mlrun 1.10.0rc12__py3-none-any.whl → 1.10.0rc14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

@@ -804,25 +804,45 @@ class V3IOTSDBConnector(TSDBConnector):
804
804
  @staticmethod
805
805
  def _get_sql_query(
806
806
  *,
807
- endpoint_id: str,
808
807
  table_path: str,
808
+ endpoint_id: Optional[str] = None,
809
+ application_names: Optional[list[str]] = None,
809
810
  name: str = mm_schemas.ResultData.RESULT_NAME,
810
811
  metric_and_app_names: Optional[list[tuple[str, str]]] = None,
811
812
  columns: Optional[list[str]] = None,
813
+ group_by_columns: Optional[list[str]] = None,
812
814
  ) -> str:
813
815
  """Get the SQL query for the results/metrics table"""
816
+
817
+ if metric_and_app_names and not endpoint_id:
818
+ raise mlrun.errors.MLRunInvalidArgumentError(
819
+ "If metric_and_app_names is provided, endpoint_id must also be provided"
820
+ )
821
+
822
+ if metric_and_app_names and application_names:
823
+ raise mlrun.errors.MLRunInvalidArgumentError(
824
+ "Cannot provide both metric_and_app_names and application_names"
825
+ )
826
+
814
827
  if columns:
815
828
  selection = ",".join(columns)
816
829
  else:
817
830
  selection = "*"
818
831
 
819
832
  with StringIO() as query:
820
- query.write(
821
- f"SELECT {selection} FROM '{table_path}' "
822
- f"WHERE {mm_schemas.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
823
- )
833
+ where_added = False
834
+ query.write(f"SELECT {selection} FROM '{table_path}'")
835
+ if endpoint_id:
836
+ query.write(
837
+ f" WHERE {mm_schemas.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
838
+ )
839
+ where_added = True
824
840
  if metric_and_app_names:
825
- query.write(" AND (")
841
+ if where_added:
842
+ query.write(" AND (")
843
+ else:
844
+ query.write(" WHERE (")
845
+ where_added = True
826
846
 
827
847
  for i, (app_name, result_name) in enumerate(metric_and_app_names):
828
848
  sub_cond = (
@@ -835,6 +855,22 @@ class V3IOTSDBConnector(TSDBConnector):
835
855
 
836
856
  query.write(")")
837
857
 
858
+ if application_names:
859
+ if where_added:
860
+ query.write(" AND (")
861
+ else:
862
+ query.write(" WHERE (")
863
+ for i, app_name in enumerate(application_names):
864
+ sub_cond = f"{mm_schemas.WriterEvent.APPLICATION_NAME}='{app_name}'"
865
+ if i != 0: # not first sub condition
866
+ query.write(" OR ")
867
+ query.write(sub_cond)
868
+ query.write(")")
869
+
870
+ if group_by_columns:
871
+ query.write(" GROUP BY ")
872
+ query.write(",".join(group_by_columns))
873
+
838
874
  query.write(";")
839
875
  return query.getvalue()
840
876
 
@@ -1272,7 +1308,49 @@ class V3IOTSDBConnector(TSDBConnector):
1272
1308
  end: Optional[Union[datetime, str]] = None,
1273
1309
  application_names: Optional[Union[str, list[str]]] = None,
1274
1310
  ) -> dict[str, int]:
1275
- raise NotImplementedError
1311
+ start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
1312
+ group_by_columns = [
1313
+ mm_schemas.ApplicationEvent.APPLICATION_NAME,
1314
+ mm_schemas.ApplicationEvent.ENDPOINT_ID,
1315
+ ]
1316
+
1317
+ def get_application_endpoints_records(
1318
+ record_type: Literal["metrics", "results"],
1319
+ ):
1320
+ if record_type == "results":
1321
+ table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
1322
+ else:
1323
+ table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
1324
+ sql_query = self._get_sql_query(
1325
+ table_path=table_path,
1326
+ columns=[mm_schemas.WriterEvent.START_INFER_TIME],
1327
+ group_by_columns=group_by_columns,
1328
+ application_names=application_names,
1329
+ )
1330
+ return self.frames_client.read(
1331
+ backend=_TSDB_BE,
1332
+ start=start,
1333
+ end=end,
1334
+ query=sql_query,
1335
+ )
1336
+
1337
+ df_results = get_application_endpoints_records("results")
1338
+ df_metrics = get_application_endpoints_records("metrics")
1339
+
1340
+ if df_results.empty and df_metrics.empty:
1341
+ return {}
1342
+
1343
+ # Combine the two dataframes and count unique endpoints per application
1344
+ combined_df = pd.concat([df_results, df_metrics], ignore_index=True)
1345
+ if combined_df.empty:
1346
+ return {}
1347
+ combined_df.drop_duplicates(subset=group_by_columns, inplace=True)
1348
+
1349
+ grouped_df = combined_df.groupby(
1350
+ mm_schemas.WriterEvent.APPLICATION_NAME
1351
+ ).count()
1352
+
1353
+ return grouped_df[mm_schemas.WriterEvent.ENDPOINT_ID].to_dict()
1276
1354
 
1277
1355
  def calculate_latest_metrics(
1278
1356
  self,
@@ -1282,4 +1360,93 @@ class V3IOTSDBConnector(TSDBConnector):
1282
1360
  ) -> list[
1283
1361
  Union[mm_schemas.ApplicationResultRecord, mm_schemas.ApplicationMetricRecord]
1284
1362
  ]:
1285
- raise NotImplementedError
1363
+ metric_list = []
1364
+ start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
1365
+
1366
+ # Get the latest results
1367
+ def get_latest_metrics_records(
1368
+ record_type: Literal["metrics", "results"],
1369
+ ) -> pd.DataFrame:
1370
+ group_by_columns = [mm_schemas.ApplicationEvent.APPLICATION_NAME]
1371
+ if record_type == "results":
1372
+ table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
1373
+ columns = [
1374
+ f"last({mm_schemas.ResultData.RESULT_STATUS})",
1375
+ f"last({mm_schemas.ResultData.RESULT_VALUE})",
1376
+ f"last({mm_schemas.ResultData.RESULT_KIND})",
1377
+ ]
1378
+ group_by_columns += [
1379
+ mm_schemas.ResultData.RESULT_NAME,
1380
+ ]
1381
+ else:
1382
+ table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
1383
+ columns = [f"last({mm_schemas.MetricData.METRIC_VALUE})"]
1384
+ group_by_columns += [
1385
+ mm_schemas.MetricData.METRIC_NAME,
1386
+ ]
1387
+ sql_query = self._get_sql_query(
1388
+ table_path=table_path,
1389
+ columns=columns,
1390
+ group_by_columns=group_by_columns,
1391
+ application_names=application_names,
1392
+ )
1393
+
1394
+ return self.frames_client.read(
1395
+ backend=_TSDB_BE,
1396
+ start=start,
1397
+ end=end,
1398
+ query=sql_query,
1399
+ )
1400
+
1401
+ df_results = get_latest_metrics_records("results")
1402
+ df_metrics = get_latest_metrics_records("metrics")
1403
+
1404
+ if df_results.empty and df_metrics.empty:
1405
+ return metric_list
1406
+
1407
+ # Convert the results DataFrame to a list of ApplicationResultRecord
1408
+ def build_metric_objects() -> (
1409
+ list[
1410
+ Union[
1411
+ mm_schemas.ApplicationResultRecord,
1412
+ mm_schemas.ApplicationMetricRecord,
1413
+ ]
1414
+ ]
1415
+ ):
1416
+ metric_objects = []
1417
+ if not df_results.empty:
1418
+ df_results.rename(
1419
+ columns={
1420
+ f"last({mm_schemas.ResultData.RESULT_VALUE})": mm_schemas.ResultData.RESULT_VALUE,
1421
+ f"last({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS,
1422
+ f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
1423
+ },
1424
+ inplace=True,
1425
+ )
1426
+ for _, row in df_results.iterrows():
1427
+ metric_objects.append(
1428
+ mm_schemas.ApplicationResultRecord(
1429
+ result_name=row[mm_schemas.ResultData.RESULT_NAME],
1430
+ kind=row[mm_schemas.ResultData.RESULT_KIND],
1431
+ status=row[mm_schemas.ResultData.RESULT_STATUS],
1432
+ value=row[mm_schemas.ResultData.RESULT_VALUE],
1433
+ )
1434
+ )
1435
+ if not df_metrics.empty:
1436
+ df_metrics.rename(
1437
+ columns={
1438
+ f"last({mm_schemas.MetricData.METRIC_VALUE})": mm_schemas.MetricData.METRIC_VALUE,
1439
+ },
1440
+ inplace=True,
1441
+ )
1442
+
1443
+ for _, row in df_metrics.iterrows():
1444
+ metric_objects.append(
1445
+ mm_schemas.ApplicationMetricRecord(
1446
+ metric_name=row[mm_schemas.MetricData.METRIC_NAME],
1447
+ value=row[mm_schemas.MetricData.METRIC_VALUE],
1448
+ )
1449
+ )
1450
+ return metric_objects
1451
+
1452
+ return build_metric_objects()
@@ -1081,34 +1081,56 @@ def rerun_workflow(
1081
1081
  :param run_uid: The run UID of the original workflow to retry.
1082
1082
  :param project_name: The project name.
1083
1083
  """
1084
+ db = mlrun.get_run_db()
1084
1085
 
1085
1086
  try:
1086
- # TODO in followups: handle start and running notifications
1087
-
1088
- # Retry the pipeline - TODO: add submit-direct flag when created
1089
- db = mlrun.get_run_db()
1087
+ # Invoke the KFP retry endpoint (direct-submit mode)
1090
1088
  new_pipeline_id = db.retry_pipeline(
1091
- run_uid, project_name, submit_mode=mlrun_constants.WorkflowSubmitMode.direct
1089
+ run_id=run_uid,
1090
+ project=project_name,
1091
+ submit_mode=mlrun_constants.WorkflowSubmitMode.direct,
1092
+ )
1093
+ logger.info(
1094
+ "KFP retry submitted",
1095
+ new_pipeline_id=new_pipeline_id,
1096
+ rerun_of_workflow=run_uid,
1092
1097
  )
1093
1098
 
1094
- # Store result for observability
1095
- context.set_label(
1096
- mlrun_constants.MLRunInternalLabels.workflow_id, new_pipeline_id
1099
+ except mlrun.errors.MLRunHTTPError as http_exc:
1100
+ logger.error(
1101
+ "Failed calling KFP retry API",
1102
+ run_id=run_uid,
1103
+ error=err_to_str(http_exc),
1097
1104
  )
1098
- context.update_run()
1105
+ raise
1099
1106
 
1100
- context.log_result("workflow_id", new_pipeline_id)
1107
+ # Enqueue "running" notifications server-side for this RerunRunner run
1108
+ db.push_run_notifications(context.uid, project_name)
1101
1109
 
1102
- # wait for pipeline completion so monitor will push terminal notifications
1103
- wait_for_pipeline_completion(
1110
+ context.set_label(mlrun_constants.MLRunInternalLabels.workflow_id, new_pipeline_id)
1111
+ context.update_run()
1112
+
1113
+ context.log_result("workflow_id", new_pipeline_id)
1114
+
1115
+ try:
1116
+ pipeline = wait_for_pipeline_completion(
1104
1117
  new_pipeline_id,
1105
1118
  project=project_name,
1106
1119
  )
1107
-
1108
- # Temporary exception
1109
1120
  except Exception as exc:
1110
- context.logger.error("Failed to rerun workflow", exc=err_to_str(exc))
1111
- raise
1121
+ mlrun.utils.logger.error(
1122
+ "Failed waiting for workflow completion",
1123
+ rerun_pipeline_id=new_pipeline_id,
1124
+ exc=err_to_str(exc),
1125
+ )
1126
+ else:
1127
+ final_state = pipeline["run"]["status"]
1128
+ context.log_result("workflow_state", final_state, commit=True)
1129
+
1130
+ if final_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
1131
+ raise mlrun.errors.MLRunRuntimeError(
1132
+ f"Pipeline retry of {run_uid} finished in state={final_state}"
1133
+ )
1112
1134
 
1113
1135
 
1114
1136
  def load_and_run(context, *args, **kwargs):
@@ -1201,13 +1223,13 @@ def load_and_run_workflow(
1201
1223
  start_notifications = [
1202
1224
  notification
1203
1225
  for notification in context.get_notifications(unmask_secret_params=True)
1204
- if "running" in notification.when
1226
+ if mlrun.common.runtimes.constants.RunStates.running in notification.when
1205
1227
  ]
1206
1228
 
1207
1229
  # Prevent redundant notifications for run completion by ensuring that notifications are only triggered when the run
1208
1230
  # reaches the "running" state, as the server already handles the completion notifications.
1209
1231
  for notification in start_notifications:
1210
- notification.when = ["running"]
1232
+ notification.when = [mlrun.common.runtimes.constants.RunStates.running]
1211
1233
 
1212
1234
  workflow_log_message = workflow_name or workflow_path
1213
1235
  context.logger.info(
mlrun/projects/project.py CHANGED
@@ -1889,7 +1889,7 @@ class MlrunProject(ModelObj):
1889
1889
  def log_llm_prompt(
1890
1890
  self,
1891
1891
  key,
1892
- prompt_string: Optional[str] = None,
1892
+ prompt_template: Optional[list[dict]] = None,
1893
1893
  prompt_path: Optional[str] = None,
1894
1894
  prompt_legend: Optional[dict] = None,
1895
1895
  model_artifact: Union[ModelArtifact, str] = None,
@@ -1923,10 +1923,16 @@ class MlrunProject(ModelObj):
1923
1923
  )
1924
1924
 
1925
1925
  :param key: Unique key for the prompt artifact.
1926
- :param prompt_string: Raw prompt text. Mutually exclusive with `prompt_path`.
1926
+ :param prompt_template: Raw prompt list of dicts -
1927
+ [{"role": "system", "content": "You are a {profession} advisor"},
1928
+ "role": "user", "content": "I need your help with {profession}"]. only "role" and "content" keys allow in any
1929
+ str format (upper/lower case), keys will be modified to lower case.
1930
+ Cannot be used with `prompt_path`.
1927
1931
  :param prompt_path: Path to a file containing the prompt. Mutually exclusive with `prompt_string`.
1928
1932
  :param prompt_legend: A dictionary where each key is a placeholder in the prompt (e.g., ``{user_name}``)
1929
- and the value is a description or explanation of what that placeholder represents.
1933
+ and the value is a dictionary holding two keys, "field", "description". "field" points to the field in
1934
+ the event where the value of the place-holder inside the event, if None or not exist will be replaced
1935
+ with the place-holder name. "description" will point to explanation of what that placeholder represents.
1930
1936
  Useful for documenting and clarifying dynamic parts of the prompt.
1931
1937
  :param model_artifact: Reference to the parent model (either `ModelArtifact` or model URI string).
1932
1938
  :param model_configuration: Configuration dictionary for model generation parameters
@@ -1942,10 +1948,15 @@ class MlrunProject(ModelObj):
1942
1948
  :returns: The logged `LLMPromptArtifact` object.
1943
1949
  """
1944
1950
 
1951
+ if not prompt_template and not prompt_path:
1952
+ raise mlrun.errors.MLRunInvalidArgumentError(
1953
+ "Either 'prompt_template' or 'prompt_path' must be provided"
1954
+ )
1955
+
1945
1956
  llm_prompt = LLMPromptArtifact(
1946
1957
  key=key,
1947
1958
  project=self.name,
1948
- prompt_string=prompt_string,
1959
+ prompt_template=prompt_template,
1949
1960
  prompt_path=prompt_path,
1950
1961
  prompt_legend=prompt_legend,
1951
1962
  model_artifact=model_artifact,
mlrun/run.py CHANGED
@@ -895,7 +895,7 @@ def _run_pipeline(
895
895
  def retry_pipeline(
896
896
  run_id: str,
897
897
  project: str,
898
- ) -> str:
898
+ ) -> typing.Union[str, dict[str, str]]:
899
899
  """Retry a pipeline run.
900
900
 
901
901
  This function retries a previously executed pipeline run using the specified run ID. If the run is not in a
@@ -914,10 +914,33 @@ def retry_pipeline(
914
914
  "Please set the dbpath URL."
915
915
  )
916
916
 
917
- pipeline_run_id = mldb.retry_pipeline(
917
+ # Invoke retry pipeline run. Depending on the context, this call returns either:
918
+ # 1. A simple string of a workflow-id, for direct retries or non-remote workflows, or
919
+ # 2. A dict payload representing a WorkflowResponse when rerunning remote workflows.
920
+ rerun_response = mldb.retry_pipeline(
918
921
  run_id=run_id,
919
922
  project=project,
920
923
  )
924
+ if isinstance(rerun_response, str):
925
+ pipeline_run_id = rerun_response
926
+ else:
927
+ rerun_response = mlrun.common.schemas.WorkflowResponse(**rerun_response)
928
+
929
+ def _fetch_workflow_id():
930
+ rerun = mldb.read_run(rerun_response.run_id, project)
931
+ workflow_id = rerun["metadata"]["labels"].get("workflow-id")
932
+ if not workflow_id:
933
+ raise mlrun.errors.MLRunRuntimeError("workflow-id label not set yet")
934
+ return workflow_id
935
+
936
+ pipeline_run_id = mlrun.utils.helpers.retry_until_successful(
937
+ backoff=3,
938
+ timeout=int(mlrun.mlconf.workflows.timeouts.remote),
939
+ logger=logger,
940
+ verbose=False,
941
+ _function=_fetch_workflow_id,
942
+ )
943
+
921
944
  if pipeline_run_id == run_id:
922
945
  logger.info(
923
946
  f"Retried pipeline run ID={pipeline_run_id}, check UI for progress."
mlrun/serving/__init__.py CHANGED
@@ -28,6 +28,7 @@ __all__ = [
28
28
  "Model",
29
29
  "ModelSelector",
30
30
  "MonitoredStep",
31
+ "LLModel",
31
32
  ]
32
33
 
33
34
  from .routers import ModelRouter, VotingEnsemble # noqa
@@ -47,6 +48,7 @@ from .states import (
47
48
  Model,
48
49
  ModelSelector,
49
50
  MonitoredStep,
51
+ LLModel,
50
52
  ) # noqa
51
53
  from .v1_serving import MLModelServer, new_v1_model_server # noqa
52
54
  from .v2_serving import V2ModelServer # noqa
mlrun/serving/server.py CHANGED
@@ -788,6 +788,7 @@ class GraphContext:
788
788
  self.verbose = False
789
789
  self.stream = None
790
790
  self.root = None
791
+ self.executor: Optional[storey.flow.RunnableExecutor] = None
791
792
 
792
793
  if nuclio_context:
793
794
  self.logger: NuclioLogger = nuclio_context.logger