mlrun 1.10.0rc14__py3-none-any.whl → 1.10.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/base.py +0 -31
- mlrun/artifacts/llm_prompt.py +6 -0
- mlrun/artifacts/manager.py +0 -5
- mlrun/common/constants.py +1 -0
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/functions.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -0
- mlrun/common/schemas/workflow.py +2 -0
- mlrun/config.py +1 -1
- mlrun/datastore/model_provider/model_provider.py +42 -14
- mlrun/datastore/model_provider/openai_provider.py +96 -15
- mlrun/db/base.py +20 -0
- mlrun/db/httpdb.py +64 -9
- mlrun/db/nopdb.py +13 -0
- mlrun/launcher/local.py +13 -0
- mlrun/model_monitoring/__init__.py +1 -0
- mlrun/model_monitoring/applications/base.py +176 -20
- mlrun/model_monitoring/db/_schedules.py +84 -24
- mlrun/model_monitoring/db/tsdb/base.py +72 -1
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +7 -1
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +37 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +25 -0
- mlrun/model_monitoring/helpers.py +26 -4
- mlrun/projects/pipelines.py +44 -24
- mlrun/projects/project.py +26 -7
- mlrun/runtimes/daskjob.py +6 -0
- mlrun/runtimes/mpijob/abstract.py +6 -0
- mlrun/runtimes/mpijob/v1.py +6 -0
- mlrun/runtimes/nuclio/application/application.py +2 -0
- mlrun/runtimes/nuclio/function.py +6 -0
- mlrun/runtimes/nuclio/serving.py +12 -11
- mlrun/runtimes/pod.py +21 -0
- mlrun/runtimes/remotesparkjob.py +6 -0
- mlrun/runtimes/sparkjob/spark3job.py +6 -0
- mlrun/runtimes/utils.py +0 -2
- mlrun/serving/server.py +122 -53
- mlrun/serving/states.py +128 -44
- mlrun/serving/system_steps.py +84 -58
- mlrun/utils/helpers.py +82 -12
- mlrun/utils/retryer.py +15 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/METADATA +2 -7
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/RECORD +48 -48
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/top_level.txt +0 -0
|
@@ -469,6 +469,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
469
469
|
preform_agg_columns: Optional[list] = None,
|
|
470
470
|
order_by: Optional[str] = None,
|
|
471
471
|
desc: Optional[bool] = None,
|
|
472
|
+
partition_by: Optional[str] = None,
|
|
472
473
|
) -> pd.DataFrame:
|
|
473
474
|
"""
|
|
474
475
|
Getting records from TSDB data collection.
|
|
@@ -496,6 +497,8 @@ class TDEngineConnector(TSDBConnector):
|
|
|
496
497
|
if an empty list was provided The aggregation won't be performed.
|
|
497
498
|
:param order_by: The column or alias to preform ordering on the query.
|
|
498
499
|
:param desc: Whether or not to sort the results in descending order.
|
|
500
|
+
:param partition_by: The column to partition the results by. Note that if interval is provided,
|
|
501
|
+
`agg_funcs` must bg provided as well.
|
|
499
502
|
|
|
500
503
|
:return: DataFrame with the provided attributes from the data collection.
|
|
501
504
|
:raise: MLRunInvalidArgumentError if query the provided table failed.
|
|
@@ -517,6 +520,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
517
520
|
preform_agg_funcs_columns=preform_agg_columns,
|
|
518
521
|
order_by=order_by,
|
|
519
522
|
desc=desc,
|
|
523
|
+
partition_by=partition_by,
|
|
520
524
|
)
|
|
521
525
|
logger.debug("Querying TDEngine", query=full_query)
|
|
522
526
|
try:
|
|
@@ -1205,6 +1209,39 @@ class TDEngineConnector(TSDBConnector):
|
|
|
1205
1209
|
)
|
|
1206
1210
|
)
|
|
1207
1211
|
|
|
1212
|
+
def get_drift_data(
|
|
1213
|
+
self,
|
|
1214
|
+
start: datetime,
|
|
1215
|
+
end: datetime,
|
|
1216
|
+
) -> mm_schemas.ModelEndpointDriftValues:
|
|
1217
|
+
filter_query = self._generate_filter_query(
|
|
1218
|
+
filter_column=mm_schemas.ResultData.RESULT_STATUS,
|
|
1219
|
+
filter_values=[
|
|
1220
|
+
mm_schemas.ResultStatusApp.potential_detection.value,
|
|
1221
|
+
mm_schemas.ResultStatusApp.detected.value,
|
|
1222
|
+
],
|
|
1223
|
+
)
|
|
1224
|
+
table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
|
|
1225
|
+
start, end, interval = self._prepare_aligned_start_end(start, end)
|
|
1226
|
+
|
|
1227
|
+
# get per time-interval x endpoint_id combination the max result status
|
|
1228
|
+
df = self._get_records(
|
|
1229
|
+
table=table,
|
|
1230
|
+
start=start,
|
|
1231
|
+
end=end,
|
|
1232
|
+
interval=interval,
|
|
1233
|
+
columns=[mm_schemas.ResultData.RESULT_STATUS],
|
|
1234
|
+
filter_query=filter_query,
|
|
1235
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
1236
|
+
agg_funcs=["max"],
|
|
1237
|
+
partition_by=mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
1238
|
+
)
|
|
1239
|
+
if df.empty:
|
|
1240
|
+
return mm_schemas.ModelEndpointDriftValues(values=[])
|
|
1241
|
+
|
|
1242
|
+
df["_wstart"] = pd.to_datetime(df["_wstart"])
|
|
1243
|
+
return self._df_to_drift_data(df)
|
|
1244
|
+
|
|
1208
1245
|
# Note: this function serves as a reference for checking the TSDB for the existence of a metric.
|
|
1209
1246
|
#
|
|
1210
1247
|
# def read_prediction_metric_for_endpoint_if_exists(
|
|
@@ -1450,3 +1450,28 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1450
1450
|
return metric_objects
|
|
1451
1451
|
|
|
1452
1452
|
return build_metric_objects()
|
|
1453
|
+
|
|
1454
|
+
def get_drift_data(
|
|
1455
|
+
self,
|
|
1456
|
+
start: datetime,
|
|
1457
|
+
end: datetime,
|
|
1458
|
+
) -> mm_schemas.ModelEndpointDriftValues:
|
|
1459
|
+
table = mm_schemas.V3IOTSDBTables.APP_RESULTS
|
|
1460
|
+
start, end, interval = self._prepare_aligned_start_end(start, end)
|
|
1461
|
+
|
|
1462
|
+
# get per time-interval x endpoint_id combination the max result status
|
|
1463
|
+
df = self._get_records(
|
|
1464
|
+
table=table,
|
|
1465
|
+
start=start,
|
|
1466
|
+
end=end,
|
|
1467
|
+
interval=interval,
|
|
1468
|
+
sliding_window_step=interval,
|
|
1469
|
+
columns=[mm_schemas.ResultData.RESULT_STATUS],
|
|
1470
|
+
agg_funcs=["max"],
|
|
1471
|
+
group_by=mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
1472
|
+
)
|
|
1473
|
+
if df.empty:
|
|
1474
|
+
return mm_schemas.ModelEndpointDriftValues(values=[])
|
|
1475
|
+
df = df[df[f"max({mm_schemas.ResultData.RESULT_STATUS})"] >= 1]
|
|
1476
|
+
df = df.reset_index(names="_wstart")
|
|
1477
|
+
return self._df_to_drift_data(df)
|
|
@@ -549,6 +549,10 @@ def _get_monitoring_schedules_folder_path(project: str) -> str:
|
|
|
549
549
|
)
|
|
550
550
|
|
|
551
551
|
|
|
552
|
+
def _get_monitoring_schedules_user_folder_path(out_path: str) -> str:
|
|
553
|
+
return os.path.join(out_path, mm_constants.FileTargetKind.MONITORING_SCHEDULES)
|
|
554
|
+
|
|
555
|
+
|
|
552
556
|
def _get_monitoring_schedules_file_endpoint_path(
|
|
553
557
|
*, project: str, endpoint_id: str
|
|
554
558
|
) -> str:
|
|
@@ -570,10 +574,7 @@ def get_monitoring_schedules_endpoint_data(
|
|
|
570
574
|
)
|
|
571
575
|
|
|
572
576
|
|
|
573
|
-
def get_monitoring_schedules_chief_data(
|
|
574
|
-
*,
|
|
575
|
-
project: str,
|
|
576
|
-
) -> "DataItem":
|
|
577
|
+
def get_monitoring_schedules_chief_data(*, project: str) -> "DataItem":
|
|
577
578
|
"""
|
|
578
579
|
Get the model monitoring schedules' data item of the project's model endpoint.
|
|
579
580
|
"""
|
|
@@ -582,6 +583,19 @@ def get_monitoring_schedules_chief_data(
|
|
|
582
583
|
)
|
|
583
584
|
|
|
584
585
|
|
|
586
|
+
def get_monitoring_schedules_user_application_data(
|
|
587
|
+
*, out_path: str, application: str
|
|
588
|
+
) -> "DataItem":
|
|
589
|
+
"""
|
|
590
|
+
Get the model monitoring schedules' data item of user application runs.
|
|
591
|
+
"""
|
|
592
|
+
return mlrun.datastore.store_manager.object(
|
|
593
|
+
_get_monitoring_schedules_file_user_application_path(
|
|
594
|
+
out_path=out_path, application=application
|
|
595
|
+
)
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
|
|
585
599
|
def _get_monitoring_schedules_file_chief_path(
|
|
586
600
|
*,
|
|
587
601
|
project: str,
|
|
@@ -591,6 +605,14 @@ def _get_monitoring_schedules_file_chief_path(
|
|
|
591
605
|
)
|
|
592
606
|
|
|
593
607
|
|
|
608
|
+
def _get_monitoring_schedules_file_user_application_path(
|
|
609
|
+
*, out_path: str, application: str
|
|
610
|
+
) -> str:
|
|
611
|
+
return os.path.join(
|
|
612
|
+
_get_monitoring_schedules_user_folder_path(out_path), f"{application}.json"
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
|
|
594
616
|
def get_start_end(
|
|
595
617
|
start: Union[datetime.datetime, None],
|
|
596
618
|
end: Union[datetime.datetime, None],
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -1072,7 +1072,11 @@ def github_webhook(request):
|
|
|
1072
1072
|
|
|
1073
1073
|
|
|
1074
1074
|
def rerun_workflow(
|
|
1075
|
-
context: mlrun.execution.MLClientCtx,
|
|
1075
|
+
context: mlrun.execution.MLClientCtx,
|
|
1076
|
+
run_uid: str,
|
|
1077
|
+
project_name: str,
|
|
1078
|
+
original_runner_uid: str,
|
|
1079
|
+
original_workflow_name: str,
|
|
1076
1080
|
):
|
|
1077
1081
|
"""
|
|
1078
1082
|
Re-run a workflow by retrying a previously failed KFP pipeline.
|
|
@@ -1080,8 +1084,11 @@ def rerun_workflow(
|
|
|
1080
1084
|
:param context: MLRun context.
|
|
1081
1085
|
:param run_uid: The run UID of the original workflow to retry.
|
|
1082
1086
|
:param project_name: The project name.
|
|
1087
|
+
:param original_runner_uid: The original workflow runner UID.
|
|
1088
|
+
:param original_workflow_name: The original workflow name.
|
|
1083
1089
|
"""
|
|
1084
1090
|
db = mlrun.get_run_db()
|
|
1091
|
+
new_pipeline_id = None
|
|
1085
1092
|
|
|
1086
1093
|
try:
|
|
1087
1094
|
# Invoke the KFP retry endpoint (direct-submit mode)
|
|
@@ -1096,6 +1103,24 @@ def rerun_workflow(
|
|
|
1096
1103
|
rerun_of_workflow=run_uid,
|
|
1097
1104
|
)
|
|
1098
1105
|
|
|
1106
|
+
# Enqueue "running" notifications server-side for this RerunRunner run
|
|
1107
|
+
db.push_run_notifications(context.uid, project_name)
|
|
1108
|
+
|
|
1109
|
+
context.set_label(
|
|
1110
|
+
mlrun_constants.MLRunInternalLabels.workflow_id, new_pipeline_id
|
|
1111
|
+
)
|
|
1112
|
+
context.update_run()
|
|
1113
|
+
|
|
1114
|
+
context.log_result("workflow_id", new_pipeline_id)
|
|
1115
|
+
|
|
1116
|
+
pipeline = wait_for_pipeline_completion(
|
|
1117
|
+
new_pipeline_id,
|
|
1118
|
+
project=project_name,
|
|
1119
|
+
)
|
|
1120
|
+
|
|
1121
|
+
final_state = pipeline["run"]["status"]
|
|
1122
|
+
context.log_result("workflow_state", final_state, commit=True)
|
|
1123
|
+
|
|
1099
1124
|
except mlrun.errors.MLRunHTTPError as http_exc:
|
|
1100
1125
|
logger.error(
|
|
1101
1126
|
"Failed calling KFP retry API",
|
|
@@ -1104,33 +1129,28 @@ def rerun_workflow(
|
|
|
1104
1129
|
)
|
|
1105
1130
|
raise
|
|
1106
1131
|
|
|
1107
|
-
# Enqueue "running" notifications server-side for this RerunRunner run
|
|
1108
|
-
db.push_run_notifications(context.uid, project_name)
|
|
1109
|
-
|
|
1110
|
-
context.set_label(mlrun_constants.MLRunInternalLabels.workflow_id, new_pipeline_id)
|
|
1111
|
-
context.update_run()
|
|
1112
|
-
|
|
1113
|
-
context.log_result("workflow_id", new_pipeline_id)
|
|
1114
|
-
|
|
1115
|
-
try:
|
|
1116
|
-
pipeline = wait_for_pipeline_completion(
|
|
1117
|
-
new_pipeline_id,
|
|
1118
|
-
project=project_name,
|
|
1119
|
-
)
|
|
1120
1132
|
except Exception as exc:
|
|
1121
|
-
|
|
1122
|
-
"
|
|
1133
|
+
logger.error(
|
|
1134
|
+
"Error during rerun_workflow execution",
|
|
1135
|
+
error=err_to_str(exc),
|
|
1123
1136
|
rerun_pipeline_id=new_pipeline_id,
|
|
1124
|
-
exc=err_to_str(exc),
|
|
1125
1137
|
)
|
|
1126
|
-
|
|
1127
|
-
final_state = pipeline["run"]["status"]
|
|
1128
|
-
context.log_result("workflow_state", final_state, commit=True)
|
|
1138
|
+
raise
|
|
1129
1139
|
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1140
|
+
finally:
|
|
1141
|
+
# Once the rerun has finished, clear the “retrying” label on the original runner
|
|
1142
|
+
# so that subsequent retry requests can acquire the lock again.
|
|
1143
|
+
db.set_run_retrying_status(
|
|
1144
|
+
project=project_name,
|
|
1145
|
+
name=original_workflow_name,
|
|
1146
|
+
run_id=original_runner_uid,
|
|
1147
|
+
retrying=False,
|
|
1148
|
+
)
|
|
1149
|
+
|
|
1150
|
+
if final_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
|
|
1151
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
1152
|
+
f"Pipeline retry of {run_uid} finished in state={final_state}"
|
|
1153
|
+
)
|
|
1134
1154
|
|
|
1135
1155
|
|
|
1136
1156
|
def load_and_run(context, *args, **kwargs):
|
mlrun/projects/project.py
CHANGED
|
@@ -1042,12 +1042,7 @@ class ProjectSpec(ModelObj):
|
|
|
1042
1042
|
artifact = artifact.to_dict()
|
|
1043
1043
|
else: # artifact is a dict
|
|
1044
1044
|
# imported/legacy artifacts don't have metadata,spec,status fields
|
|
1045
|
-
key_field = (
|
|
1046
|
-
"key"
|
|
1047
|
-
if _is_imported_artifact(artifact)
|
|
1048
|
-
or mlrun.utils.is_legacy_artifact(artifact)
|
|
1049
|
-
else "metadata.key"
|
|
1050
|
-
)
|
|
1045
|
+
key_field = "key" if _is_imported_artifact(artifact) else "metadata.key"
|
|
1051
1046
|
key = mlrun.utils.get_in(artifact, key_field, "")
|
|
1052
1047
|
if not key:
|
|
1053
1048
|
raise ValueError(f'artifacts "{key_field}" must be specified')
|
|
@@ -5078,7 +5073,6 @@ class MlrunProject(ModelObj):
|
|
|
5078
5073
|
:param states: List only runs whose state is one of the provided states.
|
|
5079
5074
|
:param sort: Whether to sort the result according to their start time. Otherwise, results will be
|
|
5080
5075
|
returned by their internal order in the DB (order will not be guaranteed).
|
|
5081
|
-
:param last: Deprecated - currently not used (will be removed in 1.10.0).
|
|
5082
5076
|
:param iter: If ``True`` return runs from all iterations. Otherwise, return only runs whose ``iter`` is 0.
|
|
5083
5077
|
:param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``.
|
|
5084
5078
|
:param start_time_to: Filter by run start time in ``[start_time_from, start_time_to]``.
|
|
@@ -5557,6 +5551,31 @@ class MlrunProject(ModelObj):
|
|
|
5557
5551
|
**kwargs,
|
|
5558
5552
|
)
|
|
5559
5553
|
|
|
5554
|
+
def get_drift_over_time(
|
|
5555
|
+
self,
|
|
5556
|
+
start: Optional[datetime.datetime] = None,
|
|
5557
|
+
end: Optional[datetime.datetime] = None,
|
|
5558
|
+
) -> mlrun.common.schemas.model_monitoring.ModelEndpointDriftValues:
|
|
5559
|
+
"""
|
|
5560
|
+
Get drift counts over time for the project.
|
|
5561
|
+
|
|
5562
|
+
This method returns a list of tuples, each representing a time-interval (in a granularity set by the
|
|
5563
|
+
duration of the given time range) and the number of suspected drifts and detected drifts in that interval.
|
|
5564
|
+
For a range of 6 hours or less, the granularity is 10 minute, for a range of 2 hours to 72 hours, the
|
|
5565
|
+
granularity is 1 hour, and for a range of more than 72 hours, the granularity is 24 hours.
|
|
5566
|
+
|
|
5567
|
+
:param start: Start time of the range to retrieve drift counts from.
|
|
5568
|
+
:param end: End time of the range to retrieve drift counts from.
|
|
5569
|
+
|
|
5570
|
+
:return: A ModelEndpointDriftValues object containing the drift counts over time.
|
|
5571
|
+
"""
|
|
5572
|
+
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
5573
|
+
return db.get_drift_over_time(
|
|
5574
|
+
project=self.metadata.name,
|
|
5575
|
+
start=start,
|
|
5576
|
+
end=end,
|
|
5577
|
+
)
|
|
5578
|
+
|
|
5560
5579
|
def _run_authenticated_git_action(
|
|
5561
5580
|
self,
|
|
5562
5581
|
action: Callable,
|
mlrun/runtimes/daskjob.py
CHANGED
|
@@ -93,6 +93,9 @@ class DaskSpec(KubeResourceSpec):
|
|
|
93
93
|
security_context=None,
|
|
94
94
|
state_thresholds=None,
|
|
95
95
|
serving_spec=None,
|
|
96
|
+
graph=None,
|
|
97
|
+
parameters=None,
|
|
98
|
+
track_models=None,
|
|
96
99
|
):
|
|
97
100
|
super().__init__(
|
|
98
101
|
command=command,
|
|
@@ -123,6 +126,9 @@ class DaskSpec(KubeResourceSpec):
|
|
|
123
126
|
security_context=security_context,
|
|
124
127
|
state_thresholds=state_thresholds,
|
|
125
128
|
serving_spec=serving_spec,
|
|
129
|
+
graph=graph,
|
|
130
|
+
parameters=parameters,
|
|
131
|
+
track_models=track_models,
|
|
126
132
|
)
|
|
127
133
|
self.args = args
|
|
128
134
|
|
|
@@ -55,6 +55,9 @@ class MPIResourceSpec(KubeResourceSpec):
|
|
|
55
55
|
security_context=None,
|
|
56
56
|
state_thresholds=None,
|
|
57
57
|
serving_spec=None,
|
|
58
|
+
graph=None,
|
|
59
|
+
parameters=None,
|
|
60
|
+
track_models=None,
|
|
58
61
|
):
|
|
59
62
|
super().__init__(
|
|
60
63
|
command=command,
|
|
@@ -85,6 +88,9 @@ class MPIResourceSpec(KubeResourceSpec):
|
|
|
85
88
|
security_context=security_context,
|
|
86
89
|
state_thresholds=state_thresholds,
|
|
87
90
|
serving_spec=serving_spec,
|
|
91
|
+
graph=graph,
|
|
92
|
+
parameters=parameters,
|
|
93
|
+
track_models=track_models,
|
|
88
94
|
)
|
|
89
95
|
self.mpi_args = mpi_args or [
|
|
90
96
|
"-x",
|
mlrun/runtimes/mpijob/v1.py
CHANGED
|
@@ -50,6 +50,9 @@ class MPIV1ResourceSpec(MPIResourceSpec):
|
|
|
50
50
|
security_context=None,
|
|
51
51
|
state_thresholds=None,
|
|
52
52
|
serving_spec=None,
|
|
53
|
+
graph=None,
|
|
54
|
+
parameters=None,
|
|
55
|
+
track_models=None,
|
|
53
56
|
):
|
|
54
57
|
super().__init__(
|
|
55
58
|
command=command,
|
|
@@ -81,6 +84,9 @@ class MPIV1ResourceSpec(MPIResourceSpec):
|
|
|
81
84
|
security_context=security_context,
|
|
82
85
|
state_thresholds=state_thresholds,
|
|
83
86
|
serving_spec=serving_spec,
|
|
87
|
+
graph=graph,
|
|
88
|
+
parameters=parameters,
|
|
89
|
+
track_models=track_models,
|
|
84
90
|
)
|
|
85
91
|
self.clean_pod_policy = clean_pod_policy or MPIJobV1CleanPodPolicies.default()
|
|
86
92
|
|
|
@@ -400,8 +400,10 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
400
400
|
# nuclio implementation detail - when providing the image and emptying out the source code and build source,
|
|
401
401
|
# nuclio skips rebuilding the image and simply takes the prebuilt image
|
|
402
402
|
self.spec.build.functionSourceCode = ""
|
|
403
|
+
self.spec.config.pop("spec.build.functionSourceCode", None)
|
|
403
404
|
self.status.application_source = self.spec.build.source
|
|
404
405
|
self.spec.build.source = ""
|
|
406
|
+
self.spec.config.pop("spec.build.source", None)
|
|
405
407
|
|
|
406
408
|
# save the image in the status, so we won't repopulate the function source code
|
|
407
409
|
self.status.container_image = image
|
|
@@ -155,6 +155,9 @@ class NuclioSpec(KubeResourceSpec):
|
|
|
155
155
|
state_thresholds=None,
|
|
156
156
|
disable_default_http_trigger=None,
|
|
157
157
|
serving_spec=None,
|
|
158
|
+
graph=None,
|
|
159
|
+
parameters=None,
|
|
160
|
+
track_models=None,
|
|
158
161
|
):
|
|
159
162
|
super().__init__(
|
|
160
163
|
command=command,
|
|
@@ -185,6 +188,9 @@ class NuclioSpec(KubeResourceSpec):
|
|
|
185
188
|
security_context=security_context,
|
|
186
189
|
state_thresholds=state_thresholds,
|
|
187
190
|
serving_spec=serving_spec,
|
|
191
|
+
graph=graph,
|
|
192
|
+
parameters=parameters,
|
|
193
|
+
track_models=track_models,
|
|
188
194
|
)
|
|
189
195
|
|
|
190
196
|
self.base_spec = base_spec or {}
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -720,6 +720,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
720
720
|
"track_models": self.spec.track_models,
|
|
721
721
|
"default_content_type": self.spec.default_content_type,
|
|
722
722
|
"model_endpoint_creation_task_name": self.spec.model_endpoint_creation_task_name,
|
|
723
|
+
# TODO: find another way to pass this (needed for local run)
|
|
723
724
|
"filename": getattr(self.spec, "filename", None),
|
|
724
725
|
}
|
|
725
726
|
|
|
@@ -788,17 +789,13 @@ class ServingRuntime(RemoteRuntime):
|
|
|
788
789
|
monitoring_mock=self.spec.track_models,
|
|
789
790
|
)
|
|
790
791
|
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
server.
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
self.spec.track_models,
|
|
799
|
-
server.context,
|
|
800
|
-
self.spec,
|
|
801
|
-
)
|
|
792
|
+
server.graph = add_system_steps_to_graph(
|
|
793
|
+
server.project,
|
|
794
|
+
server.graph,
|
|
795
|
+
self.spec.track_models,
|
|
796
|
+
server.context,
|
|
797
|
+
self.spec,
|
|
798
|
+
)
|
|
802
799
|
|
|
803
800
|
if workdir:
|
|
804
801
|
os.chdir(old_workdir)
|
|
@@ -858,6 +855,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
858
855
|
description=self.spec.description,
|
|
859
856
|
workdir=self.spec.workdir,
|
|
860
857
|
image_pull_secret=self.spec.image_pull_secret,
|
|
858
|
+
build=self.spec.build,
|
|
861
859
|
node_name=self.spec.node_name,
|
|
862
860
|
node_selector=self.spec.node_selector,
|
|
863
861
|
affinity=self.spec.affinity,
|
|
@@ -868,6 +866,9 @@ class ServingRuntime(RemoteRuntime):
|
|
|
868
866
|
security_context=self.spec.security_context,
|
|
869
867
|
state_thresholds=self.spec.state_thresholds,
|
|
870
868
|
serving_spec=self._get_serving_spec(),
|
|
869
|
+
track_models=self.spec.track_models,
|
|
870
|
+
parameters=self.spec.parameters,
|
|
871
|
+
graph=self.spec.graph,
|
|
871
872
|
)
|
|
872
873
|
job = KubejobRuntime(
|
|
873
874
|
spec=spec,
|
mlrun/runtimes/pod.py
CHANGED
|
@@ -104,6 +104,9 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
104
104
|
"security_context",
|
|
105
105
|
"state_thresholds",
|
|
106
106
|
"serving_spec",
|
|
107
|
+
"track_models",
|
|
108
|
+
"parameters",
|
|
109
|
+
"graph",
|
|
107
110
|
]
|
|
108
111
|
_default_fields_to_strip = FunctionSpec._default_fields_to_strip + [
|
|
109
112
|
"volumes",
|
|
@@ -180,6 +183,9 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
180
183
|
security_context=None,
|
|
181
184
|
state_thresholds=None,
|
|
182
185
|
serving_spec=None,
|
|
186
|
+
track_models=None,
|
|
187
|
+
parameters=None,
|
|
188
|
+
graph=None,
|
|
183
189
|
):
|
|
184
190
|
super().__init__(
|
|
185
191
|
command=command,
|
|
@@ -226,6 +232,10 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
226
232
|
or mlrun.mlconf.function.spec.state_thresholds.default.to_dict()
|
|
227
233
|
)
|
|
228
234
|
self.serving_spec = serving_spec
|
|
235
|
+
self.track_models = track_models
|
|
236
|
+
self.parameters = parameters
|
|
237
|
+
self._graph = None
|
|
238
|
+
self.graph = graph
|
|
229
239
|
# Termination grace period is internal for runtimes that have a pod termination hook hence it is not in the
|
|
230
240
|
# _dict_fields and doesn't have a setter.
|
|
231
241
|
self._termination_grace_period_seconds = None
|
|
@@ -303,6 +313,17 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
303
313
|
def termination_grace_period_seconds(self) -> typing.Optional[int]:
|
|
304
314
|
return self._termination_grace_period_seconds
|
|
305
315
|
|
|
316
|
+
@property
|
|
317
|
+
def graph(self):
|
|
318
|
+
"""states graph, holding the serving workflow/DAG topology"""
|
|
319
|
+
return self._graph
|
|
320
|
+
|
|
321
|
+
@graph.setter
|
|
322
|
+
def graph(self, graph):
|
|
323
|
+
from ..serving.states import graph_root_setter
|
|
324
|
+
|
|
325
|
+
graph_root_setter(self, graph)
|
|
326
|
+
|
|
306
327
|
def _serialize_field(
|
|
307
328
|
self, struct: dict, field_name: typing.Optional[str] = None, strip: bool = False
|
|
308
329
|
) -> typing.Any:
|
mlrun/runtimes/remotesparkjob.py
CHANGED
|
@@ -59,6 +59,9 @@ class RemoteSparkSpec(KubeResourceSpec):
|
|
|
59
59
|
security_context=None,
|
|
60
60
|
state_thresholds=None,
|
|
61
61
|
serving_spec=None,
|
|
62
|
+
graph=None,
|
|
63
|
+
parameters=None,
|
|
64
|
+
track_models=None,
|
|
62
65
|
):
|
|
63
66
|
super().__init__(
|
|
64
67
|
command=command,
|
|
@@ -89,6 +92,9 @@ class RemoteSparkSpec(KubeResourceSpec):
|
|
|
89
92
|
security_context=security_context,
|
|
90
93
|
state_thresholds=state_thresholds,
|
|
91
94
|
serving_spec=serving_spec,
|
|
95
|
+
graph=graph,
|
|
96
|
+
parameters=parameters,
|
|
97
|
+
track_models=track_models,
|
|
92
98
|
)
|
|
93
99
|
self.provider = provider
|
|
94
100
|
|
|
@@ -169,6 +169,9 @@ class Spark3JobSpec(KubeResourceSpec):
|
|
|
169
169
|
security_context=None,
|
|
170
170
|
state_thresholds=None,
|
|
171
171
|
serving_spec=None,
|
|
172
|
+
graph=None,
|
|
173
|
+
parameters=None,
|
|
174
|
+
track_models=None,
|
|
172
175
|
):
|
|
173
176
|
super().__init__(
|
|
174
177
|
command=command,
|
|
@@ -199,6 +202,9 @@ class Spark3JobSpec(KubeResourceSpec):
|
|
|
199
202
|
security_context=security_context,
|
|
200
203
|
state_thresholds=state_thresholds,
|
|
201
204
|
serving_spec=serving_spec,
|
|
205
|
+
graph=graph,
|
|
206
|
+
parameters=parameters,
|
|
207
|
+
track_models=track_models,
|
|
202
208
|
)
|
|
203
209
|
|
|
204
210
|
self.driver_resources = driver_resources or {}
|
mlrun/runtimes/utils.py
CHANGED
|
@@ -445,8 +445,6 @@ def enrich_run_labels(
|
|
|
445
445
|
labels_enrichment = {
|
|
446
446
|
mlrun_constants.MLRunInternalLabels.owner: os.environ.get("V3IO_USERNAME")
|
|
447
447
|
or getpass.getuser(),
|
|
448
|
-
# TODO: remove this in 1.10.0
|
|
449
|
-
mlrun_constants.MLRunInternalLabels.v3io_user: os.environ.get("V3IO_USERNAME"),
|
|
450
448
|
}
|
|
451
449
|
|
|
452
450
|
# Resolve which label keys to enrich
|