mlrun 1.7.0rc58__py3-none-any.whl → 1.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/formatters/feature_set.py +12 -1
- mlrun/config.py +54 -3
- mlrun/datastore/__init__.py +2 -2
- mlrun/db/httpdb.py +3 -1
- mlrun/features.py +2 -1
- mlrun/model_monitoring/applications/_application_steps.py +12 -10
- mlrun/model_monitoring/applications/evidently_base.py +1 -1
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +30 -11
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +45 -30
- mlrun/platforms/iguazio.py +46 -26
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/pipelines.py +184 -55
- mlrun/projects/project.py +15 -0
- mlrun/runtimes/nuclio/serving.py +1 -1
- mlrun/serving/routers.py +10 -1
- mlrun/serving/states.py +4 -2
- mlrun/serving/v2_serving.py +59 -23
- mlrun/utils/helpers.py +12 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc58.dist-info → mlrun-1.7.1.dist-info}/METADATA +186 -186
- {mlrun-1.7.0rc58.dist-info → mlrun-1.7.1.dist-info}/RECORD +26 -26
- {mlrun-1.7.0rc58.dist-info → mlrun-1.7.1.dist-info}/WHEEL +1 -1
- {mlrun-1.7.0rc58.dist-info → mlrun-1.7.1.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc58.dist-info → mlrun-1.7.1.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc58.dist-info → mlrun-1.7.1.dist-info}/top_level.txt +0 -0
mlrun/projects/pipelines.py
CHANGED
|
@@ -984,14 +984,25 @@ def github_webhook(request):
|
|
|
984
984
|
return {"msg": "pushed"}
|
|
985
985
|
|
|
986
986
|
|
|
987
|
-
def load_and_run(
|
|
987
|
+
def load_and_run(context, *args, **kwargs):
|
|
988
|
+
"""
|
|
989
|
+
This function serves as an alias to `load_and_run_workflow`,
|
|
990
|
+
allowing to continue using `load_and_run` without modifying existing workflows or exported runs.
|
|
991
|
+
This approach ensures backward compatibility,
|
|
992
|
+
while directing all new calls to the updated `load_and_run_workflow` function.
|
|
993
|
+
"""
|
|
994
|
+
kwargs.pop("load_only", None)
|
|
995
|
+
kwargs.pop("save", None)
|
|
996
|
+
load_and_run_workflow(context, *args, **kwargs)
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
def load_and_run_workflow(
|
|
988
1000
|
context: mlrun.execution.MLClientCtx,
|
|
989
1001
|
url: str = None,
|
|
990
1002
|
project_name: str = "",
|
|
991
1003
|
init_git: bool = None,
|
|
992
1004
|
subpath: str = None,
|
|
993
1005
|
clone: bool = False,
|
|
994
|
-
save: bool = True,
|
|
995
1006
|
workflow_name: str = None,
|
|
996
1007
|
workflow_path: str = None,
|
|
997
1008
|
workflow_arguments: dict[str, typing.Any] = None,
|
|
@@ -1004,14 +1015,12 @@ def load_and_run(
|
|
|
1004
1015
|
local: bool = None,
|
|
1005
1016
|
schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
|
|
1006
1017
|
cleanup_ttl: int = None,
|
|
1007
|
-
load_only: bool = False,
|
|
1008
1018
|
wait_for_completion: bool = False,
|
|
1009
1019
|
project_context: str = None,
|
|
1010
1020
|
):
|
|
1011
1021
|
"""
|
|
1012
1022
|
Auxiliary function that the RemoteRunner run once or run every schedule.
|
|
1013
1023
|
This function loads a project from a given remote source and then runs the workflow.
|
|
1014
|
-
|
|
1015
1024
|
:param context: mlrun context.
|
|
1016
1025
|
:param url: remote url that represents the project's source.
|
|
1017
1026
|
See 'mlrun.load_project()' for details
|
|
@@ -1019,7 +1028,6 @@ def load_and_run(
|
|
|
1019
1028
|
:param init_git: if True, will git init the context dir
|
|
1020
1029
|
:param subpath: project subpath (within the archive)
|
|
1021
1030
|
:param clone: if True, always clone (delete any existing content)
|
|
1022
|
-
:param save: whether to save the created project and artifact in the DB
|
|
1023
1031
|
:param workflow_name: name of the workflow
|
|
1024
1032
|
:param workflow_path: url to a workflow file, if not a project workflow
|
|
1025
1033
|
:param workflow_arguments: kubeflow pipelines arguments (parameters)
|
|
@@ -1035,48 +1043,31 @@ def load_and_run(
|
|
|
1035
1043
|
:param schedule: ScheduleCronTrigger class instance or a standard crontab expression string
|
|
1036
1044
|
:param cleanup_ttl: pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
|
|
1037
1045
|
workflow and all its resources are deleted)
|
|
1038
|
-
:param load_only: for just loading the project, inner use.
|
|
1039
1046
|
:param wait_for_completion: wait for workflow completion before returning
|
|
1040
1047
|
:param project_context: project context path (used for loading the project)
|
|
1041
1048
|
"""
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
["slack"]
|
|
1057
|
-
)
|
|
1058
|
-
url = get_ui_url(project_name, context.uid)
|
|
1059
|
-
link = f"<{url}|*view workflow job details*>"
|
|
1060
|
-
message = (
|
|
1061
|
-
f":x: Failed to run scheduled workflow {workflow_name} in Project {project_name} !\n"
|
|
1062
|
-
f"error: ```{error}```\n{link}"
|
|
1063
|
-
)
|
|
1064
|
-
# Sending Slack Notification without losing the original error:
|
|
1065
|
-
try:
|
|
1066
|
-
notification_pusher.push(
|
|
1067
|
-
message=message,
|
|
1068
|
-
severity=mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
1069
|
-
)
|
|
1070
|
-
|
|
1071
|
-
except Exception as exc:
|
|
1072
|
-
logger.error("Failed to send slack notification", exc=err_to_str(exc))
|
|
1073
|
-
|
|
1074
|
-
raise error
|
|
1075
|
-
|
|
1076
|
-
context.logger.info(f"Loaded project {project.name} successfully")
|
|
1049
|
+
project_context = project_context or f"./{project_name}"
|
|
1050
|
+
|
|
1051
|
+
# Load the project to fetch files which the runner needs, such as remote source files
|
|
1052
|
+
pull_remote_project_files(
|
|
1053
|
+
context=context,
|
|
1054
|
+
project_context=project_context,
|
|
1055
|
+
url=url,
|
|
1056
|
+
project_name=project_name,
|
|
1057
|
+
init_git=init_git,
|
|
1058
|
+
subpath=subpath,
|
|
1059
|
+
clone=clone,
|
|
1060
|
+
schedule=schedule,
|
|
1061
|
+
workflow_name=workflow_name,
|
|
1062
|
+
)
|
|
1077
1063
|
|
|
1078
|
-
|
|
1079
|
-
|
|
1064
|
+
# Retrieve the project object:
|
|
1065
|
+
# - If the project exists in the MLRun database, it will be loaded from there.
|
|
1066
|
+
# - If it doesn't exist in the database, it will be created from the previously loaded local directory.
|
|
1067
|
+
project = mlrun.get_or_create_project(
|
|
1068
|
+
context=project_context or f"./{project_name}",
|
|
1069
|
+
name=project_name,
|
|
1070
|
+
)
|
|
1080
1071
|
|
|
1081
1072
|
# extract "start" notification if exists
|
|
1082
1073
|
start_notifications = [
|
|
@@ -1109,18 +1100,156 @@ def load_and_run(
|
|
|
1109
1100
|
raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
|
|
1110
1101
|
|
|
1111
1102
|
if wait_for_completion:
|
|
1103
|
+
handle_workflow_completion(
|
|
1104
|
+
run=run,
|
|
1105
|
+
project=project,
|
|
1106
|
+
context=context,
|
|
1107
|
+
workflow_log_message=workflow_log_message,
|
|
1108
|
+
)
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
def pull_remote_project_files(
|
|
1112
|
+
context: mlrun.execution.MLClientCtx,
|
|
1113
|
+
project_context: str,
|
|
1114
|
+
url: str,
|
|
1115
|
+
project_name: str,
|
|
1116
|
+
init_git: typing.Optional[bool],
|
|
1117
|
+
subpath: typing.Optional[str],
|
|
1118
|
+
clone: bool,
|
|
1119
|
+
schedule: typing.Optional[
|
|
1120
|
+
typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger]
|
|
1121
|
+
],
|
|
1122
|
+
workflow_name: typing.Optional[str],
|
|
1123
|
+
) -> None:
|
|
1124
|
+
"""
|
|
1125
|
+
Load the project to clone remote files if they exist.
|
|
1126
|
+
If an exception occurs during project loading, send a notification if the workflow is scheduled.
|
|
1127
|
+
|
|
1128
|
+
:param context: MLRun execution context.
|
|
1129
|
+
:param project_context: Path to the project context.
|
|
1130
|
+
:param url: URL of the project repository.
|
|
1131
|
+
:param project_name: Name of the project.
|
|
1132
|
+
:param init_git: Initialize a git repository.
|
|
1133
|
+
:param subpath: Project subpath within the repository.
|
|
1134
|
+
:param clone: Whether to clone the repository.
|
|
1135
|
+
:param schedule: Schedule for running the workflow.
|
|
1136
|
+
:param workflow_name: Name of the workflow to run.
|
|
1137
|
+
"""
|
|
1138
|
+
try:
|
|
1139
|
+
# Load the project to clone remote files if they exist.
|
|
1140
|
+
# Using save=False to avoid overriding changes from the database if it already exists.
|
|
1141
|
+
mlrun.load_project(
|
|
1142
|
+
context=project_context,
|
|
1143
|
+
url=url,
|
|
1144
|
+
name=project_name,
|
|
1145
|
+
init_git=init_git,
|
|
1146
|
+
subpath=subpath,
|
|
1147
|
+
clone=clone,
|
|
1148
|
+
save=False,
|
|
1149
|
+
)
|
|
1150
|
+
except Exception as error:
|
|
1151
|
+
notify_scheduled_workflow_failure(
|
|
1152
|
+
schedule=schedule,
|
|
1153
|
+
project_name=project_name,
|
|
1154
|
+
workflow_name=workflow_name,
|
|
1155
|
+
error=error,
|
|
1156
|
+
context_uid=context.uid,
|
|
1157
|
+
)
|
|
1158
|
+
raise error
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
def notify_scheduled_workflow_failure(
|
|
1162
|
+
schedule,
|
|
1163
|
+
project_name: str,
|
|
1164
|
+
workflow_name: str,
|
|
1165
|
+
error: Exception,
|
|
1166
|
+
context_uid: str,
|
|
1167
|
+
) -> None:
|
|
1168
|
+
if schedule:
|
|
1169
|
+
notification_pusher = mlrun.utils.notifications.CustomNotificationPusher(
|
|
1170
|
+
["slack"]
|
|
1171
|
+
)
|
|
1172
|
+
url = get_ui_url(project_name, context_uid)
|
|
1173
|
+
link = f"<{url}|*view workflow job details*>"
|
|
1174
|
+
message = (
|
|
1175
|
+
f":x: Failed to run scheduled workflow {workflow_name} "
|
|
1176
|
+
f"in Project {project_name}!\n"
|
|
1177
|
+
f"Error: ```{err_to_str(error)}```\n{link}"
|
|
1178
|
+
)
|
|
1179
|
+
# Sending Slack Notification without losing the original error:
|
|
1112
1180
|
try:
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
"Failed waiting for workflow completion",
|
|
1117
|
-
workflow=workflow_log_message,
|
|
1118
|
-
exc=err_to_str(exc),
|
|
1181
|
+
notification_pusher.push(
|
|
1182
|
+
message=message,
|
|
1183
|
+
severity=mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
1119
1184
|
)
|
|
1120
1185
|
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1186
|
+
except Exception as exc:
|
|
1187
|
+
logger.error("Failed to send slack notification", exc=err_to_str(exc))
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
def handle_workflow_completion(
|
|
1191
|
+
run: _PipelineRunStatus,
|
|
1192
|
+
project,
|
|
1193
|
+
context: mlrun.execution.MLClientCtx,
|
|
1194
|
+
workflow_log_message: str,
|
|
1195
|
+
) -> None:
|
|
1196
|
+
"""
|
|
1197
|
+
Handle workflow completion by waiting for it to finish and logging the final state.
|
|
1198
|
+
|
|
1199
|
+
:param run: Run object containing workflow execution details.
|
|
1200
|
+
:param project: MLRun project object.
|
|
1201
|
+
:param context: MLRun execution context.
|
|
1202
|
+
:param workflow_log_message: Message used for logging.
|
|
1203
|
+
"""
|
|
1204
|
+
try:
|
|
1205
|
+
run.wait_for_completion()
|
|
1206
|
+
except Exception as exc:
|
|
1207
|
+
mlrun.utils.logger.error(
|
|
1208
|
+
"Failed waiting for workflow completion",
|
|
1209
|
+
workflow=workflow_log_message,
|
|
1210
|
+
exc=err_to_str(exc),
|
|
1211
|
+
)
|
|
1212
|
+
|
|
1213
|
+
pipeline_state, _, _ = project.get_run_status(run)
|
|
1214
|
+
context.log_result(key="workflow_state", value=pipeline_state, commit=True)
|
|
1215
|
+
if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
|
|
1216
|
+
raise RuntimeError(
|
|
1217
|
+
f"Workflow {workflow_log_message} failed, state={pipeline_state}"
|
|
1218
|
+
)
|
|
1219
|
+
|
|
1220
|
+
|
|
1221
|
+
def import_remote_project(
|
|
1222
|
+
context: mlrun.execution.MLClientCtx,
|
|
1223
|
+
url: str = None,
|
|
1224
|
+
project_name: str = "",
|
|
1225
|
+
init_git: bool = None,
|
|
1226
|
+
subpath: str = None,
|
|
1227
|
+
clone: bool = False,
|
|
1228
|
+
save: bool = True,
|
|
1229
|
+
project_context: str = None,
|
|
1230
|
+
):
|
|
1231
|
+
"""
|
|
1232
|
+
This function loads a project from a given remote source.
|
|
1233
|
+
|
|
1234
|
+
:param context: mlrun context.
|
|
1235
|
+
:param url: remote url that represents the project's source.
|
|
1236
|
+
See 'mlrun.load_project()' for details
|
|
1237
|
+
:param project_name: project name
|
|
1238
|
+
:param init_git: if True, will git init the context dir
|
|
1239
|
+
:param subpath: project subpath (within the archive)
|
|
1240
|
+
:param clone: if True, always clone (delete any existing content)
|
|
1241
|
+
:param save: whether to save the created project and artifact in the DB
|
|
1242
|
+
:param project_context: project context path (used for loading the project)
|
|
1243
|
+
"""
|
|
1244
|
+
project = mlrun.load_project(
|
|
1245
|
+
context=project_context or f"./{project_name}",
|
|
1246
|
+
url=url,
|
|
1247
|
+
name=project_name,
|
|
1248
|
+
init_git=init_git,
|
|
1249
|
+
subpath=subpath,
|
|
1250
|
+
clone=clone,
|
|
1251
|
+
save=save,
|
|
1252
|
+
sync_functions=True,
|
|
1253
|
+
)
|
|
1254
|
+
|
|
1255
|
+
context.logger.info(f"Loaded project {project.name} successfully")
|
mlrun/projects/project.py
CHANGED
|
@@ -40,6 +40,7 @@ import requests
|
|
|
40
40
|
import yaml
|
|
41
41
|
from mlrun_pipelines.models import PipelineNodeWrapper
|
|
42
42
|
|
|
43
|
+
import mlrun.common.formatters
|
|
43
44
|
import mlrun.common.helpers
|
|
44
45
|
import mlrun.common.runtimes.constants
|
|
45
46
|
import mlrun.common.schemas.artifact
|
|
@@ -47,6 +48,7 @@ import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
|
47
48
|
import mlrun.db
|
|
48
49
|
import mlrun.errors
|
|
49
50
|
import mlrun.k8s_utils
|
|
51
|
+
import mlrun.lists
|
|
50
52
|
import mlrun.model_monitoring.applications as mm_app
|
|
51
53
|
import mlrun.runtimes
|
|
52
54
|
import mlrun.runtimes.nuclio.api_gateway
|
|
@@ -3799,6 +3801,9 @@ class MlrunProject(ModelObj):
|
|
|
3799
3801
|
category: typing.Union[str, mlrun.common.schemas.ArtifactCategories] = None,
|
|
3800
3802
|
tree: str = None,
|
|
3801
3803
|
limit: int = None,
|
|
3804
|
+
format_: Optional[
|
|
3805
|
+
mlrun.common.formatters.ArtifactFormat
|
|
3806
|
+
] = mlrun.common.formatters.ArtifactFormat.full,
|
|
3802
3807
|
) -> mlrun.lists.ArtifactList:
|
|
3803
3808
|
"""List artifacts filtered by various parameters.
|
|
3804
3809
|
|
|
@@ -3829,6 +3834,7 @@ class MlrunProject(ModelObj):
|
|
|
3829
3834
|
:param category: Return artifacts of the requested category.
|
|
3830
3835
|
:param tree: Return artifacts of the requested tree.
|
|
3831
3836
|
:param limit: Maximum number of artifacts to return.
|
|
3837
|
+
:param format_: The format in which to return the artifacts. Default is 'full'.
|
|
3832
3838
|
"""
|
|
3833
3839
|
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
3834
3840
|
return db.list_artifacts(
|
|
@@ -3843,6 +3849,7 @@ class MlrunProject(ModelObj):
|
|
|
3843
3849
|
kind=kind,
|
|
3844
3850
|
category=category,
|
|
3845
3851
|
tree=tree,
|
|
3852
|
+
format_=format_,
|
|
3846
3853
|
limit=limit,
|
|
3847
3854
|
)
|
|
3848
3855
|
|
|
@@ -3856,6 +3863,10 @@ class MlrunProject(ModelObj):
|
|
|
3856
3863
|
iter: int = None,
|
|
3857
3864
|
best_iteration: bool = False,
|
|
3858
3865
|
tree: str = None,
|
|
3866
|
+
limit: int = None,
|
|
3867
|
+
format_: Optional[
|
|
3868
|
+
mlrun.common.formatters.ArtifactFormat
|
|
3869
|
+
] = mlrun.common.formatters.ArtifactFormat.full,
|
|
3859
3870
|
):
|
|
3860
3871
|
"""List models in project, filtered by various parameters.
|
|
3861
3872
|
|
|
@@ -3879,6 +3890,8 @@ class MlrunProject(ModelObj):
|
|
|
3879
3890
|
artifacts generated from a hyper-param run. If only a single iteration exists, will return the artifact
|
|
3880
3891
|
from that iteration. If using ``best_iter``, the ``iter`` parameter must not be used.
|
|
3881
3892
|
:param tree: Return artifacts of the requested tree.
|
|
3893
|
+
:param limit: Maximum number of artifacts to return.
|
|
3894
|
+
:param format_: The format in which to return the artifacts. Default is 'full'.
|
|
3882
3895
|
"""
|
|
3883
3896
|
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
3884
3897
|
return db.list_artifacts(
|
|
@@ -3892,6 +3905,8 @@ class MlrunProject(ModelObj):
|
|
|
3892
3905
|
best_iteration=best_iteration,
|
|
3893
3906
|
kind="model",
|
|
3894
3907
|
tree=tree,
|
|
3908
|
+
limit=limit,
|
|
3909
|
+
format_=format_,
|
|
3895
3910
|
).to_objects()
|
|
3896
3911
|
|
|
3897
3912
|
def list_functions(self, name=None, tag=None, labels=None):
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -607,7 +607,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
607
607
|
):
|
|
608
608
|
# initialize or create required streams/queues
|
|
609
609
|
self.spec.graph.check_and_process_graph()
|
|
610
|
-
self.spec.graph.
|
|
610
|
+
self.spec.graph.create_queue_streams()
|
|
611
611
|
functions_in_steps = self.spec.graph.list_child_functions()
|
|
612
612
|
child_functions = list(self._spec.function_refs.keys())
|
|
613
613
|
for function in functions_in_steps:
|
mlrun/serving/routers.py
CHANGED
|
@@ -491,6 +491,7 @@ class VotingEnsemble(ParallelRun):
|
|
|
491
491
|
executor_type: Union[ParallelRunnerModes, str] = ParallelRunnerModes.thread,
|
|
492
492
|
format_response_with_col_name_flag: bool = False,
|
|
493
493
|
prediction_col_name: str = "prediction",
|
|
494
|
+
shard_by_endpoint: typing.Optional[bool] = None,
|
|
494
495
|
**kwargs,
|
|
495
496
|
):
|
|
496
497
|
"""Voting Ensemble
|
|
@@ -580,6 +581,8 @@ class VotingEnsemble(ParallelRun):
|
|
|
580
581
|
`{id: <id>, model_name: <name>, outputs: {..., prediction: [<predictions>], ...}}`
|
|
581
582
|
the prediction_col_name should be `prediction`.
|
|
582
583
|
by default, `prediction`
|
|
584
|
+
:param shard_by_endpoint: whether to use the endpoint as the partition/sharding key when writing to model
|
|
585
|
+
monitoring stream. Defaults to True.
|
|
583
586
|
:param kwargs: extra arguments
|
|
584
587
|
"""
|
|
585
588
|
super().__init__(
|
|
@@ -606,6 +609,7 @@ class VotingEnsemble(ParallelRun):
|
|
|
606
609
|
self.prediction_col_name = prediction_col_name or "prediction"
|
|
607
610
|
self.format_response_with_col_name_flag = format_response_with_col_name_flag
|
|
608
611
|
self.model_endpoint_uid = None
|
|
612
|
+
self.shard_by_endpoint = shard_by_endpoint
|
|
609
613
|
|
|
610
614
|
def post_init(self, mode="sync"):
|
|
611
615
|
server = getattr(self.context, "_server", None) or getattr(
|
|
@@ -907,7 +911,12 @@ class VotingEnsemble(ParallelRun):
|
|
|
907
911
|
if self._model_logger and self.log_router:
|
|
908
912
|
if "id" not in request:
|
|
909
913
|
request["id"] = response.body["id"]
|
|
910
|
-
|
|
914
|
+
partition_key = (
|
|
915
|
+
self.model_endpoint_uid if self.shard_by_endpoint is not False else None
|
|
916
|
+
)
|
|
917
|
+
self._model_logger.push(
|
|
918
|
+
start, request, response.body, partition_key=partition_key
|
|
919
|
+
)
|
|
911
920
|
event.body = _update_result_body(
|
|
912
921
|
self._result_path, original_body, response.body if response else None
|
|
913
922
|
)
|
mlrun/serving/states.py
CHANGED
|
@@ -839,6 +839,8 @@ class QueueStep(BaseStep):
|
|
|
839
839
|
retention_in_hours=self.retention_in_hours,
|
|
840
840
|
**self.options,
|
|
841
841
|
)
|
|
842
|
+
if hasattr(self._stream, "create_stream"):
|
|
843
|
+
self._stream.create_stream()
|
|
842
844
|
self._set_error_handler()
|
|
843
845
|
|
|
844
846
|
@property
|
|
@@ -1247,8 +1249,8 @@ class FlowStep(BaseStep):
|
|
|
1247
1249
|
links[next_step.function] = step
|
|
1248
1250
|
return links
|
|
1249
1251
|
|
|
1250
|
-
def
|
|
1251
|
-
"""
|
|
1252
|
+
def create_queue_streams(self):
|
|
1253
|
+
"""create the streams used in this flow"""
|
|
1252
1254
|
for step in self.get_children():
|
|
1253
1255
|
if step.kind == StepKinds.queue:
|
|
1254
1256
|
step.init_object(self.context, None)
|
mlrun/serving/v2_serving.py
CHANGED
|
@@ -39,6 +39,7 @@ class V2ModelServer(StepToDict):
|
|
|
39
39
|
protocol=None,
|
|
40
40
|
input_path: str = None,
|
|
41
41
|
result_path: str = None,
|
|
42
|
+
shard_by_endpoint: Optional[bool] = None,
|
|
42
43
|
**kwargs,
|
|
43
44
|
):
|
|
44
45
|
"""base model serving class (v2), using similar API to KFServing v2 and Triton
|
|
@@ -91,6 +92,8 @@ class V2ModelServer(StepToDict):
|
|
|
91
92
|
this require that the event body will behave like a dict, example:
|
|
92
93
|
event: {"x": 5} , result_path="resp" means the returned response will be written
|
|
93
94
|
to event["y"] resulting in {"x": 5, "resp": <result>}
|
|
95
|
+
:param shard_by_endpoint: whether to use the endpoint as the partition/sharding key when writing to model
|
|
96
|
+
monitoring stream. Defaults to True.
|
|
94
97
|
:param kwargs: extra arguments (can be accessed using self.get_param(key))
|
|
95
98
|
"""
|
|
96
99
|
self.name = name
|
|
@@ -119,7 +122,9 @@ class V2ModelServer(StepToDict):
|
|
|
119
122
|
if model:
|
|
120
123
|
self.model = model
|
|
121
124
|
self.ready = True
|
|
125
|
+
self._versioned_model_name = None
|
|
122
126
|
self.model_endpoint_uid = None
|
|
127
|
+
self.shard_by_endpoint = shard_by_endpoint
|
|
123
128
|
|
|
124
129
|
def _load_and_update_state(self):
|
|
125
130
|
try:
|
|
@@ -225,6 +230,23 @@ class V2ModelServer(StepToDict):
|
|
|
225
230
|
request = self.preprocess(event_body, op)
|
|
226
231
|
return self.validate(request, op)
|
|
227
232
|
|
|
233
|
+
@property
|
|
234
|
+
def versioned_model_name(self):
|
|
235
|
+
if self._versioned_model_name:
|
|
236
|
+
return self._versioned_model_name
|
|
237
|
+
|
|
238
|
+
# Generating version model value based on the model name and model version
|
|
239
|
+
if self.model_path and self.model_path.startswith("store://"):
|
|
240
|
+
# Enrich the model server with the model artifact metadata
|
|
241
|
+
self.get_model()
|
|
242
|
+
if not self.version:
|
|
243
|
+
# Enrich the model version with the model artifact tag
|
|
244
|
+
self.version = self.model_spec.tag
|
|
245
|
+
self.labels = self.model_spec.labels
|
|
246
|
+
version = self.version or "latest"
|
|
247
|
+
self._versioned_model_name = f"{self.name}:{version}"
|
|
248
|
+
return self._versioned_model_name
|
|
249
|
+
|
|
228
250
|
def do_event(self, event, *args, **kwargs):
|
|
229
251
|
"""main model event handler method"""
|
|
230
252
|
start = now_date()
|
|
@@ -232,6 +254,11 @@ class V2ModelServer(StepToDict):
|
|
|
232
254
|
event_body = _extract_input_data(self._input_path, event.body)
|
|
233
255
|
event_id = event.id
|
|
234
256
|
op = event.path.strip("/")
|
|
257
|
+
|
|
258
|
+
partition_key = (
|
|
259
|
+
self.model_endpoint_uid if self.shard_by_endpoint is not False else None
|
|
260
|
+
)
|
|
261
|
+
|
|
235
262
|
if event_body and isinstance(event_body, dict):
|
|
236
263
|
op = op or event_body.get("operation")
|
|
237
264
|
event_id = event_body.get("id", event_id)
|
|
@@ -251,7 +278,13 @@ class V2ModelServer(StepToDict):
|
|
|
251
278
|
except Exception as exc:
|
|
252
279
|
request["id"] = event_id
|
|
253
280
|
if self._model_logger:
|
|
254
|
-
self._model_logger.push(
|
|
281
|
+
self._model_logger.push(
|
|
282
|
+
start,
|
|
283
|
+
request,
|
|
284
|
+
op=op,
|
|
285
|
+
error=exc,
|
|
286
|
+
partition_key=partition_key,
|
|
287
|
+
)
|
|
255
288
|
raise exc
|
|
256
289
|
|
|
257
290
|
response = {
|
|
@@ -288,7 +321,7 @@ class V2ModelServer(StepToDict):
|
|
|
288
321
|
setattr(event, "terminated", True)
|
|
289
322
|
event_body = {
|
|
290
323
|
"name": self.name,
|
|
291
|
-
"version": self.version,
|
|
324
|
+
"version": self.version or "",
|
|
292
325
|
"inputs": [],
|
|
293
326
|
"outputs": [],
|
|
294
327
|
}
|
|
@@ -308,7 +341,13 @@ class V2ModelServer(StepToDict):
|
|
|
308
341
|
except Exception as exc:
|
|
309
342
|
request["id"] = event_id
|
|
310
343
|
if self._model_logger:
|
|
311
|
-
self._model_logger.push(
|
|
344
|
+
self._model_logger.push(
|
|
345
|
+
start,
|
|
346
|
+
request,
|
|
347
|
+
op=op,
|
|
348
|
+
error=exc,
|
|
349
|
+
partition_key=partition_key,
|
|
350
|
+
)
|
|
312
351
|
raise exc
|
|
313
352
|
|
|
314
353
|
response = {
|
|
@@ -332,12 +371,20 @@ class V2ModelServer(StepToDict):
|
|
|
332
371
|
if self._model_logger:
|
|
333
372
|
inputs, outputs = self.logged_results(request, response, op)
|
|
334
373
|
if inputs is None and outputs is None:
|
|
335
|
-
self._model_logger.push(
|
|
374
|
+
self._model_logger.push(
|
|
375
|
+
start, request, response, op, partition_key=partition_key
|
|
376
|
+
)
|
|
336
377
|
else:
|
|
337
378
|
track_request = {"id": event_id, "inputs": inputs or []}
|
|
338
379
|
track_response = {"outputs": outputs or []}
|
|
339
380
|
# TODO : check dict/list
|
|
340
|
-
self._model_logger.push(
|
|
381
|
+
self._model_logger.push(
|
|
382
|
+
start,
|
|
383
|
+
track_request,
|
|
384
|
+
track_response,
|
|
385
|
+
op,
|
|
386
|
+
partition_key=partition_key,
|
|
387
|
+
)
|
|
341
388
|
event.body = _update_result_body(self._result_path, original_body, response)
|
|
342
389
|
return event
|
|
343
390
|
|
|
@@ -454,7 +501,7 @@ class _ModelLogPusher:
|
|
|
454
501
|
base_data["labels"] = self.model.labels
|
|
455
502
|
return base_data
|
|
456
503
|
|
|
457
|
-
def push(self, start, request, resp=None, op=None, error=None):
|
|
504
|
+
def push(self, start, request, resp=None, op=None, error=None, partition_key=None):
|
|
458
505
|
start_str = start.isoformat(sep=" ", timespec="microseconds")
|
|
459
506
|
if error:
|
|
460
507
|
data = self.base_data()
|
|
@@ -465,7 +512,7 @@ class _ModelLogPusher:
|
|
|
465
512
|
if self.verbose:
|
|
466
513
|
message = f"{message}\n{traceback.format_exc()}"
|
|
467
514
|
data["error"] = message
|
|
468
|
-
self.output_stream.push([data])
|
|
515
|
+
self.output_stream.push([data], partition_key=partition_key)
|
|
469
516
|
return
|
|
470
517
|
|
|
471
518
|
self._sample_iter = (self._sample_iter + 1) % self.stream_sample
|
|
@@ -491,7 +538,7 @@ class _ModelLogPusher:
|
|
|
491
538
|
"metrics",
|
|
492
539
|
]
|
|
493
540
|
data["values"] = self._batch
|
|
494
|
-
self.output_stream.push([data])
|
|
541
|
+
self.output_stream.push([data], partition_key=partition_key)
|
|
495
542
|
else:
|
|
496
543
|
data = self.base_data()
|
|
497
544
|
data["request"] = request
|
|
@@ -501,7 +548,7 @@ class _ModelLogPusher:
|
|
|
501
548
|
data["microsec"] = microsec
|
|
502
549
|
if getattr(self.model, "metrics", None):
|
|
503
550
|
data["metrics"] = self.model.metrics
|
|
504
|
-
self.output_stream.push([data])
|
|
551
|
+
self.output_stream.push([data], partition_key=partition_key)
|
|
505
552
|
|
|
506
553
|
|
|
507
554
|
def _init_endpoint_record(
|
|
@@ -531,21 +578,10 @@ def _init_endpoint_record(
|
|
|
531
578
|
logger.error("Failed to parse function URI", exc=err_to_str(e))
|
|
532
579
|
return None
|
|
533
580
|
|
|
534
|
-
# Generating version model value based on the model name and model version
|
|
535
|
-
if model.model_path and model.model_path.startswith("store://"):
|
|
536
|
-
# Enrich the model server with the model artifact metadata
|
|
537
|
-
model.get_model()
|
|
538
|
-
if not model.version:
|
|
539
|
-
# Enrich the model version with the model artifact tag
|
|
540
|
-
model.version = model.model_spec.tag
|
|
541
|
-
model.labels = model.model_spec.labels
|
|
542
|
-
versioned_model_name = f"{model.name}:{model.version}"
|
|
543
|
-
else:
|
|
544
|
-
versioned_model_name = f"{model.name}:latest"
|
|
545
|
-
|
|
546
581
|
# Generating model endpoint ID based on function uri and model version
|
|
547
582
|
uid = mlrun.common.model_monitoring.create_model_endpoint_uid(
|
|
548
|
-
function_uri=graph_server.function_uri,
|
|
583
|
+
function_uri=graph_server.function_uri,
|
|
584
|
+
versioned_model=model.versioned_model_name,
|
|
549
585
|
).uid
|
|
550
586
|
|
|
551
587
|
try:
|
|
@@ -568,7 +604,7 @@ def _init_endpoint_record(
|
|
|
568
604
|
),
|
|
569
605
|
spec=mlrun.common.schemas.ModelEndpointSpec(
|
|
570
606
|
function_uri=graph_server.function_uri,
|
|
571
|
-
model=versioned_model_name,
|
|
607
|
+
model=model.versioned_model_name,
|
|
572
608
|
model_class=model.__class__.__name__,
|
|
573
609
|
model_uri=model.model_path,
|
|
574
610
|
stream_path=model.context.stream.stream_uri,
|
mlrun/utils/helpers.py
CHANGED
|
@@ -1226,14 +1226,24 @@ def datetime_to_iso(time_obj: Optional[datetime]) -> Optional[str]:
|
|
|
1226
1226
|
return time_obj.isoformat()
|
|
1227
1227
|
|
|
1228
1228
|
|
|
1229
|
-
def enrich_datetime_with_tz_info(timestamp_string):
|
|
1229
|
+
def enrich_datetime_with_tz_info(timestamp_string) -> Optional[datetime]:
|
|
1230
1230
|
if not timestamp_string:
|
|
1231
1231
|
return timestamp_string
|
|
1232
1232
|
|
|
1233
1233
|
if timestamp_string and not mlrun.utils.helpers.has_timezone(timestamp_string):
|
|
1234
1234
|
timestamp_string += datetime.now(timezone.utc).astimezone().strftime("%z")
|
|
1235
1235
|
|
|
1236
|
-
|
|
1236
|
+
for _format in [
|
|
1237
|
+
# e.g: 2021-08-25 12:00:00.000Z
|
|
1238
|
+
"%Y-%m-%d %H:%M:%S.%f%z",
|
|
1239
|
+
# e.g: 2024-11-11 07:44:56+0000
|
|
1240
|
+
"%Y-%m-%d %H:%M:%S%z",
|
|
1241
|
+
]:
|
|
1242
|
+
try:
|
|
1243
|
+
return datetime.strptime(timestamp_string, _format)
|
|
1244
|
+
except ValueError as exc:
|
|
1245
|
+
last_exc = exc
|
|
1246
|
+
raise last_exc
|
|
1237
1247
|
|
|
1238
1248
|
|
|
1239
1249
|
def has_timezone(timestamp):
|
mlrun/utils/version/version.json
CHANGED