mlrun 1.10.0rc7__py3-none-any.whl → 1.10.0rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -1
- mlrun/common/db/dialects.py +25 -0
- mlrun/common/schemas/background_task.py +5 -0
- mlrun/common/schemas/function.py +1 -0
- mlrun/common/schemas/model_monitoring/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/constants.py +16 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +8 -0
- mlrun/common/schemas/partition.py +13 -3
- mlrun/common/schemas/project.py +4 -0
- mlrun/common/schemas/serving.py +2 -0
- mlrun/config.py +11 -22
- mlrun/datastore/utils.py +3 -2
- mlrun/db/__init__.py +1 -0
- mlrun/db/base.py +11 -10
- mlrun/db/httpdb.py +97 -25
- mlrun/db/nopdb.py +5 -4
- mlrun/db/sql_types.py +160 -0
- mlrun/frameworks/tf_keras/__init__.py +4 -4
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +23 -20
- mlrun/frameworks/tf_keras/mlrun_interface.py +4 -1
- mlrun/frameworks/tf_keras/model_handler.py +80 -9
- mlrun/frameworks/tf_keras/utils.py +12 -1
- mlrun/launcher/base.py +6 -1
- mlrun/launcher/client.py +1 -22
- mlrun/launcher/local.py +0 -4
- mlrun/model_monitoring/applications/base.py +21 -1
- mlrun/model_monitoring/applications/context.py +2 -1
- mlrun/projects/pipelines.py +35 -3
- mlrun/projects/project.py +13 -29
- mlrun/run.py +37 -5
- mlrun/runtimes/daskjob.py +0 -2
- mlrun/runtimes/kubejob.py +0 -4
- mlrun/runtimes/mpijob/abstract.py +0 -2
- mlrun/runtimes/mpijob/v1.py +0 -2
- mlrun/runtimes/nuclio/function.py +0 -2
- mlrun/runtimes/nuclio/serving.py +14 -51
- mlrun/runtimes/pod.py +0 -3
- mlrun/runtimes/remotesparkjob.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +0 -2
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/server.py +159 -123
- mlrun/serving/states.py +215 -18
- mlrun/serving/system_steps.py +391 -0
- mlrun/serving/v2_serving.py +9 -8
- mlrun/utils/helpers.py +19 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/METADATA +22 -18
- {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/RECORD +52 -50
- mlrun/common/db/sql_session.py +0 -79
- {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/top_level.txt +0 -0
mlrun/projects/pipelines.py
CHANGED
|
@@ -39,7 +39,12 @@ from mlrun.utils import (
|
|
|
39
39
|
|
|
40
40
|
from ..common.helpers import parse_versioned_object_uri
|
|
41
41
|
from ..config import config
|
|
42
|
-
from ..run import
|
|
42
|
+
from ..run import (
|
|
43
|
+
_run_pipeline,
|
|
44
|
+
retry_pipeline,
|
|
45
|
+
terminate_pipeline,
|
|
46
|
+
wait_for_pipeline_completion,
|
|
47
|
+
)
|
|
43
48
|
from ..runtimes.pod import AutoMountType
|
|
44
49
|
|
|
45
50
|
|
|
@@ -696,6 +701,24 @@ class _KFPRunner(_PipelineRunner):
|
|
|
696
701
|
)
|
|
697
702
|
return run_id
|
|
698
703
|
|
|
704
|
+
@classmethod
|
|
705
|
+
def terminate(
|
|
706
|
+
cls,
|
|
707
|
+
run: "_PipelineRunStatus",
|
|
708
|
+
project: typing.Optional["mlrun.projects.MlrunProject"] = None,
|
|
709
|
+
) -> str:
|
|
710
|
+
project_name = project.metadata.name if project else ""
|
|
711
|
+
logger.info(
|
|
712
|
+
"Terminating pipeline",
|
|
713
|
+
run_id=run.run_id,
|
|
714
|
+
project=project_name,
|
|
715
|
+
)
|
|
716
|
+
run_id = terminate_pipeline(
|
|
717
|
+
run.run_id,
|
|
718
|
+
project=project_name,
|
|
719
|
+
)
|
|
720
|
+
return run_id
|
|
721
|
+
|
|
699
722
|
@staticmethod
|
|
700
723
|
def wait_for_completion(
|
|
701
724
|
run: "_PipelineRunStatus",
|
|
@@ -1130,6 +1153,7 @@ def load_and_run_workflow(
|
|
|
1130
1153
|
project = mlrun.get_or_create_project(
|
|
1131
1154
|
context=project_context or f"./{project_name}",
|
|
1132
1155
|
name=project_name,
|
|
1156
|
+
allow_cross_project=True,
|
|
1133
1157
|
)
|
|
1134
1158
|
|
|
1135
1159
|
# extract "start" notification if exists
|
|
@@ -1145,7 +1169,9 @@ def load_and_run_workflow(
|
|
|
1145
1169
|
notification.when = ["running"]
|
|
1146
1170
|
|
|
1147
1171
|
workflow_log_message = workflow_name or workflow_path
|
|
1148
|
-
context.logger.info(
|
|
1172
|
+
context.logger.info(
|
|
1173
|
+
"Running workflow from remote", workflow_log_message=workflow_log_message
|
|
1174
|
+
)
|
|
1149
1175
|
run = project.run(
|
|
1150
1176
|
name=workflow_name,
|
|
1151
1177
|
workflow_path=workflow_path,
|
|
@@ -1162,6 +1188,11 @@ def load_and_run_workflow(
|
|
|
1162
1188
|
notifications=start_notifications,
|
|
1163
1189
|
context=context,
|
|
1164
1190
|
)
|
|
1191
|
+
# Patch the current run object (the workflow-runner) with the workflow-id label
|
|
1192
|
+
context.logger.info(
|
|
1193
|
+
"Associating workflow-runner with workflow ID", run_id=run.run_id
|
|
1194
|
+
)
|
|
1195
|
+
context.set_label("workflow-id", run.run_id)
|
|
1165
1196
|
context.log_result(key="workflow_id", value=run.run_id)
|
|
1166
1197
|
context.log_result(key="engine", value=run._engine.engine, commit=True)
|
|
1167
1198
|
|
|
@@ -1215,6 +1246,7 @@ def pull_remote_project_files(
|
|
|
1215
1246
|
subpath=subpath,
|
|
1216
1247
|
clone=clone,
|
|
1217
1248
|
save=False,
|
|
1249
|
+
allow_cross_project=True,
|
|
1218
1250
|
)
|
|
1219
1251
|
except Exception as error:
|
|
1220
1252
|
notify_scheduled_workflow_failure(
|
|
@@ -1321,4 +1353,4 @@ def import_remote_project(
|
|
|
1321
1353
|
sync_functions=True,
|
|
1322
1354
|
)
|
|
1323
1355
|
|
|
1324
|
-
context.logger.info(
|
|
1356
|
+
context.logger.info("Loaded project successfully", project_name=project.name)
|
mlrun/projects/project.py
CHANGED
|
@@ -2518,7 +2518,6 @@ class MlrunProject(ModelObj):
|
|
|
2518
2518
|
|
|
2519
2519
|
def enable_model_monitoring(
|
|
2520
2520
|
self,
|
|
2521
|
-
default_controller_image: str = "mlrun/mlrun",
|
|
2522
2521
|
base_period: int = 10,
|
|
2523
2522
|
image: str = "mlrun/mlrun",
|
|
2524
2523
|
*,
|
|
@@ -2534,7 +2533,6 @@ class MlrunProject(ModelObj):
|
|
|
2534
2533
|
The stream function goal is to monitor the log of the data stream. It is triggered when a new log entry
|
|
2535
2534
|
is detected. It processes the new events into statistics that are then written to statistics databases.
|
|
2536
2535
|
|
|
2537
|
-
:param default_controller_image: Deprecated.
|
|
2538
2536
|
:param base_period: The time period in minutes in which the model monitoring controller
|
|
2539
2537
|
function is triggered. By default, the base period is 10 minutes
|
|
2540
2538
|
(which is also the minimum value for production environments).
|
|
@@ -2562,14 +2560,6 @@ class MlrunProject(ModelObj):
|
|
|
2562
2560
|
background, including the histogram data drift app if selected.
|
|
2563
2561
|
:param fetch_credentials_from_sys_config: If true, fetch the credentials from the system configuration.
|
|
2564
2562
|
"""
|
|
2565
|
-
if default_controller_image != "mlrun/mlrun":
|
|
2566
|
-
# TODO: Remove this in 1.10.0
|
|
2567
|
-
warnings.warn(
|
|
2568
|
-
"'default_controller_image' is deprecated in 1.7.0 and will be removed in 1.10.0, "
|
|
2569
|
-
"use 'image' instead",
|
|
2570
|
-
FutureWarning,
|
|
2571
|
-
)
|
|
2572
|
-
image = default_controller_image
|
|
2573
2563
|
if base_period < 10:
|
|
2574
2564
|
logger.warn(
|
|
2575
2565
|
"enable_model_monitoring: 'base_period' < 10 minutes is not supported in production environments",
|
|
@@ -2971,19 +2961,6 @@ class MlrunProject(ModelObj):
|
|
|
2971
2961
|
mlrun.db.get_run_db().delete_function(name=name, project=self.metadata.name)
|
|
2972
2962
|
self.spec.remove_function(name)
|
|
2973
2963
|
|
|
2974
|
-
def remove_model_monitoring_function(self, name: Union[str, list[str]]):
|
|
2975
|
-
"""delete the specified model-monitoring-app function/s
|
|
2976
|
-
|
|
2977
|
-
:param name: name of the model-monitoring-function/s (under the project)
|
|
2978
|
-
"""
|
|
2979
|
-
# TODO: Remove this in 1.10.0
|
|
2980
|
-
warnings.warn(
|
|
2981
|
-
"'remove_model_monitoring_function' is deprecated in 1.7.0 and will be removed in 1.10.0. "
|
|
2982
|
-
"Please use `delete_model_monitoring_function` instead.",
|
|
2983
|
-
FutureWarning,
|
|
2984
|
-
)
|
|
2985
|
-
self.delete_model_monitoring_function(name)
|
|
2986
|
-
|
|
2987
2964
|
def delete_model_monitoring_function(self, name: Union[str, list[str]]):
|
|
2988
2965
|
"""delete the specified model-monitoring-app function/s
|
|
2989
2966
|
|
|
@@ -3848,7 +3825,8 @@ class MlrunProject(ModelObj):
|
|
|
3848
3825
|
)
|
|
3849
3826
|
|
|
3850
3827
|
The replication factor and timeout configuration might need to be adjusted according to your Confluent cluster
|
|
3851
|
-
type and settings.
|
|
3828
|
+
type and settings. Nuclio annotations for the model monitoring infrastructure and application functions are
|
|
3829
|
+
supported through ``kwargs_public={"nuclio_annotations": {...}, ...}``.
|
|
3852
3830
|
|
|
3853
3831
|
:param tsdb_profile_name: The datastore profile name of the time-series database to be used in model
|
|
3854
3832
|
monitoring. The supported profiles are:
|
|
@@ -4278,11 +4256,17 @@ class MlrunProject(ModelObj):
|
|
|
4278
4256
|
function = mlrun.new_function("mlrun--project--image--builder", kind="job")
|
|
4279
4257
|
|
|
4280
4258
|
if self.spec.source and not self.spec.load_source_on_run:
|
|
4281
|
-
|
|
4282
|
-
|
|
4283
|
-
|
|
4284
|
-
|
|
4285
|
-
|
|
4259
|
+
if self.spec.source.startswith("db://"):
|
|
4260
|
+
logger.debug(
|
|
4261
|
+
"Project source is 'db://', which refers to metadata stored in the MLRun DB."
|
|
4262
|
+
" Skipping source archive setup for image build"
|
|
4263
|
+
)
|
|
4264
|
+
else:
|
|
4265
|
+
function.with_source_archive(
|
|
4266
|
+
source=self.spec.source,
|
|
4267
|
+
target_dir=target_dir,
|
|
4268
|
+
pull_at_runtime=False,
|
|
4269
|
+
)
|
|
4286
4270
|
|
|
4287
4271
|
build = self.spec.build
|
|
4288
4272
|
result = self.build_function(
|
mlrun/run.py
CHANGED
|
@@ -894,7 +894,6 @@ def _run_pipeline(
|
|
|
894
894
|
def retry_pipeline(
|
|
895
895
|
run_id: str,
|
|
896
896
|
project: str,
|
|
897
|
-
namespace: Optional[str] = None,
|
|
898
897
|
) -> str:
|
|
899
898
|
"""Retry a pipeline run.
|
|
900
899
|
|
|
@@ -903,7 +902,6 @@ def retry_pipeline(
|
|
|
903
902
|
|
|
904
903
|
:param run_id: ID of the pipeline run to retry.
|
|
905
904
|
:param project: name of the project associated with the pipeline run.
|
|
906
|
-
:param namespace: Optional; Kubernetes namespace to use if not the default.
|
|
907
905
|
|
|
908
906
|
:returns: ID of the retried pipeline run or the ID of a cloned run if the original run is not retryable.
|
|
909
907
|
:raises ValueError: If access to the remote API service is not available.
|
|
@@ -918,7 +916,6 @@ def retry_pipeline(
|
|
|
918
916
|
pipeline_run_id = mldb.retry_pipeline(
|
|
919
917
|
run_id=run_id,
|
|
920
918
|
project=project,
|
|
921
|
-
namespace=namespace,
|
|
922
919
|
)
|
|
923
920
|
if pipeline_run_id == run_id:
|
|
924
921
|
logger.info(
|
|
@@ -931,6 +928,35 @@ def retry_pipeline(
|
|
|
931
928
|
return pipeline_run_id
|
|
932
929
|
|
|
933
930
|
|
|
931
|
+
def terminate_pipeline(
|
|
932
|
+
run_id: str,
|
|
933
|
+
project: str,
|
|
934
|
+
) -> str:
|
|
935
|
+
"""Terminate a pipeline run.
|
|
936
|
+
|
|
937
|
+
This function terminates a running pipeline with the specified run ID. If the run is not in a
|
|
938
|
+
terminable state, an error is raised.
|
|
939
|
+
|
|
940
|
+
:param run_id: ID of the pipeline run to terminate.
|
|
941
|
+
:param project: name of the project associated with the pipeline run.
|
|
942
|
+
|
|
943
|
+
:returns: ID of the terminate pipeline run background task.
|
|
944
|
+
:raises ValueError: If access to the remote API service is not available.
|
|
945
|
+
"""
|
|
946
|
+
mldb = mlrun.db.get_run_db()
|
|
947
|
+
if mldb.kind != "http":
|
|
948
|
+
raise ValueError(
|
|
949
|
+
"Terminating a pipeline requires access to remote API service. "
|
|
950
|
+
"Please set the dbpath URL."
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
pipeline_run_task = mldb.terminate_pipeline(
|
|
954
|
+
run_id=run_id,
|
|
955
|
+
project=project,
|
|
956
|
+
)
|
|
957
|
+
return pipeline_run_task["metadata"]["id"]
|
|
958
|
+
|
|
959
|
+
|
|
934
960
|
def wait_for_pipeline_completion(
|
|
935
961
|
run_id,
|
|
936
962
|
timeout=60 * 60,
|
|
@@ -997,7 +1023,10 @@ def wait_for_pipeline_completion(
|
|
|
997
1023
|
_wait_for_pipeline_completion,
|
|
998
1024
|
)
|
|
999
1025
|
else:
|
|
1000
|
-
client = mlrun_pipelines.utils.get_client(
|
|
1026
|
+
client = mlrun_pipelines.utils.get_client(
|
|
1027
|
+
logger=logger,
|
|
1028
|
+
namespace=namespace,
|
|
1029
|
+
)
|
|
1001
1030
|
resp = client.wait_for_run_completion(run_id, timeout)
|
|
1002
1031
|
if resp:
|
|
1003
1032
|
resp = resp.to_dict()
|
|
@@ -1058,7 +1087,10 @@ def get_pipeline(
|
|
|
1058
1087
|
)
|
|
1059
1088
|
|
|
1060
1089
|
else:
|
|
1061
|
-
client = mlrun_pipelines.utils.get_client(
|
|
1090
|
+
client = mlrun_pipelines.utils.get_client(
|
|
1091
|
+
logger=logger,
|
|
1092
|
+
namespace=namespace,
|
|
1093
|
+
)
|
|
1062
1094
|
resp = client.get_run(run_id)
|
|
1063
1095
|
if resp:
|
|
1064
1096
|
resp = resp.to_dict()
|
mlrun/runtimes/daskjob.py
CHANGED
|
@@ -92,7 +92,6 @@ class DaskSpec(KubeResourceSpec):
|
|
|
92
92
|
preemption_mode=None,
|
|
93
93
|
security_context=None,
|
|
94
94
|
state_thresholds=None,
|
|
95
|
-
serving_spec=None,
|
|
96
95
|
):
|
|
97
96
|
super().__init__(
|
|
98
97
|
command=command,
|
|
@@ -122,7 +121,6 @@ class DaskSpec(KubeResourceSpec):
|
|
|
122
121
|
preemption_mode=preemption_mode,
|
|
123
122
|
security_context=security_context,
|
|
124
123
|
state_thresholds=state_thresholds,
|
|
125
|
-
serving_spec=serving_spec,
|
|
126
124
|
)
|
|
127
125
|
self.args = args
|
|
128
126
|
|
mlrun/runtimes/kubejob.py
CHANGED
|
@@ -207,7 +207,3 @@ class KubejobRuntime(KubeResource):
|
|
|
207
207
|
raise NotImplementedError(
|
|
208
208
|
f"Running a {self.kind} function from the client is not supported. Use .run() to submit the job to the API."
|
|
209
209
|
)
|
|
210
|
-
|
|
211
|
-
@property
|
|
212
|
-
def serving_spec(self):
|
|
213
|
-
return self.spec.serving_spec
|
|
@@ -54,7 +54,6 @@ class MPIResourceSpec(KubeResourceSpec):
|
|
|
54
54
|
preemption_mode=None,
|
|
55
55
|
security_context=None,
|
|
56
56
|
state_thresholds=None,
|
|
57
|
-
serving_spec=None,
|
|
58
57
|
):
|
|
59
58
|
super().__init__(
|
|
60
59
|
command=command,
|
|
@@ -84,7 +83,6 @@ class MPIResourceSpec(KubeResourceSpec):
|
|
|
84
83
|
preemption_mode=preemption_mode,
|
|
85
84
|
security_context=security_context,
|
|
86
85
|
state_thresholds=state_thresholds,
|
|
87
|
-
serving_spec=serving_spec,
|
|
88
86
|
)
|
|
89
87
|
self.mpi_args = mpi_args or [
|
|
90
88
|
"-x",
|
mlrun/runtimes/mpijob/v1.py
CHANGED
|
@@ -49,7 +49,6 @@ class MPIV1ResourceSpec(MPIResourceSpec):
|
|
|
49
49
|
preemption_mode=None,
|
|
50
50
|
security_context=None,
|
|
51
51
|
state_thresholds=None,
|
|
52
|
-
serving_spec=None,
|
|
53
52
|
):
|
|
54
53
|
super().__init__(
|
|
55
54
|
command=command,
|
|
@@ -80,7 +79,6 @@ class MPIV1ResourceSpec(MPIResourceSpec):
|
|
|
80
79
|
preemption_mode=preemption_mode,
|
|
81
80
|
security_context=security_context,
|
|
82
81
|
state_thresholds=state_thresholds,
|
|
83
|
-
serving_spec=serving_spec,
|
|
84
82
|
)
|
|
85
83
|
self.clean_pod_policy = clean_pod_policy or MPIJobV1CleanPodPolicies.default()
|
|
86
84
|
|
|
@@ -154,7 +154,6 @@ class NuclioSpec(KubeResourceSpec):
|
|
|
154
154
|
add_templated_ingress_host_mode=None,
|
|
155
155
|
state_thresholds=None,
|
|
156
156
|
disable_default_http_trigger=None,
|
|
157
|
-
serving_spec=None,
|
|
158
157
|
):
|
|
159
158
|
super().__init__(
|
|
160
159
|
command=command,
|
|
@@ -184,7 +183,6 @@ class NuclioSpec(KubeResourceSpec):
|
|
|
184
183
|
preemption_mode=preemption_mode,
|
|
185
184
|
security_context=security_context,
|
|
186
185
|
state_thresholds=state_thresholds,
|
|
187
|
-
serving_spec=serving_spec,
|
|
188
186
|
)
|
|
189
187
|
|
|
190
188
|
self.base_spec = base_spec or {}
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
import copy
|
|
15
14
|
import json
|
|
16
15
|
import os
|
|
17
16
|
import warnings
|
|
@@ -43,8 +42,6 @@ from mlrun.serving.states import (
|
|
|
43
42
|
)
|
|
44
43
|
from mlrun.utils import get_caller_globals, logger, set_paths
|
|
45
44
|
|
|
46
|
-
from .. import KubejobRuntime
|
|
47
|
-
from ..pod import KubeResourceSpec
|
|
48
45
|
from .function import NuclioSpec, RemoteRuntime, min_nuclio_versions
|
|
49
46
|
|
|
50
47
|
serving_subkind = "serving_v2"
|
|
@@ -152,7 +149,6 @@ class ServingSpec(NuclioSpec):
|
|
|
152
149
|
state_thresholds=None,
|
|
153
150
|
disable_default_http_trigger=None,
|
|
154
151
|
model_endpoint_creation_task_name=None,
|
|
155
|
-
serving_spec=None,
|
|
156
152
|
):
|
|
157
153
|
super().__init__(
|
|
158
154
|
command=command,
|
|
@@ -193,7 +189,6 @@ class ServingSpec(NuclioSpec):
|
|
|
193
189
|
service_type=service_type,
|
|
194
190
|
add_templated_ingress_host_mode=add_templated_ingress_host_mode,
|
|
195
191
|
disable_default_http_trigger=disable_default_http_trigger,
|
|
196
|
-
serving_spec=serving_spec,
|
|
197
192
|
)
|
|
198
193
|
|
|
199
194
|
self.models = models or {}
|
|
@@ -482,6 +477,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
482
477
|
state = TaskStep(
|
|
483
478
|
class_name,
|
|
484
479
|
class_args,
|
|
480
|
+
name=key,
|
|
485
481
|
handler=handler,
|
|
486
482
|
function=child_function,
|
|
487
483
|
model_endpoint_creation_strategy=creation_strategy,
|
|
@@ -707,7 +703,6 @@ class ServingRuntime(RemoteRuntime):
|
|
|
707
703
|
"track_models": self.spec.track_models,
|
|
708
704
|
"default_content_type": self.spec.default_content_type,
|
|
709
705
|
"model_endpoint_creation_task_name": self.spec.model_endpoint_creation_task_name,
|
|
710
|
-
"filename": getattr(self.spec, "filename", None),
|
|
711
706
|
}
|
|
712
707
|
|
|
713
708
|
if self.spec.secret_sources:
|
|
@@ -716,10 +711,6 @@ class ServingRuntime(RemoteRuntime):
|
|
|
716
711
|
|
|
717
712
|
return json.dumps(serving_spec)
|
|
718
713
|
|
|
719
|
-
@property
|
|
720
|
-
def serving_spec(self):
|
|
721
|
-
return self._get_serving_spec()
|
|
722
|
-
|
|
723
714
|
def to_mock_server(
|
|
724
715
|
self,
|
|
725
716
|
namespace=None,
|
|
@@ -751,13 +742,10 @@ class ServingRuntime(RemoteRuntime):
|
|
|
751
742
|
set_paths(workdir)
|
|
752
743
|
os.chdir(workdir)
|
|
753
744
|
|
|
754
|
-
system_graph = None
|
|
755
|
-
if isinstance(self.spec.graph, RootFlowStep):
|
|
756
|
-
system_graph = add_system_steps_to_graph(copy.deepcopy(self.spec.graph))
|
|
757
745
|
server = create_graph_server(
|
|
758
746
|
parameters=self.spec.parameters,
|
|
759
747
|
load_mode=self.spec.load_mode,
|
|
760
|
-
graph=
|
|
748
|
+
graph=self.spec.graph,
|
|
761
749
|
verbose=self.verbose,
|
|
762
750
|
current_function=current_function,
|
|
763
751
|
graph_initializer=self.spec.graph_initializer,
|
|
@@ -778,6 +766,18 @@ class ServingRuntime(RemoteRuntime):
|
|
|
778
766
|
monitoring_mock=self.spec.track_models,
|
|
779
767
|
)
|
|
780
768
|
|
|
769
|
+
if (
|
|
770
|
+
isinstance(self.spec.graph, RootFlowStep)
|
|
771
|
+
and self.spec.graph.include_monitored_step()
|
|
772
|
+
):
|
|
773
|
+
server.graph = add_system_steps_to_graph(
|
|
774
|
+
server.project,
|
|
775
|
+
server.graph,
|
|
776
|
+
self.spec.track_models,
|
|
777
|
+
server.context,
|
|
778
|
+
self.spec,
|
|
779
|
+
)
|
|
780
|
+
|
|
781
781
|
if workdir:
|
|
782
782
|
os.chdir(old_workdir)
|
|
783
783
|
|
|
@@ -815,40 +815,3 @@ class ServingRuntime(RemoteRuntime):
|
|
|
815
815
|
"Turn off the mock (mock=False) and make sure Nuclio is installed for real deployment to Nuclio"
|
|
816
816
|
)
|
|
817
817
|
self._mock_server = self.to_mock_server()
|
|
818
|
-
|
|
819
|
-
def to_job(self) -> KubejobRuntime:
|
|
820
|
-
"""Convert this ServingRuntime to a KubejobRuntime, so that the graph can be run as a standalone job."""
|
|
821
|
-
if self.spec.function_refs:
|
|
822
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
823
|
-
f"Cannot convert function '{self.metadata.name}' to a job because it has child functions"
|
|
824
|
-
)
|
|
825
|
-
|
|
826
|
-
spec = KubeResourceSpec(
|
|
827
|
-
image=self.spec.image,
|
|
828
|
-
mode=self.spec.mode,
|
|
829
|
-
volumes=self.spec.volumes,
|
|
830
|
-
volume_mounts=self.spec.volume_mounts,
|
|
831
|
-
env=self.spec.env,
|
|
832
|
-
resources=self.spec.resources,
|
|
833
|
-
default_handler="mlrun.serving.server.execute_graph",
|
|
834
|
-
pythonpath=self.spec.pythonpath,
|
|
835
|
-
entry_points=self.spec.entry_points,
|
|
836
|
-
description=self.spec.description,
|
|
837
|
-
workdir=self.spec.workdir,
|
|
838
|
-
image_pull_secret=self.spec.image_pull_secret,
|
|
839
|
-
node_name=self.spec.node_name,
|
|
840
|
-
node_selector=self.spec.node_selector,
|
|
841
|
-
affinity=self.spec.affinity,
|
|
842
|
-
disable_auto_mount=self.spec.disable_auto_mount,
|
|
843
|
-
priority_class_name=self.spec.priority_class_name,
|
|
844
|
-
tolerations=self.spec.tolerations,
|
|
845
|
-
preemption_mode=self.spec.preemption_mode,
|
|
846
|
-
security_context=self.spec.security_context,
|
|
847
|
-
state_thresholds=self.spec.state_thresholds,
|
|
848
|
-
serving_spec=self._get_serving_spec(),
|
|
849
|
-
)
|
|
850
|
-
job = KubejobRuntime(
|
|
851
|
-
spec=spec,
|
|
852
|
-
metadata=self.metadata,
|
|
853
|
-
)
|
|
854
|
-
return job
|
mlrun/runtimes/pod.py
CHANGED
|
@@ -103,7 +103,6 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
103
103
|
"preemption_mode",
|
|
104
104
|
"security_context",
|
|
105
105
|
"state_thresholds",
|
|
106
|
-
"serving_spec",
|
|
107
106
|
]
|
|
108
107
|
_default_fields_to_strip = FunctionSpec._default_fields_to_strip + [
|
|
109
108
|
"volumes",
|
|
@@ -179,7 +178,6 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
179
178
|
preemption_mode=None,
|
|
180
179
|
security_context=None,
|
|
181
180
|
state_thresholds=None,
|
|
182
|
-
serving_spec=None,
|
|
183
181
|
):
|
|
184
182
|
super().__init__(
|
|
185
183
|
command=command,
|
|
@@ -225,7 +223,6 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
225
223
|
state_thresholds
|
|
226
224
|
or mlrun.mlconf.function.spec.state_thresholds.default.to_dict()
|
|
227
225
|
)
|
|
228
|
-
self.serving_spec = serving_spec
|
|
229
226
|
# Termination grace period is internal for runtimes that have a pod termination hook hence it is not in the
|
|
230
227
|
# _dict_fields and doesn't have a setter.
|
|
231
228
|
self._termination_grace_period_seconds = None
|
mlrun/runtimes/remotesparkjob.py
CHANGED
|
@@ -58,7 +58,6 @@ class RemoteSparkSpec(KubeResourceSpec):
|
|
|
58
58
|
preemption_mode=None,
|
|
59
59
|
security_context=None,
|
|
60
60
|
state_thresholds=None,
|
|
61
|
-
serving_spec=None,
|
|
62
61
|
):
|
|
63
62
|
super().__init__(
|
|
64
63
|
command=command,
|
|
@@ -88,7 +87,6 @@ class RemoteSparkSpec(KubeResourceSpec):
|
|
|
88
87
|
preemption_mode=preemption_mode,
|
|
89
88
|
security_context=security_context,
|
|
90
89
|
state_thresholds=state_thresholds,
|
|
91
|
-
serving_spec=serving_spec,
|
|
92
90
|
)
|
|
93
91
|
self.provider = provider
|
|
94
92
|
|
|
@@ -168,7 +168,6 @@ class Spark3JobSpec(KubeResourceSpec):
|
|
|
168
168
|
executor_cores=None,
|
|
169
169
|
security_context=None,
|
|
170
170
|
state_thresholds=None,
|
|
171
|
-
serving_spec=None,
|
|
172
171
|
):
|
|
173
172
|
super().__init__(
|
|
174
173
|
command=command,
|
|
@@ -198,7 +197,6 @@ class Spark3JobSpec(KubeResourceSpec):
|
|
|
198
197
|
preemption_mode=preemption_mode,
|
|
199
198
|
security_context=security_context,
|
|
200
199
|
state_thresholds=state_thresholds,
|
|
201
|
-
serving_spec=serving_spec,
|
|
202
200
|
)
|
|
203
201
|
|
|
204
202
|
self.driver_resources = driver_resources or {}
|
mlrun/serving/__init__.py
CHANGED
|
@@ -27,6 +27,7 @@ __all__ = [
|
|
|
27
27
|
"ModelRunner",
|
|
28
28
|
"Model",
|
|
29
29
|
"ModelSelector",
|
|
30
|
+
"MonitoredStep",
|
|
30
31
|
]
|
|
31
32
|
|
|
32
33
|
from .routers import ModelRouter, VotingEnsemble # noqa
|
|
@@ -45,6 +46,7 @@ from .states import (
|
|
|
45
46
|
ModelRunner,
|
|
46
47
|
Model,
|
|
47
48
|
ModelSelector,
|
|
49
|
+
MonitoredStep,
|
|
48
50
|
) # noqa
|
|
49
51
|
from .v1_serving import MLModelServer, new_v1_model_server # noqa
|
|
50
52
|
from .v2_serving import V2ModelServer # noqa
|