mlrun 1.10.0rc6__py3-none-any.whl → 1.10.0rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -1
- mlrun/__main__.py +47 -4
- mlrun/artifacts/base.py +0 -27
- mlrun/artifacts/dataset.py +0 -8
- mlrun/artifacts/model.py +0 -7
- mlrun/artifacts/plots.py +0 -13
- mlrun/common/schemas/background_task.py +5 -0
- mlrun/common/schemas/model_monitoring/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/constants.py +16 -0
- mlrun/common/schemas/project.py +4 -0
- mlrun/common/schemas/serving.py +2 -0
- mlrun/config.py +11 -22
- mlrun/datastore/utils.py +3 -1
- mlrun/db/base.py +0 -19
- mlrun/db/httpdb.py +73 -65
- mlrun/db/nopdb.py +0 -12
- mlrun/frameworks/tf_keras/__init__.py +4 -4
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +23 -20
- mlrun/frameworks/tf_keras/model_handler.py +69 -9
- mlrun/frameworks/tf_keras/utils.py +12 -1
- mlrun/launcher/base.py +7 -0
- mlrun/launcher/client.py +2 -21
- mlrun/launcher/local.py +4 -0
- mlrun/model_monitoring/applications/_application_steps.py +23 -39
- mlrun/model_monitoring/applications/base.py +167 -32
- mlrun/model_monitoring/helpers.py +0 -3
- mlrun/projects/operations.py +11 -24
- mlrun/projects/pipelines.py +33 -3
- mlrun/projects/project.py +45 -89
- mlrun/run.py +37 -5
- mlrun/runtimes/daskjob.py +2 -0
- mlrun/runtimes/kubejob.py +5 -8
- mlrun/runtimes/mpijob/abstract.py +2 -0
- mlrun/runtimes/mpijob/v1.py +2 -0
- mlrun/runtimes/nuclio/function.py +2 -0
- mlrun/runtimes/nuclio/serving.py +60 -5
- mlrun/runtimes/pod.py +3 -0
- mlrun/runtimes/remotesparkjob.py +2 -0
- mlrun/runtimes/sparkjob/spark3job.py +2 -0
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/server.py +253 -29
- mlrun/serving/states.py +215 -18
- mlrun/serving/system_steps.py +391 -0
- mlrun/serving/v2_serving.py +9 -8
- mlrun/utils/helpers.py +18 -4
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/METADATA +9 -9
- {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/RECORD +52 -51
- {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/top_level.txt +0 -0
mlrun/projects/project.py
CHANGED
|
@@ -757,14 +757,7 @@ def _project_instance_from_struct(struct, name, allow_cross_project):
|
|
|
757
757
|
"3. Use different project context dir."
|
|
758
758
|
)
|
|
759
759
|
|
|
760
|
-
if allow_cross_project
|
|
761
|
-
# TODO: Remove this warning in version 1.10.0 and also fix cli to support allow_cross_project
|
|
762
|
-
warnings.warn(
|
|
763
|
-
f"Project {name=} is different than specified on the context's project yaml. "
|
|
764
|
-
"This behavior is deprecated and will not be supported from version 1.10.0."
|
|
765
|
-
)
|
|
766
|
-
logger.warn(error_message)
|
|
767
|
-
elif allow_cross_project:
|
|
760
|
+
if allow_cross_project:
|
|
768
761
|
logger.debug(
|
|
769
762
|
"Project name is different than specified on the context's project yaml. Overriding.",
|
|
770
763
|
existing_name=name_from_struct,
|
|
@@ -2525,7 +2518,6 @@ class MlrunProject(ModelObj):
|
|
|
2525
2518
|
|
|
2526
2519
|
def enable_model_monitoring(
|
|
2527
2520
|
self,
|
|
2528
|
-
default_controller_image: str = "mlrun/mlrun",
|
|
2529
2521
|
base_period: int = 10,
|
|
2530
2522
|
image: str = "mlrun/mlrun",
|
|
2531
2523
|
*,
|
|
@@ -2541,7 +2533,6 @@ class MlrunProject(ModelObj):
|
|
|
2541
2533
|
The stream function goal is to monitor the log of the data stream. It is triggered when a new log entry
|
|
2542
2534
|
is detected. It processes the new events into statistics that are then written to statistics databases.
|
|
2543
2535
|
|
|
2544
|
-
:param default_controller_image: Deprecated.
|
|
2545
2536
|
:param base_period: The time period in minutes in which the model monitoring controller
|
|
2546
2537
|
function is triggered. By default, the base period is 10 minutes
|
|
2547
2538
|
(which is also the minimum value for production environments).
|
|
@@ -2569,14 +2560,6 @@ class MlrunProject(ModelObj):
|
|
|
2569
2560
|
background, including the histogram data drift app if selected.
|
|
2570
2561
|
:param fetch_credentials_from_sys_config: If true, fetch the credentials from the system configuration.
|
|
2571
2562
|
"""
|
|
2572
|
-
if default_controller_image != "mlrun/mlrun":
|
|
2573
|
-
# TODO: Remove this in 1.10.0
|
|
2574
|
-
warnings.warn(
|
|
2575
|
-
"'default_controller_image' is deprecated in 1.7.0 and will be removed in 1.10.0, "
|
|
2576
|
-
"use 'image' instead",
|
|
2577
|
-
FutureWarning,
|
|
2578
|
-
)
|
|
2579
|
-
image = default_controller_image
|
|
2580
2563
|
if base_period < 10:
|
|
2581
2564
|
logger.warn(
|
|
2582
2565
|
"enable_model_monitoring: 'base_period' < 10 minutes is not supported in production environments",
|
|
@@ -3855,7 +3838,8 @@ class MlrunProject(ModelObj):
|
|
|
3855
3838
|
)
|
|
3856
3839
|
|
|
3857
3840
|
The replication factor and timeout configuration might need to be adjusted according to your Confluent cluster
|
|
3858
|
-
type and settings.
|
|
3841
|
+
type and settings. Nuclio annotations for the model monitoring infrastructure and application functions are
|
|
3842
|
+
supported through ``kwargs_public={"nuclio_annotations": {...}, ...}``.
|
|
3859
3843
|
|
|
3860
3844
|
:param tsdb_profile_name: The datastore profile name of the time-series database to be used in model
|
|
3861
3845
|
monitoring. The supported profiles are:
|
|
@@ -4111,7 +4095,7 @@ class MlrunProject(ModelObj):
|
|
|
4111
4095
|
requirements: Optional[typing.Union[str, list[str]]] = None,
|
|
4112
4096
|
mlrun_version_specifier: Optional[str] = None,
|
|
4113
4097
|
builder_env: Optional[dict] = None,
|
|
4114
|
-
overwrite_build_params: bool =
|
|
4098
|
+
overwrite_build_params: bool = True,
|
|
4115
4099
|
requirements_file: Optional[str] = None,
|
|
4116
4100
|
extra_args: Optional[str] = None,
|
|
4117
4101
|
force_build: bool = False,
|
|
@@ -4167,7 +4151,7 @@ class MlrunProject(ModelObj):
|
|
|
4167
4151
|
commands: Optional[list] = None,
|
|
4168
4152
|
secret_name: Optional[str] = None,
|
|
4169
4153
|
requirements: Optional[typing.Union[str, list[str]]] = None,
|
|
4170
|
-
overwrite_build_params: bool =
|
|
4154
|
+
overwrite_build_params: bool = True,
|
|
4171
4155
|
requirements_file: Optional[str] = None,
|
|
4172
4156
|
builder_env: Optional[dict] = None,
|
|
4173
4157
|
extra_args: Optional[str] = None,
|
|
@@ -4197,12 +4181,6 @@ class MlrunProject(ModelObj):
|
|
|
4197
4181
|
:param source_code_target_dir: Path on the image where source code would be extracted
|
|
4198
4182
|
(by default `/home/mlrun_code`)
|
|
4199
4183
|
"""
|
|
4200
|
-
if not overwrite_build_params:
|
|
4201
|
-
# TODO: change overwrite_build_params default to True in 1.10.0
|
|
4202
|
-
warnings.warn(
|
|
4203
|
-
"The `overwrite_build_params` parameter default will change from 'False' to 'True' in 1.10.0.",
|
|
4204
|
-
mlrun.utils.OverwriteBuildParamsWarning,
|
|
4205
|
-
)
|
|
4206
4184
|
default_image_name = mlrun.mlconf.default_project_image_name.format(
|
|
4207
4185
|
name=self.name
|
|
4208
4186
|
)
|
|
@@ -4236,7 +4214,7 @@ class MlrunProject(ModelObj):
|
|
|
4236
4214
|
requirements: Optional[typing.Union[str, list[str]]] = None,
|
|
4237
4215
|
mlrun_version_specifier: Optional[str] = None,
|
|
4238
4216
|
builder_env: Optional[dict] = None,
|
|
4239
|
-
overwrite_build_params: bool =
|
|
4217
|
+
overwrite_build_params: bool = True,
|
|
4240
4218
|
requirements_file: Optional[str] = None,
|
|
4241
4219
|
extra_args: Optional[str] = None,
|
|
4242
4220
|
target_dir: Optional[str] = None,
|
|
@@ -4276,60 +4254,53 @@ class MlrunProject(ModelObj):
|
|
|
4276
4254
|
base_image=base_image,
|
|
4277
4255
|
)
|
|
4278
4256
|
|
|
4279
|
-
|
|
4280
|
-
|
|
4281
|
-
|
|
4282
|
-
|
|
4283
|
-
|
|
4284
|
-
|
|
4285
|
-
|
|
4286
|
-
|
|
4287
|
-
|
|
4288
|
-
|
|
4289
|
-
|
|
4290
|
-
)
|
|
4291
|
-
|
|
4292
|
-
self.build_config(
|
|
4293
|
-
image=image,
|
|
4294
|
-
set_as_default=set_as_default,
|
|
4295
|
-
base_image=base_image,
|
|
4296
|
-
commands=commands,
|
|
4297
|
-
secret_name=secret_name,
|
|
4298
|
-
with_mlrun=with_mlrun,
|
|
4299
|
-
requirements=requirements,
|
|
4300
|
-
requirements_file=requirements_file,
|
|
4301
|
-
overwrite_build_params=overwrite_build_params,
|
|
4302
|
-
)
|
|
4257
|
+
self.build_config(
|
|
4258
|
+
image=image,
|
|
4259
|
+
set_as_default=set_as_default,
|
|
4260
|
+
base_image=base_image,
|
|
4261
|
+
commands=commands,
|
|
4262
|
+
secret_name=secret_name,
|
|
4263
|
+
with_mlrun=with_mlrun,
|
|
4264
|
+
requirements=requirements,
|
|
4265
|
+
requirements_file=requirements_file,
|
|
4266
|
+
overwrite_build_params=overwrite_build_params,
|
|
4267
|
+
)
|
|
4303
4268
|
|
|
4304
|
-
|
|
4269
|
+
function = mlrun.new_function("mlrun--project--image--builder", kind="job")
|
|
4305
4270
|
|
|
4306
|
-
|
|
4271
|
+
if self.spec.source and not self.spec.load_source_on_run:
|
|
4272
|
+
if self.spec.source.startswith("db://"):
|
|
4273
|
+
logger.debug(
|
|
4274
|
+
"Project source is 'db://', which refers to metadata stored in the MLRun DB."
|
|
4275
|
+
" Skipping source archive setup for image build"
|
|
4276
|
+
)
|
|
4277
|
+
else:
|
|
4307
4278
|
function.with_source_archive(
|
|
4308
4279
|
source=self.spec.source,
|
|
4309
4280
|
target_dir=target_dir,
|
|
4310
4281
|
pull_at_runtime=False,
|
|
4311
4282
|
)
|
|
4312
4283
|
|
|
4313
|
-
|
|
4314
|
-
|
|
4315
|
-
|
|
4316
|
-
|
|
4317
|
-
|
|
4318
|
-
|
|
4319
|
-
|
|
4320
|
-
|
|
4321
|
-
|
|
4322
|
-
|
|
4323
|
-
|
|
4324
|
-
|
|
4325
|
-
|
|
4326
|
-
|
|
4327
|
-
|
|
4284
|
+
build = self.spec.build
|
|
4285
|
+
result = self.build_function(
|
|
4286
|
+
function=function,
|
|
4287
|
+
with_mlrun=build.with_mlrun,
|
|
4288
|
+
image=build.image,
|
|
4289
|
+
base_image=build.base_image,
|
|
4290
|
+
commands=build.commands,
|
|
4291
|
+
secret_name=build.secret,
|
|
4292
|
+
requirements=build.requirements,
|
|
4293
|
+
overwrite_build_params=overwrite_build_params,
|
|
4294
|
+
mlrun_version_specifier=mlrun_version_specifier,
|
|
4295
|
+
builder_env=builder_env,
|
|
4296
|
+
extra_args=extra_args,
|
|
4297
|
+
force_build=True,
|
|
4298
|
+
)
|
|
4328
4299
|
|
|
4329
|
-
|
|
4330
|
-
|
|
4331
|
-
|
|
4332
|
-
|
|
4300
|
+
# Get the enriched target dir from the function
|
|
4301
|
+
self.spec.build.source_code_target_dir = (
|
|
4302
|
+
function.spec.build.source_code_target_dir
|
|
4303
|
+
)
|
|
4333
4304
|
|
|
4334
4305
|
try:
|
|
4335
4306
|
mlrun.db.get_run_db(secrets=self._secrets).delete_function(
|
|
@@ -5015,9 +4986,6 @@ class MlrunProject(ModelObj):
|
|
|
5015
4986
|
name: Optional[str] = None,
|
|
5016
4987
|
uid: Optional[Union[str, list[str]]] = None,
|
|
5017
4988
|
labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
|
|
5018
|
-
state: Optional[
|
|
5019
|
-
mlrun.common.runtimes.constants.RunStates
|
|
5020
|
-
] = None, # Backward compatibility
|
|
5021
4989
|
states: typing.Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
|
|
5022
4990
|
sort: bool = True,
|
|
5023
4991
|
iter: bool = False,
|
|
@@ -5061,7 +5029,6 @@ class MlrunProject(ModelObj):
|
|
|
5061
5029
|
- A comma-separated string formatted as `"label1=value1,label2"` to match entities with
|
|
5062
5030
|
the specified key-value pairs or key existence.
|
|
5063
5031
|
|
|
5064
|
-
:param state: Deprecated - List only runs whose state is specified.
|
|
5065
5032
|
:param states: List only runs whose state is one of the provided states.
|
|
5066
5033
|
:param sort: Whether to sort the result according to their start time. Otherwise, results will be
|
|
5067
5034
|
returned by their internal order in the DB (order will not be guaranteed).
|
|
@@ -5075,24 +5042,13 @@ class MlrunProject(ModelObj):
|
|
|
5075
5042
|
:param end_time_from: Filter by run end time in ``[end_time_from, end_time_to]``.
|
|
5076
5043
|
:param end_time_to: Filter by run end time in ``[end_time_from, end_time_to]``.
|
|
5077
5044
|
"""
|
|
5078
|
-
if state:
|
|
5079
|
-
# TODO: Remove this in 1.10.0
|
|
5080
|
-
warnings.warn(
|
|
5081
|
-
"'state' is deprecated in 1.7.0 and will be removed in 1.10.0. Use 'states' instead.",
|
|
5082
|
-
FutureWarning,
|
|
5083
|
-
)
|
|
5084
|
-
|
|
5085
5045
|
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
5086
5046
|
return db.list_runs(
|
|
5087
5047
|
name,
|
|
5088
5048
|
uid,
|
|
5089
5049
|
self.metadata.name,
|
|
5090
5050
|
labels=labels,
|
|
5091
|
-
states=
|
|
5092
|
-
mlrun.utils.helpers.as_list(state)
|
|
5093
|
-
if state is not None
|
|
5094
|
-
else states or None
|
|
5095
|
-
),
|
|
5051
|
+
states=states or None,
|
|
5096
5052
|
sort=sort,
|
|
5097
5053
|
iter=iter,
|
|
5098
5054
|
start_time_from=start_time_from,
|
mlrun/run.py
CHANGED
|
@@ -894,7 +894,6 @@ def _run_pipeline(
|
|
|
894
894
|
def retry_pipeline(
|
|
895
895
|
run_id: str,
|
|
896
896
|
project: str,
|
|
897
|
-
namespace: Optional[str] = None,
|
|
898
897
|
) -> str:
|
|
899
898
|
"""Retry a pipeline run.
|
|
900
899
|
|
|
@@ -903,7 +902,6 @@ def retry_pipeline(
|
|
|
903
902
|
|
|
904
903
|
:param run_id: ID of the pipeline run to retry.
|
|
905
904
|
:param project: name of the project associated with the pipeline run.
|
|
906
|
-
:param namespace: Optional; Kubernetes namespace to use if not the default.
|
|
907
905
|
|
|
908
906
|
:returns: ID of the retried pipeline run or the ID of a cloned run if the original run is not retryable.
|
|
909
907
|
:raises ValueError: If access to the remote API service is not available.
|
|
@@ -918,7 +916,6 @@ def retry_pipeline(
|
|
|
918
916
|
pipeline_run_id = mldb.retry_pipeline(
|
|
919
917
|
run_id=run_id,
|
|
920
918
|
project=project,
|
|
921
|
-
namespace=namespace,
|
|
922
919
|
)
|
|
923
920
|
if pipeline_run_id == run_id:
|
|
924
921
|
logger.info(
|
|
@@ -931,6 +928,35 @@ def retry_pipeline(
|
|
|
931
928
|
return pipeline_run_id
|
|
932
929
|
|
|
933
930
|
|
|
931
|
+
def terminate_pipeline(
|
|
932
|
+
run_id: str,
|
|
933
|
+
project: str,
|
|
934
|
+
) -> str:
|
|
935
|
+
"""Terminate a pipeline run.
|
|
936
|
+
|
|
937
|
+
This function terminates a running pipeline with the specified run ID. If the run is not in a
|
|
938
|
+
terminable state, an error is raised.
|
|
939
|
+
|
|
940
|
+
:param run_id: ID of the pipeline run to terminate.
|
|
941
|
+
:param project: name of the project associated with the pipeline run.
|
|
942
|
+
|
|
943
|
+
:returns: ID of the terminate pipeline run background task.
|
|
944
|
+
:raises ValueError: If access to the remote API service is not available.
|
|
945
|
+
"""
|
|
946
|
+
mldb = mlrun.db.get_run_db()
|
|
947
|
+
if mldb.kind != "http":
|
|
948
|
+
raise ValueError(
|
|
949
|
+
"Terminating a pipeline requires access to remote API service. "
|
|
950
|
+
"Please set the dbpath URL."
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
pipeline_run_task = mldb.terminate_pipeline(
|
|
954
|
+
run_id=run_id,
|
|
955
|
+
project=project,
|
|
956
|
+
)
|
|
957
|
+
return pipeline_run_task["metadata"]["id"]
|
|
958
|
+
|
|
959
|
+
|
|
934
960
|
def wait_for_pipeline_completion(
|
|
935
961
|
run_id,
|
|
936
962
|
timeout=60 * 60,
|
|
@@ -997,7 +1023,10 @@ def wait_for_pipeline_completion(
|
|
|
997
1023
|
_wait_for_pipeline_completion,
|
|
998
1024
|
)
|
|
999
1025
|
else:
|
|
1000
|
-
client = mlrun_pipelines.utils.get_client(
|
|
1026
|
+
client = mlrun_pipelines.utils.get_client(
|
|
1027
|
+
logger=logger,
|
|
1028
|
+
namespace=namespace,
|
|
1029
|
+
)
|
|
1001
1030
|
resp = client.wait_for_run_completion(run_id, timeout)
|
|
1002
1031
|
if resp:
|
|
1003
1032
|
resp = resp.to_dict()
|
|
@@ -1058,7 +1087,10 @@ def get_pipeline(
|
|
|
1058
1087
|
)
|
|
1059
1088
|
|
|
1060
1089
|
else:
|
|
1061
|
-
client = mlrun_pipelines.utils.get_client(
|
|
1090
|
+
client = mlrun_pipelines.utils.get_client(
|
|
1091
|
+
logger=logger,
|
|
1092
|
+
namespace=namespace,
|
|
1093
|
+
)
|
|
1062
1094
|
resp = client.get_run(run_id)
|
|
1063
1095
|
if resp:
|
|
1064
1096
|
resp = resp.to_dict()
|
mlrun/runtimes/daskjob.py
CHANGED
|
@@ -92,6 +92,7 @@ class DaskSpec(KubeResourceSpec):
|
|
|
92
92
|
preemption_mode=None,
|
|
93
93
|
security_context=None,
|
|
94
94
|
state_thresholds=None,
|
|
95
|
+
serving_spec=None,
|
|
95
96
|
):
|
|
96
97
|
super().__init__(
|
|
97
98
|
command=command,
|
|
@@ -121,6 +122,7 @@ class DaskSpec(KubeResourceSpec):
|
|
|
121
122
|
preemption_mode=preemption_mode,
|
|
122
123
|
security_context=security_context,
|
|
123
124
|
state_thresholds=state_thresholds,
|
|
125
|
+
serving_spec=serving_spec,
|
|
124
126
|
)
|
|
125
127
|
self.args = args
|
|
126
128
|
|
mlrun/runtimes/kubejob.py
CHANGED
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import typing
|
|
15
|
-
import warnings
|
|
16
15
|
|
|
17
16
|
import mlrun.common.schemas
|
|
18
17
|
import mlrun.db
|
|
@@ -83,7 +82,7 @@ class KubejobRuntime(KubeResource):
|
|
|
83
82
|
with_mlrun=None,
|
|
84
83
|
auto_build=None,
|
|
85
84
|
requirements=None,
|
|
86
|
-
overwrite=
|
|
85
|
+
overwrite=True,
|
|
87
86
|
prepare_image_for_deploy=True,
|
|
88
87
|
requirements_file=None,
|
|
89
88
|
builder_env=None,
|
|
@@ -113,12 +112,6 @@ class KubejobRuntime(KubeResource):
|
|
|
113
112
|
:param builder_env: Kaniko builder pod env vars dict (for config/credentials)
|
|
114
113
|
e.g. builder_env={"GIT_TOKEN": token}
|
|
115
114
|
"""
|
|
116
|
-
if not overwrite:
|
|
117
|
-
# TODO: change overwrite default to True in 1.10.0
|
|
118
|
-
warnings.warn(
|
|
119
|
-
"The `overwrite` parameter default will change from 'False' to 'True' in 1.10.0.",
|
|
120
|
-
mlrun.utils.OverwriteBuildParamsWarning,
|
|
121
|
-
)
|
|
122
115
|
image = mlrun.utils.helpers.remove_image_protocol_prefix(image)
|
|
123
116
|
self.spec.build.build_config(
|
|
124
117
|
image=image,
|
|
@@ -214,3 +207,7 @@ class KubejobRuntime(KubeResource):
|
|
|
214
207
|
raise NotImplementedError(
|
|
215
208
|
f"Running a {self.kind} function from the client is not supported. Use .run() to submit the job to the API."
|
|
216
209
|
)
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def serving_spec(self):
|
|
213
|
+
return self.spec.serving_spec
|
|
@@ -54,6 +54,7 @@ class MPIResourceSpec(KubeResourceSpec):
|
|
|
54
54
|
preemption_mode=None,
|
|
55
55
|
security_context=None,
|
|
56
56
|
state_thresholds=None,
|
|
57
|
+
serving_spec=None,
|
|
57
58
|
):
|
|
58
59
|
super().__init__(
|
|
59
60
|
command=command,
|
|
@@ -83,6 +84,7 @@ class MPIResourceSpec(KubeResourceSpec):
|
|
|
83
84
|
preemption_mode=preemption_mode,
|
|
84
85
|
security_context=security_context,
|
|
85
86
|
state_thresholds=state_thresholds,
|
|
87
|
+
serving_spec=serving_spec,
|
|
86
88
|
)
|
|
87
89
|
self.mpi_args = mpi_args or [
|
|
88
90
|
"-x",
|
mlrun/runtimes/mpijob/v1.py
CHANGED
|
@@ -49,6 +49,7 @@ class MPIV1ResourceSpec(MPIResourceSpec):
|
|
|
49
49
|
preemption_mode=None,
|
|
50
50
|
security_context=None,
|
|
51
51
|
state_thresholds=None,
|
|
52
|
+
serving_spec=None,
|
|
52
53
|
):
|
|
53
54
|
super().__init__(
|
|
54
55
|
command=command,
|
|
@@ -79,6 +80,7 @@ class MPIV1ResourceSpec(MPIResourceSpec):
|
|
|
79
80
|
preemption_mode=preemption_mode,
|
|
80
81
|
security_context=security_context,
|
|
81
82
|
state_thresholds=state_thresholds,
|
|
83
|
+
serving_spec=serving_spec,
|
|
82
84
|
)
|
|
83
85
|
self.clean_pod_policy = clean_pod_policy or MPIJobV1CleanPodPolicies.default()
|
|
84
86
|
|
|
@@ -154,6 +154,7 @@ class NuclioSpec(KubeResourceSpec):
|
|
|
154
154
|
add_templated_ingress_host_mode=None,
|
|
155
155
|
state_thresholds=None,
|
|
156
156
|
disable_default_http_trigger=None,
|
|
157
|
+
serving_spec=None,
|
|
157
158
|
):
|
|
158
159
|
super().__init__(
|
|
159
160
|
command=command,
|
|
@@ -183,6 +184,7 @@ class NuclioSpec(KubeResourceSpec):
|
|
|
183
184
|
preemption_mode=preemption_mode,
|
|
184
185
|
security_context=security_context,
|
|
185
186
|
state_thresholds=state_thresholds,
|
|
187
|
+
serving_spec=serving_spec,
|
|
186
188
|
)
|
|
187
189
|
|
|
188
190
|
self.base_spec = base_spec or {}
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
import copy
|
|
15
14
|
import json
|
|
16
15
|
import os
|
|
17
16
|
import warnings
|
|
@@ -43,6 +42,8 @@ from mlrun.serving.states import (
|
|
|
43
42
|
)
|
|
44
43
|
from mlrun.utils import get_caller_globals, logger, set_paths
|
|
45
44
|
|
|
45
|
+
from .. import KubejobRuntime
|
|
46
|
+
from ..pod import KubeResourceSpec
|
|
46
47
|
from .function import NuclioSpec, RemoteRuntime, min_nuclio_versions
|
|
47
48
|
|
|
48
49
|
serving_subkind = "serving_v2"
|
|
@@ -150,6 +151,7 @@ class ServingSpec(NuclioSpec):
|
|
|
150
151
|
state_thresholds=None,
|
|
151
152
|
disable_default_http_trigger=None,
|
|
152
153
|
model_endpoint_creation_task_name=None,
|
|
154
|
+
serving_spec=None,
|
|
153
155
|
):
|
|
154
156
|
super().__init__(
|
|
155
157
|
command=command,
|
|
@@ -190,6 +192,7 @@ class ServingSpec(NuclioSpec):
|
|
|
190
192
|
service_type=service_type,
|
|
191
193
|
add_templated_ingress_host_mode=add_templated_ingress_host_mode,
|
|
192
194
|
disable_default_http_trigger=disable_default_http_trigger,
|
|
195
|
+
serving_spec=serving_spec,
|
|
193
196
|
)
|
|
194
197
|
|
|
195
198
|
self.models = models or {}
|
|
@@ -478,6 +481,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
478
481
|
state = TaskStep(
|
|
479
482
|
class_name,
|
|
480
483
|
class_args,
|
|
484
|
+
name=key,
|
|
481
485
|
handler=handler,
|
|
482
486
|
function=child_function,
|
|
483
487
|
model_endpoint_creation_strategy=creation_strategy,
|
|
@@ -703,6 +707,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
703
707
|
"track_models": self.spec.track_models,
|
|
704
708
|
"default_content_type": self.spec.default_content_type,
|
|
705
709
|
"model_endpoint_creation_task_name": self.spec.model_endpoint_creation_task_name,
|
|
710
|
+
"filename": getattr(self.spec, "filename", None),
|
|
706
711
|
}
|
|
707
712
|
|
|
708
713
|
if self.spec.secret_sources:
|
|
@@ -711,6 +716,10 @@ class ServingRuntime(RemoteRuntime):
|
|
|
711
716
|
|
|
712
717
|
return json.dumps(serving_spec)
|
|
713
718
|
|
|
719
|
+
@property
|
|
720
|
+
def serving_spec(self):
|
|
721
|
+
return self._get_serving_spec()
|
|
722
|
+
|
|
714
723
|
def to_mock_server(
|
|
715
724
|
self,
|
|
716
725
|
namespace=None,
|
|
@@ -742,13 +751,10 @@ class ServingRuntime(RemoteRuntime):
|
|
|
742
751
|
set_paths(workdir)
|
|
743
752
|
os.chdir(workdir)
|
|
744
753
|
|
|
745
|
-
system_graph = None
|
|
746
|
-
if isinstance(self.spec.graph, RootFlowStep):
|
|
747
|
-
system_graph = add_system_steps_to_graph(copy.deepcopy(self.spec.graph))
|
|
748
754
|
server = create_graph_server(
|
|
749
755
|
parameters=self.spec.parameters,
|
|
750
756
|
load_mode=self.spec.load_mode,
|
|
751
|
-
graph=
|
|
757
|
+
graph=self.spec.graph,
|
|
752
758
|
verbose=self.verbose,
|
|
753
759
|
current_function=current_function,
|
|
754
760
|
graph_initializer=self.spec.graph_initializer,
|
|
@@ -769,6 +775,18 @@ class ServingRuntime(RemoteRuntime):
|
|
|
769
775
|
monitoring_mock=self.spec.track_models,
|
|
770
776
|
)
|
|
771
777
|
|
|
778
|
+
if (
|
|
779
|
+
isinstance(self.spec.graph, RootFlowStep)
|
|
780
|
+
and self.spec.graph.include_monitored_step()
|
|
781
|
+
):
|
|
782
|
+
server.graph = add_system_steps_to_graph(
|
|
783
|
+
server.project,
|
|
784
|
+
server.graph,
|
|
785
|
+
self.spec.track_models,
|
|
786
|
+
server.context,
|
|
787
|
+
self.spec,
|
|
788
|
+
)
|
|
789
|
+
|
|
772
790
|
if workdir:
|
|
773
791
|
os.chdir(old_workdir)
|
|
774
792
|
|
|
@@ -806,3 +824,40 @@ class ServingRuntime(RemoteRuntime):
|
|
|
806
824
|
"Turn off the mock (mock=False) and make sure Nuclio is installed for real deployment to Nuclio"
|
|
807
825
|
)
|
|
808
826
|
self._mock_server = self.to_mock_server()
|
|
827
|
+
|
|
828
|
+
def to_job(self) -> KubejobRuntime:
|
|
829
|
+
"""Convert this ServingRuntime to a KubejobRuntime, so that the graph can be run as a standalone job."""
|
|
830
|
+
if self.spec.function_refs:
|
|
831
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
832
|
+
f"Cannot convert function '{self.metadata.name}' to a job because it has child functions"
|
|
833
|
+
)
|
|
834
|
+
|
|
835
|
+
spec = KubeResourceSpec(
|
|
836
|
+
image=self.spec.image,
|
|
837
|
+
mode=self.spec.mode,
|
|
838
|
+
volumes=self.spec.volumes,
|
|
839
|
+
volume_mounts=self.spec.volume_mounts,
|
|
840
|
+
env=self.spec.env,
|
|
841
|
+
resources=self.spec.resources,
|
|
842
|
+
default_handler="mlrun.serving.server.execute_graph",
|
|
843
|
+
pythonpath=self.spec.pythonpath,
|
|
844
|
+
entry_points=self.spec.entry_points,
|
|
845
|
+
description=self.spec.description,
|
|
846
|
+
workdir=self.spec.workdir,
|
|
847
|
+
image_pull_secret=self.spec.image_pull_secret,
|
|
848
|
+
node_name=self.spec.node_name,
|
|
849
|
+
node_selector=self.spec.node_selector,
|
|
850
|
+
affinity=self.spec.affinity,
|
|
851
|
+
disable_auto_mount=self.spec.disable_auto_mount,
|
|
852
|
+
priority_class_name=self.spec.priority_class_name,
|
|
853
|
+
tolerations=self.spec.tolerations,
|
|
854
|
+
preemption_mode=self.spec.preemption_mode,
|
|
855
|
+
security_context=self.spec.security_context,
|
|
856
|
+
state_thresholds=self.spec.state_thresholds,
|
|
857
|
+
serving_spec=self._get_serving_spec(),
|
|
858
|
+
)
|
|
859
|
+
job = KubejobRuntime(
|
|
860
|
+
spec=spec,
|
|
861
|
+
metadata=self.metadata,
|
|
862
|
+
)
|
|
863
|
+
return job
|
mlrun/runtimes/pod.py
CHANGED
|
@@ -103,6 +103,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
103
103
|
"preemption_mode",
|
|
104
104
|
"security_context",
|
|
105
105
|
"state_thresholds",
|
|
106
|
+
"serving_spec",
|
|
106
107
|
]
|
|
107
108
|
_default_fields_to_strip = FunctionSpec._default_fields_to_strip + [
|
|
108
109
|
"volumes",
|
|
@@ -178,6 +179,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
178
179
|
preemption_mode=None,
|
|
179
180
|
security_context=None,
|
|
180
181
|
state_thresholds=None,
|
|
182
|
+
serving_spec=None,
|
|
181
183
|
):
|
|
182
184
|
super().__init__(
|
|
183
185
|
command=command,
|
|
@@ -223,6 +225,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
223
225
|
state_thresholds
|
|
224
226
|
or mlrun.mlconf.function.spec.state_thresholds.default.to_dict()
|
|
225
227
|
)
|
|
228
|
+
self.serving_spec = serving_spec
|
|
226
229
|
# Termination grace period is internal for runtimes that have a pod termination hook hence it is not in the
|
|
227
230
|
# _dict_fields and doesn't have a setter.
|
|
228
231
|
self._termination_grace_period_seconds = None
|
mlrun/runtimes/remotesparkjob.py
CHANGED
|
@@ -58,6 +58,7 @@ class RemoteSparkSpec(KubeResourceSpec):
|
|
|
58
58
|
preemption_mode=None,
|
|
59
59
|
security_context=None,
|
|
60
60
|
state_thresholds=None,
|
|
61
|
+
serving_spec=None,
|
|
61
62
|
):
|
|
62
63
|
super().__init__(
|
|
63
64
|
command=command,
|
|
@@ -87,6 +88,7 @@ class RemoteSparkSpec(KubeResourceSpec):
|
|
|
87
88
|
preemption_mode=preemption_mode,
|
|
88
89
|
security_context=security_context,
|
|
89
90
|
state_thresholds=state_thresholds,
|
|
91
|
+
serving_spec=serving_spec,
|
|
90
92
|
)
|
|
91
93
|
self.provider = provider
|
|
92
94
|
|
|
@@ -168,6 +168,7 @@ class Spark3JobSpec(KubeResourceSpec):
|
|
|
168
168
|
executor_cores=None,
|
|
169
169
|
security_context=None,
|
|
170
170
|
state_thresholds=None,
|
|
171
|
+
serving_spec=None,
|
|
171
172
|
):
|
|
172
173
|
super().__init__(
|
|
173
174
|
command=command,
|
|
@@ -197,6 +198,7 @@ class Spark3JobSpec(KubeResourceSpec):
|
|
|
197
198
|
preemption_mode=preemption_mode,
|
|
198
199
|
security_context=security_context,
|
|
199
200
|
state_thresholds=state_thresholds,
|
|
201
|
+
serving_spec=serving_spec,
|
|
200
202
|
)
|
|
201
203
|
|
|
202
204
|
self.driver_resources = driver_resources or {}
|
mlrun/serving/__init__.py
CHANGED
|
@@ -27,6 +27,7 @@ __all__ = [
|
|
|
27
27
|
"ModelRunner",
|
|
28
28
|
"Model",
|
|
29
29
|
"ModelSelector",
|
|
30
|
+
"MonitoredStep",
|
|
30
31
|
]
|
|
31
32
|
|
|
32
33
|
from .routers import ModelRouter, VotingEnsemble # noqa
|
|
@@ -45,6 +46,7 @@ from .states import (
|
|
|
45
46
|
ModelRunner,
|
|
46
47
|
Model,
|
|
47
48
|
ModelSelector,
|
|
49
|
+
MonitoredStep,
|
|
48
50
|
) # noqa
|
|
49
51
|
from .v1_serving import MLModelServer, new_v1_model_server # noqa
|
|
50
52
|
from .v2_serving import V2ModelServer # noqa
|