mlrun 1.6.0rc35__py3-none-any.whl → 1.7.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +3 -3
- mlrun/api/schemas/__init__.py +1 -1
- mlrun/artifacts/base.py +11 -6
- mlrun/artifacts/dataset.py +2 -2
- mlrun/artifacts/model.py +30 -24
- mlrun/artifacts/plots.py +2 -2
- mlrun/common/db/sql_session.py +5 -3
- mlrun/common/helpers.py +1 -2
- mlrun/common/schemas/artifact.py +3 -3
- mlrun/common/schemas/auth.py +3 -3
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +1 -1
- mlrun/common/schemas/feature_store.py +16 -16
- mlrun/common/schemas/frontend_spec.py +7 -7
- mlrun/common/schemas/function.py +1 -1
- mlrun/common/schemas/hub.py +4 -9
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/grafana.py +4 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +14 -15
- mlrun/common/schemas/notification.py +4 -4
- mlrun/common/schemas/object.py +2 -2
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/project.py +3 -3
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +3 -3
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +2 -2
- mlrun/config.py +8 -4
- mlrun/data_types/to_pandas.py +1 -3
- mlrun/datastore/base.py +0 -28
- mlrun/datastore/datastore_profile.py +9 -9
- mlrun/datastore/filestore.py +0 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/sources.py +7 -11
- mlrun/datastore/spark_utils.py +1 -2
- mlrun/datastore/targets.py +31 -31
- mlrun/datastore/utils.py +4 -6
- mlrun/datastore/v3io.py +70 -46
- mlrun/db/base.py +22 -23
- mlrun/db/httpdb.py +34 -34
- mlrun/db/nopdb.py +19 -19
- mlrun/errors.py +1 -1
- mlrun/execution.py +4 -4
- mlrun/feature_store/api.py +20 -21
- mlrun/feature_store/common.py +1 -1
- mlrun/feature_store/feature_set.py +28 -32
- mlrun/feature_store/feature_vector.py +24 -27
- mlrun/feature_store/retrieval/base.py +7 -7
- mlrun/feature_store/retrieval/conversion.py +2 -4
- mlrun/feature_store/steps.py +7 -15
- mlrun/features.py +5 -7
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +2 -3
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +16 -35
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +7 -7
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +32 -32
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +9 -9
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +2 -2
- mlrun/frameworks/pytorch/__init__.py +16 -16
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +12 -12
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +5 -5
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +14 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +7 -9
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +12 -12
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +4 -5
- mlrun/kfpops.py +2 -2
- mlrun/launcher/base.py +10 -10
- mlrun/launcher/local.py +8 -8
- mlrun/launcher/remote.py +7 -7
- mlrun/lists.py +3 -4
- mlrun/model.py +205 -55
- mlrun/model_monitoring/api.py +21 -24
- mlrun/model_monitoring/application.py +4 -4
- mlrun/model_monitoring/batch.py +17 -17
- mlrun/model_monitoring/controller.py +2 -1
- mlrun/model_monitoring/features_drift_table.py +44 -31
- mlrun/model_monitoring/prometheus.py +1 -4
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +11 -13
- mlrun/model_monitoring/stores/model_endpoint_store.py +9 -11
- mlrun/model_monitoring/stores/models/__init__.py +2 -2
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +11 -13
- mlrun/model_monitoring/stream_processing.py +16 -34
- mlrun/model_monitoring/tracking_policy.py +2 -1
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +6 -6
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +18 -23
- mlrun/package/utils/_formatter.py +4 -4
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/other.py +1 -2
- mlrun/projects/operations.py +5 -5
- mlrun/projects/pipelines.py +9 -9
- mlrun/projects/project.py +58 -46
- mlrun/render.py +1 -1
- mlrun/run.py +9 -9
- mlrun/runtimes/__init__.py +7 -4
- mlrun/runtimes/base.py +20 -23
- mlrun/runtimes/constants.py +5 -5
- mlrun/runtimes/daskjob.py +8 -8
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/local.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +1 -2
- mlrun/runtimes/nuclio/__init__.py +20 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +15 -16
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +13 -12
- mlrun/runtimes/pod.py +95 -48
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +50 -33
- mlrun/runtimes/utils.py +1 -2
- mlrun/secrets.py +3 -3
- mlrun/serving/remote.py +0 -4
- mlrun/serving/routers.py +6 -6
- mlrun/serving/server.py +4 -4
- mlrun/serving/states.py +29 -0
- mlrun/serving/utils.py +3 -3
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +50 -8
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +1 -2
- mlrun/utils/async_http.py +5 -7
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +3 -3
- mlrun/utils/helpers.py +37 -119
- mlrun/utils/http.py +1 -4
- mlrun/utils/logger.py +49 -14
- mlrun/utils/notifications/notification/__init__.py +3 -3
- mlrun/utils/notifications/notification/base.py +2 -2
- mlrun/utils/notifications/notification/ipython.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +8 -14
- mlrun/utils/retryer.py +207 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +2 -3
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- {mlrun-1.6.0rc35.dist-info → mlrun-1.7.0rc2.dist-info}/METADATA +9 -9
- mlrun-1.7.0rc2.dist-info/RECORD +315 -0
- mlrun-1.6.0rc35.dist-info/RECORD +0 -313
- {mlrun-1.6.0rc35.dist-info → mlrun-1.7.0rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.6.0rc35.dist-info → mlrun-1.7.0rc2.dist-info}/WHEEL +0 -0
- {mlrun-1.6.0rc35.dist-info → mlrun-1.7.0rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.0rc35.dist-info → mlrun-1.7.0rc2.dist-info}/top_level.txt +0 -0
mlrun/runtimes/daskjob.py
CHANGED
|
@@ -16,7 +16,7 @@ import inspect
|
|
|
16
16
|
import socket
|
|
17
17
|
import time
|
|
18
18
|
from os import environ
|
|
19
|
-
from typing import Callable,
|
|
19
|
+
from typing import Callable, Optional, Union
|
|
20
20
|
|
|
21
21
|
import mlrun.common.schemas
|
|
22
22
|
import mlrun.errors
|
|
@@ -461,7 +461,7 @@ class DaskCluster(KubejobRuntime):
|
|
|
461
461
|
|
|
462
462
|
def set_state_thresholds(
|
|
463
463
|
self,
|
|
464
|
-
state_thresholds:
|
|
464
|
+
state_thresholds: dict[str, str],
|
|
465
465
|
patch: bool = True,
|
|
466
466
|
):
|
|
467
467
|
raise NotImplementedError(
|
|
@@ -477,23 +477,23 @@ class DaskCluster(KubejobRuntime):
|
|
|
477
477
|
name: Optional[str] = "",
|
|
478
478
|
project: Optional[str] = "",
|
|
479
479
|
params: Optional[dict] = None,
|
|
480
|
-
inputs: Optional[
|
|
480
|
+
inputs: Optional[dict[str, str]] = None,
|
|
481
481
|
out_path: Optional[str] = "",
|
|
482
482
|
workdir: Optional[str] = "",
|
|
483
483
|
artifact_path: Optional[str] = "",
|
|
484
484
|
watch: Optional[bool] = True,
|
|
485
485
|
schedule: Optional[Union[str, mlrun.common.schemas.ScheduleCronTrigger]] = None,
|
|
486
|
-
hyperparams: Optional[
|
|
486
|
+
hyperparams: Optional[dict[str, list]] = None,
|
|
487
487
|
hyper_param_options: Optional[mlrun.model.HyperParamOptions] = None,
|
|
488
488
|
verbose: Optional[bool] = None,
|
|
489
489
|
scrape_metrics: Optional[bool] = None,
|
|
490
490
|
local: Optional[bool] = False,
|
|
491
491
|
local_code_path: Optional[str] = None,
|
|
492
492
|
auto_build: Optional[bool] = None,
|
|
493
|
-
param_file_secrets: Optional[
|
|
494
|
-
notifications: Optional[
|
|
495
|
-
returns: Optional[
|
|
496
|
-
state_thresholds: Optional[
|
|
493
|
+
param_file_secrets: Optional[dict[str, str]] = None,
|
|
494
|
+
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
495
|
+
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
496
|
+
state_thresholds: Optional[dict[str, int]] = None,
|
|
497
497
|
**launcher_kwargs,
|
|
498
498
|
) -> RunObject:
|
|
499
499
|
if state_thresholds:
|
|
@@ -33,7 +33,7 @@ def main():
|
|
|
33
33
|
"The Databricks credentials path does not exist."
|
|
34
34
|
" Please manually cancel the job from the Databricks environment."
|
|
35
35
|
)
|
|
36
|
-
with open(credentials_path
|
|
36
|
+
with open(credentials_path) as yaml_file:
|
|
37
37
|
loaded_data = yaml.safe_load(yaml_file)
|
|
38
38
|
# use for flat yaml only
|
|
39
39
|
for key, value in loaded_data.items():
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
from ast import FunctionDef, parse, unparse
|
|
16
16
|
from base64 import b64decode, b64encode
|
|
17
|
-
from typing import Callable,
|
|
17
|
+
from typing import Callable, Optional, Union
|
|
18
18
|
|
|
19
19
|
import mlrun
|
|
20
20
|
import mlrun.runtimes.kubejob as kubejob
|
|
@@ -215,23 +215,23 @@ def run_mlrun_databricks_job(context,task_parameters: dict, **kwargs):
|
|
|
215
215
|
name: Optional[str] = "",
|
|
216
216
|
project: Optional[str] = "",
|
|
217
217
|
params: Optional[dict] = None,
|
|
218
|
-
inputs: Optional[
|
|
218
|
+
inputs: Optional[dict[str, str]] = None,
|
|
219
219
|
out_path: Optional[str] = "",
|
|
220
220
|
workdir: Optional[str] = "",
|
|
221
221
|
artifact_path: Optional[str] = "",
|
|
222
222
|
watch: Optional[bool] = True,
|
|
223
223
|
schedule: Optional[Union[str, mlrun.common.schemas.ScheduleCronTrigger]] = None,
|
|
224
|
-
hyperparams: Optional[
|
|
224
|
+
hyperparams: Optional[dict[str, list]] = None,
|
|
225
225
|
hyper_param_options: Optional[HyperParamOptions] = None,
|
|
226
226
|
verbose: Optional[bool] = None,
|
|
227
227
|
scrape_metrics: Optional[bool] = None,
|
|
228
228
|
local: Optional[bool] = False,
|
|
229
229
|
local_code_path: Optional[str] = None,
|
|
230
230
|
auto_build: Optional[bool] = None,
|
|
231
|
-
param_file_secrets: Optional[
|
|
232
|
-
notifications: Optional[
|
|
233
|
-
returns: Optional[
|
|
234
|
-
state_thresholds: Optional[
|
|
231
|
+
param_file_secrets: Optional[dict[str, str]] = None,
|
|
232
|
+
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
233
|
+
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
234
|
+
state_thresholds: Optional[dict[str, int]] = None,
|
|
235
235
|
**launcher_kwargs,
|
|
236
236
|
) -> RunObject:
|
|
237
237
|
if local:
|
|
@@ -111,7 +111,7 @@ class FunctionReference(ModelObj):
|
|
|
111
111
|
code = self.code
|
|
112
112
|
if kind == mlrun.runtimes.RuntimeKinds.serving:
|
|
113
113
|
code = code + mlrun_footer.format(
|
|
114
|
-
mlrun.runtimes.serving.serving_subkind
|
|
114
|
+
mlrun.runtimes.nuclio.serving.serving_subkind
|
|
115
115
|
)
|
|
116
116
|
func = mlrun.new_function(
|
|
117
117
|
self.name, kind=kind, image=self.image or default_image
|
mlrun/runtimes/local.py
CHANGED
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import abc
|
|
15
15
|
import os
|
|
16
|
-
import typing
|
|
17
16
|
|
|
18
17
|
from mlrun.config import config
|
|
19
18
|
from mlrun.runtimes.kubejob import KubejobRuntime
|
|
@@ -206,7 +205,7 @@ class AbstractMPIJobRuntime(KubejobRuntime, abc.ABC):
|
|
|
206
205
|
|
|
207
206
|
self.set_envs(horovod_autotune_settings)
|
|
208
207
|
|
|
209
|
-
def set_mpi_args(self, args:
|
|
208
|
+
def set_mpi_args(self, args: list[str]) -> None:
|
|
210
209
|
"""Sets the runtime's mpi arguments to args.
|
|
211
210
|
|
|
212
211
|
Parameters
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .serving import ServingRuntime, new_v2_model_server # noqa
|
|
16
|
+
from .nuclio import nuclio_init_hook # noqa
|
|
17
|
+
from .function import (
|
|
18
|
+
min_nuclio_versions,
|
|
19
|
+
RemoteRuntime,
|
|
20
|
+
) # noqa
|
|
@@ -34,23 +34,22 @@ import mlrun.k8s_utils
|
|
|
34
34
|
import mlrun.utils
|
|
35
35
|
import mlrun.utils.helpers
|
|
36
36
|
from mlrun.common.schemas import AuthInfo
|
|
37
|
-
|
|
38
|
-
from
|
|
39
|
-
from
|
|
40
|
-
from
|
|
41
|
-
from
|
|
42
|
-
from
|
|
43
|
-
from ..platforms.iguazio import (
|
|
37
|
+
from mlrun.config import config as mlconf
|
|
38
|
+
from mlrun.errors import err_to_str
|
|
39
|
+
from mlrun.kfpops import deploy_op
|
|
40
|
+
from mlrun.lists import RunList
|
|
41
|
+
from mlrun.model import RunObject
|
|
42
|
+
from mlrun.platforms.iguazio import (
|
|
44
43
|
VolumeMount,
|
|
45
44
|
mount_v3io,
|
|
46
45
|
parse_path,
|
|
47
46
|
split_path,
|
|
48
47
|
v3io_cred,
|
|
49
48
|
)
|
|
50
|
-
from
|
|
51
|
-
from .
|
|
52
|
-
from .
|
|
53
|
-
from .utils import
|
|
49
|
+
from mlrun.runtimes.base import FunctionStatus, RunError
|
|
50
|
+
from mlrun.runtimes.pod import KubeResource, KubeResourceSpec
|
|
51
|
+
from mlrun.runtimes.utils import get_item_name, log_std
|
|
52
|
+
from mlrun.utils import get_in, logger, update_in
|
|
54
53
|
|
|
55
54
|
|
|
56
55
|
def validate_nuclio_version_compatibility(*min_versions):
|
|
@@ -386,7 +385,7 @@ class RemoteRuntime(KubeResource):
|
|
|
386
385
|
workers: typing.Optional[int] = 8,
|
|
387
386
|
port: typing.Optional[int] = None,
|
|
388
387
|
host: typing.Optional[str] = None,
|
|
389
|
-
paths: typing.Optional[
|
|
388
|
+
paths: typing.Optional[list[str]] = None,
|
|
390
389
|
canary: typing.Optional[float] = None,
|
|
391
390
|
secret: typing.Optional[str] = None,
|
|
392
391
|
worker_timeout: typing.Optional[int] = None,
|
|
@@ -629,9 +628,9 @@ class RemoteRuntime(KubeResource):
|
|
|
629
628
|
def with_node_selection(
|
|
630
629
|
self,
|
|
631
630
|
node_name: typing.Optional[str] = None,
|
|
632
|
-
node_selector: typing.Optional[
|
|
631
|
+
node_selector: typing.Optional[dict[str, str]] = None,
|
|
633
632
|
affinity: typing.Optional[client.V1Affinity] = None,
|
|
634
|
-
tolerations: typing.Optional[
|
|
633
|
+
tolerations: typing.Optional[list[client.V1Toleration]] = None,
|
|
635
634
|
):
|
|
636
635
|
"""k8s node selection attributes"""
|
|
637
636
|
if tolerations and not validate_nuclio_version_compatibility("1.7.5"):
|
|
@@ -683,7 +682,7 @@ class RemoteRuntime(KubeResource):
|
|
|
683
682
|
|
|
684
683
|
def set_state_thresholds(
|
|
685
684
|
self,
|
|
686
|
-
state_thresholds:
|
|
685
|
+
state_thresholds: dict[str, int],
|
|
687
686
|
patch: bool = True,
|
|
688
687
|
):
|
|
689
688
|
raise NotImplementedError(
|
|
@@ -716,7 +715,7 @@ class RemoteRuntime(KubeResource):
|
|
|
716
715
|
raise_on_exception=True,
|
|
717
716
|
resolve_address=True,
|
|
718
717
|
auth_info: AuthInfo = None,
|
|
719
|
-
) ->
|
|
718
|
+
) -> tuple[str, str, typing.Optional[float]]:
|
|
720
719
|
if dashboard:
|
|
721
720
|
(
|
|
722
721
|
state,
|
|
@@ -17,13 +17,13 @@ import os
|
|
|
17
17
|
import socket
|
|
18
18
|
|
|
19
19
|
import mlrun.db
|
|
20
|
+
from mlrun.errors import err_to_str
|
|
21
|
+
from mlrun.execution import MLClientCtx
|
|
22
|
+
from mlrun.model import RunTemplate
|
|
23
|
+
from mlrun.runtimes.local import get_func_arg
|
|
24
|
+
from mlrun.serving.server import v2_serving_init
|
|
25
|
+
from mlrun.serving.v1_serving import nuclio_serving_init
|
|
20
26
|
|
|
21
|
-
from ..errors import err_to_str
|
|
22
|
-
from ..execution import MLClientCtx
|
|
23
|
-
from ..model import RunTemplate
|
|
24
|
-
from ..serving.server import v2_serving_init
|
|
25
|
-
from ..serving.v1_serving import nuclio_serving_init
|
|
26
|
-
from .local import get_func_arg
|
|
27
27
|
from .serving import serving_subkind
|
|
28
28
|
|
|
29
29
|
|
|
@@ -15,20 +15,20 @@
|
|
|
15
15
|
import json
|
|
16
16
|
import os
|
|
17
17
|
from copy import deepcopy
|
|
18
|
-
from typing import
|
|
18
|
+
from typing import Union
|
|
19
19
|
|
|
20
20
|
import nuclio
|
|
21
21
|
from nuclio import KafkaTrigger
|
|
22
22
|
|
|
23
23
|
import mlrun
|
|
24
24
|
import mlrun.common.schemas
|
|
25
|
+
from mlrun.datastore import parse_kafka_url
|
|
26
|
+
from mlrun.model import ObjectList
|
|
25
27
|
from mlrun.model_monitoring.tracking_policy import TrackingPolicy
|
|
26
|
-
|
|
27
|
-
from
|
|
28
|
-
from
|
|
29
|
-
from
|
|
30
|
-
from ..serving.server import GraphServer, create_graph_server
|
|
31
|
-
from ..serving.states import (
|
|
28
|
+
from mlrun.runtimes.function_reference import FunctionReference
|
|
29
|
+
from mlrun.secrets import SecretsStore
|
|
30
|
+
from mlrun.serving.server import GraphServer, create_graph_server
|
|
31
|
+
from mlrun.serving.states import (
|
|
32
32
|
RootFlowStep,
|
|
33
33
|
RouterStep,
|
|
34
34
|
StepKinds,
|
|
@@ -37,9 +37,9 @@ from ..serving.states import (
|
|
|
37
37
|
new_remote_endpoint,
|
|
38
38
|
params_to_step,
|
|
39
39
|
)
|
|
40
|
-
from
|
|
40
|
+
from mlrun.utils import get_caller_globals, logger, set_paths
|
|
41
|
+
|
|
41
42
|
from .function import NuclioSpec, RemoteRuntime
|
|
42
|
-
from .function_reference import FunctionReference
|
|
43
43
|
|
|
44
44
|
serving_subkind = "serving_v2"
|
|
45
45
|
|
|
@@ -216,12 +216,12 @@ class ServingSpec(NuclioSpec):
|
|
|
216
216
|
graph_root_setter(self, graph)
|
|
217
217
|
|
|
218
218
|
@property
|
|
219
|
-
def function_refs(self) ->
|
|
219
|
+
def function_refs(self) -> list[FunctionReference]:
|
|
220
220
|
"""function references, list of optional child function refs"""
|
|
221
221
|
return self._function_refs
|
|
222
222
|
|
|
223
223
|
@function_refs.setter
|
|
224
|
-
def function_refs(self, function_refs:
|
|
224
|
+
def function_refs(self, function_refs: list[FunctionReference]):
|
|
225
225
|
self._function_refs = ObjectList.from_list(FunctionReference, function_refs)
|
|
226
226
|
|
|
227
227
|
|
|
@@ -309,7 +309,8 @@ class ServingRuntime(RemoteRuntime):
|
|
|
309
309
|
stream_args: dict = None,
|
|
310
310
|
tracking_policy: Union[TrackingPolicy, dict] = None,
|
|
311
311
|
):
|
|
312
|
-
"""
|
|
312
|
+
"""apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
|
|
313
|
+
and analyze performance.
|
|
313
314
|
|
|
314
315
|
:param stream_path: Path/url of the tracking stream e.g. v3io:///users/mike/mystream
|
|
315
316
|
you can use the "dummy://" path for test/simulation.
|
mlrun/runtimes/pod.py
CHANGED
|
@@ -105,6 +105,50 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
105
105
|
"security_context",
|
|
106
106
|
"state_thresholds",
|
|
107
107
|
]
|
|
108
|
+
_default_fields_to_strip = FunctionSpec._default_fields_to_strip + [
|
|
109
|
+
"volumes",
|
|
110
|
+
"volume_mounts",
|
|
111
|
+
"resources",
|
|
112
|
+
"replicas",
|
|
113
|
+
"image_pull_policy",
|
|
114
|
+
"service_account",
|
|
115
|
+
"image_pull_secret",
|
|
116
|
+
"node_name",
|
|
117
|
+
"node_selector",
|
|
118
|
+
"affinity",
|
|
119
|
+
"priority_class_name",
|
|
120
|
+
"tolerations",
|
|
121
|
+
"preemption_mode",
|
|
122
|
+
"security_context",
|
|
123
|
+
]
|
|
124
|
+
_k8s_fields_to_serialize = [
|
|
125
|
+
"volumes",
|
|
126
|
+
"volume_mounts",
|
|
127
|
+
"resources",
|
|
128
|
+
"env",
|
|
129
|
+
"image_pull_policy",
|
|
130
|
+
"service_account",
|
|
131
|
+
"image_pull_secret",
|
|
132
|
+
"node_name",
|
|
133
|
+
"node_selector",
|
|
134
|
+
"affinity",
|
|
135
|
+
"tolerations",
|
|
136
|
+
"security_context",
|
|
137
|
+
]
|
|
138
|
+
_fields_to_serialize = FunctionSpec._fields_to_serialize + _k8s_fields_to_serialize
|
|
139
|
+
_fields_to_enrich = FunctionSpec._fields_to_enrich + [
|
|
140
|
+
"env", # Removing sensitive data from env
|
|
141
|
+
]
|
|
142
|
+
_fields_to_skip_validation = FunctionSpec._fields_to_skip_validation + [
|
|
143
|
+
# TODO: affinity, tolerations and node_selector are skipped due to preemption mode transitions.
|
|
144
|
+
# Preemption mode 'none' depends on the previous mode while the default mode may enrich these values.
|
|
145
|
+
# When we allow 'None' values for these attributes we get their true values and they will undo the default
|
|
146
|
+
# enrichment when creating the runtime from dict.
|
|
147
|
+
# The enrichment should move to the server side and then this can be removed.
|
|
148
|
+
"affinity",
|
|
149
|
+
"tolerations",
|
|
150
|
+
"node_selector",
|
|
151
|
+
]
|
|
108
152
|
|
|
109
153
|
def __init__(
|
|
110
154
|
self,
|
|
@@ -222,7 +266,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
222
266
|
self._affinity = transform_attribute_to_k8s_class_instance("affinity", affinity)
|
|
223
267
|
|
|
224
268
|
@property
|
|
225
|
-
def tolerations(self) ->
|
|
269
|
+
def tolerations(self) -> list[k8s_client.V1Toleration]:
|
|
226
270
|
return self._tolerations
|
|
227
271
|
|
|
228
272
|
@tolerations.setter
|
|
@@ -264,15 +308,42 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
264
308
|
def termination_grace_period_seconds(self) -> typing.Optional[int]:
|
|
265
309
|
return self._termination_grace_period_seconds
|
|
266
310
|
|
|
267
|
-
def
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
311
|
+
def _serialize_field(
|
|
312
|
+
self, struct: dict, field_name: str = None, strip: bool = False
|
|
313
|
+
) -> typing.Any:
|
|
314
|
+
"""
|
|
315
|
+
Serialize a field to a dict, list, or primitive type.
|
|
316
|
+
If field_name is in _k8s_fields_to_serialize, we will apply k8s serialization
|
|
317
|
+
"""
|
|
318
|
+
k8s_api = k8s_client.ApiClient()
|
|
319
|
+
if field_name in self._k8s_fields_to_serialize:
|
|
320
|
+
return k8s_api.sanitize_for_serialization(getattr(self, field_name))
|
|
321
|
+
return super()._serialize_field(struct, field_name, strip)
|
|
322
|
+
|
|
323
|
+
def _enrich_field(
|
|
324
|
+
self, struct: dict, field_name: str = None, strip: bool = False
|
|
325
|
+
) -> typing.Any:
|
|
326
|
+
k8s_api = k8s_client.ApiClient()
|
|
327
|
+
if strip:
|
|
328
|
+
if field_name == "env":
|
|
329
|
+
# We first try to pull from struct because the field might have been already serialized and if not,
|
|
330
|
+
# we pull from self
|
|
331
|
+
envs = struct.get(field_name, None) or getattr(self, field_name, None)
|
|
332
|
+
if envs:
|
|
333
|
+
serialized_envs = k8s_api.sanitize_for_serialization(envs)
|
|
334
|
+
for env in serialized_envs:
|
|
335
|
+
if env["name"].startswith("V3IO_"):
|
|
336
|
+
env["value"] = ""
|
|
337
|
+
return serialized_envs
|
|
338
|
+
return super()._enrich_field(struct=struct, field_name=field_name, strip=strip)
|
|
339
|
+
|
|
340
|
+
def _apply_enrichment_before_to_dict_completion(
|
|
341
|
+
self, struct: dict, strip: bool = False
|
|
342
|
+
):
|
|
343
|
+
if strip:
|
|
344
|
+
# Reset this, since mounts and env variables were cleared.
|
|
345
|
+
struct["disable_auto_mount"] = False
|
|
346
|
+
return super()._apply_enrichment_before_to_dict_completion(struct, strip)
|
|
276
347
|
|
|
277
348
|
def update_vols_and_mounts(
|
|
278
349
|
self, volumes, volume_mounts, volume_mounts_field_name="_volume_mounts"
|
|
@@ -455,7 +526,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
455
526
|
return {}
|
|
456
527
|
return resources
|
|
457
528
|
|
|
458
|
-
def _merge_node_selector(self, node_selector:
|
|
529
|
+
def _merge_node_selector(self, node_selector: dict[str, str]):
|
|
459
530
|
if not node_selector:
|
|
460
531
|
return
|
|
461
532
|
|
|
@@ -464,7 +535,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
464
535
|
|
|
465
536
|
def _merge_tolerations(
|
|
466
537
|
self,
|
|
467
|
-
tolerations:
|
|
538
|
+
tolerations: list[k8s_client.V1Toleration],
|
|
468
539
|
tolerations_field_name: str,
|
|
469
540
|
):
|
|
470
541
|
if not tolerations:
|
|
@@ -649,7 +720,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
649
720
|
|
|
650
721
|
def _merge_node_selector_term_to_node_affinity(
|
|
651
722
|
self,
|
|
652
|
-
node_selector_terms:
|
|
723
|
+
node_selector_terms: list[k8s_client.V1NodeSelectorTerm],
|
|
653
724
|
affinity_field_name: str,
|
|
654
725
|
):
|
|
655
726
|
if not node_selector_terms:
|
|
@@ -694,7 +765,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
694
765
|
|
|
695
766
|
def _prune_affinity_node_selector_requirement(
|
|
696
767
|
self,
|
|
697
|
-
node_selector_requirements:
|
|
768
|
+
node_selector_requirements: list[k8s_client.V1NodeSelectorRequirement],
|
|
698
769
|
affinity_field_name: str = "affinity",
|
|
699
770
|
):
|
|
700
771
|
"""
|
|
@@ -749,20 +820,18 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
749
820
|
|
|
750
821
|
@staticmethod
|
|
751
822
|
def _prune_node_selector_requirements_from_node_selector_terms(
|
|
752
|
-
node_selector_terms:
|
|
753
|
-
node_selector_requirements_to_prune:
|
|
754
|
-
|
|
755
|
-
],
|
|
756
|
-
) -> typing.List[k8s_client.V1NodeSelectorTerm]:
|
|
823
|
+
node_selector_terms: list[k8s_client.V1NodeSelectorTerm],
|
|
824
|
+
node_selector_requirements_to_prune: list[k8s_client.V1NodeSelectorRequirement],
|
|
825
|
+
) -> list[k8s_client.V1NodeSelectorTerm]:
|
|
757
826
|
"""
|
|
758
827
|
Goes over each expression in all the terms provided and removes the expressions if it matches
|
|
759
828
|
one of the requirements provided to remove
|
|
760
829
|
|
|
761
830
|
:return: New list of terms without the provided node selector requirements
|
|
762
831
|
"""
|
|
763
|
-
new_node_selector_terms:
|
|
832
|
+
new_node_selector_terms: list[k8s_client.V1NodeSelectorTerm] = []
|
|
764
833
|
for term in node_selector_terms:
|
|
765
|
-
new_node_selector_requirements:
|
|
834
|
+
new_node_selector_requirements: list[
|
|
766
835
|
k8s_client.V1NodeSelectorRequirement
|
|
767
836
|
] = []
|
|
768
837
|
for node_selector_requirement in term.match_expressions:
|
|
@@ -791,7 +860,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
791
860
|
|
|
792
861
|
def _prune_tolerations(
|
|
793
862
|
self,
|
|
794
|
-
tolerations:
|
|
863
|
+
tolerations: list[k8s_client.V1Toleration],
|
|
795
864
|
tolerations_field_name: str = "tolerations",
|
|
796
865
|
):
|
|
797
866
|
"""
|
|
@@ -820,7 +889,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
820
889
|
|
|
821
890
|
def _prune_node_selector(
|
|
822
891
|
self,
|
|
823
|
-
node_selector:
|
|
892
|
+
node_selector: dict[str, str],
|
|
824
893
|
node_selector_field_name: str,
|
|
825
894
|
):
|
|
826
895
|
"""
|
|
@@ -927,28 +996,6 @@ class KubeResource(BaseRuntime):
|
|
|
927
996
|
def spec(self, spec):
|
|
928
997
|
self._spec = self._verify_dict(spec, "spec", KubeResourceSpec)
|
|
929
998
|
|
|
930
|
-
def to_dict(self, fields=None, exclude=None, strip=False):
|
|
931
|
-
struct = super().to_dict(fields, exclude, strip=strip)
|
|
932
|
-
api = k8s_client.ApiClient()
|
|
933
|
-
struct = api.sanitize_for_serialization(struct)
|
|
934
|
-
if strip:
|
|
935
|
-
spec = struct["spec"]
|
|
936
|
-
for attr in [
|
|
937
|
-
"volumes",
|
|
938
|
-
"volume_mounts",
|
|
939
|
-
"driver_volume_mounts",
|
|
940
|
-
"executor_volume_mounts",
|
|
941
|
-
]:
|
|
942
|
-
if attr in spec:
|
|
943
|
-
del spec[attr]
|
|
944
|
-
if "env" in spec and spec["env"]:
|
|
945
|
-
for ev in spec["env"]:
|
|
946
|
-
if ev["name"].startswith("V3IO_"):
|
|
947
|
-
ev["value"] = ""
|
|
948
|
-
# Reset this, since mounts and env variables were cleared.
|
|
949
|
-
spec["disable_auto_mount"] = False
|
|
950
|
-
return struct
|
|
951
|
-
|
|
952
999
|
def apply(self, modify):
|
|
953
1000
|
"""
|
|
954
1001
|
Apply a modifier to the runtime which is used to change the runtimes k8s object's spec.
|
|
@@ -1065,7 +1112,7 @@ class KubeResource(BaseRuntime):
|
|
|
1065
1112
|
|
|
1066
1113
|
def set_state_thresholds(
|
|
1067
1114
|
self,
|
|
1068
|
-
state_thresholds:
|
|
1115
|
+
state_thresholds: dict[str, str],
|
|
1069
1116
|
patch: bool = True,
|
|
1070
1117
|
):
|
|
1071
1118
|
"""
|
|
@@ -1126,9 +1173,9 @@ class KubeResource(BaseRuntime):
|
|
|
1126
1173
|
def with_node_selection(
|
|
1127
1174
|
self,
|
|
1128
1175
|
node_name: typing.Optional[str] = None,
|
|
1129
|
-
node_selector: typing.Optional[
|
|
1176
|
+
node_selector: typing.Optional[dict[str, str]] = None,
|
|
1130
1177
|
affinity: typing.Optional[k8s_client.V1Affinity] = None,
|
|
1131
|
-
tolerations: typing.Optional[
|
|
1178
|
+
tolerations: typing.Optional[list[k8s_client.V1Toleration]] = None,
|
|
1132
1179
|
):
|
|
1133
1180
|
"""
|
|
1134
1181
|
Enables to control on which k8s node the job will run
|