mlrun 1.6.4rc8__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +40 -122
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +47 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +79 -47
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +74 -1
- mlrun/common/db/sql_session.py +5 -5
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +45 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +33 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +12 -3
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +31 -5
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +25 -4
- mlrun/common/schemas/auth.py +16 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -2
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +74 -44
- mlrun/common/schemas/frontend_spec.py +15 -7
- mlrun/common/schemas/function.py +12 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +20 -4
- mlrun/common/schemas/model_monitoring/constants.py +123 -42
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
- mlrun/common/schemas/notification.py +71 -14
- mlrun/common/schemas/object.py +2 -2
- mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
- mlrun/common/schemas/pipeline.py +8 -1
- mlrun/common/schemas/project.py +69 -18
- mlrun/common/schemas/runs.py +7 -1
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +4 -4
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +12 -4
- mlrun/common/types.py +14 -1
- mlrun/config.py +154 -69
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +67 -37
- mlrun/datastore/__init__.py +6 -8
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +143 -42
- mlrun/datastore/base.py +102 -58
- mlrun/datastore/datastore.py +34 -13
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -4
- mlrun/datastore/google_cloud_storage.py +97 -33
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +7 -2
- mlrun/datastore/s3.py +34 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +303 -111
- mlrun/datastore/spark_utils.py +31 -2
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +453 -176
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +6 -1
- mlrun/db/base.py +274 -41
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +893 -225
- mlrun/db/nopdb.py +291 -33
- mlrun/errors.py +36 -6
- mlrun/execution.py +115 -42
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +65 -73
- mlrun/feature_store/common.py +7 -12
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +39 -31
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +45 -34
- mlrun/features.py +11 -21
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +5 -6
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +2 -2
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +6 -6
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +61 -17
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +23 -13
- mlrun/launcher/remote.py +17 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +478 -103
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +163 -371
- mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
- mlrun/model_monitoring/applications/_application_steps.py +188 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +131 -278
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +199 -55
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +134 -398
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +8 -8
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +52 -25
- mlrun/projects/pipelines.py +191 -197
- mlrun/projects/project.py +1227 -400
- mlrun/render.py +16 -19
- mlrun/run.py +209 -184
- mlrun/runtimes/__init__.py +83 -15
- mlrun/runtimes/base.py +51 -35
- mlrun/runtimes/daskjob.py +17 -10
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +40 -11
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
- mlrun/runtimes/pod.py +281 -101
- mlrun/runtimes/remotesparkjob.py +12 -9
- mlrun/runtimes/sparkjob/spark3job.py +67 -51
- mlrun/runtimes/utils.py +41 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +85 -69
- mlrun/serving/server.py +69 -44
- mlrun/serving/states.py +209 -36
- mlrun/serving/utils.py +22 -14
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +133 -54
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +6 -2
- mlrun/utils/async_http.py +6 -8
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +21 -3
- mlrun/utils/helpers.py +405 -225
- mlrun/utils/http.py +3 -6
- mlrun/utils/logger.py +112 -16
- mlrun/utils/notifications/notification/__init__.py +17 -13
- mlrun/utils/notifications/notification/base.py +50 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +59 -2
- mlrun/utils/notifications/notification_pusher.py +149 -30
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +4 -6
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- mlrun-1.7.0.dist-info/METADATA +378 -0
- mlrun-1.7.0.dist-info/RECORD +351 -0
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -273
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/prometheus.py +0 -219
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc8.dist-info/METADATA +0 -272
- mlrun-1.6.4rc8.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
mlrun/runtimes/pod.py
CHANGED
|
@@ -15,12 +15,14 @@ import copy
|
|
|
15
15
|
import inspect
|
|
16
16
|
import os
|
|
17
17
|
import re
|
|
18
|
+
import time
|
|
18
19
|
import typing
|
|
19
20
|
from enum import Enum
|
|
20
21
|
|
|
21
22
|
import dotenv
|
|
22
|
-
import kfp.dsl
|
|
23
23
|
import kubernetes.client as k8s_client
|
|
24
|
+
import mlrun_pipelines.mounts
|
|
25
|
+
from mlrun_pipelines.mixins import KfpAdapterMixin
|
|
24
26
|
|
|
25
27
|
import mlrun.errors
|
|
26
28
|
import mlrun.utils.regex
|
|
@@ -36,11 +38,11 @@ from ..k8s_utils import (
|
|
|
36
38
|
generate_preemptible_nodes_affinity_terms,
|
|
37
39
|
generate_preemptible_nodes_anti_affinity_terms,
|
|
38
40
|
generate_preemptible_tolerations,
|
|
41
|
+
validate_node_selectors,
|
|
39
42
|
)
|
|
40
43
|
from ..utils import logger, update_in
|
|
41
44
|
from .base import BaseRuntime, FunctionSpec, spec_fields
|
|
42
45
|
from .utils import (
|
|
43
|
-
apply_kfp,
|
|
44
46
|
get_gpu_from_resource_requirement,
|
|
45
47
|
get_item_name,
|
|
46
48
|
set_named_item,
|
|
@@ -105,6 +107,50 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
105
107
|
"security_context",
|
|
106
108
|
"state_thresholds",
|
|
107
109
|
]
|
|
110
|
+
_default_fields_to_strip = FunctionSpec._default_fields_to_strip + [
|
|
111
|
+
"volumes",
|
|
112
|
+
"volume_mounts",
|
|
113
|
+
"resources",
|
|
114
|
+
"replicas",
|
|
115
|
+
"image_pull_policy",
|
|
116
|
+
"service_account",
|
|
117
|
+
"image_pull_secret",
|
|
118
|
+
"node_name",
|
|
119
|
+
"node_selector",
|
|
120
|
+
"affinity",
|
|
121
|
+
"priority_class_name",
|
|
122
|
+
"tolerations",
|
|
123
|
+
"preemption_mode",
|
|
124
|
+
"security_context",
|
|
125
|
+
]
|
|
126
|
+
_k8s_fields_to_serialize = [
|
|
127
|
+
"volumes",
|
|
128
|
+
"volume_mounts",
|
|
129
|
+
"resources",
|
|
130
|
+
"env",
|
|
131
|
+
"image_pull_policy",
|
|
132
|
+
"service_account",
|
|
133
|
+
"image_pull_secret",
|
|
134
|
+
"node_name",
|
|
135
|
+
"node_selector",
|
|
136
|
+
"affinity",
|
|
137
|
+
"tolerations",
|
|
138
|
+
"security_context",
|
|
139
|
+
]
|
|
140
|
+
_fields_to_serialize = FunctionSpec._fields_to_serialize + _k8s_fields_to_serialize
|
|
141
|
+
_fields_to_enrich = FunctionSpec._fields_to_enrich + [
|
|
142
|
+
"env", # Removing sensitive data from env
|
|
143
|
+
]
|
|
144
|
+
_fields_to_skip_validation = FunctionSpec._fields_to_skip_validation + [
|
|
145
|
+
# TODO: affinity, tolerations and node_selector are skipped due to preemption mode transitions.
|
|
146
|
+
# Preemption mode 'none' depends on the previous mode while the default mode may enrich these values.
|
|
147
|
+
# When we allow 'None' values for these attributes we get their true values and they will undo the default
|
|
148
|
+
# enrichment when creating the runtime from dict.
|
|
149
|
+
# The enrichment should move to the server side and then this can be removed.
|
|
150
|
+
"affinity",
|
|
151
|
+
"tolerations",
|
|
152
|
+
"node_selector",
|
|
153
|
+
]
|
|
108
154
|
|
|
109
155
|
def __init__(
|
|
110
156
|
self,
|
|
@@ -170,9 +216,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
170
216
|
image_pull_secret or mlrun.mlconf.function.spec.image_pull_secret.default
|
|
171
217
|
)
|
|
172
218
|
self.node_name = node_name
|
|
173
|
-
self.node_selector =
|
|
174
|
-
node_selector or mlrun.mlconf.get_default_function_node_selector()
|
|
175
|
-
)
|
|
219
|
+
self.node_selector = node_selector or {}
|
|
176
220
|
self._affinity = affinity
|
|
177
221
|
self.priority_class_name = (
|
|
178
222
|
priority_class_name or mlrun.mlconf.default_function_priority_class_name
|
|
@@ -222,7 +266,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
222
266
|
self._affinity = transform_attribute_to_k8s_class_instance("affinity", affinity)
|
|
223
267
|
|
|
224
268
|
@property
|
|
225
|
-
def tolerations(self) ->
|
|
269
|
+
def tolerations(self) -> list[k8s_client.V1Toleration]:
|
|
226
270
|
return self._tolerations
|
|
227
271
|
|
|
228
272
|
@tolerations.setter
|
|
@@ -264,15 +308,42 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
264
308
|
def termination_grace_period_seconds(self) -> typing.Optional[int]:
|
|
265
309
|
return self._termination_grace_period_seconds
|
|
266
310
|
|
|
267
|
-
def
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
311
|
+
def _serialize_field(
|
|
312
|
+
self, struct: dict, field_name: str = None, strip: bool = False
|
|
313
|
+
) -> typing.Any:
|
|
314
|
+
"""
|
|
315
|
+
Serialize a field to a dict, list, or primitive type.
|
|
316
|
+
If field_name is in _k8s_fields_to_serialize, we will apply k8s serialization
|
|
317
|
+
"""
|
|
318
|
+
k8s_api = k8s_client.ApiClient()
|
|
319
|
+
if field_name in self._k8s_fields_to_serialize:
|
|
320
|
+
return k8s_api.sanitize_for_serialization(getattr(self, field_name))
|
|
321
|
+
return super()._serialize_field(struct, field_name, strip)
|
|
322
|
+
|
|
323
|
+
def _enrich_field(
|
|
324
|
+
self, struct: dict, field_name: str = None, strip: bool = False
|
|
325
|
+
) -> typing.Any:
|
|
326
|
+
k8s_api = k8s_client.ApiClient()
|
|
327
|
+
if strip:
|
|
328
|
+
if field_name == "env":
|
|
329
|
+
# We first try to pull from struct because the field might have been already serialized and if not,
|
|
330
|
+
# we pull from self
|
|
331
|
+
envs = struct.get(field_name, None) or getattr(self, field_name, None)
|
|
332
|
+
if envs:
|
|
333
|
+
serialized_envs = k8s_api.sanitize_for_serialization(envs)
|
|
334
|
+
for env in serialized_envs:
|
|
335
|
+
if env["name"].startswith("V3IO_"):
|
|
336
|
+
env["value"] = ""
|
|
337
|
+
return serialized_envs
|
|
338
|
+
return super()._enrich_field(struct=struct, field_name=field_name, strip=strip)
|
|
339
|
+
|
|
340
|
+
def _apply_enrichment_before_to_dict_completion(
|
|
341
|
+
self, struct: dict, strip: bool = False
|
|
342
|
+
):
|
|
343
|
+
if strip:
|
|
344
|
+
# Reset this, since mounts and env variables were cleared.
|
|
345
|
+
struct["disable_auto_mount"] = False
|
|
346
|
+
return super()._apply_enrichment_before_to_dict_completion(struct, strip)
|
|
276
347
|
|
|
277
348
|
def update_vols_and_mounts(
|
|
278
349
|
self, volumes, volume_mounts, volume_mounts_field_name="_volume_mounts"
|
|
@@ -455,16 +526,18 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
455
526
|
return {}
|
|
456
527
|
return resources
|
|
457
528
|
|
|
458
|
-
def _merge_node_selector(self, node_selector:
|
|
529
|
+
def _merge_node_selector(self, node_selector: dict[str, str]):
|
|
459
530
|
if not node_selector:
|
|
460
531
|
return
|
|
461
532
|
|
|
462
533
|
# merge node selectors - precedence to existing node selector
|
|
463
|
-
self.node_selector =
|
|
534
|
+
self.node_selector = mlrun.utils.helpers.merge_dicts_with_precedence(
|
|
535
|
+
node_selector, self.node_selector
|
|
536
|
+
)
|
|
464
537
|
|
|
465
538
|
def _merge_tolerations(
|
|
466
539
|
self,
|
|
467
|
-
tolerations:
|
|
540
|
+
tolerations: list[k8s_client.V1Toleration],
|
|
468
541
|
tolerations_field_name: str,
|
|
469
542
|
):
|
|
470
543
|
if not tolerations:
|
|
@@ -649,7 +722,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
649
722
|
|
|
650
723
|
def _merge_node_selector_term_to_node_affinity(
|
|
651
724
|
self,
|
|
652
|
-
node_selector_terms:
|
|
725
|
+
node_selector_terms: list[k8s_client.V1NodeSelectorTerm],
|
|
653
726
|
affinity_field_name: str,
|
|
654
727
|
):
|
|
655
728
|
if not node_selector_terms:
|
|
@@ -694,7 +767,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
694
767
|
|
|
695
768
|
def _prune_affinity_node_selector_requirement(
|
|
696
769
|
self,
|
|
697
|
-
node_selector_requirements:
|
|
770
|
+
node_selector_requirements: list[k8s_client.V1NodeSelectorRequirement],
|
|
698
771
|
affinity_field_name: str = "affinity",
|
|
699
772
|
):
|
|
700
773
|
"""
|
|
@@ -749,20 +822,18 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
749
822
|
|
|
750
823
|
@staticmethod
|
|
751
824
|
def _prune_node_selector_requirements_from_node_selector_terms(
|
|
752
|
-
node_selector_terms:
|
|
753
|
-
node_selector_requirements_to_prune:
|
|
754
|
-
|
|
755
|
-
],
|
|
756
|
-
) -> typing.List[k8s_client.V1NodeSelectorTerm]:
|
|
825
|
+
node_selector_terms: list[k8s_client.V1NodeSelectorTerm],
|
|
826
|
+
node_selector_requirements_to_prune: list[k8s_client.V1NodeSelectorRequirement],
|
|
827
|
+
) -> list[k8s_client.V1NodeSelectorTerm]:
|
|
757
828
|
"""
|
|
758
829
|
Goes over each expression in all the terms provided and removes the expressions if it matches
|
|
759
830
|
one of the requirements provided to remove
|
|
760
831
|
|
|
761
832
|
:return: New list of terms without the provided node selector requirements
|
|
762
833
|
"""
|
|
763
|
-
new_node_selector_terms:
|
|
834
|
+
new_node_selector_terms: list[k8s_client.V1NodeSelectorTerm] = []
|
|
764
835
|
for term in node_selector_terms:
|
|
765
|
-
new_node_selector_requirements:
|
|
836
|
+
new_node_selector_requirements: list[
|
|
766
837
|
k8s_client.V1NodeSelectorRequirement
|
|
767
838
|
] = []
|
|
768
839
|
for node_selector_requirement in term.match_expressions:
|
|
@@ -791,7 +862,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
791
862
|
|
|
792
863
|
def _prune_tolerations(
|
|
793
864
|
self,
|
|
794
|
-
tolerations:
|
|
865
|
+
tolerations: list[k8s_client.V1Toleration],
|
|
795
866
|
tolerations_field_name: str = "tolerations",
|
|
796
867
|
):
|
|
797
868
|
"""
|
|
@@ -820,7 +891,7 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
820
891
|
|
|
821
892
|
def _prune_node_selector(
|
|
822
893
|
self,
|
|
823
|
-
node_selector:
|
|
894
|
+
node_selector: dict[str, str],
|
|
824
895
|
node_selector_field_name: str,
|
|
825
896
|
):
|
|
826
897
|
"""
|
|
@@ -865,12 +936,12 @@ class AutoMountType(str, Enum):
|
|
|
865
936
|
@classmethod
|
|
866
937
|
def all_mount_modifiers(cls):
|
|
867
938
|
return [
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
939
|
+
mlrun_pipelines.mounts.v3io_cred.__name__,
|
|
940
|
+
mlrun_pipelines.mounts.mount_v3io.__name__,
|
|
941
|
+
mlrun_pipelines.mounts.mount_pvc.__name__,
|
|
942
|
+
mlrun_pipelines.mounts.auto_mount.__name__,
|
|
943
|
+
mlrun_pipelines.mounts.mount_s3.__name__,
|
|
944
|
+
mlrun_pipelines.mounts.set_env_variables.__name__,
|
|
874
945
|
]
|
|
875
946
|
|
|
876
947
|
@classmethod
|
|
@@ -887,27 +958,27 @@ class AutoMountType(str, Enum):
|
|
|
887
958
|
def _get_auto_modifier():
|
|
888
959
|
# If we're running on Iguazio - use v3io_cred
|
|
889
960
|
if mlconf.igz_version != "":
|
|
890
|
-
return
|
|
961
|
+
return mlrun_pipelines.mounts.v3io_cred
|
|
891
962
|
# Else, either pvc mount if it's configured or do nothing otherwise
|
|
892
963
|
pvc_configured = (
|
|
893
964
|
"MLRUN_PVC_MOUNT" in os.environ
|
|
894
965
|
or "pvc_name" in mlconf.get_storage_auto_mount_params()
|
|
895
966
|
)
|
|
896
|
-
return
|
|
967
|
+
return mlrun_pipelines.mounts.mount_pvc if pvc_configured else None
|
|
897
968
|
|
|
898
969
|
def get_modifier(self):
|
|
899
970
|
return {
|
|
900
971
|
AutoMountType.none: None,
|
|
901
|
-
AutoMountType.v3io_credentials:
|
|
902
|
-
AutoMountType.v3io_fuse:
|
|
903
|
-
AutoMountType.pvc:
|
|
972
|
+
AutoMountType.v3io_credentials: mlrun_pipelines.mounts.v3io_cred,
|
|
973
|
+
AutoMountType.v3io_fuse: mlrun_pipelines.mounts.mount_v3io,
|
|
974
|
+
AutoMountType.pvc: mlrun_pipelines.mounts.mount_pvc,
|
|
904
975
|
AutoMountType.auto: self._get_auto_modifier(),
|
|
905
|
-
AutoMountType.s3:
|
|
906
|
-
AutoMountType.env:
|
|
976
|
+
AutoMountType.s3: mlrun_pipelines.mounts.mount_s3,
|
|
977
|
+
AutoMountType.env: mlrun_pipelines.mounts.set_env_variables,
|
|
907
978
|
}[self]
|
|
908
979
|
|
|
909
980
|
|
|
910
|
-
class KubeResource(BaseRuntime):
|
|
981
|
+
class KubeResource(BaseRuntime, KfpAdapterMixin):
|
|
911
982
|
"""
|
|
912
983
|
A parent class for runtimes that generate k8s resources when executing.
|
|
913
984
|
"""
|
|
@@ -916,7 +987,7 @@ class KubeResource(BaseRuntime):
|
|
|
916
987
|
_is_nested = True
|
|
917
988
|
|
|
918
989
|
def __init__(self, spec=None, metadata=None):
|
|
919
|
-
super().__init__(metadata, spec)
|
|
990
|
+
super().__init__(metadata=metadata, spec=spec)
|
|
920
991
|
self.verbose = False
|
|
921
992
|
|
|
922
993
|
@property
|
|
@@ -927,48 +998,6 @@ class KubeResource(BaseRuntime):
|
|
|
927
998
|
def spec(self, spec):
|
|
928
999
|
self._spec = self._verify_dict(spec, "spec", KubeResourceSpec)
|
|
929
1000
|
|
|
930
|
-
def to_dict(self, fields=None, exclude=None, strip=False):
|
|
931
|
-
struct = super().to_dict(fields, exclude, strip=strip)
|
|
932
|
-
api = k8s_client.ApiClient()
|
|
933
|
-
struct = api.sanitize_for_serialization(struct)
|
|
934
|
-
if strip:
|
|
935
|
-
spec = struct["spec"]
|
|
936
|
-
for attr in [
|
|
937
|
-
"volumes",
|
|
938
|
-
"volume_mounts",
|
|
939
|
-
"driver_volume_mounts",
|
|
940
|
-
"executor_volume_mounts",
|
|
941
|
-
]:
|
|
942
|
-
if attr in spec:
|
|
943
|
-
del spec[attr]
|
|
944
|
-
if "env" in spec and spec["env"]:
|
|
945
|
-
for ev in spec["env"]:
|
|
946
|
-
if ev["name"].startswith("V3IO_"):
|
|
947
|
-
ev["value"] = ""
|
|
948
|
-
# Reset this, since mounts and env variables were cleared.
|
|
949
|
-
spec["disable_auto_mount"] = False
|
|
950
|
-
return struct
|
|
951
|
-
|
|
952
|
-
def apply(self, modify):
|
|
953
|
-
"""
|
|
954
|
-
Apply a modifier to the runtime which is used to change the runtimes k8s object's spec.
|
|
955
|
-
Modifiers can be either KFP modifiers or MLRun modifiers (which are compatible with KFP). All modifiers accept
|
|
956
|
-
a `kfp.dsl.ContainerOp` object, apply some changes on its spec and return it so modifiers can be chained
|
|
957
|
-
one after the other.
|
|
958
|
-
|
|
959
|
-
:param modify: a modifier runnable object
|
|
960
|
-
:return: the runtime (self) after the modifications
|
|
961
|
-
"""
|
|
962
|
-
|
|
963
|
-
# Kubeflow pipeline have a hook to add the component to the DAG on ContainerOp init
|
|
964
|
-
# we remove the hook to suppress kubeflow op registration and return it after the apply()
|
|
965
|
-
old_op_handler = kfp.dsl._container_op._register_op_handler
|
|
966
|
-
kfp.dsl._container_op._register_op_handler = lambda x: self.metadata.name
|
|
967
|
-
cop = kfp.dsl.ContainerOp("name", "image")
|
|
968
|
-
kfp.dsl._container_op._register_op_handler = old_op_handler
|
|
969
|
-
|
|
970
|
-
return apply_kfp(modify, cop, self)
|
|
971
|
-
|
|
972
1001
|
def set_env_from_secret(self, name, secret=None, secret_key=None):
|
|
973
1002
|
"""set pod environment var from secret"""
|
|
974
1003
|
secret_key = secret_key or name
|
|
@@ -1065,7 +1094,7 @@ class KubeResource(BaseRuntime):
|
|
|
1065
1094
|
|
|
1066
1095
|
def set_state_thresholds(
|
|
1067
1096
|
self,
|
|
1068
|
-
state_thresholds:
|
|
1097
|
+
state_thresholds: dict[str, str],
|
|
1069
1098
|
patch: bool = True,
|
|
1070
1099
|
):
|
|
1071
1100
|
"""
|
|
@@ -1078,12 +1107,12 @@ class KubeResource(BaseRuntime):
|
|
|
1078
1107
|
|
|
1079
1108
|
:param state_thresholds: A dictionary of state to threshold. The supported states are:
|
|
1080
1109
|
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
See mlrun.mlconf.function.spec.state_thresholds for the default thresholds.
|
|
1110
|
+
* pending_scheduled - The pod/crd is scheduled on a node but not yet running
|
|
1111
|
+
* pending_not_scheduled - The pod/crd is not yet scheduled on a node
|
|
1112
|
+
* executing - The pod/crd started and is running
|
|
1113
|
+
* image_pull_backoff - The pod/crd is in image pull backoff
|
|
1086
1114
|
|
|
1115
|
+
See :code:`mlrun.mlconf.function.spec.state_thresholds` for the default thresholds.
|
|
1087
1116
|
:param patch: Whether to merge the given thresholds with the existing thresholds (True, default)
|
|
1088
1117
|
or override them (False)
|
|
1089
1118
|
"""
|
|
@@ -1126,9 +1155,9 @@ class KubeResource(BaseRuntime):
|
|
|
1126
1155
|
def with_node_selection(
|
|
1127
1156
|
self,
|
|
1128
1157
|
node_name: typing.Optional[str] = None,
|
|
1129
|
-
node_selector: typing.Optional[
|
|
1158
|
+
node_selector: typing.Optional[dict[str, str]] = None,
|
|
1130
1159
|
affinity: typing.Optional[k8s_client.V1Affinity] = None,
|
|
1131
|
-
tolerations: typing.Optional[
|
|
1160
|
+
tolerations: typing.Optional[list[k8s_client.V1Toleration]] = None,
|
|
1132
1161
|
):
|
|
1133
1162
|
"""
|
|
1134
1163
|
Enables to control on which k8s node the job will run
|
|
@@ -1146,9 +1175,10 @@ class KubeResource(BaseRuntime):
|
|
|
1146
1175
|
"""
|
|
1147
1176
|
if node_name:
|
|
1148
1177
|
self.spec.node_name = node_name
|
|
1149
|
-
if node_selector:
|
|
1178
|
+
if node_selector is not None:
|
|
1179
|
+
validate_node_selectors(node_selectors=node_selector, raise_on_error=False)
|
|
1150
1180
|
self.spec.node_selector = node_selector
|
|
1151
|
-
if affinity:
|
|
1181
|
+
if affinity is not None:
|
|
1152
1182
|
self.spec.affinity = affinity
|
|
1153
1183
|
if tolerations is not None:
|
|
1154
1184
|
self.spec.tolerations = tolerations
|
|
@@ -1204,9 +1234,9 @@ class KubeResource(BaseRuntime):
|
|
|
1204
1234
|
from kubernetes import client as k8s_client
|
|
1205
1235
|
|
|
1206
1236
|
security_context = k8s_client.V1SecurityContext(
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1237
|
+
run_as_user=1000,
|
|
1238
|
+
run_as_group=3000,
|
|
1239
|
+
)
|
|
1210
1240
|
function.with_security_context(security_context)
|
|
1211
1241
|
|
|
1212
1242
|
More info:
|
|
@@ -1265,6 +1295,156 @@ class KubeResource(BaseRuntime):
|
|
|
1265
1295
|
|
|
1266
1296
|
self.spec.validate_service_account(allowed_service_accounts)
|
|
1267
1297
|
|
|
1298
|
+
def _configure_mlrun_build_with_source(
|
|
1299
|
+
self, source, workdir=None, handler=None, pull_at_runtime=True, target_dir=None
|
|
1300
|
+
):
|
|
1301
|
+
mlrun.utils.helpers.validate_builder_source(source, pull_at_runtime, workdir)
|
|
1302
|
+
|
|
1303
|
+
self.spec.build.source = source
|
|
1304
|
+
if handler:
|
|
1305
|
+
self.spec.default_handler = handler
|
|
1306
|
+
if workdir:
|
|
1307
|
+
self.spec.workdir = workdir
|
|
1308
|
+
if target_dir:
|
|
1309
|
+
self.spec.build.source_code_target_dir = target_dir
|
|
1310
|
+
|
|
1311
|
+
self.spec.build.load_source_on_run = pull_at_runtime
|
|
1312
|
+
if (
|
|
1313
|
+
self.spec.build.base_image
|
|
1314
|
+
and not self.spec.build.commands
|
|
1315
|
+
and pull_at_runtime
|
|
1316
|
+
and not self.spec.image
|
|
1317
|
+
):
|
|
1318
|
+
# if we load source from repo and don't need a full build use the base_image as the image
|
|
1319
|
+
self.spec.image = self.spec.build.base_image
|
|
1320
|
+
elif not pull_at_runtime:
|
|
1321
|
+
# clear the image so build will not be skipped
|
|
1322
|
+
self.spec.build.base_image = self.spec.build.base_image or self.spec.image
|
|
1323
|
+
self.spec.image = ""
|
|
1324
|
+
|
|
1325
|
+
def _resolve_build_with_mlrun(self, with_mlrun: typing.Optional[bool] = None):
|
|
1326
|
+
build = self.spec.build
|
|
1327
|
+
if with_mlrun is None:
|
|
1328
|
+
if build.with_mlrun is not None:
|
|
1329
|
+
with_mlrun = build.with_mlrun
|
|
1330
|
+
else:
|
|
1331
|
+
with_mlrun = build.base_image and not (
|
|
1332
|
+
build.base_image.startswith("mlrun/")
|
|
1333
|
+
or "/mlrun/" in build.base_image
|
|
1334
|
+
)
|
|
1335
|
+
if (
|
|
1336
|
+
not build.source
|
|
1337
|
+
and not build.commands
|
|
1338
|
+
and not build.requirements
|
|
1339
|
+
and not build.extra
|
|
1340
|
+
and with_mlrun
|
|
1341
|
+
):
|
|
1342
|
+
logger.info(
|
|
1343
|
+
"Running build to add mlrun package, set "
|
|
1344
|
+
"with_mlrun=False to skip if its already in the image"
|
|
1345
|
+
)
|
|
1346
|
+
return with_mlrun
|
|
1347
|
+
|
|
1348
|
+
def _build_image(
|
|
1349
|
+
self,
|
|
1350
|
+
builder_env: dict,
|
|
1351
|
+
force_build: bool,
|
|
1352
|
+
mlrun_version_specifier: typing.Optional[bool],
|
|
1353
|
+
show_on_failure: bool,
|
|
1354
|
+
skip_deployed: bool,
|
|
1355
|
+
watch: bool,
|
|
1356
|
+
is_kfp: bool,
|
|
1357
|
+
with_mlrun: typing.Optional[bool],
|
|
1358
|
+
):
|
|
1359
|
+
# When we're in pipelines context we must watch otherwise the pipelines pod will exit before the operation
|
|
1360
|
+
# is actually done. (when a pipelines pod exits, the pipeline step marked as done)
|
|
1361
|
+
if is_kfp:
|
|
1362
|
+
watch = True
|
|
1363
|
+
|
|
1364
|
+
if skip_deployed and self.requires_build() and not self.is_deployed():
|
|
1365
|
+
logger.warning(
|
|
1366
|
+
f"Even though {skip_deployed=}, the build might be triggered due to the function's configuration. "
|
|
1367
|
+
"See requires_build() and is_deployed() for reasoning."
|
|
1368
|
+
)
|
|
1369
|
+
|
|
1370
|
+
db = self._get_db()
|
|
1371
|
+
data = db.remote_builder(
|
|
1372
|
+
self,
|
|
1373
|
+
with_mlrun,
|
|
1374
|
+
mlrun_version_specifier,
|
|
1375
|
+
skip_deployed,
|
|
1376
|
+
builder_env=builder_env,
|
|
1377
|
+
force_build=force_build,
|
|
1378
|
+
)
|
|
1379
|
+
self.status = data["data"].get("status", None)
|
|
1380
|
+
self.spec.image = mlrun.utils.get_in(
|
|
1381
|
+
data, "data.spec.image"
|
|
1382
|
+
) or mlrun.utils.get_in(data, "data.spec.build.image")
|
|
1383
|
+
self.spec.build.base_image = self.spec.build.base_image or mlrun.utils.get_in(
|
|
1384
|
+
data, "data.spec.build.base_image"
|
|
1385
|
+
)
|
|
1386
|
+
# Get the source target dir in case it was enriched due to loading source
|
|
1387
|
+
self.spec.build.source_code_target_dir = mlrun.utils.get_in(
|
|
1388
|
+
data, "data.spec.build.source_code_target_dir"
|
|
1389
|
+
) or mlrun.utils.get_in(data, "data.spec.clone_target_dir")
|
|
1390
|
+
ready = data.get("ready", False)
|
|
1391
|
+
if not ready:
|
|
1392
|
+
logger.info(
|
|
1393
|
+
f"Started building image: {data.get('data', {}).get('spec', {}).get('build', {}).get('image')}"
|
|
1394
|
+
)
|
|
1395
|
+
if watch and not ready:
|
|
1396
|
+
state = self._build_watch(
|
|
1397
|
+
watch=watch,
|
|
1398
|
+
show_on_failure=show_on_failure,
|
|
1399
|
+
)
|
|
1400
|
+
ready = state == "ready"
|
|
1401
|
+
self.status.state = state
|
|
1402
|
+
|
|
1403
|
+
if watch and not ready:
|
|
1404
|
+
raise mlrun.errors.MLRunRuntimeError("Deploy failed")
|
|
1405
|
+
return ready
|
|
1406
|
+
|
|
1407
|
+
def _build_watch(
|
|
1408
|
+
self,
|
|
1409
|
+
watch: bool = True,
|
|
1410
|
+
logs: bool = True,
|
|
1411
|
+
show_on_failure: bool = False,
|
|
1412
|
+
):
|
|
1413
|
+
db = self._get_db()
|
|
1414
|
+
offset = 0
|
|
1415
|
+
try:
|
|
1416
|
+
text, _ = db.get_builder_status(self, 0, logs=logs)
|
|
1417
|
+
except mlrun.db.RunDBError:
|
|
1418
|
+
raise ValueError("function or build process not found")
|
|
1419
|
+
|
|
1420
|
+
def print_log(text):
|
|
1421
|
+
if text and (
|
|
1422
|
+
not show_on_failure
|
|
1423
|
+
or self.status.state == mlrun.common.schemas.FunctionState.error
|
|
1424
|
+
):
|
|
1425
|
+
print(text, end="")
|
|
1426
|
+
|
|
1427
|
+
print_log(text)
|
|
1428
|
+
offset += len(text)
|
|
1429
|
+
if watch:
|
|
1430
|
+
while self.status.state in [
|
|
1431
|
+
mlrun.common.schemas.FunctionState.pending,
|
|
1432
|
+
mlrun.common.schemas.FunctionState.running,
|
|
1433
|
+
]:
|
|
1434
|
+
time.sleep(2)
|
|
1435
|
+
if show_on_failure:
|
|
1436
|
+
text = ""
|
|
1437
|
+
db.get_builder_status(self, 0, logs=False)
|
|
1438
|
+
if self.status.state == mlrun.common.schemas.FunctionState.error:
|
|
1439
|
+
# re-read the full log on failure
|
|
1440
|
+
text, _ = db.get_builder_status(self, offset, logs=logs)
|
|
1441
|
+
else:
|
|
1442
|
+
text, _ = db.get_builder_status(self, offset, logs=logs)
|
|
1443
|
+
print_log(text)
|
|
1444
|
+
offset += len(text)
|
|
1445
|
+
|
|
1446
|
+
return self.status.state
|
|
1447
|
+
|
|
1268
1448
|
|
|
1269
1449
|
def _resolve_if_type_sanitized(attribute_name, attribute):
|
|
1270
1450
|
attribute_config = sanitized_attributes[attribute_name]
|
|
@@ -1344,7 +1524,7 @@ def get_sanitized_attribute(spec, attribute_name: str):
|
|
|
1344
1524
|
|
|
1345
1525
|
# check if attribute of type dict, and then check if type is sanitized
|
|
1346
1526
|
if isinstance(attribute, dict):
|
|
1347
|
-
if attribute_config["not_sanitized_class"]
|
|
1527
|
+
if not isinstance(attribute_config["not_sanitized_class"], dict):
|
|
1348
1528
|
raise mlrun.errors.MLRunInvalidArgumentTypeError(
|
|
1349
1529
|
f"expected to be of type {attribute_config.get('not_sanitized_class')} but got dict"
|
|
1350
1530
|
)
|
|
@@ -1354,7 +1534,7 @@ def get_sanitized_attribute(spec, attribute_name: str):
|
|
|
1354
1534
|
elif isinstance(attribute, list) and not isinstance(
|
|
1355
1535
|
attribute[0], attribute_config["sub_attribute_type"]
|
|
1356
1536
|
):
|
|
1357
|
-
if attribute_config["not_sanitized_class"]
|
|
1537
|
+
if not isinstance(attribute_config["not_sanitized_class"], list):
|
|
1358
1538
|
raise mlrun.errors.MLRunInvalidArgumentTypeError(
|
|
1359
1539
|
f"expected to be of type {attribute_config.get('not_sanitized_class')} but got list"
|
|
1360
1540
|
)
|
mlrun/runtimes/remotesparkjob.py
CHANGED
|
@@ -15,11 +15,11 @@ import re
|
|
|
15
15
|
from subprocess import run
|
|
16
16
|
|
|
17
17
|
import kubernetes.client
|
|
18
|
+
from mlrun_pipelines.mounts import mount_v3io, mount_v3iod
|
|
18
19
|
|
|
19
20
|
import mlrun.errors
|
|
20
21
|
from mlrun.config import config
|
|
21
22
|
|
|
22
|
-
from ..platforms.iguazio import mount_v3io, mount_v3iod
|
|
23
23
|
from .kubejob import KubejobRuntime
|
|
24
24
|
from .pod import KubeResourceSpec
|
|
25
25
|
|
|
@@ -92,7 +92,7 @@ class RemoteSparkSpec(KubeResourceSpec):
|
|
|
92
92
|
self.provider = provider
|
|
93
93
|
|
|
94
94
|
|
|
95
|
-
class RemoteSparkProviders
|
|
95
|
+
class RemoteSparkProviders:
|
|
96
96
|
iguazio = "iguazio"
|
|
97
97
|
|
|
98
98
|
|
|
@@ -102,16 +102,13 @@ class RemoteSparkRuntime(KubejobRuntime):
|
|
|
102
102
|
|
|
103
103
|
@classmethod
|
|
104
104
|
def deploy_default_image(cls):
|
|
105
|
-
|
|
106
|
-
from mlrun.run import new_function
|
|
107
|
-
|
|
108
|
-
sj = new_function(
|
|
105
|
+
sj = mlrun.new_function(
|
|
109
106
|
kind="remote-spark", name="remote-spark-default-image-deploy-temp"
|
|
110
107
|
)
|
|
111
108
|
sj.spec.build.image = cls.default_image
|
|
112
109
|
sj.with_spark_service(spark_service="dummy-spark")
|
|
113
110
|
sj.deploy()
|
|
114
|
-
get_run_db().delete_function(name=sj.metadata.name)
|
|
111
|
+
mlrun.get_run_db().delete_function(name=sj.metadata.name)
|
|
115
112
|
|
|
116
113
|
def is_deployed(self):
|
|
117
114
|
if (
|
|
@@ -130,14 +127,20 @@ class RemoteSparkRuntime(KubejobRuntime):
|
|
|
130
127
|
def spec(self, spec):
|
|
131
128
|
self._spec = self._verify_dict(spec, "spec", RemoteSparkSpec)
|
|
132
129
|
|
|
133
|
-
def with_spark_service(
|
|
130
|
+
def with_spark_service(
|
|
131
|
+
self,
|
|
132
|
+
spark_service,
|
|
133
|
+
provider=RemoteSparkProviders.iguazio,
|
|
134
|
+
with_v3io_mount=True,
|
|
135
|
+
):
|
|
134
136
|
"""Attach spark service to function"""
|
|
135
137
|
self.spec.provider = provider
|
|
136
138
|
if provider == RemoteSparkProviders.iguazio:
|
|
137
139
|
self.spec.env.append(
|
|
138
140
|
{"name": "MLRUN_SPARK_CLIENT_IGZ_SPARK", "value": "true"}
|
|
139
141
|
)
|
|
140
|
-
|
|
142
|
+
if with_v3io_mount:
|
|
143
|
+
self.apply(mount_v3io())
|
|
141
144
|
self.apply(
|
|
142
145
|
mount_v3iod(
|
|
143
146
|
namespace=config.namespace,
|