mlrun 1.6.4rc8__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +40 -122
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +47 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +79 -47
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +74 -1
- mlrun/common/db/sql_session.py +5 -5
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +45 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +33 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +12 -3
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +31 -5
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +25 -4
- mlrun/common/schemas/auth.py +16 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -2
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +74 -44
- mlrun/common/schemas/frontend_spec.py +15 -7
- mlrun/common/schemas/function.py +12 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +20 -4
- mlrun/common/schemas/model_monitoring/constants.py +123 -42
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
- mlrun/common/schemas/notification.py +71 -14
- mlrun/common/schemas/object.py +2 -2
- mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
- mlrun/common/schemas/pipeline.py +8 -1
- mlrun/common/schemas/project.py +69 -18
- mlrun/common/schemas/runs.py +7 -1
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +4 -4
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +12 -4
- mlrun/common/types.py +14 -1
- mlrun/config.py +154 -69
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +67 -37
- mlrun/datastore/__init__.py +6 -8
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +143 -42
- mlrun/datastore/base.py +102 -58
- mlrun/datastore/datastore.py +34 -13
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -4
- mlrun/datastore/google_cloud_storage.py +97 -33
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +7 -2
- mlrun/datastore/s3.py +34 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +303 -111
- mlrun/datastore/spark_utils.py +31 -2
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +453 -176
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +6 -1
- mlrun/db/base.py +274 -41
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +893 -225
- mlrun/db/nopdb.py +291 -33
- mlrun/errors.py +36 -6
- mlrun/execution.py +115 -42
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +65 -73
- mlrun/feature_store/common.py +7 -12
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +39 -31
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +45 -34
- mlrun/features.py +11 -21
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +5 -6
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +2 -2
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +6 -6
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +61 -17
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +23 -13
- mlrun/launcher/remote.py +17 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +478 -103
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +163 -371
- mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
- mlrun/model_monitoring/applications/_application_steps.py +188 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +131 -278
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +199 -55
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +134 -398
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +8 -8
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +52 -25
- mlrun/projects/pipelines.py +191 -197
- mlrun/projects/project.py +1227 -400
- mlrun/render.py +16 -19
- mlrun/run.py +209 -184
- mlrun/runtimes/__init__.py +83 -15
- mlrun/runtimes/base.py +51 -35
- mlrun/runtimes/daskjob.py +17 -10
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +40 -11
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
- mlrun/runtimes/pod.py +281 -101
- mlrun/runtimes/remotesparkjob.py +12 -9
- mlrun/runtimes/sparkjob/spark3job.py +67 -51
- mlrun/runtimes/utils.py +41 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +85 -69
- mlrun/serving/server.py +69 -44
- mlrun/serving/states.py +209 -36
- mlrun/serving/utils.py +22 -14
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +133 -54
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +6 -2
- mlrun/utils/async_http.py +6 -8
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +21 -3
- mlrun/utils/helpers.py +405 -225
- mlrun/utils/http.py +3 -6
- mlrun/utils/logger.py +112 -16
- mlrun/utils/notifications/notification/__init__.py +17 -13
- mlrun/utils/notifications/notification/base.py +50 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +59 -2
- mlrun/utils/notifications/notification_pusher.py +149 -30
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +4 -6
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- mlrun-1.7.0.dist-info/METADATA +378 -0
- mlrun-1.7.0.dist-info/RECORD +351 -0
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -273
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/prometheus.py +0 -219
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc8.dist-info/METADATA +0 -272
- mlrun-1.6.4rc8.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -11,19 +11,19 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
15
14
|
import typing
|
|
16
15
|
|
|
17
16
|
import kubernetes.client
|
|
17
|
+
from mlrun_pipelines.mounts import mount_v3io, mount_v3iod
|
|
18
18
|
|
|
19
19
|
import mlrun.common.schemas.function
|
|
20
20
|
import mlrun.errors
|
|
21
|
+
import mlrun.k8s_utils
|
|
21
22
|
import mlrun.runtimes.pod
|
|
22
23
|
from mlrun.config import config
|
|
23
24
|
|
|
24
25
|
from ...execution import MLClientCtx
|
|
25
26
|
from ...model import RunObject
|
|
26
|
-
from ...platforms.iguazio import mount_v3io, mount_v3iod
|
|
27
27
|
from ...utils import update_in, verify_field_regex
|
|
28
28
|
from ..kubejob import KubejobRuntime
|
|
29
29
|
from ..pod import KubeResourceSpec
|
|
@@ -69,6 +69,48 @@ class Spark3JobSpec(KubeResourceSpec):
|
|
|
69
69
|
"driver_cores",
|
|
70
70
|
"executor_cores",
|
|
71
71
|
]
|
|
72
|
+
_default_fields_to_strip = KubeResourceSpec._default_fields_to_strip + [
|
|
73
|
+
"driver_node_selector",
|
|
74
|
+
"executor_node_selector",
|
|
75
|
+
"driver_tolerations",
|
|
76
|
+
"executor_tolerations",
|
|
77
|
+
"driver_affinity",
|
|
78
|
+
"executor_affinity",
|
|
79
|
+
"driver_volume_mounts",
|
|
80
|
+
"executor_volume_mounts",
|
|
81
|
+
"driver_cores",
|
|
82
|
+
"executor_cores",
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
__k8s_fields_to_serialize = [
|
|
86
|
+
"driver_volume_mounts",
|
|
87
|
+
"executor_volume_mounts",
|
|
88
|
+
"driver_node_selector",
|
|
89
|
+
"executor_node_selector",
|
|
90
|
+
"executor_affinity",
|
|
91
|
+
"executor_tolerations",
|
|
92
|
+
"driver_affinity",
|
|
93
|
+
"driver_tolerations",
|
|
94
|
+
]
|
|
95
|
+
_k8s_fields_to_serialize = (
|
|
96
|
+
KubeResourceSpec._k8s_fields_to_serialize + __k8s_fields_to_serialize
|
|
97
|
+
)
|
|
98
|
+
_fields_to_serialize = (
|
|
99
|
+
KubeResourceSpec._fields_to_serialize + __k8s_fields_to_serialize
|
|
100
|
+
)
|
|
101
|
+
_fields_to_skip_validation = KubeResourceSpec._fields_to_skip_validation + [
|
|
102
|
+
# TODO: affinity, tolerations and node_selector are skipped due to preemption mode transitions.
|
|
103
|
+
# Preemption mode 'none' depends on the previous mode while the default mode may enrich these values.
|
|
104
|
+
# When we allow 'None' values for these attributes we get their true values and they will undo the default
|
|
105
|
+
# enrichment when creating the runtime from dict.
|
|
106
|
+
# The enrichment should move to the server side and then this can be removed.
|
|
107
|
+
"driver_node_selector",
|
|
108
|
+
"executor_node_selector",
|
|
109
|
+
"executor_affinity",
|
|
110
|
+
"executor_tolerations",
|
|
111
|
+
"driver_affinity",
|
|
112
|
+
"driver_tolerations",
|
|
113
|
+
]
|
|
72
114
|
|
|
73
115
|
def __init__(
|
|
74
116
|
self,
|
|
@@ -189,26 +231,8 @@ class Spark3JobSpec(KubeResourceSpec):
|
|
|
189
231
|
self.driver_cores = driver_cores
|
|
190
232
|
self.executor_cores = executor_cores
|
|
191
233
|
|
|
192
|
-
def to_dict(self, fields=None, exclude=None):
|
|
193
|
-
exclude = exclude or []
|
|
194
|
-
_exclude = [
|
|
195
|
-
"affinity",
|
|
196
|
-
"tolerations",
|
|
197
|
-
"security_context",
|
|
198
|
-
"executor_affinity",
|
|
199
|
-
"executor_tolerations",
|
|
200
|
-
"driver_affinity",
|
|
201
|
-
"driver_tolerations",
|
|
202
|
-
]
|
|
203
|
-
struct = super().to_dict(fields, exclude=list(set(exclude + _exclude)))
|
|
204
|
-
api = kubernetes.client.ApiClient()
|
|
205
|
-
for field in _exclude:
|
|
206
|
-
if field not in exclude:
|
|
207
|
-
struct[field] = api.sanitize_for_serialization(getattr(self, field))
|
|
208
|
-
return struct
|
|
209
|
-
|
|
210
234
|
@property
|
|
211
|
-
def executor_tolerations(self) ->
|
|
235
|
+
def executor_tolerations(self) -> list[kubernetes.client.V1Toleration]:
|
|
212
236
|
return self._executor_tolerations
|
|
213
237
|
|
|
214
238
|
@executor_tolerations.setter
|
|
@@ -220,7 +244,7 @@ class Spark3JobSpec(KubeResourceSpec):
|
|
|
220
244
|
)
|
|
221
245
|
|
|
222
246
|
@property
|
|
223
|
-
def driver_tolerations(self) ->
|
|
247
|
+
def driver_tolerations(self) -> list[kubernetes.client.V1Toleration]:
|
|
224
248
|
return self._driver_tolerations
|
|
225
249
|
|
|
226
250
|
@driver_tolerations.setter
|
|
@@ -428,7 +452,7 @@ class Spark3JobSpec(KubeResourceSpec):
|
|
|
428
452
|
class Spark3Runtime(KubejobRuntime):
|
|
429
453
|
group = "sparkoperator.k8s.io"
|
|
430
454
|
version = "v1beta2"
|
|
431
|
-
apiVersion = group + "/" + version
|
|
455
|
+
apiVersion = group + "/" + version # noqa: N815
|
|
432
456
|
kind = "spark"
|
|
433
457
|
plural = "sparkapplications"
|
|
434
458
|
|
|
@@ -461,11 +485,9 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
461
485
|
def with_node_selection(
|
|
462
486
|
self,
|
|
463
487
|
node_name: typing.Optional[str] = None,
|
|
464
|
-
node_selector: typing.Optional[
|
|
488
|
+
node_selector: typing.Optional[dict[str, str]] = None,
|
|
465
489
|
affinity: typing.Optional[kubernetes.client.V1Affinity] = None,
|
|
466
|
-
tolerations: typing.Optional[
|
|
467
|
-
typing.List[kubernetes.client.V1Toleration]
|
|
468
|
-
] = None,
|
|
490
|
+
tolerations: typing.Optional[list[kubernetes.client.V1Toleration]] = None,
|
|
469
491
|
):
|
|
470
492
|
if node_name:
|
|
471
493
|
raise NotImplementedError(
|
|
@@ -484,22 +506,18 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
484
506
|
raise NotImplementedError(
|
|
485
507
|
"Setting node name is not supported for spark runtime"
|
|
486
508
|
)
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
)
|
|
493
|
-
super().with_node_selection(node_name, node_selector, affinity, tolerations)
|
|
509
|
+
mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
|
|
510
|
+
self.with_driver_node_selection(node_name, node_selector, affinity, tolerations)
|
|
511
|
+
self.with_executor_node_selection(
|
|
512
|
+
node_name, node_selector, affinity, tolerations
|
|
513
|
+
)
|
|
494
514
|
|
|
495
515
|
def with_driver_node_selection(
|
|
496
516
|
self,
|
|
497
517
|
node_name: typing.Optional[str] = None,
|
|
498
|
-
node_selector: typing.Optional[
|
|
518
|
+
node_selector: typing.Optional[dict[str, str]] = None,
|
|
499
519
|
affinity: typing.Optional[kubernetes.client.V1Affinity] = None,
|
|
500
|
-
tolerations: typing.Optional[
|
|
501
|
-
typing.List[kubernetes.client.V1Toleration]
|
|
502
|
-
] = None,
|
|
520
|
+
tolerations: typing.Optional[list[kubernetes.client.V1Toleration]] = None,
|
|
503
521
|
):
|
|
504
522
|
"""
|
|
505
523
|
Enables control of which k8s node the spark executor will run on.
|
|
@@ -518,21 +536,20 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
518
536
|
raise NotImplementedError(
|
|
519
537
|
"Setting node name is not supported for spark runtime"
|
|
520
538
|
)
|
|
521
|
-
if affinity:
|
|
539
|
+
if affinity is not None:
|
|
522
540
|
self.spec.driver_affinity = affinity
|
|
523
|
-
if node_selector:
|
|
541
|
+
if node_selector is not None:
|
|
542
|
+
mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
|
|
524
543
|
self.spec.driver_node_selector = node_selector
|
|
525
|
-
if tolerations:
|
|
544
|
+
if tolerations is not None:
|
|
526
545
|
self.spec.driver_tolerations = tolerations
|
|
527
546
|
|
|
528
547
|
def with_executor_node_selection(
|
|
529
548
|
self,
|
|
530
549
|
node_name: typing.Optional[str] = None,
|
|
531
|
-
node_selector: typing.Optional[
|
|
550
|
+
node_selector: typing.Optional[dict[str, str]] = None,
|
|
532
551
|
affinity: typing.Optional[kubernetes.client.V1Affinity] = None,
|
|
533
|
-
tolerations: typing.Optional[
|
|
534
|
-
typing.List[kubernetes.client.V1Toleration]
|
|
535
|
-
] = None,
|
|
552
|
+
tolerations: typing.Optional[list[kubernetes.client.V1Toleration]] = None,
|
|
536
553
|
):
|
|
537
554
|
"""
|
|
538
555
|
Enables control of which k8s node the spark executor will run on.
|
|
@@ -551,11 +568,12 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
551
568
|
raise NotImplementedError(
|
|
552
569
|
"Setting node name is not supported for spark runtime"
|
|
553
570
|
)
|
|
554
|
-
if affinity:
|
|
571
|
+
if affinity is not None:
|
|
555
572
|
self.spec.executor_affinity = affinity
|
|
556
|
-
if node_selector:
|
|
573
|
+
if node_selector is not None:
|
|
574
|
+
mlrun.k8s_utils.validate_node_selectors(node_selector, raise_on_error=False)
|
|
557
575
|
self.spec.executor_node_selector = node_selector
|
|
558
|
-
if tolerations:
|
|
576
|
+
if tolerations is not None:
|
|
559
577
|
self.spec.executor_tolerations = tolerations
|
|
560
578
|
|
|
561
579
|
def with_preemption_mode(
|
|
@@ -794,9 +812,7 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
794
812
|
|
|
795
813
|
@classmethod
|
|
796
814
|
def deploy_default_image(cls, with_gpu=False):
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
sj = new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
|
|
815
|
+
sj = mlrun.new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
|
|
800
816
|
sj.spec.build.image = cls._get_default_deployed_mlrun_image_name(with_gpu)
|
|
801
817
|
|
|
802
818
|
# setting required resources
|
mlrun/runtimes/utils.py
CHANGED
|
@@ -16,22 +16,21 @@ import hashlib
|
|
|
16
16
|
import json
|
|
17
17
|
import os
|
|
18
18
|
import re
|
|
19
|
-
import typing
|
|
20
19
|
from io import StringIO
|
|
21
20
|
from sys import stderr
|
|
22
21
|
|
|
23
22
|
import pandas as pd
|
|
24
|
-
from kubernetes import client
|
|
25
23
|
|
|
26
24
|
import mlrun
|
|
27
25
|
import mlrun.common.constants
|
|
26
|
+
import mlrun.common.constants as mlrun_constants
|
|
28
27
|
import mlrun.common.schemas
|
|
29
28
|
import mlrun.utils.regex
|
|
30
29
|
from mlrun.artifacts import TableArtifact
|
|
30
|
+
from mlrun.common.runtimes.constants import RunLabels
|
|
31
31
|
from mlrun.config import config
|
|
32
32
|
from mlrun.errors import err_to_str
|
|
33
33
|
from mlrun.frameworks.parallel_coordinates import gen_pcp_plot
|
|
34
|
-
from mlrun.runtimes.constants import RunLabels
|
|
35
34
|
from mlrun.runtimes.generators import selector
|
|
36
35
|
from mlrun.utils import get_in, helpers, logger, verify_field_regex
|
|
37
36
|
|
|
@@ -40,9 +39,6 @@ class RunError(Exception):
|
|
|
40
39
|
pass
|
|
41
40
|
|
|
42
41
|
|
|
43
|
-
mlrun_key = "mlrun/"
|
|
44
|
-
|
|
45
|
-
|
|
46
42
|
class _ContextStore:
|
|
47
43
|
def __init__(self):
|
|
48
44
|
self._context = None
|
|
@@ -281,43 +277,6 @@ def get_item_name(item, attr="name"):
|
|
|
281
277
|
return getattr(item, attr, None)
|
|
282
278
|
|
|
283
279
|
|
|
284
|
-
def apply_kfp(modify, cop, runtime):
|
|
285
|
-
modify(cop)
|
|
286
|
-
|
|
287
|
-
# Have to do it here to avoid circular dependencies
|
|
288
|
-
from .pod import AutoMountType
|
|
289
|
-
|
|
290
|
-
if AutoMountType.is_auto_modifier(modify):
|
|
291
|
-
runtime.spec.disable_auto_mount = True
|
|
292
|
-
|
|
293
|
-
api = client.ApiClient()
|
|
294
|
-
for k, v in cop.pod_labels.items():
|
|
295
|
-
runtime.metadata.labels[k] = v
|
|
296
|
-
for k, v in cop.pod_annotations.items():
|
|
297
|
-
runtime.metadata.annotations[k] = v
|
|
298
|
-
if cop.container.env:
|
|
299
|
-
env_names = [
|
|
300
|
-
e.name if hasattr(e, "name") else e["name"] for e in runtime.spec.env
|
|
301
|
-
]
|
|
302
|
-
for e in api.sanitize_for_serialization(cop.container.env):
|
|
303
|
-
name = e["name"]
|
|
304
|
-
if name in env_names:
|
|
305
|
-
runtime.spec.env[env_names.index(name)] = e
|
|
306
|
-
else:
|
|
307
|
-
runtime.spec.env.append(e)
|
|
308
|
-
env_names.append(name)
|
|
309
|
-
cop.container.env.clear()
|
|
310
|
-
|
|
311
|
-
if cop.volumes and cop.container.volume_mounts:
|
|
312
|
-
vols = api.sanitize_for_serialization(cop.volumes)
|
|
313
|
-
mounts = api.sanitize_for_serialization(cop.container.volume_mounts)
|
|
314
|
-
runtime.spec.update_vols_and_mounts(vols, mounts)
|
|
315
|
-
cop.volumes.clear()
|
|
316
|
-
cop.container.volume_mounts.clear()
|
|
317
|
-
|
|
318
|
-
return runtime
|
|
319
|
-
|
|
320
|
-
|
|
321
280
|
def verify_limits(
|
|
322
281
|
resources_field_name,
|
|
323
282
|
mem=None,
|
|
@@ -411,41 +370,13 @@ def generate_resources(mem=None, cpu=None, gpus=None, gpu_type="nvidia.com/gpu")
|
|
|
411
370
|
|
|
412
371
|
|
|
413
372
|
def get_func_selector(project, name=None, tag=None):
|
|
414
|
-
s = [f"{
|
|
373
|
+
s = [f"{mlrun_constants.MLRunInternalLabels.project}={project}"]
|
|
415
374
|
if name:
|
|
416
|
-
s.append(f"{
|
|
417
|
-
s.append(f"{
|
|
375
|
+
s.append(f"{mlrun_constants.MLRunInternalLabels.function}={name}")
|
|
376
|
+
s.append(f"{mlrun_constants.MLRunInternalLabels.tag}={tag or 'latest'}")
|
|
418
377
|
return s
|
|
419
378
|
|
|
420
379
|
|
|
421
|
-
class k8s_resource:
|
|
422
|
-
kind = ""
|
|
423
|
-
per_run = False
|
|
424
|
-
per_function = False
|
|
425
|
-
k8client = None
|
|
426
|
-
|
|
427
|
-
def deploy_function(self, function):
|
|
428
|
-
pass
|
|
429
|
-
|
|
430
|
-
def release_function(self, function):
|
|
431
|
-
pass
|
|
432
|
-
|
|
433
|
-
def submit_run(self, function, runobj):
|
|
434
|
-
pass
|
|
435
|
-
|
|
436
|
-
def get_object(self, name, namespace=None):
|
|
437
|
-
return None
|
|
438
|
-
|
|
439
|
-
def get_status(self, name, namespace=None):
|
|
440
|
-
return None
|
|
441
|
-
|
|
442
|
-
def del_object(self, name, namespace=None):
|
|
443
|
-
pass
|
|
444
|
-
|
|
445
|
-
def get_pods(self, name, namespace=None, master=False):
|
|
446
|
-
return {}
|
|
447
|
-
|
|
448
|
-
|
|
449
380
|
def enrich_function_from_dict(function, function_dict):
|
|
450
381
|
override_function = mlrun.new_function(runtime=function_dict, kind=function.kind)
|
|
451
382
|
for attribute in [
|
|
@@ -501,10 +432,11 @@ def enrich_function_from_dict(function, function_dict):
|
|
|
501
432
|
|
|
502
433
|
def enrich_run_labels(
|
|
503
434
|
labels: dict,
|
|
504
|
-
labels_to_enrich:
|
|
435
|
+
labels_to_enrich: list[RunLabels] = None,
|
|
505
436
|
):
|
|
506
437
|
labels_enrichment = {
|
|
507
438
|
RunLabels.owner: os.environ.get("V3IO_USERNAME") or getpass.getuser(),
|
|
439
|
+
# TODO: remove this in 1.9.0
|
|
508
440
|
RunLabels.v3io_user: os.environ.get("V3IO_USERNAME"),
|
|
509
441
|
}
|
|
510
442
|
labels_to_enrich = labels_to_enrich or RunLabels.all()
|
|
@@ -513,3 +445,37 @@ def enrich_run_labels(
|
|
|
513
445
|
if label.value not in labels and enrichment:
|
|
514
446
|
labels[label.value] = enrichment
|
|
515
447
|
return labels
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def resolve_node_selectors(
|
|
451
|
+
project_node_selector: dict, instance_node_selector: dict
|
|
452
|
+
) -> dict:
|
|
453
|
+
config_node_selector = mlrun.mlconf.get_default_function_node_selector()
|
|
454
|
+
if project_node_selector or config_node_selector:
|
|
455
|
+
mlrun.utils.logger.debug(
|
|
456
|
+
"Enriching node selector from project and mlrun config",
|
|
457
|
+
project_node_selector=project_node_selector,
|
|
458
|
+
config_node_selector=config_node_selector,
|
|
459
|
+
)
|
|
460
|
+
return mlrun.utils.helpers.merge_dicts_with_precedence(
|
|
461
|
+
config_node_selector,
|
|
462
|
+
project_node_selector,
|
|
463
|
+
instance_node_selector,
|
|
464
|
+
)
|
|
465
|
+
return instance_node_selector
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def enrich_gateway_timeout_annotations(annotations: dict, gateway_timeout: int):
|
|
469
|
+
"""
|
|
470
|
+
Set gateway proxy connect/read/send timeout annotations
|
|
471
|
+
:param annotations: The annotations to enrich
|
|
472
|
+
:param gateway_timeout: The timeout to set
|
|
473
|
+
"""
|
|
474
|
+
if not gateway_timeout:
|
|
475
|
+
return
|
|
476
|
+
gateway_timeout_str = str(gateway_timeout)
|
|
477
|
+
annotations["nginx.ingress.kubernetes.io/proxy-connect-timeout"] = (
|
|
478
|
+
gateway_timeout_str
|
|
479
|
+
)
|
|
480
|
+
annotations["nginx.ingress.kubernetes.io/proxy-read-timeout"] = gateway_timeout_str
|
|
481
|
+
annotations["nginx.ingress.kubernetes.io/proxy-send-timeout"] = gateway_timeout_str
|
mlrun/secrets.py
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
from ast import literal_eval
|
|
16
16
|
from os import environ, getenv
|
|
17
|
-
from typing import Callable,
|
|
17
|
+
from typing import Callable, Optional, Union
|
|
18
18
|
|
|
19
19
|
from .utils import AzureVaultStore, list2dict
|
|
20
20
|
|
|
@@ -148,7 +148,7 @@ class SecretsStore:
|
|
|
148
148
|
|
|
149
149
|
def get_secret_or_env(
|
|
150
150
|
key: str,
|
|
151
|
-
secret_provider: Union[
|
|
151
|
+
secret_provider: Union[dict, SecretsStore, Callable, None] = None,
|
|
152
152
|
default: Optional[str] = None,
|
|
153
153
|
prefix: Optional[str] = None,
|
|
154
154
|
) -> str:
|
|
@@ -163,15 +163,19 @@ def get_secret_or_env(
|
|
|
163
163
|
|
|
164
164
|
Example::
|
|
165
165
|
|
|
166
|
-
secrets = {
|
|
166
|
+
secrets = {"KEY1": "VALUE1"}
|
|
167
167
|
secret = get_secret_or_env("KEY1", secret_provider=secrets)
|
|
168
168
|
|
|
169
|
+
|
|
169
170
|
# Using a function to retrieve a secret
|
|
170
171
|
def my_secret_provider(key):
|
|
171
172
|
# some internal logic to retrieve secret
|
|
172
173
|
return value
|
|
173
174
|
|
|
174
|
-
|
|
175
|
+
|
|
176
|
+
secret = get_secret_or_env(
|
|
177
|
+
"KEY1", secret_provider=my_secret_provider, default="TOO-MANY-SECRETS"
|
|
178
|
+
)
|
|
175
179
|
|
|
176
180
|
:param key: Secret key to look for
|
|
177
181
|
:param secret_provider: Dictionary, callable or `SecretsStore` to extract the secret value from. If using a
|
|
@@ -185,7 +189,7 @@ def get_secret_or_env(
|
|
|
185
189
|
|
|
186
190
|
value = None
|
|
187
191
|
if secret_provider:
|
|
188
|
-
if isinstance(secret_provider, (
|
|
192
|
+
if isinstance(secret_provider, (dict, SecretsStore)):
|
|
189
193
|
value = secret_provider.get(key)
|
|
190
194
|
else:
|
|
191
195
|
value = secret_provider(key)
|
mlrun/serving/__init__.py
CHANGED
|
@@ -22,10 +22,17 @@ __all__ = [
|
|
|
22
22
|
"RouterStep",
|
|
23
23
|
"QueueStep",
|
|
24
24
|
"ErrorStep",
|
|
25
|
+
"MonitoringApplicationStep",
|
|
25
26
|
]
|
|
26
27
|
|
|
27
28
|
from .routers import ModelRouter, VotingEnsemble # noqa
|
|
28
29
|
from .server import GraphContext, GraphServer, create_graph_server # noqa
|
|
29
|
-
from .states import
|
|
30
|
+
from .states import (
|
|
31
|
+
ErrorStep,
|
|
32
|
+
QueueStep,
|
|
33
|
+
RouterStep,
|
|
34
|
+
TaskStep,
|
|
35
|
+
MonitoringApplicationStep,
|
|
36
|
+
) # noqa
|
|
30
37
|
from .v1_serving import MLModelServer, new_v1_model_server # noqa
|
|
31
38
|
from .v2_serving import V2ModelServer # noqa
|
mlrun/serving/remote.py
CHANGED
|
@@ -37,8 +37,6 @@ default_backoff_factor = 1
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
class RemoteStep(storey.SendToHttp):
|
|
40
|
-
"""class for calling remote endpoints"""
|
|
41
|
-
|
|
42
40
|
def __init__(
|
|
43
41
|
self,
|
|
44
42
|
url: str,
|
|
@@ -174,8 +172,7 @@ class RemoteStep(storey.SendToHttp):
|
|
|
174
172
|
if not self._session:
|
|
175
173
|
self._session = mlrun.utils.HTTPSessionWithRetry(
|
|
176
174
|
self.retries,
|
|
177
|
-
self.backoff_factor
|
|
178
|
-
or mlrun.config.config.http_retry_defaults.backoff_factor,
|
|
175
|
+
self.backoff_factor or mlrun.mlconf.http_retry_defaults.backoff_factor,
|
|
179
176
|
retry_on_exception=False,
|
|
180
177
|
retry_on_status=self.retries > 0,
|
|
181
178
|
retry_on_post=True,
|
|
@@ -187,7 +184,7 @@ class RemoteStep(storey.SendToHttp):
|
|
|
187
184
|
resp = self._session.request(
|
|
188
185
|
method,
|
|
189
186
|
url,
|
|
190
|
-
verify=mlrun.
|
|
187
|
+
verify=mlrun.mlconf.httpdb.http.verify,
|
|
191
188
|
headers=headers,
|
|
192
189
|
data=body,
|
|
193
190
|
timeout=self.timeout,
|
|
@@ -242,8 +239,6 @@ class RemoteStep(storey.SendToHttp):
|
|
|
242
239
|
|
|
243
240
|
|
|
244
241
|
class BatchHttpRequests(_ConcurrentJobExecution):
|
|
245
|
-
"""class for calling remote endpoints in parallel"""
|
|
246
|
-
|
|
247
242
|
def __init__(
|
|
248
243
|
self,
|
|
249
244
|
url: str = None,
|