mlrun 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +26 -112
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +46 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +47 -48
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +69 -0
- mlrun/common/db/sql_session.py +2 -3
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/common/formatters/artifact.py +21 -0
- mlrun/common/formatters/base.py +78 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/helpers.py +1 -2
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +24 -4
- mlrun/common/schemas/alert.py +203 -0
- mlrun/common/schemas/api_gateway.py +148 -0
- mlrun/common/schemas/artifact.py +18 -8
- mlrun/common/schemas/auth.py +11 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -1
- mlrun/common/schemas/feature_store.py +16 -16
- mlrun/common/schemas/frontend_spec.py +8 -7
- mlrun/common/schemas/function.py +5 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +18 -3
- mlrun/common/schemas/model_monitoring/constants.py +83 -26
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +99 -16
- mlrun/common/schemas/notification.py +4 -4
- mlrun/common/schemas/object.py +2 -2
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +1 -10
- mlrun/common/schemas/project.py +24 -23
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +3 -3
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +2 -2
- mlrun/common/types.py +7 -1
- mlrun/config.py +54 -17
- mlrun/data_types/to_pandas.py +10 -12
- mlrun/datastore/__init__.py +5 -8
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +17 -5
- mlrun/datastore/base.py +62 -39
- mlrun/datastore/datastore.py +28 -9
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/filestore.py +0 -1
- mlrun/datastore/google_cloud_storage.py +6 -2
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +6 -2
- mlrun/datastore/s3.py +9 -0
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +201 -96
- mlrun/datastore/spark_utils.py +1 -2
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +358 -104
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +5 -1
- mlrun/db/base.py +185 -35
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +614 -179
- mlrun/db/nopdb.py +210 -26
- mlrun/errors.py +12 -1
- mlrun/execution.py +41 -24
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +40 -72
- mlrun/feature_store/common.py +1 -1
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +28 -30
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/conversion.py +11 -13
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +34 -24
- mlrun/feature_store/steps.py +37 -34
- mlrun/features.py +9 -20
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +2 -3
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +4 -3
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +14 -16
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +8 -6
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +17 -11
- mlrun/launcher/remote.py +16 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +238 -73
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +138 -315
- mlrun/model_monitoring/application.py +5 -296
- mlrun/model_monitoring/applications/__init__.py +24 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +349 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +104 -84
- mlrun/model_monitoring/controller_handler.py +13 -5
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +64 -40
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +310 -165
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +127 -28
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/prometheus.py +1 -4
- mlrun/model_monitoring/stream_processing.py +62 -231
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +152 -124
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +6 -6
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +35 -21
- mlrun/projects/pipelines.py +68 -99
- mlrun/projects/project.py +830 -266
- mlrun/render.py +3 -11
- mlrun/run.py +162 -166
- mlrun/runtimes/__init__.py +62 -7
- mlrun/runtimes/base.py +39 -32
- mlrun/runtimes/daskjob.py +8 -8
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +28 -122
- mlrun/runtimes/local.py +6 -3
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +709 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +523 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +112 -73
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +45 -51
- mlrun/runtimes/pod.py +286 -88
- mlrun/runtimes/remotesparkjob.py +2 -2
- mlrun/runtimes/sparkjob/spark3job.py +51 -34
- mlrun/runtimes/utils.py +7 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +13 -10
- mlrun/serving/server.py +22 -26
- mlrun/serving/states.py +99 -25
- mlrun/serving/utils.py +3 -3
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +59 -20
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +1 -2
- mlrun/utils/async_http.py +5 -7
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +3 -3
- mlrun/utils/helpers.py +183 -197
- mlrun/utils/http.py +2 -5
- mlrun/utils/logger.py +76 -14
- mlrun/utils/notifications/notification/__init__.py +17 -12
- mlrun/utils/notifications/notification/base.py +14 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +3 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +101 -21
- mlrun/utils/notifications/notification/webhook.py +11 -1
- mlrun/utils/notifications/notification_pusher.py +155 -30
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +2 -4
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +31 -19
- mlrun-1.7.0rc20.dist-info/RECORD +353 -0
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc2.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
mlrun/runtimes/kubejob.py
CHANGED
|
@@ -12,16 +12,15 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import time
|
|
16
15
|
import warnings
|
|
17
16
|
|
|
17
|
+
from mlrun_pipelines.common.ops import build_op
|
|
18
|
+
|
|
18
19
|
import mlrun.common.schemas
|
|
19
20
|
import mlrun.db
|
|
20
21
|
import mlrun.errors
|
|
21
22
|
|
|
22
|
-
from ..kfpops import build_op
|
|
23
23
|
from ..model import RunObject
|
|
24
|
-
from ..utils import get_in, logger
|
|
25
24
|
from .pod import KubeResource
|
|
26
25
|
|
|
27
26
|
|
|
@@ -65,29 +64,13 @@ class KubejobRuntime(KubeResource):
|
|
|
65
64
|
:param pull_at_runtime: load the archive into the container at job runtime vs on build/deploy
|
|
66
65
|
:param target_dir: target dir on runtime pod or repo clone / archive extraction
|
|
67
66
|
"""
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
if target_dir:
|
|
76
|
-
self.spec.build.source_code_target_dir = target_dir
|
|
77
|
-
|
|
78
|
-
self.spec.build.load_source_on_run = pull_at_runtime
|
|
79
|
-
if (
|
|
80
|
-
self.spec.build.base_image
|
|
81
|
-
and not self.spec.build.commands
|
|
82
|
-
and pull_at_runtime
|
|
83
|
-
and not self.spec.image
|
|
84
|
-
):
|
|
85
|
-
# if we load source from repo and don't need a full build use the base_image as the image
|
|
86
|
-
self.spec.image = self.spec.build.base_image
|
|
87
|
-
elif not pull_at_runtime:
|
|
88
|
-
# clear the image so build will not be skipped
|
|
89
|
-
self.spec.build.base_image = self.spec.build.base_image or self.spec.image
|
|
90
|
-
self.spec.image = ""
|
|
67
|
+
self._configure_mlrun_build_with_source(
|
|
68
|
+
source=source,
|
|
69
|
+
workdir=workdir,
|
|
70
|
+
handler=handler,
|
|
71
|
+
pull_at_runtime=pull_at_runtime,
|
|
72
|
+
target_dir=target_dir,
|
|
73
|
+
)
|
|
91
74
|
|
|
92
75
|
def build_config(
|
|
93
76
|
self,
|
|
@@ -169,116 +152,39 @@ class KubejobRuntime(KubeResource):
|
|
|
169
152
|
show_on_failure: bool = False,
|
|
170
153
|
force_build: bool = False,
|
|
171
154
|
) -> bool:
|
|
172
|
-
"""
|
|
155
|
+
"""Deploy function, build container with dependencies
|
|
173
156
|
|
|
174
|
-
:param watch:
|
|
175
|
-
:param with_mlrun:
|
|
176
|
-
:param skip_deployed:
|
|
177
|
-
:param is_kfp:
|
|
178
|
-
:param mlrun_version_specifier:
|
|
157
|
+
:param watch: Wait for the deploy to complete (and print build logs)
|
|
158
|
+
:param with_mlrun: Add the current mlrun package to the container build
|
|
159
|
+
:param skip_deployed: Skip the build if we already have an image for the function
|
|
160
|
+
:param is_kfp: Deploy as part of a kfp pipeline
|
|
161
|
+
:param mlrun_version_specifier: Which mlrun package version to include (if not current)
|
|
179
162
|
:param builder_env: Kaniko builder pod env vars dict (for config/credentials)
|
|
180
163
|
e.g. builder_env={"GIT_TOKEN": token}
|
|
181
|
-
:param show_on_failure:
|
|
182
|
-
:param force_build:
|
|
164
|
+
:param show_on_failure: Show logs only in case of build failure
|
|
165
|
+
:param force_build: Set True for force building the image, even when no changes were made
|
|
183
166
|
|
|
184
167
|
:return: True if the function is ready (deployed)
|
|
185
168
|
"""
|
|
186
169
|
|
|
187
170
|
build = self.spec.build
|
|
171
|
+
with_mlrun = self._resolve_build_with_mlrun(with_mlrun)
|
|
188
172
|
|
|
189
|
-
if with_mlrun is None:
|
|
190
|
-
if build.with_mlrun is not None:
|
|
191
|
-
with_mlrun = build.with_mlrun
|
|
192
|
-
else:
|
|
193
|
-
with_mlrun = build.base_image and not (
|
|
194
|
-
build.base_image.startswith("mlrun/")
|
|
195
|
-
or "/mlrun/" in build.base_image
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
if (
|
|
199
|
-
not build.source
|
|
200
|
-
and not build.commands
|
|
201
|
-
and not build.requirements
|
|
202
|
-
and not build.extra
|
|
203
|
-
and with_mlrun
|
|
204
|
-
):
|
|
205
|
-
logger.info(
|
|
206
|
-
"Running build to add mlrun package, set "
|
|
207
|
-
"with_mlrun=False to skip if its already in the image"
|
|
208
|
-
)
|
|
209
173
|
self.status.state = ""
|
|
210
174
|
if build.base_image:
|
|
211
175
|
# clear the image so build will not be skipped
|
|
212
176
|
self.spec.image = ""
|
|
213
177
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
with_mlrun,
|
|
225
|
-
mlrun_version_specifier,
|
|
226
|
-
skip_deployed,
|
|
227
|
-
builder_env=builder_env,
|
|
228
|
-
force_build=force_build,
|
|
229
|
-
)
|
|
230
|
-
self.status = data["data"].get("status", None)
|
|
231
|
-
self.spec.image = get_in(data, "data.spec.image")
|
|
232
|
-
self.spec.build.base_image = self.spec.build.base_image or get_in(
|
|
233
|
-
data, "data.spec.build.base_image"
|
|
234
|
-
)
|
|
235
|
-
# Get the source target dir in case it was enriched due to loading source
|
|
236
|
-
self.spec.build.source_code_target_dir = get_in(
|
|
237
|
-
data, "data.spec.build.source_code_target_dir"
|
|
238
|
-
) or get_in(data, "data.spec.clone_target_dir")
|
|
239
|
-
ready = data.get("ready", False)
|
|
240
|
-
if not ready:
|
|
241
|
-
logger.info(
|
|
242
|
-
f"Started building image: {data.get('data', {}).get('spec', {}).get('build', {}).get('image')}"
|
|
243
|
-
)
|
|
244
|
-
if watch and not ready:
|
|
245
|
-
state = self._build_watch(watch, show_on_failure=show_on_failure)
|
|
246
|
-
ready = state == "ready"
|
|
247
|
-
self.status.state = state
|
|
248
|
-
|
|
249
|
-
if watch and not ready:
|
|
250
|
-
raise mlrun.errors.MLRunRuntimeError("Deploy failed")
|
|
251
|
-
return ready
|
|
252
|
-
|
|
253
|
-
def _build_watch(self, watch=True, logs=True, show_on_failure=False):
|
|
254
|
-
db = self._get_db()
|
|
255
|
-
offset = 0
|
|
256
|
-
try:
|
|
257
|
-
text, _ = db.get_builder_status(self, 0, logs=logs)
|
|
258
|
-
except mlrun.db.RunDBError:
|
|
259
|
-
raise ValueError("function or build process not found")
|
|
260
|
-
|
|
261
|
-
def print_log(text):
|
|
262
|
-
if text and (not show_on_failure or self.status.state == "error"):
|
|
263
|
-
print(text, end="")
|
|
264
|
-
|
|
265
|
-
print_log(text)
|
|
266
|
-
offset += len(text)
|
|
267
|
-
if watch:
|
|
268
|
-
while self.status.state in ["pending", "running"]:
|
|
269
|
-
time.sleep(2)
|
|
270
|
-
if show_on_failure:
|
|
271
|
-
text = ""
|
|
272
|
-
db.get_builder_status(self, 0, logs=False)
|
|
273
|
-
if self.status.state == "error":
|
|
274
|
-
# re-read the full log on failure
|
|
275
|
-
text, _ = db.get_builder_status(self, offset, logs=logs)
|
|
276
|
-
else:
|
|
277
|
-
text, _ = db.get_builder_status(self, offset, logs=logs)
|
|
278
|
-
print_log(text)
|
|
279
|
-
offset += len(text)
|
|
280
|
-
|
|
281
|
-
return self.status.state
|
|
178
|
+
return self._build_image(
|
|
179
|
+
builder_env=builder_env,
|
|
180
|
+
force_build=force_build,
|
|
181
|
+
mlrun_version_specifier=mlrun_version_specifier,
|
|
182
|
+
show_on_failure=show_on_failure,
|
|
183
|
+
skip_deployed=skip_deployed,
|
|
184
|
+
watch=watch,
|
|
185
|
+
is_kfp=is_kfp,
|
|
186
|
+
with_mlrun=with_mlrun,
|
|
187
|
+
)
|
|
282
188
|
|
|
283
189
|
def deploy_step(
|
|
284
190
|
self,
|
mlrun/runtimes/local.py
CHANGED
|
@@ -33,6 +33,7 @@ from sys import executable
|
|
|
33
33
|
from nuclio import Event
|
|
34
34
|
|
|
35
35
|
import mlrun
|
|
36
|
+
import mlrun.common.constants as mlrun_constants
|
|
36
37
|
from mlrun.lists import RunList
|
|
37
38
|
|
|
38
39
|
from ..errors import err_to_str
|
|
@@ -257,7 +258,8 @@ class LocalRuntime(BaseRuntime, ParallelRunner):
|
|
|
257
258
|
set_paths(os.path.realpath("."))
|
|
258
259
|
|
|
259
260
|
if (
|
|
260
|
-
runobj.metadata.labels.get(
|
|
261
|
+
runobj.metadata.labels.get(mlrun_constants.MLRunInternalLabels.kind)
|
|
262
|
+
== RemoteSparkRuntime.kind
|
|
261
263
|
and environ["MLRUN_SPARK_CLIENT_IGZ_SPARK"] == "true"
|
|
262
264
|
):
|
|
263
265
|
from mlrun.runtimes.remotesparkjob import igz_spark_pre_hook
|
|
@@ -382,6 +384,7 @@ def load_module(file_name, handler, context):
|
|
|
382
384
|
if spec is None:
|
|
383
385
|
raise RunError(f"Cannot import from {file_name!r}")
|
|
384
386
|
module = imputil.module_from_spec(spec)
|
|
387
|
+
sys.modules[mod_name] = module
|
|
385
388
|
spec.loader.exec_module(module)
|
|
386
389
|
|
|
387
390
|
class_args = {}
|
|
@@ -436,7 +439,7 @@ def run_exec(cmd, args, env=None, cwd=None):
|
|
|
436
439
|
return out, err
|
|
437
440
|
|
|
438
441
|
|
|
439
|
-
class _DupStdout
|
|
442
|
+
class _DupStdout:
|
|
440
443
|
def __init__(self):
|
|
441
444
|
self.terminal = sys.stdout
|
|
442
445
|
self.buf = StringIO()
|
|
@@ -493,7 +496,7 @@ def exec_from_params(handler, runobj: RunObject, context: MLClientCtx, cwd=None)
|
|
|
493
496
|
logger.warning("Run was aborted", err=err_to_str(exc))
|
|
494
497
|
# Run was aborted, the state run state is updated by the abort job, no need to commit again
|
|
495
498
|
context.set_state(
|
|
496
|
-
mlrun.runtimes.constants.RunStates.aborted, commit=False
|
|
499
|
+
mlrun.common.runtimes.constants.RunStates.aborted, commit=False
|
|
497
500
|
)
|
|
498
501
|
commit = False
|
|
499
502
|
except Exception as exc:
|
|
@@ -21,28 +21,8 @@ from mlrun.config import config
|
|
|
21
21
|
from .. import MPIJobCRDVersions
|
|
22
22
|
from .abstract import AbstractMPIJobRuntime
|
|
23
23
|
from .v1 import MpiRuntimeV1
|
|
24
|
-
from .v1alpha1 import MpiRuntimeV1Alpha1
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
def _resolve_mpijob_crd_version():
|
|
28
27
|
# config is expected to get enriched from the API through the client-spec
|
|
29
28
|
return config.mpijob_crd_version or MPIJobCRDVersions.default()
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class MpiRuntimeContainer(containers.DeclarativeContainer):
|
|
33
|
-
resolver = providers.Callable(
|
|
34
|
-
_resolve_mpijob_crd_version,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
selector = providers.Selector(
|
|
38
|
-
resolver,
|
|
39
|
-
v1=providers.Object(MpiRuntimeV1),
|
|
40
|
-
v1alpha1=providers.Object(MpiRuntimeV1Alpha1),
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
# An empty selector to be overriden by the API
|
|
44
|
-
handler_selector = providers.Selector(
|
|
45
|
-
resolver,
|
|
46
|
-
v1=providers.Object(None),
|
|
47
|
-
v1alpha1=providers.Object(None),
|
|
48
|
-
)
|
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import abc
|
|
15
15
|
import os
|
|
16
|
-
import typing
|
|
17
16
|
|
|
18
17
|
from mlrun.config import config
|
|
19
18
|
from mlrun.runtimes.kubejob import KubejobRuntime
|
|
@@ -206,7 +205,7 @@ class AbstractMPIJobRuntime(KubejobRuntime, abc.ABC):
|
|
|
206
205
|
|
|
207
206
|
self.set_envs(horovod_autotune_settings)
|
|
208
207
|
|
|
209
|
-
def set_mpi_args(self, args:
|
|
208
|
+
def set_mpi_args(self, args: list[str]) -> None:
|
|
210
209
|
"""Sets the runtime's mpi arguments to args.
|
|
211
210
|
|
|
212
211
|
Parameters
|
|
@@ -224,14 +223,14 @@ class AbstractMPIJobRuntime(KubejobRuntime, abc.ABC):
|
|
|
224
223
|
```
|
|
225
224
|
# Define the wanted MPI arguments
|
|
226
225
|
mpi_args = []
|
|
227
|
-
mpi_args.append(
|
|
228
|
-
mpi_args.append(
|
|
229
|
-
mpi_args.append(
|
|
230
|
-
mpi_args.append(
|
|
231
|
-
mpi_args.append(
|
|
232
|
-
mpi_args.append(
|
|
233
|
-
mpi_args.append(
|
|
234
|
-
mpi_args.append(
|
|
226
|
+
mpi_args.append("-x")
|
|
227
|
+
mpi_args.append("NCCL_DEBUG=INFO")
|
|
228
|
+
mpi_args.append("-x")
|
|
229
|
+
mpi_args.append("NCCL_SOCKET_NTHREADS=2")
|
|
230
|
+
mpi_args.append("-x")
|
|
231
|
+
mpi_args.append("NCCL_NSOCKS_PERTHREAD=8")
|
|
232
|
+
mpi_args.append("-x")
|
|
233
|
+
mpi_args.append("NCCL_MIN_NCHANNELS=4")
|
|
235
234
|
|
|
236
235
|
# Set the MPI arguments in the function
|
|
237
236
|
fn.set_mpi_args(mpi_args)
|
mlrun/runtimes/mpijob/v1.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
from mlrun.runtimes.constants import MPIJobCRDVersions, MPIJobV1CleanPodPolicies
|
|
14
|
+
from mlrun.common.runtimes.constants import MPIJobCRDVersions, MPIJobV1CleanPodPolicies
|
|
15
15
|
from mlrun.runtimes.mpijob.abstract import AbstractMPIJobRuntime, MPIResourceSpec
|
|
16
16
|
|
|
17
17
|
|
|
@@ -12,12 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from
|
|
16
|
-
|
|
17
|
-
from .
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class ModelEndpointsTable(Base, ModelEndpointsBaseTable):
|
|
23
|
-
pass
|
|
15
|
+
from .serving import ServingRuntime, new_v2_model_server # noqa
|
|
16
|
+
from .nuclio import nuclio_init_hook # noqa
|
|
17
|
+
from .function import (
|
|
18
|
+
min_nuclio_versions,
|
|
19
|
+
RemoteRuntime,
|
|
20
|
+
) # noqa
|
|
21
|
+
from .api_gateway import APIGateway
|