mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +25 -111
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +38 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +41 -47
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +68 -0
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
- mlrun/common/formatters/base.py +78 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +25 -4
- mlrun/common/schemas/alert.py +203 -0
- mlrun/common/schemas/api_gateway.py +148 -0
- mlrun/common/schemas/artifact.py +15 -5
- mlrun/common/schemas/auth.py +8 -2
- mlrun/common/schemas/client_spec.py +2 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +19 -3
- mlrun/common/schemas/model_monitoring/constants.py +96 -26
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +0 -9
- mlrun/common/schemas/project.py +22 -21
- mlrun/common/types.py +7 -1
- mlrun/config.py +87 -19
- mlrun/data_types/data_types.py +4 -0
- mlrun/data_types/to_pandas.py +9 -9
- mlrun/datastore/__init__.py +5 -8
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +4 -5
- mlrun/datastore/base.py +69 -30
- mlrun/datastore/datastore.py +10 -2
- mlrun/datastore/datastore_profile.py +90 -6
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +172 -44
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +285 -41
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +27 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +149 -14
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +608 -178
- mlrun/db/nopdb.py +191 -7
- mlrun/errors.py +11 -0
- mlrun/execution.py +37 -20
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +21 -52
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +2 -1
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +9 -9
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +34 -24
- mlrun/feature_store/steps.py +30 -19
- mlrun/features.py +4 -13
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +10 -11
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +8 -6
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +9 -3
- mlrun/launcher/remote.py +9 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +58 -19
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +127 -301
- mlrun/model_monitoring/application.py +5 -296
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +30 -36
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +100 -7
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +93 -228
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +152 -124
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +30 -16
- mlrun/projects/pipelines.py +92 -99
- mlrun/projects/project.py +757 -268
- mlrun/render.py +15 -14
- mlrun/run.py +160 -162
- mlrun/runtimes/__init__.py +55 -3
- mlrun/runtimes/base.py +33 -19
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +28 -122
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/api_gateway.py +709 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +523 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +98 -58
- mlrun/runtimes/nuclio/serving.py +36 -42
- mlrun/runtimes/pod.py +196 -45
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +6 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +7 -4
- mlrun/serving/server.py +7 -8
- mlrun/serving/states.py +73 -43
- mlrun/serving/v2_serving.py +8 -7
- mlrun/track/tracker.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +141 -75
- mlrun/utils/http.py +1 -1
- mlrun/utils/logger.py +39 -7
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/base.py +12 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +3 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +101 -21
- mlrun/utils/notifications/notification/webhook.py +11 -1
- mlrun/utils/notifications/notification_pusher.py +147 -16
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
- mlrun-1.7.0rc20.dist-info/RECORD +353 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc4.dist-info/RECORD +0 -321
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
mlrun/runtimes/__init__.py
CHANGED
|
@@ -30,19 +30,22 @@ __all__ = [
|
|
|
30
30
|
|
|
31
31
|
from mlrun.runtimes.utils import resolve_spark_operator_version
|
|
32
32
|
|
|
33
|
+
from ..common.runtimes.constants import MPIJobCRDVersions
|
|
33
34
|
from .base import BaseRuntime, RunError, RuntimeClassMode # noqa
|
|
34
|
-
from .constants import MPIJobCRDVersions
|
|
35
35
|
from .daskjob import DaskCluster # noqa
|
|
36
36
|
from .databricks_job.databricks_runtime import DatabricksRuntime
|
|
37
37
|
from .kubejob import KubejobRuntime, KubeResource # noqa
|
|
38
38
|
from .local import HandlerRuntime, LocalRuntime # noqa
|
|
39
|
-
from .mpijob import
|
|
39
|
+
from .mpijob import MpiRuntimeV1 # noqa
|
|
40
40
|
from .nuclio import (
|
|
41
41
|
RemoteRuntime,
|
|
42
42
|
ServingRuntime,
|
|
43
43
|
new_v2_model_server,
|
|
44
44
|
nuclio_init_hook,
|
|
45
45
|
)
|
|
46
|
+
from .nuclio.api_gateway import APIGateway
|
|
47
|
+
from .nuclio.application import ApplicationRuntime
|
|
48
|
+
from .nuclio.serving import serving_subkind
|
|
46
49
|
from .remotesparkjob import RemoteSparkRuntime
|
|
47
50
|
from .sparkjob import Spark3Runtime
|
|
48
51
|
|
|
@@ -101,6 +104,7 @@ class RuntimeKinds:
|
|
|
101
104
|
local = "local"
|
|
102
105
|
handler = "handler"
|
|
103
106
|
databricks = "databricks"
|
|
107
|
+
application = "application"
|
|
104
108
|
|
|
105
109
|
@staticmethod
|
|
106
110
|
def all():
|
|
@@ -115,6 +119,7 @@ class RuntimeKinds:
|
|
|
115
119
|
RuntimeKinds.mpijob,
|
|
116
120
|
RuntimeKinds.local,
|
|
117
121
|
RuntimeKinds.databricks,
|
|
122
|
+
RuntimeKinds.application,
|
|
118
123
|
]
|
|
119
124
|
|
|
120
125
|
@staticmethod
|
|
@@ -147,6 +152,23 @@ class RuntimeKinds:
|
|
|
147
152
|
RuntimeKinds.remote,
|
|
148
153
|
RuntimeKinds.nuclio,
|
|
149
154
|
RuntimeKinds.serving,
|
|
155
|
+
RuntimeKinds.application,
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
@staticmethod
|
|
159
|
+
def pure_nuclio_deployed_runtimes():
|
|
160
|
+
return [
|
|
161
|
+
RuntimeKinds.remote,
|
|
162
|
+
RuntimeKinds.nuclio,
|
|
163
|
+
RuntimeKinds.serving,
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
@staticmethod
|
|
167
|
+
def handlerless_runtimes():
|
|
168
|
+
return [
|
|
169
|
+
RuntimeKinds.serving,
|
|
170
|
+
# Application runtime handler is internal reverse proxy
|
|
171
|
+
RuntimeKinds.application,
|
|
150
172
|
]
|
|
151
173
|
|
|
152
174
|
@staticmethod
|
|
@@ -211,10 +233,39 @@ class RuntimeKinds:
|
|
|
211
233
|
# both spark and remote spark uses different mechanism for assigning images
|
|
212
234
|
return kind not in [RuntimeKinds.spark, RuntimeKinds.remotespark]
|
|
213
235
|
|
|
236
|
+
@staticmethod
|
|
237
|
+
def resolve_nuclio_runtime(kind: str, sub_kind: str):
|
|
238
|
+
kind = kind.split(":")[0]
|
|
239
|
+
if kind not in RuntimeKinds.nuclio_runtimes():
|
|
240
|
+
raise ValueError(
|
|
241
|
+
f"Kind {kind} is not a nuclio runtime, available runtimes are {RuntimeKinds.nuclio_runtimes()}"
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
if sub_kind == serving_subkind:
|
|
245
|
+
return ServingRuntime()
|
|
246
|
+
|
|
247
|
+
if kind == RuntimeKinds.application:
|
|
248
|
+
return ApplicationRuntime()
|
|
249
|
+
|
|
250
|
+
runtime = RemoteRuntime()
|
|
251
|
+
runtime.spec.function_kind = sub_kind
|
|
252
|
+
return runtime
|
|
253
|
+
|
|
254
|
+
@staticmethod
|
|
255
|
+
def resolve_nuclio_sub_kind(kind):
|
|
256
|
+
is_nuclio = kind.startswith("nuclio")
|
|
257
|
+
sub_kind = kind[kind.find(":") + 1 :] if is_nuclio and ":" in kind else None
|
|
258
|
+
if kind == RuntimeKinds.serving:
|
|
259
|
+
is_nuclio = True
|
|
260
|
+
sub_kind = serving_subkind
|
|
261
|
+
elif kind == RuntimeKinds.application:
|
|
262
|
+
is_nuclio = True
|
|
263
|
+
return is_nuclio, sub_kind
|
|
264
|
+
|
|
214
265
|
|
|
215
266
|
def get_runtime_class(kind: str):
|
|
216
267
|
if kind == RuntimeKinds.mpijob:
|
|
217
|
-
return
|
|
268
|
+
return MpiRuntimeV1
|
|
218
269
|
|
|
219
270
|
if kind == RuntimeKinds.spark:
|
|
220
271
|
return Spark3Runtime
|
|
@@ -228,6 +279,7 @@ def get_runtime_class(kind: str):
|
|
|
228
279
|
RuntimeKinds.local: LocalRuntime,
|
|
229
280
|
RuntimeKinds.remotespark: RemoteSparkRuntime,
|
|
230
281
|
RuntimeKinds.databricks: DatabricksRuntime,
|
|
282
|
+
RuntimeKinds.application: ApplicationRuntime,
|
|
231
283
|
}
|
|
232
284
|
|
|
233
285
|
return kind_runtime_map[kind]
|
mlrun/runtimes/base.py
CHANGED
|
@@ -21,8 +21,11 @@ from os import environ
|
|
|
21
21
|
from typing import Callable, Optional, Union
|
|
22
22
|
|
|
23
23
|
import requests.exceptions
|
|
24
|
+
from mlrun_pipelines.common.ops import mlrun_op
|
|
24
25
|
from nuclio.build import mlrun_footer
|
|
25
26
|
|
|
27
|
+
import mlrun.common.constants
|
|
28
|
+
import mlrun.common.constants as mlrun_constants
|
|
26
29
|
import mlrun.common.schemas
|
|
27
30
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
28
31
|
import mlrun.db
|
|
@@ -36,7 +39,6 @@ from mlrun.utils.helpers import generate_object_uri, verify_field_regex
|
|
|
36
39
|
from ..config import config
|
|
37
40
|
from ..datastore import store_manager
|
|
38
41
|
from ..errors import err_to_str
|
|
39
|
-
from ..kfpops import mlrun_op
|
|
40
42
|
from ..lists import RunList
|
|
41
43
|
from ..model import BaseMetadata, HyperParamOptions, ImageBuilder, ModelObj, RunObject
|
|
42
44
|
from ..utils import (
|
|
@@ -138,20 +140,24 @@ class FunctionSpec(ModelObj):
|
|
|
138
140
|
|
|
139
141
|
@property
|
|
140
142
|
def clone_target_dir(self):
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
143
|
+
# TODO: remove this property in 1.9.0
|
|
144
|
+
if self.build.source_code_target_dir:
|
|
145
|
+
warnings.warn(
|
|
146
|
+
"The clone_target_dir attribute is deprecated in 1.6.2 and will be removed in 1.9.0. "
|
|
147
|
+
"Use spec.build.source_code_target_dir instead.",
|
|
148
|
+
FutureWarning,
|
|
149
|
+
)
|
|
146
150
|
return self.build.source_code_target_dir
|
|
147
151
|
|
|
148
152
|
@clone_target_dir.setter
|
|
149
153
|
def clone_target_dir(self, clone_target_dir):
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
154
|
+
# TODO: remove this property in 1.9.0
|
|
155
|
+
if clone_target_dir:
|
|
156
|
+
warnings.warn(
|
|
157
|
+
"The clone_target_dir attribute is deprecated in 1.6.2 and will be removed in 1.9.0. "
|
|
158
|
+
"Use spec.build.source_code_target_dir instead.",
|
|
159
|
+
FutureWarning,
|
|
160
|
+
)
|
|
155
161
|
self.build.source_code_target_dir = clone_target_dir
|
|
156
162
|
|
|
157
163
|
def enrich_function_preemption_spec(self):
|
|
@@ -464,11 +470,11 @@ class BaseRuntime(ModelObj):
|
|
|
464
470
|
def _store_function(self, runspec, meta, db):
|
|
465
471
|
meta.labels["kind"] = self.kind
|
|
466
472
|
mlrun.runtimes.utils.enrich_run_labels(
|
|
467
|
-
meta.labels, [mlrun.runtimes.constants.RunLabels.owner]
|
|
473
|
+
meta.labels, [mlrun.common.runtimes.constants.RunLabels.owner]
|
|
468
474
|
)
|
|
469
475
|
if runspec.spec.output_path:
|
|
470
476
|
runspec.spec.output_path = runspec.spec.output_path.replace(
|
|
471
|
-
"{{run.user}}", meta.labels[
|
|
477
|
+
"{{run.user}}", meta.labels[mlrun_constants.MLRunInternalLabels.owner]
|
|
472
478
|
)
|
|
473
479
|
|
|
474
480
|
if db and self.kind != "handler":
|
|
@@ -575,9 +581,9 @@ class BaseRuntime(ModelObj):
|
|
|
575
581
|
|
|
576
582
|
elif (
|
|
577
583
|
not was_none
|
|
578
|
-
and last_state != mlrun.runtimes.constants.RunStates.completed
|
|
584
|
+
and last_state != mlrun.common.runtimes.constants.RunStates.completed
|
|
579
585
|
and last_state
|
|
580
|
-
not in mlrun.runtimes.constants.RunStates.error_and_abortion_states()
|
|
586
|
+
not in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
|
|
581
587
|
):
|
|
582
588
|
try:
|
|
583
589
|
runtime_cls = mlrun.runtimes.get_runtime_class(kind)
|
|
@@ -630,7 +636,9 @@ class BaseRuntime(ModelObj):
|
|
|
630
636
|
image = image or self.spec.image or ""
|
|
631
637
|
|
|
632
638
|
image = enrich_image_url(image, client_version, client_python_version)
|
|
633
|
-
if not image.startswith(
|
|
639
|
+
if not image.startswith(
|
|
640
|
+
mlrun.common.constants.IMAGE_NAME_ENRICH_REGISTRY_PREFIX
|
|
641
|
+
):
|
|
634
642
|
return image
|
|
635
643
|
registry, repository = get_parsed_docker_registry()
|
|
636
644
|
if registry:
|
|
@@ -700,11 +708,11 @@ class BaseRuntime(ModelObj):
|
|
|
700
708
|
"key": "the_key".
|
|
701
709
|
:param auto_build: when set to True and the function require build it will be built on the first
|
|
702
710
|
function run, use only if you dont plan on changing the build config between runs
|
|
703
|
-
:return:
|
|
711
|
+
:return: mlrun_pipelines.models.PipelineNodeWrapper
|
|
704
712
|
"""
|
|
705
713
|
|
|
706
714
|
# if the function contain KFP PipelineParams (futures) pass the full spec to the
|
|
707
|
-
#
|
|
715
|
+
# PipelineNodeWrapper this way KFP will substitute the params with previous step outputs
|
|
708
716
|
if use_db and not self._has_pipeline_param():
|
|
709
717
|
# if the same function is built as part of the pipeline we do not use the versioned function
|
|
710
718
|
# rather the latest function w the same tag so we can pick up the updated image/status
|
|
@@ -782,7 +790,7 @@ class BaseRuntime(ModelObj):
|
|
|
782
790
|
requirements: Optional[list[str]] = None,
|
|
783
791
|
overwrite: bool = False,
|
|
784
792
|
prepare_image_for_deploy: bool = True,
|
|
785
|
-
requirements_file: str = "",
|
|
793
|
+
requirements_file: Optional[str] = "",
|
|
786
794
|
):
|
|
787
795
|
"""add package requirements from file or list to build spec.
|
|
788
796
|
|
|
@@ -836,6 +844,12 @@ class BaseRuntime(ModelObj):
|
|
|
836
844
|
or (build.source and not build.load_source_on_run)
|
|
837
845
|
)
|
|
838
846
|
|
|
847
|
+
def enrich_runtime_spec(
|
|
848
|
+
self,
|
|
849
|
+
project_node_selector: dict[str, str],
|
|
850
|
+
):
|
|
851
|
+
pass
|
|
852
|
+
|
|
839
853
|
def prepare_image_for_deploy(self):
|
|
840
854
|
"""
|
|
841
855
|
if a function has a 'spec.image' it is considered to be deployed,
|
|
@@ -99,7 +99,7 @@ def save_credentials(
|
|
|
99
99
|
credentials["DATABRICKS_CLUSTER_ID"] = cluster_id
|
|
100
100
|
|
|
101
101
|
with open(credentials_path, "w") as yaml_file:
|
|
102
|
-
yaml.
|
|
102
|
+
yaml.safe_dump(credentials, yaml_file, default_flow_style=False)
|
|
103
103
|
|
|
104
104
|
|
|
105
105
|
def run_mlrun_databricks_job(
|
mlrun/runtimes/funcdoc.py
CHANGED
|
@@ -16,8 +16,6 @@ import ast
|
|
|
16
16
|
import inspect
|
|
17
17
|
import re
|
|
18
18
|
|
|
19
|
-
from deprecated import deprecated
|
|
20
|
-
|
|
21
19
|
from mlrun.model import FunctionEntrypoint
|
|
22
20
|
|
|
23
21
|
|
|
@@ -73,32 +71,6 @@ def func_dict(
|
|
|
73
71
|
}
|
|
74
72
|
|
|
75
73
|
|
|
76
|
-
# TODO: remove in 1.7.0
|
|
77
|
-
@deprecated(
|
|
78
|
-
version="1.5.0",
|
|
79
|
-
reason="'func_info' is deprecated and will be removed in 1.7.0, use 'ast_func_info' instead",
|
|
80
|
-
category=FutureWarning,
|
|
81
|
-
)
|
|
82
|
-
def func_info(fn) -> dict:
|
|
83
|
-
sig = inspect.signature(fn)
|
|
84
|
-
doc = inspect.getdoc(fn) or ""
|
|
85
|
-
|
|
86
|
-
out = func_dict(
|
|
87
|
-
name=fn.__name__,
|
|
88
|
-
doc=doc,
|
|
89
|
-
params=[inspect_param(p) for p in sig.parameters.values()],
|
|
90
|
-
returns=param_dict(
|
|
91
|
-
type=type_name(sig.return_annotation, empty_is_none=True), default=None
|
|
92
|
-
),
|
|
93
|
-
lineno=func_lineno(fn),
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
if not fn.__doc__ or not fn.__doc__.strip():
|
|
97
|
-
return out
|
|
98
|
-
|
|
99
|
-
return merge_doc(out, doc)
|
|
100
|
-
|
|
101
|
-
|
|
102
74
|
def func_lineno(fn):
|
|
103
75
|
try:
|
|
104
76
|
return inspect.getsourcelines(fn)[1]
|
mlrun/runtimes/kubejob.py
CHANGED
|
@@ -12,16 +12,15 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import time
|
|
16
15
|
import warnings
|
|
17
16
|
|
|
17
|
+
from mlrun_pipelines.common.ops import build_op
|
|
18
|
+
|
|
18
19
|
import mlrun.common.schemas
|
|
19
20
|
import mlrun.db
|
|
20
21
|
import mlrun.errors
|
|
21
22
|
|
|
22
|
-
from ..kfpops import build_op
|
|
23
23
|
from ..model import RunObject
|
|
24
|
-
from ..utils import get_in, logger
|
|
25
24
|
from .pod import KubeResource
|
|
26
25
|
|
|
27
26
|
|
|
@@ -65,29 +64,13 @@ class KubejobRuntime(KubeResource):
|
|
|
65
64
|
:param pull_at_runtime: load the archive into the container at job runtime vs on build/deploy
|
|
66
65
|
:param target_dir: target dir on runtime pod or repo clone / archive extraction
|
|
67
66
|
"""
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
if target_dir:
|
|
76
|
-
self.spec.build.source_code_target_dir = target_dir
|
|
77
|
-
|
|
78
|
-
self.spec.build.load_source_on_run = pull_at_runtime
|
|
79
|
-
if (
|
|
80
|
-
self.spec.build.base_image
|
|
81
|
-
and not self.spec.build.commands
|
|
82
|
-
and pull_at_runtime
|
|
83
|
-
and not self.spec.image
|
|
84
|
-
):
|
|
85
|
-
# if we load source from repo and don't need a full build use the base_image as the image
|
|
86
|
-
self.spec.image = self.spec.build.base_image
|
|
87
|
-
elif not pull_at_runtime:
|
|
88
|
-
# clear the image so build will not be skipped
|
|
89
|
-
self.spec.build.base_image = self.spec.build.base_image or self.spec.image
|
|
90
|
-
self.spec.image = ""
|
|
67
|
+
self._configure_mlrun_build_with_source(
|
|
68
|
+
source=source,
|
|
69
|
+
workdir=workdir,
|
|
70
|
+
handler=handler,
|
|
71
|
+
pull_at_runtime=pull_at_runtime,
|
|
72
|
+
target_dir=target_dir,
|
|
73
|
+
)
|
|
91
74
|
|
|
92
75
|
def build_config(
|
|
93
76
|
self,
|
|
@@ -169,116 +152,39 @@ class KubejobRuntime(KubeResource):
|
|
|
169
152
|
show_on_failure: bool = False,
|
|
170
153
|
force_build: bool = False,
|
|
171
154
|
) -> bool:
|
|
172
|
-
"""
|
|
155
|
+
"""Deploy function, build container with dependencies
|
|
173
156
|
|
|
174
|
-
:param watch:
|
|
175
|
-
:param with_mlrun:
|
|
176
|
-
:param skip_deployed:
|
|
177
|
-
:param is_kfp:
|
|
178
|
-
:param mlrun_version_specifier:
|
|
157
|
+
:param watch: Wait for the deploy to complete (and print build logs)
|
|
158
|
+
:param with_mlrun: Add the current mlrun package to the container build
|
|
159
|
+
:param skip_deployed: Skip the build if we already have an image for the function
|
|
160
|
+
:param is_kfp: Deploy as part of a kfp pipeline
|
|
161
|
+
:param mlrun_version_specifier: Which mlrun package version to include (if not current)
|
|
179
162
|
:param builder_env: Kaniko builder pod env vars dict (for config/credentials)
|
|
180
163
|
e.g. builder_env={"GIT_TOKEN": token}
|
|
181
|
-
:param show_on_failure:
|
|
182
|
-
:param force_build:
|
|
164
|
+
:param show_on_failure: Show logs only in case of build failure
|
|
165
|
+
:param force_build: Set True for force building the image, even when no changes were made
|
|
183
166
|
|
|
184
167
|
:return: True if the function is ready (deployed)
|
|
185
168
|
"""
|
|
186
169
|
|
|
187
170
|
build = self.spec.build
|
|
171
|
+
with_mlrun = self._resolve_build_with_mlrun(with_mlrun)
|
|
188
172
|
|
|
189
|
-
if with_mlrun is None:
|
|
190
|
-
if build.with_mlrun is not None:
|
|
191
|
-
with_mlrun = build.with_mlrun
|
|
192
|
-
else:
|
|
193
|
-
with_mlrun = build.base_image and not (
|
|
194
|
-
build.base_image.startswith("mlrun/")
|
|
195
|
-
or "/mlrun/" in build.base_image
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
if (
|
|
199
|
-
not build.source
|
|
200
|
-
and not build.commands
|
|
201
|
-
and not build.requirements
|
|
202
|
-
and not build.extra
|
|
203
|
-
and with_mlrun
|
|
204
|
-
):
|
|
205
|
-
logger.info(
|
|
206
|
-
"Running build to add mlrun package, set "
|
|
207
|
-
"with_mlrun=False to skip if its already in the image"
|
|
208
|
-
)
|
|
209
173
|
self.status.state = ""
|
|
210
174
|
if build.base_image:
|
|
211
175
|
# clear the image so build will not be skipped
|
|
212
176
|
self.spec.image = ""
|
|
213
177
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
with_mlrun,
|
|
225
|
-
mlrun_version_specifier,
|
|
226
|
-
skip_deployed,
|
|
227
|
-
builder_env=builder_env,
|
|
228
|
-
force_build=force_build,
|
|
229
|
-
)
|
|
230
|
-
self.status = data["data"].get("status", None)
|
|
231
|
-
self.spec.image = get_in(data, "data.spec.image")
|
|
232
|
-
self.spec.build.base_image = self.spec.build.base_image or get_in(
|
|
233
|
-
data, "data.spec.build.base_image"
|
|
234
|
-
)
|
|
235
|
-
# Get the source target dir in case it was enriched due to loading source
|
|
236
|
-
self.spec.build.source_code_target_dir = get_in(
|
|
237
|
-
data, "data.spec.build.source_code_target_dir"
|
|
238
|
-
) or get_in(data, "data.spec.clone_target_dir")
|
|
239
|
-
ready = data.get("ready", False)
|
|
240
|
-
if not ready:
|
|
241
|
-
logger.info(
|
|
242
|
-
f"Started building image: {data.get('data', {}).get('spec', {}).get('build', {}).get('image')}"
|
|
243
|
-
)
|
|
244
|
-
if watch and not ready:
|
|
245
|
-
state = self._build_watch(watch, show_on_failure=show_on_failure)
|
|
246
|
-
ready = state == "ready"
|
|
247
|
-
self.status.state = state
|
|
248
|
-
|
|
249
|
-
if watch and not ready:
|
|
250
|
-
raise mlrun.errors.MLRunRuntimeError("Deploy failed")
|
|
251
|
-
return ready
|
|
252
|
-
|
|
253
|
-
def _build_watch(self, watch=True, logs=True, show_on_failure=False):
|
|
254
|
-
db = self._get_db()
|
|
255
|
-
offset = 0
|
|
256
|
-
try:
|
|
257
|
-
text, _ = db.get_builder_status(self, 0, logs=logs)
|
|
258
|
-
except mlrun.db.RunDBError:
|
|
259
|
-
raise ValueError("function or build process not found")
|
|
260
|
-
|
|
261
|
-
def print_log(text):
|
|
262
|
-
if text and (not show_on_failure or self.status.state == "error"):
|
|
263
|
-
print(text, end="")
|
|
264
|
-
|
|
265
|
-
print_log(text)
|
|
266
|
-
offset += len(text)
|
|
267
|
-
if watch:
|
|
268
|
-
while self.status.state in ["pending", "running"]:
|
|
269
|
-
time.sleep(2)
|
|
270
|
-
if show_on_failure:
|
|
271
|
-
text = ""
|
|
272
|
-
db.get_builder_status(self, 0, logs=False)
|
|
273
|
-
if self.status.state == "error":
|
|
274
|
-
# re-read the full log on failure
|
|
275
|
-
text, _ = db.get_builder_status(self, offset, logs=logs)
|
|
276
|
-
else:
|
|
277
|
-
text, _ = db.get_builder_status(self, offset, logs=logs)
|
|
278
|
-
print_log(text)
|
|
279
|
-
offset += len(text)
|
|
280
|
-
|
|
281
|
-
return self.status.state
|
|
178
|
+
return self._build_image(
|
|
179
|
+
builder_env=builder_env,
|
|
180
|
+
force_build=force_build,
|
|
181
|
+
mlrun_version_specifier=mlrun_version_specifier,
|
|
182
|
+
show_on_failure=show_on_failure,
|
|
183
|
+
skip_deployed=skip_deployed,
|
|
184
|
+
watch=watch,
|
|
185
|
+
is_kfp=is_kfp,
|
|
186
|
+
with_mlrun=with_mlrun,
|
|
187
|
+
)
|
|
282
188
|
|
|
283
189
|
def deploy_step(
|
|
284
190
|
self,
|
mlrun/runtimes/local.py
CHANGED
|
@@ -33,6 +33,7 @@ from sys import executable
|
|
|
33
33
|
from nuclio import Event
|
|
34
34
|
|
|
35
35
|
import mlrun
|
|
36
|
+
import mlrun.common.constants as mlrun_constants
|
|
36
37
|
from mlrun.lists import RunList
|
|
37
38
|
|
|
38
39
|
from ..errors import err_to_str
|
|
@@ -257,7 +258,8 @@ class LocalRuntime(BaseRuntime, ParallelRunner):
|
|
|
257
258
|
set_paths(os.path.realpath("."))
|
|
258
259
|
|
|
259
260
|
if (
|
|
260
|
-
runobj.metadata.labels.get(
|
|
261
|
+
runobj.metadata.labels.get(mlrun_constants.MLRunInternalLabels.kind)
|
|
262
|
+
== RemoteSparkRuntime.kind
|
|
261
263
|
and environ["MLRUN_SPARK_CLIENT_IGZ_SPARK"] == "true"
|
|
262
264
|
):
|
|
263
265
|
from mlrun.runtimes.remotesparkjob import igz_spark_pre_hook
|
|
@@ -382,6 +384,7 @@ def load_module(file_name, handler, context):
|
|
|
382
384
|
if spec is None:
|
|
383
385
|
raise RunError(f"Cannot import from {file_name!r}")
|
|
384
386
|
module = imputil.module_from_spec(spec)
|
|
387
|
+
sys.modules[mod_name] = module
|
|
385
388
|
spec.loader.exec_module(module)
|
|
386
389
|
|
|
387
390
|
class_args = {}
|
|
@@ -493,7 +496,7 @@ def exec_from_params(handler, runobj: RunObject, context: MLClientCtx, cwd=None)
|
|
|
493
496
|
logger.warning("Run was aborted", err=err_to_str(exc))
|
|
494
497
|
# Run was aborted, the state run state is updated by the abort job, no need to commit again
|
|
495
498
|
context.set_state(
|
|
496
|
-
mlrun.runtimes.constants.RunStates.aborted, commit=False
|
|
499
|
+
mlrun.common.runtimes.constants.RunStates.aborted, commit=False
|
|
497
500
|
)
|
|
498
501
|
commit = False
|
|
499
502
|
except Exception as exc:
|
|
@@ -21,28 +21,8 @@ from mlrun.config import config
|
|
|
21
21
|
from .. import MPIJobCRDVersions
|
|
22
22
|
from .abstract import AbstractMPIJobRuntime
|
|
23
23
|
from .v1 import MpiRuntimeV1
|
|
24
|
-
from .v1alpha1 import MpiRuntimeV1Alpha1
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
def _resolve_mpijob_crd_version():
|
|
28
27
|
# config is expected to get enriched from the API through the client-spec
|
|
29
28
|
return config.mpijob_crd_version or MPIJobCRDVersions.default()
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class MpiRuntimeContainer(containers.DeclarativeContainer):
|
|
33
|
-
resolver = providers.Callable(
|
|
34
|
-
_resolve_mpijob_crd_version,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
selector = providers.Selector(
|
|
38
|
-
resolver,
|
|
39
|
-
v1=providers.Object(MpiRuntimeV1),
|
|
40
|
-
v1alpha1=providers.Object(MpiRuntimeV1Alpha1),
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
# An empty selector to be overriden by the API
|
|
44
|
-
handler_selector = providers.Selector(
|
|
45
|
-
resolver,
|
|
46
|
-
v1=providers.Object(None),
|
|
47
|
-
v1alpha1=providers.Object(None),
|
|
48
|
-
)
|
|
@@ -223,14 +223,14 @@ class AbstractMPIJobRuntime(KubejobRuntime, abc.ABC):
|
|
|
223
223
|
```
|
|
224
224
|
# Define the wanted MPI arguments
|
|
225
225
|
mpi_args = []
|
|
226
|
-
mpi_args.append(
|
|
227
|
-
mpi_args.append(
|
|
228
|
-
mpi_args.append(
|
|
229
|
-
mpi_args.append(
|
|
230
|
-
mpi_args.append(
|
|
231
|
-
mpi_args.append(
|
|
232
|
-
mpi_args.append(
|
|
233
|
-
mpi_args.append(
|
|
226
|
+
mpi_args.append("-x")
|
|
227
|
+
mpi_args.append("NCCL_DEBUG=INFO")
|
|
228
|
+
mpi_args.append("-x")
|
|
229
|
+
mpi_args.append("NCCL_SOCKET_NTHREADS=2")
|
|
230
|
+
mpi_args.append("-x")
|
|
231
|
+
mpi_args.append("NCCL_NSOCKS_PERTHREAD=8")
|
|
232
|
+
mpi_args.append("-x")
|
|
233
|
+
mpi_args.append("NCCL_MIN_NCHANNELS=4")
|
|
234
234
|
|
|
235
235
|
# Set the MPI arguments in the function
|
|
236
236
|
fn.set_mpi_args(mpi_args)
|
mlrun/runtimes/mpijob/v1.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
from mlrun.runtimes.constants import MPIJobCRDVersions, MPIJobV1CleanPodPolicies
|
|
14
|
+
from mlrun.common.runtimes.constants import MPIJobCRDVersions, MPIJobV1CleanPodPolicies
|
|
15
15
|
from mlrun.runtimes.mpijob.abstract import AbstractMPIJobRuntime, MPIResourceSpec
|
|
16
16
|
|
|
17
17
|
|