mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +39 -121
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +39 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +73 -46
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +73 -2
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +46 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +44 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +11 -1
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +21 -4
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +113 -2
- mlrun/common/schemas/artifact.py +28 -1
- mlrun/common/schemas/auth.py +11 -0
- mlrun/common/schemas/client_spec.py +2 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +58 -28
- mlrun/common/schemas/frontend_spec.py +8 -0
- mlrun/common/schemas/function.py +11 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +21 -4
- mlrun/common/schemas/model_monitoring/constants.py +136 -42
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
- mlrun/common/schemas/notification.py +69 -12
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +7 -0
- mlrun/common/schemas/project.py +67 -16
- mlrun/common/schemas/runs.py +17 -0
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +14 -1
- mlrun/config.py +224 -58
- mlrun/data_types/data_types.py +11 -1
- mlrun/data_types/spark.py +5 -4
- mlrun/data_types/to_pandas.py +75 -34
- mlrun/datastore/__init__.py +8 -10
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +131 -43
- mlrun/datastore/base.py +107 -47
- mlrun/datastore/datastore.py +17 -7
- mlrun/datastore/datastore_profile.py +91 -7
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +92 -32
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +3 -2
- mlrun/datastore/s3.py +30 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +274 -59
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +374 -102
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +28 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +231 -22
- mlrun/db/factory.py +1 -4
- mlrun/db/httpdb.py +864 -228
- mlrun/db/nopdb.py +268 -16
- mlrun/errors.py +35 -5
- mlrun/execution.py +111 -38
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +46 -53
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +13 -2
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +38 -19
- mlrun/features.py +6 -14
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +4 -4
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +57 -12
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +15 -5
- mlrun/launcher/remote.py +10 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +297 -48
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +152 -357
- mlrun/model_monitoring/applications/__init__.py +10 -0
- mlrun/model_monitoring/applications/_application_steps.py +190 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +130 -303
- mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +177 -39
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +165 -398
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +67 -228
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/operations.py +47 -20
- mlrun/projects/pipelines.py +396 -249
- mlrun/projects/project.py +1125 -414
- mlrun/render.py +28 -22
- mlrun/run.py +207 -180
- mlrun/runtimes/__init__.py +76 -11
- mlrun/runtimes/base.py +40 -14
- mlrun/runtimes/daskjob.py +9 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +39 -10
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +646 -177
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +188 -68
- mlrun/runtimes/nuclio/serving.py +57 -60
- mlrun/runtimes/pod.py +191 -58
- mlrun/runtimes/remotesparkjob.py +11 -8
- mlrun/runtimes/sparkjob/spark3job.py +17 -18
- mlrun/runtimes/utils.py +40 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +89 -64
- mlrun/serving/server.py +54 -26
- mlrun/serving/states.py +187 -56
- mlrun/serving/utils.py +19 -11
- mlrun/serving/v2_serving.py +136 -63
- mlrun/track/tracker.py +2 -1
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +26 -6
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +375 -105
- mlrun/utils/http.py +2 -2
- mlrun/utils/logger.py +75 -9
- mlrun/utils/notifications/notification/__init__.py +14 -10
- mlrun/utils/notifications/notification/base.py +48 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +63 -2
- mlrun/utils/notifications/notification_pusher.py +146 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +2 -3
- mlrun/utils/version/version.json +2 -2
- mlrun-1.7.2.dist-info/METADATA +390 -0
- mlrun-1.7.2.dist-info/RECORD +351 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/prometheus.py +0 -216
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc5.dist-info/METADATA +0 -269
- mlrun-1.7.0rc5.dist-info/RECORD +0 -323
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
mlrun/runtimes/__init__.py
CHANGED
|
@@ -26,23 +26,30 @@ __all__ = [
|
|
|
26
26
|
"Spark3Runtime",
|
|
27
27
|
"DatabricksRuntime",
|
|
28
28
|
"KubeResource",
|
|
29
|
+
"ApplicationRuntime",
|
|
30
|
+
"MpiRuntimeV1",
|
|
29
31
|
]
|
|
30
32
|
|
|
33
|
+
import typing
|
|
34
|
+
|
|
31
35
|
from mlrun.runtimes.utils import resolve_spark_operator_version
|
|
32
36
|
|
|
37
|
+
from ..common.runtimes.constants import MPIJobCRDVersions
|
|
33
38
|
from .base import BaseRuntime, RunError, RuntimeClassMode # noqa
|
|
34
|
-
from .constants import MPIJobCRDVersions
|
|
35
39
|
from .daskjob import DaskCluster # noqa
|
|
36
40
|
from .databricks_job.databricks_runtime import DatabricksRuntime
|
|
37
41
|
from .kubejob import KubejobRuntime, KubeResource # noqa
|
|
38
42
|
from .local import HandlerRuntime, LocalRuntime # noqa
|
|
39
|
-
from .mpijob import
|
|
43
|
+
from .mpijob import MpiRuntimeV1 # noqa
|
|
40
44
|
from .nuclio import (
|
|
41
45
|
RemoteRuntime,
|
|
42
46
|
ServingRuntime,
|
|
43
47
|
new_v2_model_server,
|
|
44
48
|
nuclio_init_hook,
|
|
45
49
|
)
|
|
50
|
+
from .nuclio.api_gateway import APIGateway
|
|
51
|
+
from .nuclio.application import ApplicationRuntime
|
|
52
|
+
from .nuclio.serving import serving_subkind
|
|
46
53
|
from .remotesparkjob import RemoteSparkRuntime
|
|
47
54
|
from .sparkjob import Spark3Runtime
|
|
48
55
|
|
|
@@ -101,6 +108,7 @@ class RuntimeKinds:
|
|
|
101
108
|
local = "local"
|
|
102
109
|
handler = "handler"
|
|
103
110
|
databricks = "databricks"
|
|
111
|
+
application = "application"
|
|
104
112
|
|
|
105
113
|
@staticmethod
|
|
106
114
|
def all():
|
|
@@ -115,6 +123,7 @@ class RuntimeKinds:
|
|
|
115
123
|
RuntimeKinds.mpijob,
|
|
116
124
|
RuntimeKinds.local,
|
|
117
125
|
RuntimeKinds.databricks,
|
|
126
|
+
RuntimeKinds.application,
|
|
118
127
|
]
|
|
119
128
|
|
|
120
129
|
@staticmethod
|
|
@@ -147,6 +156,23 @@ class RuntimeKinds:
|
|
|
147
156
|
RuntimeKinds.remote,
|
|
148
157
|
RuntimeKinds.nuclio,
|
|
149
158
|
RuntimeKinds.serving,
|
|
159
|
+
RuntimeKinds.application,
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
@staticmethod
|
|
163
|
+
def pure_nuclio_deployed_runtimes():
|
|
164
|
+
return [
|
|
165
|
+
RuntimeKinds.remote,
|
|
166
|
+
RuntimeKinds.nuclio,
|
|
167
|
+
RuntimeKinds.serving,
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
@staticmethod
|
|
171
|
+
def handlerless_runtimes():
|
|
172
|
+
return [
|
|
173
|
+
RuntimeKinds.serving,
|
|
174
|
+
# Application runtime handler is internal reverse proxy
|
|
175
|
+
RuntimeKinds.application,
|
|
150
176
|
]
|
|
151
177
|
|
|
152
178
|
@staticmethod
|
|
@@ -157,7 +183,7 @@ class RuntimeKinds:
|
|
|
157
183
|
]
|
|
158
184
|
|
|
159
185
|
@staticmethod
|
|
160
|
-
def is_log_collectable_runtime(kind: str):
|
|
186
|
+
def is_log_collectable_runtime(kind: typing.Optional[str]):
|
|
161
187
|
"""
|
|
162
188
|
whether log collector can collect logs for that runtime
|
|
163
189
|
:param kind: kind name
|
|
@@ -168,13 +194,18 @@ class RuntimeKinds:
|
|
|
168
194
|
if RuntimeKinds.is_local_runtime(kind):
|
|
169
195
|
return False
|
|
170
196
|
|
|
171
|
-
if
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
197
|
+
if (
|
|
198
|
+
kind
|
|
199
|
+
not in [
|
|
200
|
+
# dask implementation is different from other runtimes, because few runs can be run against the same
|
|
201
|
+
# runtime resource, so collecting logs on that runtime resource won't be correct, the way we collect
|
|
202
|
+
# logs for dask is by using `log_std` on client side after we execute the code against the cluster,
|
|
203
|
+
# as submitting the run with the dask client will return the run stdout.
|
|
204
|
+
# For more information head to `DaskCluster._run`.
|
|
205
|
+
RuntimeKinds.dask
|
|
206
|
+
]
|
|
207
|
+
+ RuntimeKinds.nuclio_runtimes()
|
|
208
|
+
):
|
|
178
209
|
return True
|
|
179
210
|
|
|
180
211
|
return False
|
|
@@ -211,10 +242,43 @@ class RuntimeKinds:
|
|
|
211
242
|
# both spark and remote spark uses different mechanism for assigning images
|
|
212
243
|
return kind not in [RuntimeKinds.spark, RuntimeKinds.remotespark]
|
|
213
244
|
|
|
245
|
+
@staticmethod
|
|
246
|
+
def supports_from_notebook(kind):
|
|
247
|
+
return kind not in [RuntimeKinds.application]
|
|
248
|
+
|
|
249
|
+
@staticmethod
|
|
250
|
+
def resolve_nuclio_runtime(kind: str, sub_kind: str):
|
|
251
|
+
kind = kind.split(":")[0]
|
|
252
|
+
if kind not in RuntimeKinds.nuclio_runtimes():
|
|
253
|
+
raise ValueError(
|
|
254
|
+
f"Kind {kind} is not a nuclio runtime, available runtimes are {RuntimeKinds.nuclio_runtimes()}"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
if sub_kind == serving_subkind:
|
|
258
|
+
return ServingRuntime()
|
|
259
|
+
|
|
260
|
+
if kind == RuntimeKinds.application:
|
|
261
|
+
return ApplicationRuntime()
|
|
262
|
+
|
|
263
|
+
runtime = RemoteRuntime()
|
|
264
|
+
runtime.spec.function_kind = sub_kind
|
|
265
|
+
return runtime
|
|
266
|
+
|
|
267
|
+
@staticmethod
|
|
268
|
+
def resolve_nuclio_sub_kind(kind):
|
|
269
|
+
is_nuclio = kind.startswith("nuclio")
|
|
270
|
+
sub_kind = kind[kind.find(":") + 1 :] if is_nuclio and ":" in kind else None
|
|
271
|
+
if kind == RuntimeKinds.serving:
|
|
272
|
+
is_nuclio = True
|
|
273
|
+
sub_kind = serving_subkind
|
|
274
|
+
elif kind == RuntimeKinds.application:
|
|
275
|
+
is_nuclio = True
|
|
276
|
+
return is_nuclio, sub_kind
|
|
277
|
+
|
|
214
278
|
|
|
215
279
|
def get_runtime_class(kind: str):
|
|
216
280
|
if kind == RuntimeKinds.mpijob:
|
|
217
|
-
return
|
|
281
|
+
return MpiRuntimeV1
|
|
218
282
|
|
|
219
283
|
if kind == RuntimeKinds.spark:
|
|
220
284
|
return Spark3Runtime
|
|
@@ -228,6 +292,7 @@ def get_runtime_class(kind: str):
|
|
|
228
292
|
RuntimeKinds.local: LocalRuntime,
|
|
229
293
|
RuntimeKinds.remotespark: RemoteSparkRuntime,
|
|
230
294
|
RuntimeKinds.databricks: DatabricksRuntime,
|
|
295
|
+
RuntimeKinds.application: ApplicationRuntime,
|
|
231
296
|
}
|
|
232
297
|
|
|
233
298
|
return kind_runtime_map[kind]
|
mlrun/runtimes/base.py
CHANGED
|
@@ -21,8 +21,11 @@ from os import environ
|
|
|
21
21
|
from typing import Callable, Optional, Union
|
|
22
22
|
|
|
23
23
|
import requests.exceptions
|
|
24
|
+
from mlrun_pipelines.common.ops import mlrun_op
|
|
24
25
|
from nuclio.build import mlrun_footer
|
|
25
26
|
|
|
27
|
+
import mlrun.common.constants
|
|
28
|
+
import mlrun.common.constants as mlrun_constants
|
|
26
29
|
import mlrun.common.schemas
|
|
27
30
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
28
31
|
import mlrun.db
|
|
@@ -36,7 +39,6 @@ from mlrun.utils.helpers import generate_object_uri, verify_field_regex
|
|
|
36
39
|
from ..config import config
|
|
37
40
|
from ..datastore import store_manager
|
|
38
41
|
from ..errors import err_to_str
|
|
39
|
-
from ..kfpops import mlrun_op
|
|
40
42
|
from ..lists import RunList
|
|
41
43
|
from ..model import BaseMetadata, HyperParamOptions, ImageBuilder, ModelObj, RunObject
|
|
42
44
|
from ..utils import (
|
|
@@ -66,6 +68,7 @@ spec_fields = [
|
|
|
66
68
|
"disable_auto_mount",
|
|
67
69
|
"allow_empty_resources",
|
|
68
70
|
"clone_target_dir",
|
|
71
|
+
"reset_on_run",
|
|
69
72
|
]
|
|
70
73
|
|
|
71
74
|
|
|
@@ -334,6 +337,7 @@ class BaseRuntime(ModelObj):
|
|
|
334
337
|
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
335
338
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
336
339
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
340
|
+
reset_on_run: Optional[bool] = None,
|
|
337
341
|
**launcher_kwargs,
|
|
338
342
|
) -> RunObject:
|
|
339
343
|
"""
|
|
@@ -388,6 +392,9 @@ class BaseRuntime(ModelObj):
|
|
|
388
392
|
standards and is at least 1 minute (-1 for infinite).
|
|
389
393
|
If the phase is active for longer than the threshold, the run will be aborted.
|
|
390
394
|
See mlconf.function.spec.state_thresholds for the state options and default values.
|
|
395
|
+
:param reset_on_run: When True, function python modules would reload prior to code execution.
|
|
396
|
+
This ensures latest code changes are executed. This argument must be used in
|
|
397
|
+
conjunction with the local=True argument.
|
|
391
398
|
:return: Run context object (RunObject) with run metadata, results and status
|
|
392
399
|
"""
|
|
393
400
|
launcher = mlrun.launcher.factory.LauncherFactory().create_launcher(
|
|
@@ -416,15 +423,22 @@ class BaseRuntime(ModelObj):
|
|
|
416
423
|
notifications=notifications,
|
|
417
424
|
returns=returns,
|
|
418
425
|
state_thresholds=state_thresholds,
|
|
426
|
+
reset_on_run=reset_on_run,
|
|
419
427
|
)
|
|
420
428
|
|
|
421
|
-
def _get_db_run(
|
|
429
|
+
def _get_db_run(
|
|
430
|
+
self,
|
|
431
|
+
task: RunObject = None,
|
|
432
|
+
run_format: mlrun.common.formatters.RunFormat = mlrun.common.formatters.RunFormat.full,
|
|
433
|
+
):
|
|
422
434
|
if self._get_db() and task:
|
|
423
435
|
project = task.metadata.project
|
|
424
436
|
uid = task.metadata.uid
|
|
425
437
|
iter = task.metadata.iteration
|
|
426
438
|
try:
|
|
427
|
-
return self._get_db().read_run(
|
|
439
|
+
return self._get_db().read_run(
|
|
440
|
+
uid, project, iter=iter, format_=run_format
|
|
441
|
+
)
|
|
428
442
|
except mlrun.db.RunDBError:
|
|
429
443
|
return None
|
|
430
444
|
if task:
|
|
@@ -468,11 +482,11 @@ class BaseRuntime(ModelObj):
|
|
|
468
482
|
def _store_function(self, runspec, meta, db):
|
|
469
483
|
meta.labels["kind"] = self.kind
|
|
470
484
|
mlrun.runtimes.utils.enrich_run_labels(
|
|
471
|
-
meta.labels, [mlrun.runtimes.constants.RunLabels.owner]
|
|
485
|
+
meta.labels, [mlrun.common.runtimes.constants.RunLabels.owner]
|
|
472
486
|
)
|
|
473
487
|
if runspec.spec.output_path:
|
|
474
488
|
runspec.spec.output_path = runspec.spec.output_path.replace(
|
|
475
|
-
"{{run.user}}", meta.labels[
|
|
489
|
+
"{{run.user}}", meta.labels[mlrun_constants.MLRunInternalLabels.owner]
|
|
476
490
|
)
|
|
477
491
|
|
|
478
492
|
if db and self.kind != "handler":
|
|
@@ -541,13 +555,14 @@ class BaseRuntime(ModelObj):
|
|
|
541
555
|
self,
|
|
542
556
|
resp: dict = None,
|
|
543
557
|
task: RunObject = None,
|
|
544
|
-
err=None,
|
|
558
|
+
err: Union[Exception, str] = None,
|
|
559
|
+
run_format: mlrun.common.formatters.RunFormat = mlrun.common.formatters.RunFormat.full,
|
|
545
560
|
) -> typing.Optional[dict]:
|
|
546
561
|
"""update the task state in the DB"""
|
|
547
562
|
was_none = False
|
|
548
563
|
if resp is None and task:
|
|
549
564
|
was_none = True
|
|
550
|
-
resp = self._get_db_run(task)
|
|
565
|
+
resp = self._get_db_run(task, run_format)
|
|
551
566
|
|
|
552
567
|
if not resp:
|
|
553
568
|
self.store_run(task)
|
|
@@ -579,9 +594,9 @@ class BaseRuntime(ModelObj):
|
|
|
579
594
|
|
|
580
595
|
elif (
|
|
581
596
|
not was_none
|
|
582
|
-
and last_state != mlrun.runtimes.constants.RunStates.completed
|
|
597
|
+
and last_state != mlrun.common.runtimes.constants.RunStates.completed
|
|
583
598
|
and last_state
|
|
584
|
-
not in mlrun.runtimes.constants.RunStates.error_and_abortion_states()
|
|
599
|
+
not in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
|
|
585
600
|
):
|
|
586
601
|
try:
|
|
587
602
|
runtime_cls = mlrun.runtimes.get_runtime_class(kind)
|
|
@@ -634,7 +649,9 @@ class BaseRuntime(ModelObj):
|
|
|
634
649
|
image = image or self.spec.image or ""
|
|
635
650
|
|
|
636
651
|
image = enrich_image_url(image, client_version, client_python_version)
|
|
637
|
-
if not image.startswith(
|
|
652
|
+
if not image.startswith(
|
|
653
|
+
mlrun.common.constants.IMAGE_NAME_ENRICH_REGISTRY_PREFIX
|
|
654
|
+
):
|
|
638
655
|
return image
|
|
639
656
|
registry, repository = get_parsed_docker_registry()
|
|
640
657
|
if registry:
|
|
@@ -657,7 +674,7 @@ class BaseRuntime(ModelObj):
|
|
|
657
674
|
selector="",
|
|
658
675
|
hyper_param_options: HyperParamOptions = None,
|
|
659
676
|
inputs: dict = None,
|
|
660
|
-
outputs:
|
|
677
|
+
outputs: list = None,
|
|
661
678
|
workdir: str = "",
|
|
662
679
|
artifact_path: str = "",
|
|
663
680
|
image: str = "",
|
|
@@ -704,11 +721,11 @@ class BaseRuntime(ModelObj):
|
|
|
704
721
|
"key": "the_key".
|
|
705
722
|
:param auto_build: when set to True and the function require build it will be built on the first
|
|
706
723
|
function run, use only if you dont plan on changing the build config between runs
|
|
707
|
-
:return:
|
|
724
|
+
:return: mlrun_pipelines.models.PipelineNodeWrapper
|
|
708
725
|
"""
|
|
709
726
|
|
|
710
727
|
# if the function contain KFP PipelineParams (futures) pass the full spec to the
|
|
711
|
-
#
|
|
728
|
+
# PipelineNodeWrapper this way KFP will substitute the params with previous step outputs
|
|
712
729
|
if use_db and not self._has_pipeline_param():
|
|
713
730
|
# if the same function is built as part of the pipeline we do not use the versioned function
|
|
714
731
|
# rather the latest function w the same tag so we can pick up the updated image/status
|
|
@@ -786,7 +803,7 @@ class BaseRuntime(ModelObj):
|
|
|
786
803
|
requirements: Optional[list[str]] = None,
|
|
787
804
|
overwrite: bool = False,
|
|
788
805
|
prepare_image_for_deploy: bool = True,
|
|
789
|
-
requirements_file: str = "",
|
|
806
|
+
requirements_file: Optional[str] = "",
|
|
790
807
|
):
|
|
791
808
|
"""add package requirements from file or list to build spec.
|
|
792
809
|
|
|
@@ -840,6 +857,12 @@ class BaseRuntime(ModelObj):
|
|
|
840
857
|
or (build.source and not build.load_source_on_run)
|
|
841
858
|
)
|
|
842
859
|
|
|
860
|
+
def enrich_runtime_spec(
|
|
861
|
+
self,
|
|
862
|
+
project_node_selector: dict[str, str],
|
|
863
|
+
):
|
|
864
|
+
pass
|
|
865
|
+
|
|
843
866
|
def prepare_image_for_deploy(self):
|
|
844
867
|
"""
|
|
845
868
|
if a function has a 'spec.image' it is considered to be deployed,
|
|
@@ -906,3 +929,6 @@ class BaseRuntime(ModelObj):
|
|
|
906
929
|
if "default" in p:
|
|
907
930
|
line += f", default={p['default']}"
|
|
908
931
|
print(" " + line)
|
|
932
|
+
|
|
933
|
+
def skip_image_enrichment(self):
|
|
934
|
+
return False
|
mlrun/runtimes/daskjob.py
CHANGED
|
@@ -379,7 +379,7 @@ class DaskCluster(KubejobRuntime):
|
|
|
379
379
|
:param show_on_failure: show logs only in case of build failure
|
|
380
380
|
:param force_build: force building the image, even when no changes were made
|
|
381
381
|
|
|
382
|
-
:return
|
|
382
|
+
:return: True if the function is ready (deployed)
|
|
383
383
|
"""
|
|
384
384
|
return super().deploy(
|
|
385
385
|
watch,
|
|
@@ -494,6 +494,7 @@ class DaskCluster(KubejobRuntime):
|
|
|
494
494
|
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
495
495
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
496
496
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
497
|
+
reset_on_run: Optional[bool] = None,
|
|
497
498
|
**launcher_kwargs,
|
|
498
499
|
) -> RunObject:
|
|
499
500
|
if state_thresholds:
|
|
@@ -547,7 +548,13 @@ class DaskCluster(KubejobRuntime):
|
|
|
547
548
|
"specified handler (string) without command "
|
|
548
549
|
"(py file path), specify command or use handler pointer"
|
|
549
550
|
)
|
|
550
|
-
|
|
551
|
+
# Do not embed the module in system as it is not persistent with the dask cluster
|
|
552
|
+
handler = load_module(
|
|
553
|
+
self.spec.command,
|
|
554
|
+
handler,
|
|
555
|
+
context=context,
|
|
556
|
+
embed_in_sys=False,
|
|
557
|
+
)
|
|
551
558
|
client = self.client
|
|
552
559
|
setattr(context, "dask_client", client)
|
|
553
560
|
sout, serr = exec_from_params(handler, runobj, context)
|
|
@@ -232,6 +232,7 @@ def run_mlrun_databricks_job(context,task_parameters: dict, **kwargs):
|
|
|
232
232
|
notifications: Optional[list[mlrun.model.Notification]] = None,
|
|
233
233
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
234
234
|
state_thresholds: Optional[dict[str, int]] = None,
|
|
235
|
+
reset_on_run: Optional[bool] = None,
|
|
235
236
|
**launcher_kwargs,
|
|
236
237
|
) -> RunObject:
|
|
237
238
|
if local:
|
|
@@ -99,7 +99,7 @@ def save_credentials(
|
|
|
99
99
|
credentials["DATABRICKS_CLUSTER_ID"] = cluster_id
|
|
100
100
|
|
|
101
101
|
with open(credentials_path, "w") as yaml_file:
|
|
102
|
-
yaml.
|
|
102
|
+
yaml.safe_dump(credentials, yaml_file, default_flow_style=False)
|
|
103
103
|
|
|
104
104
|
|
|
105
105
|
def run_mlrun_databricks_job(
|
mlrun/runtimes/funcdoc.py
CHANGED
|
@@ -16,8 +16,6 @@ import ast
|
|
|
16
16
|
import inspect
|
|
17
17
|
import re
|
|
18
18
|
|
|
19
|
-
from deprecated import deprecated
|
|
20
|
-
|
|
21
19
|
from mlrun.model import FunctionEntrypoint
|
|
22
20
|
|
|
23
21
|
|
|
@@ -73,32 +71,6 @@ def func_dict(
|
|
|
73
71
|
}
|
|
74
72
|
|
|
75
73
|
|
|
76
|
-
# TODO: remove in 1.7.0
|
|
77
|
-
@deprecated(
|
|
78
|
-
version="1.5.0",
|
|
79
|
-
reason="'func_info' is deprecated and will be removed in 1.7.0, use 'ast_func_info' instead",
|
|
80
|
-
category=FutureWarning,
|
|
81
|
-
)
|
|
82
|
-
def func_info(fn) -> dict:
|
|
83
|
-
sig = inspect.signature(fn)
|
|
84
|
-
doc = inspect.getdoc(fn) or ""
|
|
85
|
-
|
|
86
|
-
out = func_dict(
|
|
87
|
-
name=fn.__name__,
|
|
88
|
-
doc=doc,
|
|
89
|
-
params=[inspect_param(p) for p in sig.parameters.values()],
|
|
90
|
-
returns=param_dict(
|
|
91
|
-
type=type_name(sig.return_annotation, empty_is_none=True), default=None
|
|
92
|
-
),
|
|
93
|
-
lineno=func_lineno(fn),
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
if not fn.__doc__ or not fn.__doc__.strip():
|
|
97
|
-
return out
|
|
98
|
-
|
|
99
|
-
return merge_doc(out, doc)
|
|
100
|
-
|
|
101
|
-
|
|
102
74
|
def func_lineno(fn):
|
|
103
75
|
try:
|
|
104
76
|
return inspect.getsourcelines(fn)[1]
|
|
@@ -275,7 +247,7 @@ class ASTVisitor(ast.NodeVisitor):
|
|
|
275
247
|
self.exprs.append(node)
|
|
276
248
|
super().generic_visit(node)
|
|
277
249
|
|
|
278
|
-
def visit_FunctionDef(self, node):
|
|
250
|
+
def visit_FunctionDef(self, node): # noqa: N802
|
|
279
251
|
self.funcs.append(node)
|
|
280
252
|
self.generic_visit(node)
|
|
281
253
|
|
mlrun/runtimes/kubejob.py
CHANGED
|
@@ -11,17 +11,16 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
import time
|
|
14
|
+
import typing
|
|
16
15
|
import warnings
|
|
17
16
|
|
|
17
|
+
from mlrun_pipelines.common.ops import build_op
|
|
18
|
+
|
|
18
19
|
import mlrun.common.schemas
|
|
19
20
|
import mlrun.db
|
|
20
21
|
import mlrun.errors
|
|
21
22
|
|
|
22
|
-
from ..kfpops import build_op
|
|
23
23
|
from ..model import RunObject
|
|
24
|
-
from ..utils import get_in, logger
|
|
25
24
|
from .pod import KubeResource
|
|
26
25
|
|
|
27
26
|
|
|
@@ -65,29 +64,13 @@ class KubejobRuntime(KubeResource):
|
|
|
65
64
|
:param pull_at_runtime: load the archive into the container at job runtime vs on build/deploy
|
|
66
65
|
:param target_dir: target dir on runtime pod or repo clone / archive extraction
|
|
67
66
|
"""
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
if target_dir:
|
|
76
|
-
self.spec.build.source_code_target_dir = target_dir
|
|
77
|
-
|
|
78
|
-
self.spec.build.load_source_on_run = pull_at_runtime
|
|
79
|
-
if (
|
|
80
|
-
self.spec.build.base_image
|
|
81
|
-
and not self.spec.build.commands
|
|
82
|
-
and pull_at_runtime
|
|
83
|
-
and not self.spec.image
|
|
84
|
-
):
|
|
85
|
-
# if we load source from repo and don't need a full build use the base_image as the image
|
|
86
|
-
self.spec.image = self.spec.build.base_image
|
|
87
|
-
elif not pull_at_runtime:
|
|
88
|
-
# clear the image so build will not be skipped
|
|
89
|
-
self.spec.build.base_image = self.spec.build.base_image or self.spec.image
|
|
90
|
-
self.spec.image = ""
|
|
67
|
+
self._configure_mlrun_build_with_source(
|
|
68
|
+
source=source,
|
|
69
|
+
workdir=workdir,
|
|
70
|
+
handler=handler,
|
|
71
|
+
pull_at_runtime=pull_at_runtime,
|
|
72
|
+
target_dir=target_dir,
|
|
73
|
+
)
|
|
91
74
|
|
|
92
75
|
def build_config(
|
|
93
76
|
self,
|
|
@@ -160,125 +143,48 @@ class KubejobRuntime(KubeResource):
|
|
|
160
143
|
|
|
161
144
|
def deploy(
|
|
162
145
|
self,
|
|
163
|
-
watch=True,
|
|
164
|
-
with_mlrun=None,
|
|
165
|
-
skip_deployed=False,
|
|
166
|
-
is_kfp=False,
|
|
167
|
-
mlrun_version_specifier=None,
|
|
146
|
+
watch: bool = True,
|
|
147
|
+
with_mlrun: typing.Optional[bool] = None,
|
|
148
|
+
skip_deployed: bool = False,
|
|
149
|
+
is_kfp: bool = False,
|
|
150
|
+
mlrun_version_specifier: typing.Optional[bool] = None,
|
|
168
151
|
builder_env: dict = None,
|
|
169
152
|
show_on_failure: bool = False,
|
|
170
153
|
force_build: bool = False,
|
|
171
154
|
) -> bool:
|
|
172
|
-
"""
|
|
155
|
+
"""Deploy function, build container with dependencies
|
|
173
156
|
|
|
174
|
-
:param watch:
|
|
175
|
-
:param with_mlrun:
|
|
176
|
-
:param skip_deployed:
|
|
177
|
-
:param is_kfp:
|
|
178
|
-
:param mlrun_version_specifier:
|
|
157
|
+
:param watch: Wait for the deploy to complete (and print build logs)
|
|
158
|
+
:param with_mlrun: Add the current mlrun package to the container build
|
|
159
|
+
:param skip_deployed: Skip the build if we already have an image for the function
|
|
160
|
+
:param is_kfp: Deploy as part of a kfp pipeline
|
|
161
|
+
:param mlrun_version_specifier: Which mlrun package version to include (if not current)
|
|
179
162
|
:param builder_env: Kaniko builder pod env vars dict (for config/credentials)
|
|
180
163
|
e.g. builder_env={"GIT_TOKEN": token}
|
|
181
|
-
:param show_on_failure:
|
|
182
|
-
:param force_build:
|
|
164
|
+
:param show_on_failure: Show logs only in case of build failure
|
|
165
|
+
:param force_build: Set True for force building the image, even when no changes were made
|
|
183
166
|
|
|
184
167
|
:return: True if the function is ready (deployed)
|
|
185
168
|
"""
|
|
186
169
|
|
|
187
170
|
build = self.spec.build
|
|
171
|
+
with_mlrun = self._resolve_build_with_mlrun(with_mlrun)
|
|
188
172
|
|
|
189
|
-
if with_mlrun is None:
|
|
190
|
-
if build.with_mlrun is not None:
|
|
191
|
-
with_mlrun = build.with_mlrun
|
|
192
|
-
else:
|
|
193
|
-
with_mlrun = build.base_image and not (
|
|
194
|
-
build.base_image.startswith("mlrun/")
|
|
195
|
-
or "/mlrun/" in build.base_image
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
if (
|
|
199
|
-
not build.source
|
|
200
|
-
and not build.commands
|
|
201
|
-
and not build.requirements
|
|
202
|
-
and not build.extra
|
|
203
|
-
and with_mlrun
|
|
204
|
-
):
|
|
205
|
-
logger.info(
|
|
206
|
-
"Running build to add mlrun package, set "
|
|
207
|
-
"with_mlrun=False to skip if its already in the image"
|
|
208
|
-
)
|
|
209
173
|
self.status.state = ""
|
|
210
174
|
if build.base_image:
|
|
211
175
|
# clear the image so build will not be skipped
|
|
212
176
|
self.spec.image = ""
|
|
213
177
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
with_mlrun,
|
|
225
|
-
mlrun_version_specifier,
|
|
226
|
-
skip_deployed,
|
|
227
|
-
builder_env=builder_env,
|
|
228
|
-
force_build=force_build,
|
|
229
|
-
)
|
|
230
|
-
self.status = data["data"].get("status", None)
|
|
231
|
-
self.spec.image = get_in(data, "data.spec.image")
|
|
232
|
-
self.spec.build.base_image = self.spec.build.base_image or get_in(
|
|
233
|
-
data, "data.spec.build.base_image"
|
|
234
|
-
)
|
|
235
|
-
# Get the source target dir in case it was enriched due to loading source
|
|
236
|
-
self.spec.build.source_code_target_dir = get_in(
|
|
237
|
-
data, "data.spec.build.source_code_target_dir"
|
|
238
|
-
) or get_in(data, "data.spec.clone_target_dir")
|
|
239
|
-
ready = data.get("ready", False)
|
|
240
|
-
if not ready:
|
|
241
|
-
logger.info(
|
|
242
|
-
f"Started building image: {data.get('data', {}).get('spec', {}).get('build', {}).get('image')}"
|
|
243
|
-
)
|
|
244
|
-
if watch and not ready:
|
|
245
|
-
state = self._build_watch(watch, show_on_failure=show_on_failure)
|
|
246
|
-
ready = state == "ready"
|
|
247
|
-
self.status.state = state
|
|
248
|
-
|
|
249
|
-
if watch and not ready:
|
|
250
|
-
raise mlrun.errors.MLRunRuntimeError("Deploy failed")
|
|
251
|
-
return ready
|
|
252
|
-
|
|
253
|
-
def _build_watch(self, watch=True, logs=True, show_on_failure=False):
|
|
254
|
-
db = self._get_db()
|
|
255
|
-
offset = 0
|
|
256
|
-
try:
|
|
257
|
-
text, _ = db.get_builder_status(self, 0, logs=logs)
|
|
258
|
-
except mlrun.db.RunDBError:
|
|
259
|
-
raise ValueError("function or build process not found")
|
|
260
|
-
|
|
261
|
-
def print_log(text):
|
|
262
|
-
if text and (not show_on_failure or self.status.state == "error"):
|
|
263
|
-
print(text, end="")
|
|
264
|
-
|
|
265
|
-
print_log(text)
|
|
266
|
-
offset += len(text)
|
|
267
|
-
if watch:
|
|
268
|
-
while self.status.state in ["pending", "running"]:
|
|
269
|
-
time.sleep(2)
|
|
270
|
-
if show_on_failure:
|
|
271
|
-
text = ""
|
|
272
|
-
db.get_builder_status(self, 0, logs=False)
|
|
273
|
-
if self.status.state == "error":
|
|
274
|
-
# re-read the full log on failure
|
|
275
|
-
text, _ = db.get_builder_status(self, offset, logs=logs)
|
|
276
|
-
else:
|
|
277
|
-
text, _ = db.get_builder_status(self, offset, logs=logs)
|
|
278
|
-
print_log(text)
|
|
279
|
-
offset += len(text)
|
|
280
|
-
|
|
281
|
-
return self.status.state
|
|
178
|
+
return self._build_image(
|
|
179
|
+
builder_env=builder_env,
|
|
180
|
+
force_build=force_build,
|
|
181
|
+
mlrun_version_specifier=mlrun_version_specifier,
|
|
182
|
+
show_on_failure=show_on_failure,
|
|
183
|
+
skip_deployed=skip_deployed,
|
|
184
|
+
watch=watch,
|
|
185
|
+
is_kfp=is_kfp,
|
|
186
|
+
with_mlrun=with_mlrun,
|
|
187
|
+
)
|
|
282
188
|
|
|
283
189
|
def deploy_step(
|
|
284
190
|
self,
|