mlrun 1.7.0rc6__py3-none-any.whl → 1.7.0rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +2 -0
- mlrun/common/constants.py +6 -0
- mlrun/common/schemas/__init__.py +5 -0
- mlrun/common/schemas/api_gateway.py +8 -1
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +4 -0
- mlrun/common/schemas/model_monitoring/constants.py +36 -19
- mlrun/{model_monitoring/stores/models/__init__.py → common/schemas/pagination.py} +9 -10
- mlrun/common/schemas/project.py +16 -10
- mlrun/common/types.py +7 -1
- mlrun/config.py +35 -10
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/__init__.py +3 -7
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +4 -5
- mlrun/datastore/base.py +22 -16
- mlrun/datastore/datastore.py +4 -0
- mlrun/datastore/datastore_profile.py +19 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +11 -29
- mlrun/datastore/targets.py +131 -11
- mlrun/datastore/utils.py +10 -5
- mlrun/db/base.py +58 -6
- mlrun/db/httpdb.py +183 -77
- mlrun/db/nopdb.py +110 -0
- mlrun/feature_store/api.py +3 -2
- mlrun/feature_store/retrieval/spark_merger.py +27 -23
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/kfpops.py +2 -5
- mlrun/launcher/base.py +1 -1
- mlrun/launcher/client.py +2 -2
- mlrun/model.py +1 -0
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +104 -295
- mlrun/model_monitoring/controller.py +25 -25
- mlrun/model_monitoring/db/__init__.py +16 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
- mlrun/model_monitoring/helpers.py +3 -3
- mlrun/model_monitoring/stream_processing.py +41 -9
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +4 -36
- mlrun/projects/pipelines.py +14 -2
- mlrun/projects/project.py +141 -122
- mlrun/run.py +8 -2
- mlrun/runtimes/__init__.py +16 -0
- mlrun/runtimes/base.py +10 -1
- mlrun/runtimes/kubejob.py +26 -121
- mlrun/runtimes/nuclio/api_gateway.py +243 -66
- mlrun/runtimes/nuclio/application/application.py +79 -1
- mlrun/runtimes/nuclio/application/reverse_proxy.go +9 -1
- mlrun/runtimes/nuclio/function.py +14 -8
- mlrun/runtimes/nuclio/serving.py +30 -34
- mlrun/runtimes/pod.py +171 -0
- mlrun/runtimes/utils.py +0 -28
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +4 -3
- mlrun/serving/server.py +5 -7
- mlrun/serving/states.py +40 -23
- mlrun/serving/v2_serving.py +4 -3
- mlrun/utils/helpers.py +34 -0
- mlrun/utils/http.py +1 -1
- mlrun/utils/retryer.py +1 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/METADATA +25 -16
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/RECORD +81 -75
- mlrun/model_monitoring/batch.py +0 -933
- mlrun/model_monitoring/stores/models/mysql.py +0 -34
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/top_level.txt +0 -0
|
@@ -223,7 +223,42 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
223
223
|
auth_info: AuthInfo = None,
|
|
224
224
|
builder_env: dict = None,
|
|
225
225
|
force_build: bool = False,
|
|
226
|
+
with_mlrun=None,
|
|
227
|
+
skip_deployed=False,
|
|
228
|
+
is_kfp=False,
|
|
229
|
+
mlrun_version_specifier=None,
|
|
230
|
+
show_on_failure: bool = False,
|
|
226
231
|
):
|
|
232
|
+
"""
|
|
233
|
+
Deploy function, builds the application image if required (self.requires_build()) or force_build is True,
|
|
234
|
+
Once the image is built, the function is deployed.
|
|
235
|
+
:param project: Project name
|
|
236
|
+
:param tag: Function tag
|
|
237
|
+
:param verbose: Set True for verbose logging
|
|
238
|
+
:param auth_info: Service AuthInfo (deprecated and ignored)
|
|
239
|
+
:param builder_env: Env vars dict for source archive config/credentials
|
|
240
|
+
e.g. builder_env={"GIT_TOKEN": token}
|
|
241
|
+
:param force_build: Set True for force building the application image
|
|
242
|
+
:param with_mlrun: Add the current mlrun package to the container build
|
|
243
|
+
:param skip_deployed: Skip the build if we already have an image for the function
|
|
244
|
+
:param is_kfp: Deploy as part of a kfp pipeline
|
|
245
|
+
:param mlrun_version_specifier: Which mlrun package version to include (if not current)
|
|
246
|
+
:param show_on_failure: Show logs only in case of build failure
|
|
247
|
+
:return: True if the function is ready (deployed)
|
|
248
|
+
"""
|
|
249
|
+
if self.requires_build() or force_build:
|
|
250
|
+
self._fill_credentials()
|
|
251
|
+
self._build_application_image(
|
|
252
|
+
builder_env=builder_env,
|
|
253
|
+
force_build=force_build,
|
|
254
|
+
watch=True,
|
|
255
|
+
with_mlrun=with_mlrun,
|
|
256
|
+
skip_deployed=skip_deployed,
|
|
257
|
+
is_kfp=is_kfp,
|
|
258
|
+
mlrun_version_specifier=mlrun_version_specifier,
|
|
259
|
+
show_on_failure=show_on_failure,
|
|
260
|
+
)
|
|
261
|
+
|
|
227
262
|
self._ensure_reverse_proxy_configurations()
|
|
228
263
|
self._configure_application_sidecar()
|
|
229
264
|
super().deploy(
|
|
@@ -232,7 +267,50 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
232
267
|
verbose,
|
|
233
268
|
auth_info,
|
|
234
269
|
builder_env,
|
|
235
|
-
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
def with_source_archive(
|
|
273
|
+
self, source, workdir=None, pull_at_runtime=True, target_dir=None
|
|
274
|
+
):
|
|
275
|
+
"""load the code from git/tar/zip archive at runtime or build
|
|
276
|
+
|
|
277
|
+
:param source: valid absolute path or URL to git, zip, or tar file, e.g.
|
|
278
|
+
git://github.com/mlrun/something.git
|
|
279
|
+
http://some/url/file.zip
|
|
280
|
+
note path source must exist on the image or exist locally when run is local
|
|
281
|
+
(it is recommended to use 'workdir' when source is a filepath instead)
|
|
282
|
+
:param workdir: working dir relative to the archive root (e.g. './subdir') or absolute to the image root
|
|
283
|
+
:param pull_at_runtime: load the archive into the container at job runtime vs on build/deploy
|
|
284
|
+
:param target_dir: target dir on runtime pod or repo clone / archive extraction
|
|
285
|
+
"""
|
|
286
|
+
self._configure_mlrun_build_with_source(
|
|
287
|
+
source=source,
|
|
288
|
+
workdir=workdir,
|
|
289
|
+
pull_at_runtime=pull_at_runtime,
|
|
290
|
+
target_dir=target_dir,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
def _build_application_image(
|
|
294
|
+
self,
|
|
295
|
+
builder_env: dict = None,
|
|
296
|
+
force_build: bool = False,
|
|
297
|
+
watch=True,
|
|
298
|
+
with_mlrun=None,
|
|
299
|
+
skip_deployed=False,
|
|
300
|
+
is_kfp=False,
|
|
301
|
+
mlrun_version_specifier=None,
|
|
302
|
+
show_on_failure: bool = False,
|
|
303
|
+
):
|
|
304
|
+
with_mlrun = self._resolve_build_with_mlrun(with_mlrun)
|
|
305
|
+
return self._build_image(
|
|
306
|
+
builder_env=builder_env,
|
|
307
|
+
force_build=force_build,
|
|
308
|
+
mlrun_version_specifier=mlrun_version_specifier,
|
|
309
|
+
show_on_failure=show_on_failure,
|
|
310
|
+
skip_deployed=skip_deployed,
|
|
311
|
+
watch=watch,
|
|
312
|
+
is_kfp=is_kfp,
|
|
313
|
+
with_mlrun=with_mlrun,
|
|
236
314
|
)
|
|
237
315
|
|
|
238
316
|
def _ensure_reverse_proxy_configurations(self):
|
|
@@ -39,11 +39,19 @@ func Handler(context *nuclio.Context, event nuclio.Event) (interface{}, error) {
|
|
|
39
39
|
for k, v := range event.GetHeaders() {
|
|
40
40
|
httpRequest.Header[k] = []string{v.(string)}
|
|
41
41
|
}
|
|
42
|
+
|
|
43
|
+
// populate query params
|
|
44
|
+
query := httpRequest.URL.Query()
|
|
45
|
+
for k, v := range event.GetFields() {
|
|
46
|
+
query.Set(k, v.(string))
|
|
47
|
+
}
|
|
48
|
+
httpRequest.URL.RawQuery = query.Encode()
|
|
49
|
+
|
|
42
50
|
recorder := httptest.NewRecorder()
|
|
43
51
|
reverseProxy.ServeHTTP(recorder, httpRequest)
|
|
44
52
|
|
|
45
53
|
// send request to sidecar
|
|
46
|
-
context.Logger.
|
|
54
|
+
context.Logger.DebugWith("Forwarding request to sidecar", "sidecarUrl", sidecarUrl, "query", httpRequest.URL.Query())
|
|
47
55
|
response := recorder.Result()
|
|
48
56
|
|
|
49
57
|
headers := make(map[string]interface{})
|
|
@@ -543,11 +543,16 @@ class RemoteRuntime(KubeResource):
|
|
|
543
543
|
:param project: project name
|
|
544
544
|
:param tag: function tag
|
|
545
545
|
:param verbose: set True for verbose logging
|
|
546
|
-
:param auth_info: service AuthInfo
|
|
546
|
+
:param auth_info: service AuthInfo (deprecated and ignored)
|
|
547
547
|
:param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
|
|
548
548
|
:param force_build: set True for force building the image
|
|
549
549
|
"""
|
|
550
|
-
|
|
550
|
+
if auth_info:
|
|
551
|
+
# TODO: remove in 1.9.0
|
|
552
|
+
warnings.warn(
|
|
553
|
+
"'auth_info' is deprecated for nuclio runtimes in 1.7.0 and will be removed in 1.9.0",
|
|
554
|
+
FutureWarning,
|
|
555
|
+
)
|
|
551
556
|
|
|
552
557
|
old_http_session = getattr(self, "_http_session", None)
|
|
553
558
|
if old_http_session:
|
|
@@ -570,9 +575,7 @@ class RemoteRuntime(KubeResource):
|
|
|
570
575
|
self._fill_credentials()
|
|
571
576
|
db = self._get_db()
|
|
572
577
|
logger.info("Starting remote function deploy")
|
|
573
|
-
data = db.
|
|
574
|
-
self, False, builder_env=builder_env, force_build=force_build
|
|
575
|
-
)
|
|
578
|
+
data = db.deploy_nuclio_function(func=self, builder_env=builder_env)
|
|
576
579
|
self.status = data["data"].get("status")
|
|
577
580
|
self._update_credentials_from_remote_build(data["data"])
|
|
578
581
|
|
|
@@ -613,7 +616,7 @@ class RemoteRuntime(KubeResource):
|
|
|
613
616
|
int(mlrun.mlconf.httpdb.logs.nuclio.pull_deploy_status_default_interval)
|
|
614
617
|
)
|
|
615
618
|
try:
|
|
616
|
-
text, last_log_timestamp = db.
|
|
619
|
+
text, last_log_timestamp = db.get_nuclio_deploy_status(
|
|
617
620
|
self, last_log_timestamp=last_log_timestamp, verbose=verbose
|
|
618
621
|
)
|
|
619
622
|
except mlrun.db.RunDBError:
|
|
@@ -775,6 +778,9 @@ class RemoteRuntime(KubeResource):
|
|
|
775
778
|
] = self.metadata.credentials.access_key
|
|
776
779
|
return runtime_env
|
|
777
780
|
|
|
781
|
+
def _get_serving_spec(self):
|
|
782
|
+
return None
|
|
783
|
+
|
|
778
784
|
def _get_nuclio_config_spec_env(self):
|
|
779
785
|
env_dict = {}
|
|
780
786
|
external_source_env_dict = {}
|
|
@@ -992,10 +998,10 @@ class RemoteRuntime(KubeResource):
|
|
|
992
998
|
]
|
|
993
999
|
|
|
994
1000
|
if command:
|
|
995
|
-
sidecar["command"] = command
|
|
1001
|
+
sidecar["command"] = mlrun.utils.helpers.as_list(command)
|
|
996
1002
|
|
|
997
1003
|
if args:
|
|
998
|
-
sidecar["args"] = args
|
|
1004
|
+
sidecar["args"] = mlrun.utils.helpers.as_list(args)
|
|
999
1005
|
|
|
1000
1006
|
def _set_sidecar(self, name: str) -> dict:
|
|
1001
1007
|
self.spec.config.setdefault("spec.sidecars", [])
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -14,8 +14,9 @@
|
|
|
14
14
|
|
|
15
15
|
import json
|
|
16
16
|
import os
|
|
17
|
+
import warnings
|
|
17
18
|
from copy import deepcopy
|
|
18
|
-
from typing import Union
|
|
19
|
+
from typing import TYPE_CHECKING, Optional, Union
|
|
19
20
|
|
|
20
21
|
import nuclio
|
|
21
22
|
from nuclio import KafkaTrigger
|
|
@@ -24,7 +25,6 @@ import mlrun
|
|
|
24
25
|
import mlrun.common.schemas
|
|
25
26
|
from mlrun.datastore import parse_kafka_url
|
|
26
27
|
from mlrun.model import ObjectList
|
|
27
|
-
from mlrun.model_monitoring.tracking_policy import TrackingPolicy
|
|
28
28
|
from mlrun.runtimes.function_reference import FunctionReference
|
|
29
29
|
from mlrun.secrets import SecretsStore
|
|
30
30
|
from mlrun.serving.server import GraphServer, create_graph_server
|
|
@@ -43,6 +43,10 @@ from .function import NuclioSpec, RemoteRuntime
|
|
|
43
43
|
|
|
44
44
|
serving_subkind = "serving_v2"
|
|
45
45
|
|
|
46
|
+
if TYPE_CHECKING:
|
|
47
|
+
# remove this block in 1.9.0
|
|
48
|
+
from mlrun.model_monitoring import TrackingPolicy
|
|
49
|
+
|
|
46
50
|
|
|
47
51
|
def new_v2_model_server(
|
|
48
52
|
name,
|
|
@@ -291,7 +295,9 @@ class ServingRuntime(RemoteRuntime):
|
|
|
291
295
|
"provided class is not a router step, must provide a router class in router topology"
|
|
292
296
|
)
|
|
293
297
|
else:
|
|
294
|
-
step = RouterStep(
|
|
298
|
+
step = RouterStep(
|
|
299
|
+
class_name=class_name, class_args=class_args, engine=engine
|
|
300
|
+
)
|
|
295
301
|
self.spec.graph = step
|
|
296
302
|
elif topology == StepKinds.flow:
|
|
297
303
|
self.spec.graph = RootFlowStep(engine=engine)
|
|
@@ -303,12 +309,12 @@ class ServingRuntime(RemoteRuntime):
|
|
|
303
309
|
|
|
304
310
|
def set_tracking(
|
|
305
311
|
self,
|
|
306
|
-
stream_path: str = None,
|
|
307
|
-
batch: int = None,
|
|
308
|
-
sample: int = None,
|
|
309
|
-
stream_args: dict = None,
|
|
310
|
-
tracking_policy: Union[TrackingPolicy, dict] = None,
|
|
311
|
-
):
|
|
312
|
+
stream_path: Optional[str] = None,
|
|
313
|
+
batch: Optional[int] = None,
|
|
314
|
+
sample: Optional[int] = None,
|
|
315
|
+
stream_args: Optional[dict] = None,
|
|
316
|
+
tracking_policy: Optional[Union["TrackingPolicy", dict]] = None,
|
|
317
|
+
) -> None:
|
|
312
318
|
"""apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
|
|
313
319
|
and analyze performance.
|
|
314
320
|
|
|
@@ -317,31 +323,17 @@ class ServingRuntime(RemoteRuntime):
|
|
|
317
323
|
:param batch: Micro batch size (send micro batches of N records at a time).
|
|
318
324
|
:param sample: Sample size (send only one of N records).
|
|
319
325
|
:param stream_args: Stream initialization parameters, e.g. shards, retention_in_hours, ..
|
|
320
|
-
:param tracking_policy: Tracking policy object or a dictionary that will be converted into a tracking policy
|
|
321
|
-
object. By using TrackingPolicy, the user can apply his model monitoring requirements,
|
|
322
|
-
such as setting the scheduling policy of the model monitoring batch job or changing
|
|
323
|
-
the image of the model monitoring stream.
|
|
324
326
|
|
|
325
327
|
example::
|
|
326
328
|
|
|
327
329
|
# initialize a new serving function
|
|
328
330
|
serving_fn = mlrun.import_function("hub://v2-model-server", new_name="serving")
|
|
329
|
-
# apply model monitoring
|
|
330
|
-
|
|
331
|
-
serving_fn.set_tracking(tracking_policy=tracking_policy)
|
|
331
|
+
# apply model monitoring
|
|
332
|
+
serving_fn.set_tracking()
|
|
332
333
|
|
|
333
334
|
"""
|
|
334
|
-
|
|
335
335
|
# Applying model monitoring configurations
|
|
336
336
|
self.spec.track_models = True
|
|
337
|
-
self.spec.tracking_policy = None
|
|
338
|
-
if tracking_policy:
|
|
339
|
-
if isinstance(tracking_policy, dict):
|
|
340
|
-
# Convert tracking policy dictionary into `model_monitoring.TrackingPolicy` object
|
|
341
|
-
self.spec.tracking_policy = TrackingPolicy.from_dict(tracking_policy)
|
|
342
|
-
else:
|
|
343
|
-
# Tracking_policy is already a `model_monitoring.TrackingPolicy` object
|
|
344
|
-
self.spec.tracking_policy = tracking_policy
|
|
345
337
|
|
|
346
338
|
if stream_path:
|
|
347
339
|
self.spec.parameters["log_stream"] = stream_path
|
|
@@ -351,6 +343,14 @@ class ServingRuntime(RemoteRuntime):
|
|
|
351
343
|
self.spec.parameters["log_stream_sample"] = sample
|
|
352
344
|
if stream_args:
|
|
353
345
|
self.spec.parameters["stream_args"] = stream_args
|
|
346
|
+
if tracking_policy is not None:
|
|
347
|
+
warnings.warn(
|
|
348
|
+
"The `tracking_policy` argument is deprecated from version 1.7.0 "
|
|
349
|
+
"and has no effect. It will be removed in 1.9.0.\n"
|
|
350
|
+
"To set the desired model monitoring time window and schedule, use "
|
|
351
|
+
"the `base_period` argument in `project.enable_model_monitoring()`.",
|
|
352
|
+
FutureWarning,
|
|
353
|
+
)
|
|
354
354
|
|
|
355
355
|
def add_model(
|
|
356
356
|
self,
|
|
@@ -491,9 +491,9 @@ class ServingRuntime(RemoteRuntime):
|
|
|
491
491
|
|
|
492
492
|
if (
|
|
493
493
|
stream.path.startswith("kafka://")
|
|
494
|
-
or "
|
|
494
|
+
or "kafka_brokers" in stream.options
|
|
495
495
|
):
|
|
496
|
-
brokers = stream.options.get("
|
|
496
|
+
brokers = stream.options.get("kafka_brokers")
|
|
497
497
|
if brokers:
|
|
498
498
|
brokers = brokers.split(",")
|
|
499
499
|
topic, brokers = parse_kafka_url(stream.path, brokers)
|
|
@@ -644,8 +644,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
644
644
|
force_build=force_build,
|
|
645
645
|
)
|
|
646
646
|
|
|
647
|
-
def
|
|
648
|
-
env = super()._get_runtime_env()
|
|
647
|
+
def _get_serving_spec(self):
|
|
649
648
|
function_name_uri_map = {f.name: f.uri(self) for f in self.spec.function_refs}
|
|
650
649
|
|
|
651
650
|
serving_spec = {
|
|
@@ -658,9 +657,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
658
657
|
"graph_initializer": self.spec.graph_initializer,
|
|
659
658
|
"error_stream": self.spec.error_stream,
|
|
660
659
|
"track_models": self.spec.track_models,
|
|
661
|
-
"tracking_policy":
|
|
662
|
-
if self.spec.tracking_policy
|
|
663
|
-
else None,
|
|
660
|
+
"tracking_policy": None,
|
|
664
661
|
"default_content_type": self.spec.default_content_type,
|
|
665
662
|
}
|
|
666
663
|
|
|
@@ -668,8 +665,7 @@ class ServingRuntime(RemoteRuntime):
|
|
|
668
665
|
self._secrets = SecretsStore.from_list(self.spec.secret_sources)
|
|
669
666
|
serving_spec["secret_sources"] = self._secrets.to_serial()
|
|
670
667
|
|
|
671
|
-
|
|
672
|
-
return env
|
|
668
|
+
return json.dumps(serving_spec)
|
|
673
669
|
|
|
674
670
|
def to_mock_server(
|
|
675
671
|
self,
|
mlrun/runtimes/pod.py
CHANGED
|
@@ -15,6 +15,7 @@ import copy
|
|
|
15
15
|
import inspect
|
|
16
16
|
import os
|
|
17
17
|
import re
|
|
18
|
+
import time
|
|
18
19
|
import typing
|
|
19
20
|
from enum import Enum
|
|
20
21
|
|
|
@@ -1057,6 +1058,32 @@ class KubeResource(BaseRuntime):
|
|
|
1057
1058
|
return True
|
|
1058
1059
|
return False
|
|
1059
1060
|
|
|
1061
|
+
def enrich_runtime_spec(
|
|
1062
|
+
self,
|
|
1063
|
+
project_node_selector: dict[str, str],
|
|
1064
|
+
):
|
|
1065
|
+
"""
|
|
1066
|
+
Enriches the runtime spec with the project-level node selector.
|
|
1067
|
+
|
|
1068
|
+
This method merges the project-level node selector with the existing function node_selector.
|
|
1069
|
+
The merge logic used here combines the two dictionaries, giving precedence to
|
|
1070
|
+
the keys in the runtime node_selector. If there are conflicting keys between the
|
|
1071
|
+
two dictionaries, the values from self.spec.node_selector will overwrite the
|
|
1072
|
+
values from project_node_selector.
|
|
1073
|
+
|
|
1074
|
+
Example:
|
|
1075
|
+
Suppose self.spec.node_selector = {"type": "gpu", "zone": "us-east-1"}
|
|
1076
|
+
and project_node_selector = {"type": "cpu", "environment": "production"}.
|
|
1077
|
+
After the merge, the resulting node_selector will be:
|
|
1078
|
+
{"type": "gpu", "zone": "us-east-1", "environment": "production"}
|
|
1079
|
+
|
|
1080
|
+
Note:
|
|
1081
|
+
- The merge uses the ** operator, also known as the "unpacking" operator in Python,
|
|
1082
|
+
combining key-value pairs from each dictionary. Later dictionaries take precedence
|
|
1083
|
+
when there are conflicting keys.
|
|
1084
|
+
"""
|
|
1085
|
+
self.spec.node_selector = {**project_node_selector, **self.spec.node_selector}
|
|
1086
|
+
|
|
1060
1087
|
def _set_env(self, name, value=None, value_from=None):
|
|
1061
1088
|
new_var = k8s_client.V1EnvVar(name=name, value=value, value_from=value_from)
|
|
1062
1089
|
i = 0
|
|
@@ -1312,6 +1339,150 @@ class KubeResource(BaseRuntime):
|
|
|
1312
1339
|
|
|
1313
1340
|
self.spec.validate_service_account(allowed_service_accounts)
|
|
1314
1341
|
|
|
1342
|
+
def _configure_mlrun_build_with_source(
|
|
1343
|
+
self, source, workdir=None, handler=None, pull_at_runtime=True, target_dir=None
|
|
1344
|
+
):
|
|
1345
|
+
mlrun.utils.helpers.validate_builder_source(source, pull_at_runtime, workdir)
|
|
1346
|
+
|
|
1347
|
+
self.spec.build.source = source
|
|
1348
|
+
if handler:
|
|
1349
|
+
self.spec.default_handler = handler
|
|
1350
|
+
if workdir:
|
|
1351
|
+
self.spec.workdir = workdir
|
|
1352
|
+
if target_dir:
|
|
1353
|
+
self.spec.build.source_code_target_dir = target_dir
|
|
1354
|
+
|
|
1355
|
+
self.spec.build.load_source_on_run = pull_at_runtime
|
|
1356
|
+
if (
|
|
1357
|
+
self.spec.build.base_image
|
|
1358
|
+
and not self.spec.build.commands
|
|
1359
|
+
and pull_at_runtime
|
|
1360
|
+
and not self.spec.image
|
|
1361
|
+
):
|
|
1362
|
+
# if we load source from repo and don't need a full build use the base_image as the image
|
|
1363
|
+
self.spec.image = self.spec.build.base_image
|
|
1364
|
+
elif not pull_at_runtime:
|
|
1365
|
+
# clear the image so build will not be skipped
|
|
1366
|
+
self.spec.build.base_image = self.spec.build.base_image or self.spec.image
|
|
1367
|
+
self.spec.image = ""
|
|
1368
|
+
|
|
1369
|
+
def _resolve_build_with_mlrun(self, with_mlrun: typing.Optional[bool] = None):
|
|
1370
|
+
build = self.spec.build
|
|
1371
|
+
if with_mlrun is None:
|
|
1372
|
+
if build.with_mlrun is not None:
|
|
1373
|
+
with_mlrun = build.with_mlrun
|
|
1374
|
+
else:
|
|
1375
|
+
with_mlrun = build.base_image and not (
|
|
1376
|
+
build.base_image.startswith("mlrun/")
|
|
1377
|
+
or "/mlrun/" in build.base_image
|
|
1378
|
+
)
|
|
1379
|
+
if (
|
|
1380
|
+
not build.source
|
|
1381
|
+
and not build.commands
|
|
1382
|
+
and not build.requirements
|
|
1383
|
+
and not build.extra
|
|
1384
|
+
and with_mlrun
|
|
1385
|
+
):
|
|
1386
|
+
logger.info(
|
|
1387
|
+
"Running build to add mlrun package, set "
|
|
1388
|
+
"with_mlrun=False to skip if its already in the image"
|
|
1389
|
+
)
|
|
1390
|
+
return with_mlrun
|
|
1391
|
+
|
|
1392
|
+
def _build_image(
|
|
1393
|
+
self,
|
|
1394
|
+
builder_env,
|
|
1395
|
+
force_build,
|
|
1396
|
+
mlrun_version_specifier,
|
|
1397
|
+
show_on_failure,
|
|
1398
|
+
skip_deployed,
|
|
1399
|
+
watch,
|
|
1400
|
+
is_kfp,
|
|
1401
|
+
with_mlrun,
|
|
1402
|
+
):
|
|
1403
|
+
# When we're in pipelines context we must watch otherwise the pipelines pod will exit before the operation
|
|
1404
|
+
# is actually done. (when a pipelines pod exits, the pipeline step marked as done)
|
|
1405
|
+
if is_kfp:
|
|
1406
|
+
watch = True
|
|
1407
|
+
|
|
1408
|
+
db = self._get_db()
|
|
1409
|
+
data = db.remote_builder(
|
|
1410
|
+
self,
|
|
1411
|
+
with_mlrun,
|
|
1412
|
+
mlrun_version_specifier,
|
|
1413
|
+
skip_deployed,
|
|
1414
|
+
builder_env=builder_env,
|
|
1415
|
+
force_build=force_build,
|
|
1416
|
+
)
|
|
1417
|
+
self.status = data["data"].get("status", None)
|
|
1418
|
+
self.spec.image = mlrun.utils.get_in(
|
|
1419
|
+
data, "data.spec.image"
|
|
1420
|
+
) or mlrun.utils.get_in(data, "data.spec.build.image")
|
|
1421
|
+
self.spec.build.base_image = self.spec.build.base_image or mlrun.utils.get_in(
|
|
1422
|
+
data, "data.spec.build.base_image"
|
|
1423
|
+
)
|
|
1424
|
+
# Get the source target dir in case it was enriched due to loading source
|
|
1425
|
+
self.spec.build.source_code_target_dir = mlrun.utils.get_in(
|
|
1426
|
+
data, "data.spec.build.source_code_target_dir"
|
|
1427
|
+
) or mlrun.utils.get_in(data, "data.spec.clone_target_dir")
|
|
1428
|
+
ready = data.get("ready", False)
|
|
1429
|
+
if not ready:
|
|
1430
|
+
logger.info(
|
|
1431
|
+
f"Started building image: {data.get('data', {}).get('spec', {}).get('build', {}).get('image')}"
|
|
1432
|
+
)
|
|
1433
|
+
if watch and not ready:
|
|
1434
|
+
state = self._build_watch(
|
|
1435
|
+
watch=watch,
|
|
1436
|
+
show_on_failure=show_on_failure,
|
|
1437
|
+
)
|
|
1438
|
+
ready = state == "ready"
|
|
1439
|
+
self.status.state = state
|
|
1440
|
+
|
|
1441
|
+
if watch and not ready:
|
|
1442
|
+
raise mlrun.errors.MLRunRuntimeError("Deploy failed")
|
|
1443
|
+
return ready
|
|
1444
|
+
|
|
1445
|
+
def _build_watch(
|
|
1446
|
+
self,
|
|
1447
|
+
watch: bool = True,
|
|
1448
|
+
logs: bool = True,
|
|
1449
|
+
show_on_failure: bool = False,
|
|
1450
|
+
):
|
|
1451
|
+
db = self._get_db()
|
|
1452
|
+
offset = 0
|
|
1453
|
+
try:
|
|
1454
|
+
text, _ = db.get_builder_status(self, 0, logs=logs)
|
|
1455
|
+
except mlrun.db.RunDBError:
|
|
1456
|
+
raise ValueError("function or build process not found")
|
|
1457
|
+
|
|
1458
|
+
def print_log(text):
|
|
1459
|
+
if text and (
|
|
1460
|
+
not show_on_failure
|
|
1461
|
+
or self.status.state == mlrun.common.schemas.FunctionState.error
|
|
1462
|
+
):
|
|
1463
|
+
print(text, end="")
|
|
1464
|
+
|
|
1465
|
+
print_log(text)
|
|
1466
|
+
offset += len(text)
|
|
1467
|
+
if watch:
|
|
1468
|
+
while self.status.state in [
|
|
1469
|
+
mlrun.common.schemas.FunctionState.pending,
|
|
1470
|
+
mlrun.common.schemas.FunctionState.running,
|
|
1471
|
+
]:
|
|
1472
|
+
time.sleep(2)
|
|
1473
|
+
if show_on_failure:
|
|
1474
|
+
text = ""
|
|
1475
|
+
db.get_builder_status(self, 0, logs=False)
|
|
1476
|
+
if self.status.state == mlrun.common.schemas.FunctionState.error:
|
|
1477
|
+
# re-read the full log on failure
|
|
1478
|
+
text, _ = db.get_builder_status(self, offset, logs=logs)
|
|
1479
|
+
else:
|
|
1480
|
+
text, _ = db.get_builder_status(self, offset, logs=logs)
|
|
1481
|
+
print_log(text)
|
|
1482
|
+
offset += len(text)
|
|
1483
|
+
|
|
1484
|
+
return self.status.state
|
|
1485
|
+
|
|
1315
1486
|
|
|
1316
1487
|
def _resolve_if_type_sanitized(attribute_name, attribute):
|
|
1317
1488
|
attribute_config = sanitized_attributes[attribute_name]
|
mlrun/runtimes/utils.py
CHANGED
|
@@ -417,34 +417,6 @@ def get_func_selector(project, name=None, tag=None):
|
|
|
417
417
|
return s
|
|
418
418
|
|
|
419
419
|
|
|
420
|
-
class k8s_resource:
|
|
421
|
-
kind = ""
|
|
422
|
-
per_run = False
|
|
423
|
-
per_function = False
|
|
424
|
-
k8client = None
|
|
425
|
-
|
|
426
|
-
def deploy_function(self, function):
|
|
427
|
-
pass
|
|
428
|
-
|
|
429
|
-
def release_function(self, function):
|
|
430
|
-
pass
|
|
431
|
-
|
|
432
|
-
def submit_run(self, function, runobj):
|
|
433
|
-
pass
|
|
434
|
-
|
|
435
|
-
def get_object(self, name, namespace=None):
|
|
436
|
-
return None
|
|
437
|
-
|
|
438
|
-
def get_status(self, name, namespace=None):
|
|
439
|
-
return None
|
|
440
|
-
|
|
441
|
-
def del_object(self, name, namespace=None):
|
|
442
|
-
pass
|
|
443
|
-
|
|
444
|
-
def get_pods(self, name, namespace=None, master=False):
|
|
445
|
-
return {}
|
|
446
|
-
|
|
447
|
-
|
|
448
420
|
def enrich_function_from_dict(function, function_dict):
|
|
449
421
|
override_function = mlrun.new_function(runtime=function_dict, kind=function.kind)
|
|
450
422
|
for attribute in [
|
mlrun/serving/remote.py
CHANGED
|
@@ -172,8 +172,7 @@ class RemoteStep(storey.SendToHttp):
|
|
|
172
172
|
if not self._session:
|
|
173
173
|
self._session = mlrun.utils.HTTPSessionWithRetry(
|
|
174
174
|
self.retries,
|
|
175
|
-
self.backoff_factor
|
|
176
|
-
or mlrun.config.config.http_retry_defaults.backoff_factor,
|
|
175
|
+
self.backoff_factor or mlrun.mlconf.http_retry_defaults.backoff_factor,
|
|
177
176
|
retry_on_exception=False,
|
|
178
177
|
retry_on_status=self.retries > 0,
|
|
179
178
|
retry_on_post=True,
|
|
@@ -185,7 +184,7 @@ class RemoteStep(storey.SendToHttp):
|
|
|
185
184
|
resp = self._session.request(
|
|
186
185
|
method,
|
|
187
186
|
url,
|
|
188
|
-
verify=mlrun.
|
|
187
|
+
verify=mlrun.mlconf.httpdb.http.verify,
|
|
189
188
|
headers=headers,
|
|
190
189
|
data=body,
|
|
191
190
|
timeout=self.timeout,
|
mlrun/serving/routers.py
CHANGED
|
@@ -28,6 +28,7 @@ import numpy as np
|
|
|
28
28
|
import mlrun
|
|
29
29
|
import mlrun.common.model_monitoring
|
|
30
30
|
import mlrun.common.schemas.model_monitoring
|
|
31
|
+
from mlrun.errors import err_to_str
|
|
31
32
|
from mlrun.utils import logger, now_date
|
|
32
33
|
|
|
33
34
|
from ..common.helpers import parse_versioned_object_uri
|
|
@@ -1013,7 +1014,7 @@ def _init_endpoint_record(
|
|
|
1013
1014
|
graph_server.function_uri
|
|
1014
1015
|
)
|
|
1015
1016
|
except Exception as e:
|
|
1016
|
-
logger.error("Failed to parse function URI", exc=e)
|
|
1017
|
+
logger.error("Failed to parse function URI", exc=err_to_str(e))
|
|
1017
1018
|
return None
|
|
1018
1019
|
|
|
1019
1020
|
# Generating version model value based on the model name and model version
|
|
@@ -1089,12 +1090,12 @@ def _init_endpoint_record(
|
|
|
1089
1090
|
except Exception as exc:
|
|
1090
1091
|
logger.warning(
|
|
1091
1092
|
"Failed creating model endpoint record",
|
|
1092
|
-
exc=exc,
|
|
1093
|
+
exc=err_to_str(exc),
|
|
1093
1094
|
traceback=traceback.format_exc(),
|
|
1094
1095
|
)
|
|
1095
1096
|
|
|
1096
1097
|
except Exception as e:
|
|
1097
|
-
logger.error("Failed to retrieve model endpoint object", exc=e)
|
|
1098
|
+
logger.error("Failed to retrieve model endpoint object", exc=err_to_str(e))
|
|
1098
1099
|
|
|
1099
1100
|
return endpoint_uid
|
|
1100
1101
|
|
mlrun/serving/server.py
CHANGED
|
@@ -23,6 +23,7 @@ import uuid
|
|
|
23
23
|
from typing import Optional, Union
|
|
24
24
|
|
|
25
25
|
import mlrun
|
|
26
|
+
import mlrun.common.constants
|
|
26
27
|
import mlrun.common.helpers
|
|
27
28
|
import mlrun.model_monitoring
|
|
28
29
|
from mlrun.config import config
|
|
@@ -52,7 +53,7 @@ class _StreamContext:
|
|
|
52
53
|
Initialize _StreamContext object.
|
|
53
54
|
:param enabled: A boolean indication for applying the stream context
|
|
54
55
|
:param parameters: Dictionary of optional parameters, such as `log_stream` and `stream_args`. Note that these
|
|
55
|
-
parameters might be relevant to the output source such as `
|
|
56
|
+
parameters might be relevant to the output source such as `kafka_brokers` if
|
|
56
57
|
the output source is from type Kafka.
|
|
57
58
|
:param function_uri: Full value of the function uri, usually it's <project-name>/<function-name>
|
|
58
59
|
"""
|
|
@@ -311,11 +312,8 @@ class GraphServer(ModelObj):
|
|
|
311
312
|
def v2_serving_init(context, namespace=None):
|
|
312
313
|
"""hook for nuclio init_context()"""
|
|
313
314
|
|
|
314
|
-
data = os.environ.get("SERVING_SPEC_ENV", "")
|
|
315
|
-
if not data:
|
|
316
|
-
raise MLRunInvalidArgumentError("failed to find spec env var")
|
|
317
|
-
spec = json.loads(data)
|
|
318
315
|
context.logger.info("Initializing server from spec")
|
|
316
|
+
spec = mlrun.utils.get_serving_spec()
|
|
319
317
|
server = GraphServer.from_dict(spec)
|
|
320
318
|
if config.log_level.lower() == "debug":
|
|
321
319
|
server.verbose = True
|
|
@@ -355,7 +353,7 @@ def v2_serving_init(context, namespace=None):
|
|
|
355
353
|
|
|
356
354
|
async def termination_callback():
|
|
357
355
|
context.logger.info("Termination callback called")
|
|
358
|
-
|
|
356
|
+
server.wait_for_completion()
|
|
359
357
|
context.logger.info("Termination of async flow is completed")
|
|
360
358
|
|
|
361
359
|
context.platform.set_termination_callback(termination_callback)
|
|
@@ -367,7 +365,7 @@ def v2_serving_init(context, namespace=None):
|
|
|
367
365
|
|
|
368
366
|
async def drain_callback():
|
|
369
367
|
context.logger.info("Drain callback called")
|
|
370
|
-
|
|
368
|
+
server.wait_for_completion()
|
|
371
369
|
context.logger.info(
|
|
372
370
|
"Termination of async flow is completed. Rerunning async flow."
|
|
373
371
|
)
|