mlrun 1.7.0rc2__py3-none-any.whl → 1.7.0rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/manager.py +6 -1
- mlrun/common/constants.py +1 -0
- mlrun/common/model_monitoring/helpers.py +12 -6
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/common.py +40 -0
- mlrun/common/schemas/model_monitoring/constants.py +4 -1
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +20 -16
- mlrun/datastore/azure_blob.py +22 -9
- mlrun/datastore/base.py +15 -25
- mlrun/datastore/datastore.py +19 -8
- mlrun/datastore/datastore_profile.py +47 -5
- mlrun/datastore/google_cloud_storage.py +10 -6
- mlrun/datastore/hdfs.py +51 -0
- mlrun/datastore/redis.py +4 -0
- mlrun/datastore/s3.py +4 -0
- mlrun/datastore/sources.py +31 -50
- mlrun/datastore/targets.py +58 -48
- mlrun/datastore/utils.py +2 -49
- mlrun/datastore/v3io.py +4 -0
- mlrun/db/base.py +34 -0
- mlrun/db/httpdb.py +71 -42
- mlrun/execution.py +3 -3
- mlrun/feature_store/feature_vector.py +2 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
- mlrun/frameworks/tf_keras/model_handler.py +7 -7
- mlrun/k8s_utils.py +10 -5
- mlrun/kfpops.py +19 -10
- mlrun/model.py +5 -0
- mlrun/model_monitoring/api.py +3 -3
- mlrun/model_monitoring/application.py +1 -1
- mlrun/model_monitoring/applications/__init__.py +13 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +218 -0
- mlrun/model_monitoring/batch.py +9 -111
- mlrun/model_monitoring/controller.py +73 -55
- mlrun/model_monitoring/controller_handler.py +13 -5
- mlrun/model_monitoring/features_drift_table.py +62 -53
- mlrun/model_monitoring/helpers.py +30 -21
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +14 -14
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
- mlrun/package/packagers/pandas_packagers.py +3 -3
- mlrun/package/utils/_archiver.py +3 -1
- mlrun/platforms/iguazio.py +8 -65
- mlrun/projects/pipelines.py +21 -11
- mlrun/projects/project.py +121 -42
- mlrun/runtimes/base.py +21 -2
- mlrun/runtimes/kubejob.py +5 -3
- mlrun/runtimes/local.py +2 -2
- mlrun/runtimes/mpijob/abstract.py +6 -6
- mlrun/runtimes/nuclio/function.py +9 -9
- mlrun/runtimes/nuclio/serving.py +3 -3
- mlrun/runtimes/pod.py +3 -3
- mlrun/runtimes/sparkjob/spark3job.py +3 -3
- mlrun/serving/remote.py +4 -2
- mlrun/serving/server.py +15 -18
- mlrun/serving/states.py +27 -12
- mlrun/utils/async_http.py +3 -3
- mlrun/utils/helpers.py +27 -5
- mlrun/utils/http.py +3 -3
- mlrun/utils/notifications/notification_pusher.py +6 -6
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/METADATA +13 -16
- {mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/RECORD +70 -64
- {mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/top_level.txt +0 -0
mlrun/platforms/iguazio.py
CHANGED
|
@@ -16,19 +16,15 @@ import json
|
|
|
16
16
|
import os
|
|
17
17
|
import urllib
|
|
18
18
|
from collections import namedtuple
|
|
19
|
-
from datetime import datetime
|
|
20
|
-
from http import HTTPStatus
|
|
21
19
|
from urllib.parse import urlparse
|
|
22
20
|
|
|
23
21
|
import kfp.dsl
|
|
24
22
|
import requests
|
|
25
23
|
import semver
|
|
26
|
-
import urllib3
|
|
27
24
|
import v3io
|
|
28
25
|
|
|
29
26
|
import mlrun.errors
|
|
30
27
|
from mlrun.config import config as mlconf
|
|
31
|
-
from mlrun.errors import err_to_str
|
|
32
28
|
from mlrun.utils import dict_to_json
|
|
33
29
|
|
|
34
30
|
_cached_control_session = None
|
|
@@ -488,25 +484,6 @@ class V3ioStreamClient:
|
|
|
488
484
|
return response.output.records
|
|
489
485
|
|
|
490
486
|
|
|
491
|
-
def create_control_session(url, username, password):
|
|
492
|
-
# for systems without production cert - silence no cert verification WARN
|
|
493
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
494
|
-
if not username or not password:
|
|
495
|
-
raise ValueError("cannot create session key, missing username or password")
|
|
496
|
-
|
|
497
|
-
session = requests.Session()
|
|
498
|
-
session.auth = (username, password)
|
|
499
|
-
try:
|
|
500
|
-
auth = session.post(f"{url}/api/sessions", verify=False)
|
|
501
|
-
except OSError as exc:
|
|
502
|
-
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
503
|
-
|
|
504
|
-
if not auth.ok:
|
|
505
|
-
raise OSError(f"failed to create session: {url}, {auth.text}")
|
|
506
|
-
|
|
507
|
-
return auth.json()["data"]["id"]
|
|
508
|
-
|
|
509
|
-
|
|
510
487
|
def is_iguazio_endpoint(endpoint_url: str) -> bool:
|
|
511
488
|
# TODO: find a better heuristic
|
|
512
489
|
return ".default-tenant." in endpoint_url
|
|
@@ -533,21 +510,6 @@ def is_iguazio_session_cookie(session_cookie: str) -> bool:
|
|
|
533
510
|
return False
|
|
534
511
|
|
|
535
512
|
|
|
536
|
-
def is_iguazio_system_2_10_or_above(dashboard_url):
|
|
537
|
-
# for systems without production cert - silence no cert verification WARN
|
|
538
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
539
|
-
response = requests.get(f"{dashboard_url}/api/external_versions", verify=False)
|
|
540
|
-
|
|
541
|
-
if not response.ok:
|
|
542
|
-
if response.status_code == HTTPStatus.NOT_FOUND.value:
|
|
543
|
-
# in iguazio systems prior to 2.10 this endpoint didn't exist, so the api returns 404 cause endpoint not
|
|
544
|
-
# found
|
|
545
|
-
return False
|
|
546
|
-
response.raise_for_status()
|
|
547
|
-
|
|
548
|
-
return True
|
|
549
|
-
|
|
550
|
-
|
|
551
513
|
# we assign the control session or access key to the password since this is iguazio auth scheme
|
|
552
514
|
# (requests should be sent with username:control_session/access_key as auth header)
|
|
553
515
|
def add_or_refresh_credentials(
|
|
@@ -577,33 +539,12 @@ def add_or_refresh_credentials(
|
|
|
577
539
|
# (ideally if we could identify we're in enterprise we would have verify here that token and username have value)
|
|
578
540
|
if not is_iguazio_endpoint(api_url):
|
|
579
541
|
return "", "", token
|
|
580
|
-
iguazio_dashboard_url = "https://dashboard" + api_url[api_url.find(".") :]
|
|
581
|
-
|
|
582
|
-
# in 2.8 mlrun api is protected with control session, from 2.10 it's protected with access key
|
|
583
|
-
is_access_key_auth = is_iguazio_system_2_10_or_above(iguazio_dashboard_url)
|
|
584
|
-
if is_access_key_auth:
|
|
585
|
-
if not username or not token:
|
|
586
|
-
raise ValueError(
|
|
587
|
-
"username and access key required to authenticate against iguazio system"
|
|
588
|
-
)
|
|
589
|
-
return username, token, ""
|
|
590
542
|
|
|
591
|
-
if not username or not
|
|
592
|
-
raise ValueError(
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
if _cached_control_session:
|
|
597
|
-
if (
|
|
598
|
-
_cached_control_session[2] == username
|
|
599
|
-
and _cached_control_session[3] == password
|
|
600
|
-
and (now - _cached_control_session[1]).seconds < 20 * 60 * 60
|
|
601
|
-
):
|
|
602
|
-
return _cached_control_session[2], _cached_control_session[0], ""
|
|
603
|
-
|
|
604
|
-
control_session = create_control_session(iguazio_dashboard_url, username, password)
|
|
605
|
-
_cached_control_session = (control_session, now, username, password)
|
|
606
|
-
return username, control_session, ""
|
|
543
|
+
if not username or not token:
|
|
544
|
+
raise ValueError(
|
|
545
|
+
"username and access key required to authenticate against iguazio system"
|
|
546
|
+
)
|
|
547
|
+
return username, token, ""
|
|
607
548
|
|
|
608
549
|
|
|
609
550
|
def parse_path(url, suffix="/"):
|
|
@@ -611,7 +552,9 @@ def parse_path(url, suffix="/"):
|
|
|
611
552
|
parsed_url = urlparse(url)
|
|
612
553
|
if parsed_url.netloc:
|
|
613
554
|
scheme = parsed_url.scheme.lower()
|
|
614
|
-
if scheme == "
|
|
555
|
+
if scheme == "s3":
|
|
556
|
+
prefix = "s3"
|
|
557
|
+
elif scheme == "v3ios":
|
|
615
558
|
prefix = "https"
|
|
616
559
|
elif scheme == "v3io":
|
|
617
560
|
prefix = "http"
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -69,16 +69,16 @@ class WorkflowSpec(mlrun.model.ModelObj):
|
|
|
69
69
|
|
|
70
70
|
def __init__(
|
|
71
71
|
self,
|
|
72
|
-
engine=None,
|
|
73
|
-
code=None,
|
|
74
|
-
path=None,
|
|
75
|
-
args=None,
|
|
76
|
-
name=None,
|
|
77
|
-
handler=None,
|
|
78
|
-
args_schema: dict = None,
|
|
72
|
+
engine: typing.Optional[str] = None,
|
|
73
|
+
code: typing.Optional[str] = None,
|
|
74
|
+
path: typing.Optional[str] = None,
|
|
75
|
+
args: typing.Optional[dict] = None,
|
|
76
|
+
name: typing.Optional[str] = None,
|
|
77
|
+
handler: typing.Optional[str] = None,
|
|
78
|
+
args_schema: typing.Optional[dict] = None,
|
|
79
79
|
schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
|
|
80
|
-
cleanup_ttl: int = None,
|
|
81
|
-
image: str = None,
|
|
80
|
+
cleanup_ttl: typing.Optional[int] = None,
|
|
81
|
+
image: typing.Optional[str] = None,
|
|
82
82
|
):
|
|
83
83
|
self.engine = engine
|
|
84
84
|
self.code = code
|
|
@@ -401,6 +401,9 @@ def enrich_function_object(
|
|
|
401
401
|
else:
|
|
402
402
|
f.spec.build.source = project.spec.source
|
|
403
403
|
f.spec.build.load_source_on_run = project.spec.load_source_on_run
|
|
404
|
+
f.spec.build.source_code_target_dir = (
|
|
405
|
+
project.spec.build.source_code_target_dir
|
|
406
|
+
)
|
|
404
407
|
f.spec.workdir = project.spec.workdir or project.spec.subpath
|
|
405
408
|
f.prepare_image_for_deploy()
|
|
406
409
|
|
|
@@ -862,6 +865,11 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
862
865
|
)
|
|
863
866
|
return
|
|
864
867
|
|
|
868
|
+
logger.debug(
|
|
869
|
+
"Workflow submitted, waiting for pipeline run to start",
|
|
870
|
+
workflow_name=workflow_response.name,
|
|
871
|
+
)
|
|
872
|
+
|
|
865
873
|
# Getting workflow id from run:
|
|
866
874
|
response = retry_until_successful(
|
|
867
875
|
1,
|
|
@@ -988,6 +996,7 @@ def load_and_run(
|
|
|
988
996
|
cleanup_ttl: int = None,
|
|
989
997
|
load_only: bool = False,
|
|
990
998
|
wait_for_completion: bool = False,
|
|
999
|
+
project_context: str = None,
|
|
991
1000
|
):
|
|
992
1001
|
"""
|
|
993
1002
|
Auxiliary function that the RemoteRunner run once or run every schedule.
|
|
@@ -1018,10 +1027,11 @@ def load_and_run(
|
|
|
1018
1027
|
workflow and all its resources are deleted)
|
|
1019
1028
|
:param load_only: for just loading the project, inner use.
|
|
1020
1029
|
:param wait_for_completion: wait for workflow completion before returning
|
|
1030
|
+
:param project_context: project context path (used for loading the project)
|
|
1021
1031
|
"""
|
|
1022
1032
|
try:
|
|
1023
1033
|
project = mlrun.load_project(
|
|
1024
|
-
context=f"./{project_name}",
|
|
1034
|
+
context=project_context or f"./{project_name}",
|
|
1025
1035
|
url=url,
|
|
1026
1036
|
name=project_name,
|
|
1027
1037
|
init_git=init_git,
|
|
@@ -1053,7 +1063,7 @@ def load_and_run(
|
|
|
1053
1063
|
|
|
1054
1064
|
raise error
|
|
1055
1065
|
|
|
1056
|
-
context.logger.info(f"Loaded project {project.name}
|
|
1066
|
+
context.logger.info(f"Loaded project {project.name} successfully")
|
|
1057
1067
|
|
|
1058
1068
|
if load_only:
|
|
1059
1069
|
return
|
mlrun/projects/project.py
CHANGED
|
@@ -24,7 +24,7 @@ import typing
|
|
|
24
24
|
import uuid
|
|
25
25
|
import warnings
|
|
26
26
|
import zipfile
|
|
27
|
-
from os import environ, makedirs, path
|
|
27
|
+
from os import environ, makedirs, path
|
|
28
28
|
from typing import Callable, Optional, Union
|
|
29
29
|
|
|
30
30
|
import dotenv
|
|
@@ -36,7 +36,6 @@ import requests
|
|
|
36
36
|
import yaml
|
|
37
37
|
|
|
38
38
|
import mlrun.common.helpers
|
|
39
|
-
import mlrun.common.schemas.model_monitoring
|
|
40
39
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
41
40
|
import mlrun.db
|
|
42
41
|
import mlrun.errors
|
|
@@ -617,9 +616,14 @@ def _load_project_dir(context, name="", subpath=""):
|
|
|
617
616
|
# If there is a setup script do not force having project.yaml file
|
|
618
617
|
project = MlrunProject()
|
|
619
618
|
else:
|
|
620
|
-
|
|
621
|
-
|
|
619
|
+
message = "Project or function YAML not found in path"
|
|
620
|
+
logger.error(
|
|
621
|
+
message,
|
|
622
|
+
context=context,
|
|
623
|
+
name=name,
|
|
624
|
+
subpath=subpath,
|
|
622
625
|
)
|
|
626
|
+
raise mlrun.errors.MLRunNotFoundError(message)
|
|
623
627
|
|
|
624
628
|
project.spec.context = context
|
|
625
629
|
project.metadata.name = name or project.metadata.name
|
|
@@ -1247,20 +1251,20 @@ class MlrunProject(ModelObj):
|
|
|
1247
1251
|
self,
|
|
1248
1252
|
name,
|
|
1249
1253
|
workflow_path: str,
|
|
1250
|
-
embed=False,
|
|
1251
|
-
engine=None,
|
|
1254
|
+
embed: bool = False,
|
|
1255
|
+
engine: Optional[str] = None,
|
|
1252
1256
|
args_schema: list[EntrypointParam] = None,
|
|
1253
|
-
handler=None,
|
|
1257
|
+
handler: Optional[str] = None,
|
|
1254
1258
|
schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
|
|
1255
|
-
ttl=None,
|
|
1256
|
-
image: str = None,
|
|
1259
|
+
ttl: Optional[int] = None,
|
|
1260
|
+
image: Optional[str] = None,
|
|
1257
1261
|
**args,
|
|
1258
1262
|
):
|
|
1259
1263
|
"""Add or update a workflow, specify a name and the code path
|
|
1260
1264
|
|
|
1261
1265
|
:param name: Name of the workflow
|
|
1262
1266
|
:param workflow_path: URL (remote) / Path (absolute or relative to the project code path i.e.
|
|
1263
|
-
|
|
1267
|
+
<project.spec.get_code_path()>/<workflow_path>) for the workflow file.
|
|
1264
1268
|
:param embed: Add the workflow code into the project.yaml
|
|
1265
1269
|
:param engine: Workflow processing engine ("kfp", "local", "remote" or "remote:local")
|
|
1266
1270
|
:param args_schema: List of arg schema definitions (:py:class`~mlrun.model.EntrypointParam`)
|
|
@@ -1803,10 +1807,13 @@ class MlrunProject(ModelObj):
|
|
|
1803
1807
|
) -> mlrun.runtimes.BaseRuntime:
|
|
1804
1808
|
"""
|
|
1805
1809
|
Update or add a monitoring function to the project.
|
|
1810
|
+
Note: to deploy the function after linking it to the project,
|
|
1811
|
+
call `fn.deploy()` where `fn` is the object returned by this method.
|
|
1806
1812
|
|
|
1807
1813
|
examples::
|
|
1808
|
-
project.set_model_monitoring_function(
|
|
1809
|
-
|
|
1814
|
+
project.set_model_monitoring_function(
|
|
1815
|
+
name="myApp", application_class="MyApp", image="mlrun/mlrun"
|
|
1816
|
+
)
|
|
1810
1817
|
|
|
1811
1818
|
:param func: Function object or spec/code url, None refers to current Notebook
|
|
1812
1819
|
:param name: Name of the function (under the project), can be specified with a tag to support
|
|
@@ -1821,11 +1828,16 @@ class MlrunProject(ModelObj):
|
|
|
1821
1828
|
will be enriched with the tag value. (i.e. 'function-name:tag')
|
|
1822
1829
|
:param requirements: A list of python packages
|
|
1823
1830
|
:param requirements_file: Path to a python requirements file
|
|
1824
|
-
:param application_class: Name or an Instance of a class that
|
|
1831
|
+
:param application_class: Name or an Instance of a class that implements the monitoring application.
|
|
1825
1832
|
:param application_kwargs: Additional keyword arguments to be passed to the
|
|
1826
1833
|
monitoring application's constructor.
|
|
1827
1834
|
"""
|
|
1828
1835
|
|
|
1836
|
+
if name in mm_constants.MonitoringFunctionNames.all():
|
|
1837
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1838
|
+
f"Application name can not be on of the following name : "
|
|
1839
|
+
f"{mm_constants.MonitoringFunctionNames.all()}"
|
|
1840
|
+
)
|
|
1829
1841
|
function_object: RemoteRuntime = None
|
|
1830
1842
|
(
|
|
1831
1843
|
resolved_function_name,
|
|
@@ -1987,27 +1999,76 @@ class MlrunProject(ModelObj):
|
|
|
1987
1999
|
self,
|
|
1988
2000
|
default_controller_image: str = "mlrun/mlrun",
|
|
1989
2001
|
base_period: int = 10,
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2002
|
+
image: str = "mlrun/mlrun",
|
|
2003
|
+
deploy_histogram_data_drift_app: bool = True,
|
|
2004
|
+
) -> None:
|
|
2005
|
+
"""
|
|
2006
|
+
Deploy model monitoring application controller, writer and stream functions.
|
|
2007
|
+
While the main goal of the controller function is to handle the monitoring processing and triggering
|
|
2008
|
+
applications, the goal of the model monitoring writer function is to write all the monitoring
|
|
2009
|
+
application results to the databases.
|
|
2010
|
+
The stream function goal is to monitor the log of the data stream. It is triggered when a new log entry
|
|
2011
|
+
is detected. It processes the new events into statistics that are then written to statistics databases.
|
|
2012
|
+
|
|
2013
|
+
|
|
2014
|
+
:param default_controller_image: Deprecated.
|
|
2015
|
+
:param base_period: The time period in minutes in which the model monitoring controller
|
|
2016
|
+
function is triggered. By default, the base period is 10 minutes.
|
|
2017
|
+
:param image: The image of the model monitoring controller, writer, monitoring
|
|
2018
|
+
stream & histogram data drift functions, which are real time nuclio
|
|
2019
|
+
functions. By default, the image is mlrun/mlrun.
|
|
2020
|
+
:param deploy_histogram_data_drift_app: If true, deploy the default histogram-based data drift application.
|
|
2021
|
+
|
|
2003
2022
|
:returns: model monitoring controller job as a dictionary.
|
|
2004
2023
|
"""
|
|
2024
|
+
if default_controller_image != "mlrun/mlrun":
|
|
2025
|
+
# TODO: Remove this in 1.9.0
|
|
2026
|
+
warnings.warn(
|
|
2027
|
+
"'default_controller_image' is deprecated and will be removed in 1.9.0, "
|
|
2028
|
+
"use 'image' instead",
|
|
2029
|
+
FutureWarning,
|
|
2030
|
+
)
|
|
2031
|
+
image = default_controller_image
|
|
2005
2032
|
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
2006
|
-
|
|
2033
|
+
db.enable_model_monitoring(
|
|
2007
2034
|
project=self.name,
|
|
2008
|
-
|
|
2035
|
+
image=image,
|
|
2009
2036
|
base_period=base_period,
|
|
2010
2037
|
)
|
|
2038
|
+
if deploy_histogram_data_drift_app:
|
|
2039
|
+
fn = self.set_model_monitoring_function(
|
|
2040
|
+
func=str(
|
|
2041
|
+
pathlib.Path(__file__).parent.parent
|
|
2042
|
+
/ "model_monitoring/applications/histogram_data_drift.py"
|
|
2043
|
+
),
|
|
2044
|
+
name=mm_constants.MLRUN_HISTOGRAM_DATA_DRIFT_APP_NAME,
|
|
2045
|
+
application_class="HistogramDataDriftApplication",
|
|
2046
|
+
image=image,
|
|
2047
|
+
)
|
|
2048
|
+
fn.deploy()
|
|
2049
|
+
|
|
2050
|
+
def update_model_monitoring_controller(
|
|
2051
|
+
self,
|
|
2052
|
+
base_period: int = 10,
|
|
2053
|
+
image: str = "mlrun/mlrun",
|
|
2054
|
+
) -> None:
|
|
2055
|
+
"""
|
|
2056
|
+
Redeploy model monitoring application controller functions.
|
|
2057
|
+
|
|
2058
|
+
|
|
2059
|
+
:param base_period: The time period in minutes in which the model monitoring controller function
|
|
2060
|
+
is triggered. By default, the base period is 10 minutes.
|
|
2061
|
+
:param image: The image of the model monitoring controller, writer & monitoring
|
|
2062
|
+
stream functions, which are real time nuclio functions.
|
|
2063
|
+
By default, the image is mlrun/mlrun.
|
|
2064
|
+
:returns: model monitoring controller job as a dictionary.
|
|
2065
|
+
"""
|
|
2066
|
+
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
2067
|
+
db.update_model_monitoring_controller(
|
|
2068
|
+
project=self.name,
|
|
2069
|
+
base_period=base_period,
|
|
2070
|
+
image=image,
|
|
2071
|
+
)
|
|
2011
2072
|
|
|
2012
2073
|
def disable_model_monitoring(self):
|
|
2013
2074
|
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
@@ -2015,6 +2076,14 @@ class MlrunProject(ModelObj):
|
|
|
2015
2076
|
project=self.name,
|
|
2016
2077
|
name=mm_constants.MonitoringFunctionNames.APPLICATION_CONTROLLER,
|
|
2017
2078
|
)
|
|
2079
|
+
db.delete_function(
|
|
2080
|
+
project=self.name,
|
|
2081
|
+
name=mm_constants.MonitoringFunctionNames.WRITER,
|
|
2082
|
+
)
|
|
2083
|
+
db.delete_function(
|
|
2084
|
+
project=self.name,
|
|
2085
|
+
name=mm_constants.MonitoringFunctionNames.STREAM,
|
|
2086
|
+
)
|
|
2018
2087
|
|
|
2019
2088
|
def set_function(
|
|
2020
2089
|
self,
|
|
@@ -2448,6 +2517,16 @@ class MlrunProject(ModelObj):
|
|
|
2448
2517
|
f = self.spec._function_definitions.get(name)
|
|
2449
2518
|
if not f:
|
|
2450
2519
|
raise ValueError(f"function named {name} not found")
|
|
2520
|
+
# If this function is already available locally, don't recreate it unless always=True
|
|
2521
|
+
if (
|
|
2522
|
+
isinstance(
|
|
2523
|
+
self.spec._function_objects.get(name, None),
|
|
2524
|
+
mlrun.runtimes.base.BaseRuntime,
|
|
2525
|
+
)
|
|
2526
|
+
and not always
|
|
2527
|
+
):
|
|
2528
|
+
funcs[name] = self.spec._function_objects[name]
|
|
2529
|
+
continue
|
|
2451
2530
|
if hasattr(f, "to_dict"):
|
|
2452
2531
|
name, func = _init_function_from_obj(f, self, name)
|
|
2453
2532
|
else:
|
|
@@ -2787,7 +2866,7 @@ class MlrunProject(ModelObj):
|
|
|
2787
2866
|
def export(self, filepath=None, include_files: str = None):
|
|
2788
2867
|
"""save the project object into a yaml file or zip archive (default to project.yaml)
|
|
2789
2868
|
|
|
2790
|
-
By default the project object is exported to a yaml file, when the filepath suffix is '.zip'
|
|
2869
|
+
By default, the project object is exported to a yaml file, when the filepath suffix is '.zip'
|
|
2791
2870
|
the project context dir (code files) are also copied into the zip, the archive path can include
|
|
2792
2871
|
DataItem urls (for remote object storage, e.g. s3://<bucket>/<path>).
|
|
2793
2872
|
|
|
@@ -2812,19 +2891,19 @@ class MlrunProject(ModelObj):
|
|
|
2812
2891
|
|
|
2813
2892
|
if archive_code:
|
|
2814
2893
|
files_filter = include_files or "**"
|
|
2815
|
-
|
|
2816
|
-
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2894
|
+
with tempfile.NamedTemporaryFile(suffix=".zip") as f:
|
|
2895
|
+
remote_file = "://" in filepath
|
|
2896
|
+
fpath = f.name if remote_file else filepath
|
|
2897
|
+
with zipfile.ZipFile(fpath, "w") as zipf:
|
|
2898
|
+
for file_path in glob.iglob(
|
|
2899
|
+
f"{project_dir}/{files_filter}", recursive=True
|
|
2900
|
+
):
|
|
2901
|
+
write_path = pathlib.Path(file_path)
|
|
2902
|
+
zipf.write(
|
|
2903
|
+
write_path, arcname=write_path.relative_to(project_dir)
|
|
2904
|
+
)
|
|
2905
|
+
if remote_file:
|
|
2906
|
+
mlrun.get_dataitem(filepath).upload(zipf.filename)
|
|
2828
2907
|
|
|
2829
2908
|
def set_model_monitoring_credentials(
|
|
2830
2909
|
self,
|
mlrun/runtimes/base.py
CHANGED
|
@@ -15,6 +15,7 @@ import enum
|
|
|
15
15
|
import http
|
|
16
16
|
import re
|
|
17
17
|
import typing
|
|
18
|
+
import warnings
|
|
18
19
|
from base64 import b64encode
|
|
19
20
|
from os import environ
|
|
20
21
|
from typing import Callable, Optional, Union
|
|
@@ -125,7 +126,7 @@ class FunctionSpec(ModelObj):
|
|
|
125
126
|
self.allow_empty_resources = None
|
|
126
127
|
# The build.source is cloned/extracted to the specified clone_target_dir
|
|
127
128
|
# if a relative path is specified, it will be enriched with a temp dir path
|
|
128
|
-
self.
|
|
129
|
+
self._clone_target_dir = clone_target_dir or None
|
|
129
130
|
|
|
130
131
|
@property
|
|
131
132
|
def build(self) -> ImageBuilder:
|
|
@@ -135,6 +136,24 @@ class FunctionSpec(ModelObj):
|
|
|
135
136
|
def build(self, build):
|
|
136
137
|
self._build = self._verify_dict(build, "build", ImageBuilder)
|
|
137
138
|
|
|
139
|
+
@property
|
|
140
|
+
def clone_target_dir(self):
|
|
141
|
+
warnings.warn(
|
|
142
|
+
"The clone_target_dir attribute is deprecated in 1.6.2 and will be removed in 1.8.0. "
|
|
143
|
+
"Use spec.build.source_code_target_dir instead.",
|
|
144
|
+
FutureWarning,
|
|
145
|
+
)
|
|
146
|
+
return self.build.source_code_target_dir
|
|
147
|
+
|
|
148
|
+
@clone_target_dir.setter
|
|
149
|
+
def clone_target_dir(self, clone_target_dir):
|
|
150
|
+
warnings.warn(
|
|
151
|
+
"The clone_target_dir attribute is deprecated in 1.6.2 and will be removed in 1.8.0. "
|
|
152
|
+
"Use spec.build.source_code_target_dir instead.",
|
|
153
|
+
FutureWarning,
|
|
154
|
+
)
|
|
155
|
+
self.build.source_code_target_dir = clone_target_dir
|
|
156
|
+
|
|
138
157
|
def enrich_function_preemption_spec(self):
|
|
139
158
|
pass
|
|
140
159
|
|
|
@@ -851,7 +870,7 @@ class BaseRuntime(ModelObj):
|
|
|
851
870
|
data = dict_to_json(struct)
|
|
852
871
|
stores = store_manager.set(secrets)
|
|
853
872
|
target = target or "function.yaml"
|
|
854
|
-
datastore, subpath = stores.get_or_create_store(target)
|
|
873
|
+
datastore, subpath, url = stores.get_or_create_store(target)
|
|
855
874
|
datastore.put(subpath, data)
|
|
856
875
|
logger.info(f"function spec saved to path: {target}")
|
|
857
876
|
return self
|
mlrun/runtimes/kubejob.py
CHANGED
|
@@ -73,7 +73,7 @@ class KubejobRuntime(KubeResource):
|
|
|
73
73
|
if workdir:
|
|
74
74
|
self.spec.workdir = workdir
|
|
75
75
|
if target_dir:
|
|
76
|
-
self.spec.
|
|
76
|
+
self.spec.build.source_code_target_dir = target_dir
|
|
77
77
|
|
|
78
78
|
self.spec.build.load_source_on_run = pull_at_runtime
|
|
79
79
|
if (
|
|
@@ -232,8 +232,10 @@ class KubejobRuntime(KubeResource):
|
|
|
232
232
|
self.spec.build.base_image = self.spec.build.base_image or get_in(
|
|
233
233
|
data, "data.spec.build.base_image"
|
|
234
234
|
)
|
|
235
|
-
#
|
|
236
|
-
self.spec.
|
|
235
|
+
# Get the source target dir in case it was enriched due to loading source
|
|
236
|
+
self.spec.build.source_code_target_dir = get_in(
|
|
237
|
+
data, "data.spec.build.source_code_target_dir"
|
|
238
|
+
) or get_in(data, "data.spec.clone_target_dir")
|
|
237
239
|
ready = data.get("ready", False)
|
|
238
240
|
if not ready:
|
|
239
241
|
logger.info(
|
mlrun/runtimes/local.py
CHANGED
|
@@ -218,7 +218,7 @@ class LocalRuntime(BaseRuntime, ParallelRunner):
|
|
|
218
218
|
if workdir:
|
|
219
219
|
self.spec.workdir = workdir
|
|
220
220
|
if target_dir:
|
|
221
|
-
self.spec.
|
|
221
|
+
self.spec.build.source_code_target_dir = target_dir
|
|
222
222
|
|
|
223
223
|
def is_deployed(self):
|
|
224
224
|
return True
|
|
@@ -240,7 +240,7 @@ class LocalRuntime(BaseRuntime, ParallelRunner):
|
|
|
240
240
|
if self.spec.build.source and not hasattr(self, "_is_run_local"):
|
|
241
241
|
target_dir = extract_source(
|
|
242
242
|
self.spec.build.source,
|
|
243
|
-
self.spec.
|
|
243
|
+
self.spec.build.source_code_target_dir,
|
|
244
244
|
secrets=execution._secrets_manager,
|
|
245
245
|
)
|
|
246
246
|
if workdir and not workdir.startswith("/"):
|
|
@@ -195,13 +195,13 @@ class AbstractMPIJobRuntime(KubejobRuntime, abc.ABC):
|
|
|
195
195
|
if steps_per_sample is not None:
|
|
196
196
|
horovod_autotune_settings["autotune-steps-per-sample"] = steps_per_sample
|
|
197
197
|
if bayes_opt_max_samples is not None:
|
|
198
|
-
horovod_autotune_settings[
|
|
199
|
-
|
|
200
|
-
|
|
198
|
+
horovod_autotune_settings["autotune-bayes-opt-max-samples"] = (
|
|
199
|
+
bayes_opt_max_samples
|
|
200
|
+
)
|
|
201
201
|
if gaussian_process_noise is not None:
|
|
202
|
-
horovod_autotune_settings[
|
|
203
|
-
|
|
204
|
-
|
|
202
|
+
horovod_autotune_settings["autotune-gaussian-process-noise"] = (
|
|
203
|
+
gaussian_process_noise
|
|
204
|
+
)
|
|
205
205
|
|
|
206
206
|
self.set_envs(horovod_autotune_settings)
|
|
207
207
|
|
|
@@ -431,15 +431,15 @@ class RemoteRuntime(KubeResource):
|
|
|
431
431
|
raise ValueError(
|
|
432
432
|
"gateway timeout must be greater than the worker timeout"
|
|
433
433
|
)
|
|
434
|
-
annotations[
|
|
435
|
-
"
|
|
436
|
-
|
|
437
|
-
annotations[
|
|
438
|
-
"
|
|
439
|
-
|
|
440
|
-
annotations[
|
|
441
|
-
"
|
|
442
|
-
|
|
434
|
+
annotations["nginx.ingress.kubernetes.io/proxy-connect-timeout"] = (
|
|
435
|
+
f"{gateway_timeout}"
|
|
436
|
+
)
|
|
437
|
+
annotations["nginx.ingress.kubernetes.io/proxy-read-timeout"] = (
|
|
438
|
+
f"{gateway_timeout}"
|
|
439
|
+
)
|
|
440
|
+
annotations["nginx.ingress.kubernetes.io/proxy-send-timeout"] = (
|
|
441
|
+
f"{gateway_timeout}"
|
|
442
|
+
)
|
|
443
443
|
|
|
444
444
|
trigger = nuclio.HttpTrigger(
|
|
445
445
|
workers=workers,
|
mlrun/runtimes/nuclio/serving.py
CHANGED
|
@@ -523,9 +523,9 @@ class ServingRuntime(RemoteRuntime):
|
|
|
523
523
|
function_object.metadata.tag = self.metadata.tag
|
|
524
524
|
|
|
525
525
|
function_object.metadata.labels = function_object.metadata.labels or {}
|
|
526
|
-
function_object.metadata.labels[
|
|
527
|
-
|
|
528
|
-
|
|
526
|
+
function_object.metadata.labels["mlrun/parent-function"] = (
|
|
527
|
+
self.metadata.name
|
|
528
|
+
)
|
|
529
529
|
function_object._is_child_function = True
|
|
530
530
|
if not function_object.spec.graph:
|
|
531
531
|
# copy the current graph only if the child doesnt have a graph of his own
|
mlrun/runtimes/pod.py
CHANGED
|
@@ -501,9 +501,9 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
501
501
|
)
|
|
502
502
|
is None
|
|
503
503
|
):
|
|
504
|
-
resources[resource_requirement][
|
|
505
|
-
resource_type
|
|
506
|
-
|
|
504
|
+
resources[resource_requirement][resource_type] = (
|
|
505
|
+
default_resources[resource_requirement][resource_type]
|
|
506
|
+
)
|
|
507
507
|
# This enables the user to define that no defaults would be applied on the resources
|
|
508
508
|
elif resources == {}:
|
|
509
509
|
return resources
|
|
@@ -368,9 +368,9 @@ class Spark3JobSpec(KubeResourceSpec):
|
|
|
368
368
|
)
|
|
369
369
|
is None
|
|
370
370
|
):
|
|
371
|
-
resources[resource_requirement][
|
|
372
|
-
resource_type
|
|
373
|
-
|
|
371
|
+
resources[resource_requirement][resource_type] = (
|
|
372
|
+
default_resources[resource_requirement][resource_type]
|
|
373
|
+
)
|
|
374
374
|
else:
|
|
375
375
|
resources = default_resources
|
|
376
376
|
|
mlrun/serving/remote.py
CHANGED
|
@@ -21,6 +21,7 @@ import storey
|
|
|
21
21
|
from storey.flow import _ConcurrentJobExecution
|
|
22
22
|
|
|
23
23
|
import mlrun
|
|
24
|
+
import mlrun.config
|
|
24
25
|
from mlrun.errors import err_to_str
|
|
25
26
|
from mlrun.utils import logger
|
|
26
27
|
|
|
@@ -171,7 +172,8 @@ class RemoteStep(storey.SendToHttp):
|
|
|
171
172
|
if not self._session:
|
|
172
173
|
self._session = mlrun.utils.HTTPSessionWithRetry(
|
|
173
174
|
self.retries,
|
|
174
|
-
self.backoff_factor
|
|
175
|
+
self.backoff_factor
|
|
176
|
+
or mlrun.config.config.http_retry_defaults.backoff_factor,
|
|
175
177
|
retry_on_exception=False,
|
|
176
178
|
retry_on_status=self.retries > 0,
|
|
177
179
|
retry_on_post=True,
|
|
@@ -183,7 +185,7 @@ class RemoteStep(storey.SendToHttp):
|
|
|
183
185
|
resp = self._session.request(
|
|
184
186
|
method,
|
|
185
187
|
url,
|
|
186
|
-
verify=
|
|
188
|
+
verify=mlrun.config.config.httpdb.http.verify,
|
|
187
189
|
headers=headers,
|
|
188
190
|
data=body,
|
|
189
191
|
timeout=self.timeout,
|