mlrun 1.6.0rc20__py3-none-any.whl → 1.6.0rc22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/base.py +6 -6
- mlrun/artifacts/dataset.py +15 -8
- mlrun/artifacts/manager.py +6 -3
- mlrun/artifacts/model.py +2 -2
- mlrun/artifacts/plots.py +8 -8
- mlrun/config.py +1 -1
- mlrun/data_types/to_pandas.py +1 -1
- mlrun/datastore/azure_blob.py +12 -16
- mlrun/datastore/base.py +32 -10
- mlrun/datastore/datastore_profile.py +4 -4
- mlrun/datastore/dbfs_store.py +12 -11
- mlrun/datastore/filestore.py +2 -1
- mlrun/datastore/google_cloud_storage.py +11 -10
- mlrun/datastore/redis.py +2 -1
- mlrun/datastore/s3.py +12 -15
- mlrun/datastore/sources.py +16 -11
- mlrun/datastore/targets.py +2 -13
- mlrun/datastore/v3io.py +18 -20
- mlrun/db/httpdb.py +76 -7
- mlrun/errors.py +4 -0
- mlrun/execution.py +13 -4
- mlrun/feature_store/api.py +3 -4
- mlrun/launcher/base.py +4 -4
- mlrun/lists.py +0 -6
- mlrun/model.py +8 -1
- mlrun/model_monitoring/api.py +9 -31
- mlrun/model_monitoring/batch.py +14 -13
- mlrun/model_monitoring/controller.py +100 -70
- mlrun/model_monitoring/controller_handler.py +1 -3
- mlrun/model_monitoring/helpers.py +65 -20
- mlrun/model_monitoring/stream_processing.py +0 -3
- mlrun/projects/operations.py +1 -1
- mlrun/projects/project.py +10 -4
- mlrun/runtimes/base.py +6 -1
- mlrun/runtimes/constants.py +11 -0
- mlrun/runtimes/databricks_job/databricks_runtime.py +7 -9
- mlrun/runtimes/kubejob.py +1 -1
- mlrun/runtimes/local.py +64 -53
- mlrun/runtimes/serving.py +8 -1
- mlrun/serving/routers.py +7 -20
- mlrun/serving/server.py +4 -14
- mlrun/serving/utils.py +0 -3
- mlrun/utils/helpers.py +10 -2
- mlrun/utils/logger.py +5 -5
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/METADATA +5 -3
- {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/RECORD +51 -51
- {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/LICENSE +0 -0
- {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/WHEEL +0 -0
- {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/top_level.txt +0 -0
mlrun/datastore/s3.py
CHANGED
|
@@ -33,7 +33,7 @@ class S3Store(DataStore):
|
|
|
33
33
|
|
|
34
34
|
self.headers = None
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
access_key_id = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
|
|
37
37
|
secret_key = self._get_secret_or_env("AWS_SECRET_ACCESS_KEY")
|
|
38
38
|
endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
|
|
39
39
|
force_non_anonymous = self._get_secret_or_env("S3_NON_ANONYMOUS")
|
|
@@ -43,7 +43,7 @@ class S3Store(DataStore):
|
|
|
43
43
|
# If user asks to assume a role, this needs to go through the STS client and retrieve temporary creds
|
|
44
44
|
if assume_role_arn:
|
|
45
45
|
client = boto3.client(
|
|
46
|
-
"sts", aws_access_key_id=
|
|
46
|
+
"sts", aws_access_key_id=access_key_id, aws_secret_access_key=secret_key
|
|
47
47
|
)
|
|
48
48
|
self._temp_credentials = client.assume_role(
|
|
49
49
|
RoleArn=assume_role_arn, RoleSessionName="assumeRoleSession"
|
|
@@ -74,11 +74,11 @@ class S3Store(DataStore):
|
|
|
74
74
|
)
|
|
75
75
|
return
|
|
76
76
|
|
|
77
|
-
if
|
|
77
|
+
if access_key_id or secret_key or force_non_anonymous:
|
|
78
78
|
self.s3 = boto3.resource(
|
|
79
79
|
"s3",
|
|
80
80
|
region_name=region,
|
|
81
|
-
aws_access_key_id=
|
|
81
|
+
aws_access_key_id=access_key_id,
|
|
82
82
|
aws_secret_access_key=secret_key,
|
|
83
83
|
endpoint_url=endpoint_url,
|
|
84
84
|
)
|
|
@@ -109,18 +109,15 @@ class S3Store(DataStore):
|
|
|
109
109
|
res["spark.hadoop.fs.s3a.aws.profile"] = st.get("profile")
|
|
110
110
|
return res
|
|
111
111
|
|
|
112
|
-
|
|
112
|
+
@property
|
|
113
|
+
def filesystem(self):
|
|
113
114
|
"""return fsspec file system object, if supported"""
|
|
114
115
|
if self._filesystem:
|
|
115
116
|
return self._filesystem
|
|
116
117
|
try:
|
|
117
118
|
import s3fs # noqa
|
|
118
119
|
except ImportError as exc:
|
|
119
|
-
|
|
120
|
-
raise ImportError(
|
|
121
|
-
"AWS s3fs not installed, run pip install s3fs"
|
|
122
|
-
) from exc
|
|
123
|
-
return None
|
|
120
|
+
raise ImportError("AWS s3fs not installed") from exc
|
|
124
121
|
filesystem_class = get_filesystem_class(protocol=self.kind)
|
|
125
122
|
self._filesystem = makeDatastoreSchemaSanitizer(
|
|
126
123
|
filesystem_class,
|
|
@@ -133,19 +130,19 @@ class S3Store(DataStore):
|
|
|
133
130
|
force_non_anonymous = self._get_secret_or_env("S3_NON_ANONYMOUS")
|
|
134
131
|
profile = self._get_secret_or_env("AWS_PROFILE")
|
|
135
132
|
endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
|
|
136
|
-
|
|
133
|
+
access_key_id = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
|
|
137
134
|
secret = self._get_secret_or_env("AWS_SECRET_ACCESS_KEY")
|
|
138
135
|
|
|
139
136
|
if self._temp_credentials:
|
|
140
|
-
|
|
137
|
+
access_key_id = self._temp_credentials["AccessKeyId"]
|
|
141
138
|
secret = self._temp_credentials["SecretAccessKey"]
|
|
142
139
|
token = self._temp_credentials["SessionToken"]
|
|
143
140
|
else:
|
|
144
141
|
token = None
|
|
145
142
|
|
|
146
143
|
storage_options = dict(
|
|
147
|
-
anon=not (force_non_anonymous or (
|
|
148
|
-
key=
|
|
144
|
+
anon=not (force_non_anonymous or (access_key_id and secret)),
|
|
145
|
+
key=access_key_id,
|
|
149
146
|
secret=secret,
|
|
150
147
|
token=token,
|
|
151
148
|
)
|
|
@@ -157,7 +154,7 @@ class S3Store(DataStore):
|
|
|
157
154
|
if profile:
|
|
158
155
|
storage_options["profile"] = profile
|
|
159
156
|
|
|
160
|
-
return storage_options
|
|
157
|
+
return self._sanitize_storage_options(storage_options)
|
|
161
158
|
|
|
162
159
|
def get_bucket_and_key(self, key):
|
|
163
160
|
path = self._join(key)[1:]
|
mlrun/datastore/sources.py
CHANGED
|
@@ -20,6 +20,7 @@ from datetime import datetime
|
|
|
20
20
|
from typing import Dict, List, Optional, Union
|
|
21
21
|
|
|
22
22
|
import pandas as pd
|
|
23
|
+
import semver
|
|
23
24
|
import v3io
|
|
24
25
|
import v3io.dataplane
|
|
25
26
|
from nuclio import KafkaTrigger
|
|
@@ -1018,6 +1019,20 @@ class KafkaSource(OnlineSource):
|
|
|
1018
1019
|
max_workers=extra_attributes.pop("max_workers", 4),
|
|
1019
1020
|
)
|
|
1020
1021
|
function = function.add_trigger("kafka", trigger)
|
|
1022
|
+
|
|
1023
|
+
# ML-5499
|
|
1024
|
+
bug_fix_version = "1.12.10"
|
|
1025
|
+
if config.nuclio_version and semver.VersionInfo.parse(
|
|
1026
|
+
config.nuclio_version
|
|
1027
|
+
) < semver.VersionInfo.parse(bug_fix_version):
|
|
1028
|
+
warnings.warn(
|
|
1029
|
+
f"Detected nuclio version {config.nuclio_version}, which is older "
|
|
1030
|
+
f"than {bug_fix_version}. Forcing number of replicas of 1 in function '{function.metadata.name}'. "
|
|
1031
|
+
f"To resolve this, please upgrade Nuclio."
|
|
1032
|
+
)
|
|
1033
|
+
function.spec.min_replicas = 1
|
|
1034
|
+
function.spec.max_replicas = 1
|
|
1035
|
+
|
|
1021
1036
|
return function
|
|
1022
1037
|
|
|
1023
1038
|
|
|
@@ -1038,7 +1053,6 @@ class SQLSource(BaseSourceDriver):
|
|
|
1038
1053
|
db_url: str = None,
|
|
1039
1054
|
table_name: str = None,
|
|
1040
1055
|
spark_options: dict = None,
|
|
1041
|
-
time_fields: List[str] = None,
|
|
1042
1056
|
parse_dates: List[str] = None,
|
|
1043
1057
|
**kwargs,
|
|
1044
1058
|
):
|
|
@@ -1063,17 +1077,8 @@ class SQLSource(BaseSourceDriver):
|
|
|
1063
1077
|
:param table_name: the name of the collection to access,
|
|
1064
1078
|
from the current database
|
|
1065
1079
|
:param spark_options: additional spark read options
|
|
1066
|
-
:param time_fields : all the field to be parsed as timestamp.
|
|
1067
1080
|
:param parse_dates : all the field to be parsed as timestamp.
|
|
1068
1081
|
"""
|
|
1069
|
-
if time_fields:
|
|
1070
|
-
warnings.warn(
|
|
1071
|
-
"'time_fields' is deprecated, use 'parse_dates' instead. "
|
|
1072
|
-
"This will be removed in 1.6.0",
|
|
1073
|
-
# TODO: Remove this in 1.6.0
|
|
1074
|
-
FutureWarning,
|
|
1075
|
-
)
|
|
1076
|
-
parse_dates = time_fields
|
|
1077
1082
|
db_url = db_url or mlrun.mlconf.sql.url
|
|
1078
1083
|
if db_url is None:
|
|
1079
1084
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -1081,7 +1086,7 @@ class SQLSource(BaseSourceDriver):
|
|
|
1081
1086
|
)
|
|
1082
1087
|
if time_field:
|
|
1083
1088
|
if parse_dates:
|
|
1084
|
-
|
|
1089
|
+
parse_dates.append(time_field)
|
|
1085
1090
|
else:
|
|
1086
1091
|
parse_dates = [time_field]
|
|
1087
1092
|
attrs = {
|
mlrun/datastore/targets.py
CHANGED
|
@@ -17,7 +17,6 @@ import os
|
|
|
17
17
|
import random
|
|
18
18
|
import sys
|
|
19
19
|
import time
|
|
20
|
-
import warnings
|
|
21
20
|
from collections import Counter
|
|
22
21
|
from copy import copy
|
|
23
22
|
from typing import Any, Dict, List, Optional, Union
|
|
@@ -553,7 +552,7 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
553
552
|
else:
|
|
554
553
|
store, target_path = self._get_store_and_path()
|
|
555
554
|
target_path = generate_path_with_chunk(self, chunk_id, target_path)
|
|
556
|
-
file_system = store.
|
|
555
|
+
file_system = store.filesystem
|
|
557
556
|
if file_system.protocol == "file":
|
|
558
557
|
dir = os.path.dirname(target_path)
|
|
559
558
|
if dir:
|
|
@@ -1407,7 +1406,7 @@ class StreamTarget(BaseStoreTarget):
|
|
|
1407
1406
|
from storey import V3ioDriver
|
|
1408
1407
|
|
|
1409
1408
|
key_columns = list(key_columns.keys())
|
|
1410
|
-
endpoint, uri = parse_path(self.
|
|
1409
|
+
endpoint, uri = parse_path(self.path)
|
|
1411
1410
|
column_list = self._get_column_list(
|
|
1412
1411
|
features=features, timestamp_key=timestamp_key, key_columns=key_columns
|
|
1413
1412
|
)
|
|
@@ -1680,7 +1679,6 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1680
1679
|
if_exists: str = "append",
|
|
1681
1680
|
create_table: bool = False,
|
|
1682
1681
|
# create_according_to_data: bool = False,
|
|
1683
|
-
time_fields: List[str] = None,
|
|
1684
1682
|
varchar_len: int = 50,
|
|
1685
1683
|
parse_dates: List[str] = None,
|
|
1686
1684
|
):
|
|
@@ -1718,20 +1716,11 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1718
1716
|
:param create_table: pass True if you want to create new table named by
|
|
1719
1717
|
table_name with schema on current database.
|
|
1720
1718
|
:param create_according_to_data: (not valid)
|
|
1721
|
-
:param time_fields : all the field to be parsed as timestamp.
|
|
1722
1719
|
:param varchar_len : the defalut len of the all the varchar column (using if needed to create the table).
|
|
1723
1720
|
:param parse_dates : all the field to be parsed as timestamp.
|
|
1724
1721
|
"""
|
|
1725
1722
|
|
|
1726
1723
|
create_according_to_data = False # TODO: open for user
|
|
1727
|
-
if time_fields:
|
|
1728
|
-
warnings.warn(
|
|
1729
|
-
"'time_fields' is deprecated, use 'parse_dates' instead. "
|
|
1730
|
-
"This will be removed in 1.6.0",
|
|
1731
|
-
# TODO: Remove this in 1.6.0
|
|
1732
|
-
FutureWarning,
|
|
1733
|
-
)
|
|
1734
|
-
parse_dates = time_fields
|
|
1735
1724
|
db_url = db_url or mlrun.mlconf.sql.url
|
|
1736
1725
|
if db_url is None or table_name is None:
|
|
1737
1726
|
attr = {}
|
mlrun/datastore/v3io.py
CHANGED
|
@@ -19,6 +19,7 @@ from copy import deepcopy
|
|
|
19
19
|
from datetime import datetime
|
|
20
20
|
|
|
21
21
|
import fsspec
|
|
22
|
+
import requests
|
|
22
23
|
import v3io.dataplane
|
|
23
24
|
|
|
24
25
|
import mlrun
|
|
@@ -73,26 +74,20 @@ class V3ioStore(DataStore):
|
|
|
73
74
|
schema = "https" if self.secure else "http"
|
|
74
75
|
return f"{schema}://{self.endpoint}"
|
|
75
76
|
|
|
76
|
-
|
|
77
|
+
@property
|
|
78
|
+
def filesystem(self):
|
|
77
79
|
"""return fsspec file system object, if supported"""
|
|
78
80
|
if self._filesystem:
|
|
79
81
|
return self._filesystem
|
|
80
|
-
try:
|
|
81
|
-
import v3iofs # noqa
|
|
82
|
-
except ImportError as exc:
|
|
83
|
-
if not silent:
|
|
84
|
-
raise ImportError(
|
|
85
|
-
"v3iofs or storey not installed, run pip install storey"
|
|
86
|
-
) from exc
|
|
87
|
-
return None
|
|
88
82
|
self._filesystem = fsspec.filesystem("v3io", **self.get_storage_options())
|
|
89
83
|
return self._filesystem
|
|
90
84
|
|
|
91
85
|
def get_storage_options(self):
|
|
92
|
-
|
|
86
|
+
res = dict(
|
|
93
87
|
v3io_access_key=self._get_secret_or_env("V3IO_ACCESS_KEY"),
|
|
94
88
|
v3io_api=mlrun.mlconf.v3io_api,
|
|
95
89
|
)
|
|
90
|
+
return self._sanitize_storage_options(res)
|
|
96
91
|
|
|
97
92
|
def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
|
|
98
93
|
"""helper function for upload method, allows for controlling max_chunk_size in testing"""
|
|
@@ -150,15 +145,18 @@ class V3ioStore(DataStore):
|
|
|
150
145
|
data = memoryview(data)
|
|
151
146
|
except TypeError:
|
|
152
147
|
pass
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
148
|
+
|
|
149
|
+
with requests.Session() as requests_session:
|
|
150
|
+
while buffer_offset < buffer_size:
|
|
151
|
+
chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
|
|
152
|
+
http_put(
|
|
153
|
+
self.url + self._join(key),
|
|
154
|
+
data[buffer_offset : buffer_offset + chunk_size],
|
|
155
|
+
append_header if buffer_offset else self.headers,
|
|
156
|
+
None,
|
|
157
|
+
requests_session,
|
|
158
|
+
)
|
|
159
|
+
buffer_offset += chunk_size
|
|
162
160
|
|
|
163
161
|
def put(self, key, data, append=False):
|
|
164
162
|
return self._put(key, data)
|
|
@@ -206,7 +204,7 @@ class V3ioStore(DataStore):
|
|
|
206
204
|
"""Recursive rm file/folder
|
|
207
205
|
Workaround for v3io-fs not supporting recursive directory removal"""
|
|
208
206
|
|
|
209
|
-
file_system = self.
|
|
207
|
+
file_system = self.filesystem
|
|
210
208
|
if isinstance(path, str):
|
|
211
209
|
path = [path]
|
|
212
210
|
maxdepth = maxdepth if not maxdepth else maxdepth - 1
|
mlrun/db/httpdb.py
CHANGED
|
@@ -707,7 +707,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
707
707
|
:param state: List only runs whose state is specified.
|
|
708
708
|
:param sort: Whether to sort the result according to their start time. Otherwise, results will be
|
|
709
709
|
returned by their internal order in the DB (order will not be guaranteed).
|
|
710
|
-
:param last: Deprecated - currently not used.
|
|
710
|
+
:param last: Deprecated - currently not used (will be removed in 1.8.0).
|
|
711
711
|
:param iter: If ``True`` return runs from all iterations. Otherwise, return only runs whose ``iter`` is 0.
|
|
712
712
|
:param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``.
|
|
713
713
|
:param start_time_to: Filter by run start time in ``[start_time_from, start_time_to]``.
|
|
@@ -733,6 +733,13 @@ class HTTPRunDB(RunDBInterface):
|
|
|
733
733
|
"using the `with_notifications` flag."
|
|
734
734
|
)
|
|
735
735
|
|
|
736
|
+
if last:
|
|
737
|
+
# TODO: Remove this in 1.8.0
|
|
738
|
+
warnings.warn(
|
|
739
|
+
"'last' is deprecated and will be removed in 1.8.0.",
|
|
740
|
+
FutureWarning,
|
|
741
|
+
)
|
|
742
|
+
|
|
736
743
|
if (
|
|
737
744
|
not name
|
|
738
745
|
and not uid
|
|
@@ -1310,12 +1317,12 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1310
1317
|
|
|
1311
1318
|
def remote_builder(
|
|
1312
1319
|
self,
|
|
1313
|
-
func,
|
|
1314
|
-
with_mlrun,
|
|
1315
|
-
mlrun_version_specifier=None,
|
|
1316
|
-
skip_deployed=False,
|
|
1317
|
-
builder_env=None,
|
|
1318
|
-
force_build=False,
|
|
1320
|
+
func: BaseRuntime,
|
|
1321
|
+
with_mlrun: bool,
|
|
1322
|
+
mlrun_version_specifier: Optional[str] = None,
|
|
1323
|
+
skip_deployed: bool = False,
|
|
1324
|
+
builder_env: Optional[dict] = None,
|
|
1325
|
+
force_build: bool = False,
|
|
1319
1326
|
):
|
|
1320
1327
|
"""Build the pod image for a function, for execution on a remote cluster. This is executed by the MLRun
|
|
1321
1328
|
API server, and creates a Docker image out of the function provided and any specific build
|
|
@@ -1330,6 +1337,20 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1330
1337
|
:param builder_env: Kaniko builder pod env vars dict (for config/credentials)
|
|
1331
1338
|
:param force_build: Force building the image, even when no changes were made
|
|
1332
1339
|
"""
|
|
1340
|
+
is_s3_source = func.spec.build.source and func.spec.build.source.startswith(
|
|
1341
|
+
"s3://"
|
|
1342
|
+
)
|
|
1343
|
+
is_ecr_image = mlrun.utils.is_ecr_url(config.httpdb.builder.docker_registry)
|
|
1344
|
+
if not func.spec.build.load_source_on_run and is_s3_source and is_ecr_image:
|
|
1345
|
+
logger.warning(
|
|
1346
|
+
"Building a function image to ECR and loading an S3 source to the image may require conflicting access "
|
|
1347
|
+
"keys. Only the permissions granted to the platform's configured secret will take affect "
|
|
1348
|
+
"(see mlrun.mlconf.httpdb.builder.docker_registry_secret). "
|
|
1349
|
+
"In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
|
|
1350
|
+
source=func.spec.build.source,
|
|
1351
|
+
load_source_on_run=func.spec.build.load_source_on_run,
|
|
1352
|
+
default_docker_registry=config.httpdb.builder.docker_registry,
|
|
1353
|
+
)
|
|
1333
1354
|
|
|
1334
1355
|
try:
|
|
1335
1356
|
req = {
|
|
@@ -1466,6 +1487,54 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1466
1487
|
response = self.api_call("GET", path, error_message)
|
|
1467
1488
|
return mlrun.common.schemas.BackgroundTask(**response.json())
|
|
1468
1489
|
|
|
1490
|
+
def list_project_background_tasks(
|
|
1491
|
+
self,
|
|
1492
|
+
project: Optional[str] = None,
|
|
1493
|
+
state: Optional[str] = None,
|
|
1494
|
+
created_from: Optional[datetime] = None,
|
|
1495
|
+
created_to: Optional[datetime] = None,
|
|
1496
|
+
last_update_time_from: Optional[datetime] = None,
|
|
1497
|
+
last_update_time_to: Optional[datetime] = None,
|
|
1498
|
+
) -> list[mlrun.common.schemas.BackgroundTask]:
|
|
1499
|
+
"""
|
|
1500
|
+
Retrieve updated information on project background tasks being executed.
|
|
1501
|
+
If no filter is provided, will return background tasks from the last week.
|
|
1502
|
+
|
|
1503
|
+
:param project: Project name (defaults to mlrun.mlconf.default_project).
|
|
1504
|
+
:param state: List only background tasks whose state is specified.
|
|
1505
|
+
:param created_from: Filter by background task created time in ``[created_from, created_to]``.
|
|
1506
|
+
:param created_to: Filter by background task created time in ``[created_from, created_to]``.
|
|
1507
|
+
:param last_update_time_from: Filter by background task last update time in
|
|
1508
|
+
``(last_update_time_from, last_update_time_to)``.
|
|
1509
|
+
:param last_update_time_to: Filter by background task last update time in
|
|
1510
|
+
``(last_update_time_from, last_update_time_to)``.
|
|
1511
|
+
"""
|
|
1512
|
+
project = project or config.default_project
|
|
1513
|
+
if (
|
|
1514
|
+
not state
|
|
1515
|
+
and not created_from
|
|
1516
|
+
and not created_to
|
|
1517
|
+
and not last_update_time_from
|
|
1518
|
+
and not last_update_time_to
|
|
1519
|
+
):
|
|
1520
|
+
# default to last week on no filter
|
|
1521
|
+
created_from = datetime.now() - timedelta(days=7)
|
|
1522
|
+
|
|
1523
|
+
params = {
|
|
1524
|
+
"state": state,
|
|
1525
|
+
"created_from": datetime_to_iso(created_from),
|
|
1526
|
+
"created_to": datetime_to_iso(created_to),
|
|
1527
|
+
"last_update_time_from": datetime_to_iso(last_update_time_from),
|
|
1528
|
+
"last_update_time_to": datetime_to_iso(last_update_time_to),
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
path = f"projects/{project}/background-tasks"
|
|
1532
|
+
error_message = f"Failed listing project background task. project={project}"
|
|
1533
|
+
response = self.api_call("GET", path, error_message, params=params)
|
|
1534
|
+
return mlrun.common.schemas.BackgroundTaskList(
|
|
1535
|
+
**response.json()
|
|
1536
|
+
).background_tasks
|
|
1537
|
+
|
|
1469
1538
|
def get_background_task(self, name: str) -> mlrun.common.schemas.BackgroundTask:
|
|
1470
1539
|
"""Retrieve updated information on a background task being executed."""
|
|
1471
1540
|
|
mlrun/errors.py
CHANGED
|
@@ -207,6 +207,10 @@ class MLRunTaskCancelledError(Exception):
|
|
|
207
207
|
pass
|
|
208
208
|
|
|
209
209
|
|
|
210
|
+
class MLRunValueError(ValueError):
|
|
211
|
+
pass
|
|
212
|
+
|
|
213
|
+
|
|
210
214
|
class MLRunFatalFailureError(Exception):
|
|
211
215
|
"""
|
|
212
216
|
Internal exception meant to be used inside mlrun.utils.helpers.retry_until_successful to signal the loop not to
|
mlrun/execution.py
CHANGED
|
@@ -411,7 +411,7 @@ class MLClientCtx(object):
|
|
|
411
411
|
self._artifacts_manager.artifacts[key] = artifact_obj
|
|
412
412
|
self._state = status.get("state", self._state)
|
|
413
413
|
|
|
414
|
-
#
|
|
414
|
+
# No need to store the run for every worker
|
|
415
415
|
if store_run and self.is_logging_worker():
|
|
416
416
|
self.store_run()
|
|
417
417
|
return self
|
|
@@ -434,6 +434,12 @@ class MLClientCtx(object):
|
|
|
434
434
|
context.set_label("framework", "sklearn")
|
|
435
435
|
|
|
436
436
|
"""
|
|
437
|
+
if not self.is_logging_worker():
|
|
438
|
+
logger.warning(
|
|
439
|
+
"Setting labels is only supported in the logging worker, ignoring"
|
|
440
|
+
)
|
|
441
|
+
return
|
|
442
|
+
|
|
437
443
|
if replace or not self._labels.get(key):
|
|
438
444
|
self._labels[key] = str(value)
|
|
439
445
|
|
|
@@ -974,10 +980,11 @@ class MLClientCtx(object):
|
|
|
974
980
|
"""
|
|
975
981
|
# If it's a OpenMPI job, get the global rank and compare to the logging rank (worker) set in MLRun's
|
|
976
982
|
# configuration:
|
|
977
|
-
|
|
983
|
+
labels = self.labels
|
|
984
|
+
if "host" in labels and labels.get("kind", "job") == "mpijob":
|
|
978
985
|
# The host (pod name) of each worker is created by k8s, and by default it uses the rank number as the id in
|
|
979
986
|
# the following template: ...-worker-<rank>
|
|
980
|
-
rank = int(
|
|
987
|
+
rank = int(labels["host"].rsplit("-", 1)[1])
|
|
981
988
|
return rank == mlrun.mlconf.packagers.logging_worker
|
|
982
989
|
|
|
983
990
|
# Single worker is always the logging worker:
|
|
@@ -1004,7 +1011,6 @@ class MLClientCtx(object):
|
|
|
1004
1011
|
_struct[key] = val
|
|
1005
1012
|
|
|
1006
1013
|
struct = {
|
|
1007
|
-
"metadata.labels": self._labels,
|
|
1008
1014
|
"metadata.annotations": self._annotations,
|
|
1009
1015
|
"spec.parameters": self._parameters,
|
|
1010
1016
|
"spec.outputs": self._outputs,
|
|
@@ -1019,6 +1025,9 @@ class MLClientCtx(object):
|
|
|
1019
1025
|
if self._state != "completed":
|
|
1020
1026
|
struct["status.state"] = self._state
|
|
1021
1027
|
|
|
1028
|
+
if self.is_logging_worker():
|
|
1029
|
+
struct["metadata.labels"] = self._labels
|
|
1030
|
+
|
|
1022
1031
|
set_if_not_none(struct, "status.error", self._error)
|
|
1023
1032
|
set_if_not_none(struct, "status.commit", self._commit)
|
|
1024
1033
|
set_if_not_none(struct, "status.iterations", self._iteration_results)
|
mlrun/feature_store/api.py
CHANGED
|
@@ -933,7 +933,7 @@ def _deploy_ingestion_service_v2(
|
|
|
933
933
|
source = HTTPSource()
|
|
934
934
|
func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
|
|
935
935
|
config = RunConfig(function=func)
|
|
936
|
-
|
|
936
|
+
my_set.deploy_ingestion_service(source, run_config=config)
|
|
937
937
|
|
|
938
938
|
:param featureset: feature set object or uri
|
|
939
939
|
:param source: data source object describing the online or offline source
|
|
@@ -1025,7 +1025,7 @@ def deploy_ingestion_service(
|
|
|
1025
1025
|
source = HTTPSource()
|
|
1026
1026
|
func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
|
|
1027
1027
|
config = RunConfig(function=func)
|
|
1028
|
-
|
|
1028
|
+
my_set.deploy_ingestion_service(source, run_config=config)
|
|
1029
1029
|
|
|
1030
1030
|
:param featureset: feature set object or uri
|
|
1031
1031
|
:param source: data source object describing the online or offline source
|
|
@@ -1036,8 +1036,7 @@ def deploy_ingestion_service(
|
|
|
1036
1036
|
|
|
1037
1037
|
:return: URL to access the deployed ingestion service
|
|
1038
1038
|
"""
|
|
1039
|
-
endpoint, _ =
|
|
1040
|
-
featureset=featureset,
|
|
1039
|
+
endpoint, _ = featureset.deploy_ingestion_service(
|
|
1041
1040
|
source=source,
|
|
1042
1041
|
targets=targets,
|
|
1043
1042
|
name=name,
|
mlrun/launcher/base.py
CHANGED
|
@@ -396,10 +396,10 @@ class BaseLauncher(abc.ABC):
|
|
|
396
396
|
status=run.status.state,
|
|
397
397
|
name=run.metadata.name,
|
|
398
398
|
)
|
|
399
|
-
if
|
|
400
|
-
|
|
401
|
-
mlrun.runtimes.constants.RunStates.
|
|
402
|
-
|
|
399
|
+
if (
|
|
400
|
+
run.status.state
|
|
401
|
+
in mlrun.runtimes.constants.RunStates.error_and_abortion_states()
|
|
402
|
+
):
|
|
403
403
|
if runtime._is_remote and not runtime.is_child:
|
|
404
404
|
logger.error(
|
|
405
405
|
"Run did not finish successfully",
|
mlrun/lists.py
CHANGED
mlrun/model.py
CHANGED
|
@@ -1259,8 +1259,15 @@ class RunObject(RunTemplate):
|
|
|
1259
1259
|
"""error string if failed"""
|
|
1260
1260
|
if self.status:
|
|
1261
1261
|
unknown_error = ""
|
|
1262
|
-
if
|
|
1262
|
+
if (
|
|
1263
|
+
self.status.state
|
|
1264
|
+
in mlrun.runtimes.constants.RunStates.abortion_states()
|
|
1265
|
+
):
|
|
1266
|
+
unknown_error = "Run was aborted"
|
|
1267
|
+
|
|
1268
|
+
elif self.status.state in mlrun.runtimes.constants.RunStates.error_states():
|
|
1263
1269
|
unknown_error = "Unknown error"
|
|
1270
|
+
|
|
1264
1271
|
return (
|
|
1265
1272
|
self.status.error
|
|
1266
1273
|
or self.status.reason
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -132,7 +132,6 @@ def record_results(
|
|
|
132
132
|
drift_threshold: typing.Optional[float] = None,
|
|
133
133
|
possible_drift_threshold: typing.Optional[float] = None,
|
|
134
134
|
trigger_monitoring_job: bool = False,
|
|
135
|
-
last_in_batch_set: typing.Optional[bool] = True,
|
|
136
135
|
artifacts_tag: str = "",
|
|
137
136
|
default_batch_image="mlrun/mlrun",
|
|
138
137
|
) -> ModelEndpoint:
|
|
@@ -165,14 +164,6 @@ def record_results(
|
|
|
165
164
|
:param possible_drift_threshold: The threshold of which to mark possible drifts.
|
|
166
165
|
:param trigger_monitoring_job: If true, run the batch drift job. If not exists, the monitoring batch function
|
|
167
166
|
will be registered through MLRun API with the provided image.
|
|
168
|
-
:param last_in_batch_set: This flag can (and should only) be used when the model endpoint does not have
|
|
169
|
-
model-monitoring set.
|
|
170
|
-
If set to `True` (the default), this flag marks the current monitoring window
|
|
171
|
-
(on this monitoring endpoint) is completed - the data inferred so far is assumed
|
|
172
|
-
to be the total data for this monitoring window.
|
|
173
|
-
You may want to set this flag to `False` if you want to record multiple results in
|
|
174
|
-
close time proximity ("batch set"). In this case, set this flag to `False` on all
|
|
175
|
-
but the last batch in the set.
|
|
176
167
|
:param artifacts_tag: Tag to use for all the artifacts resulted from the function. Will be relevant
|
|
177
168
|
only if the monitoring batch job has been triggered.
|
|
178
169
|
|
|
@@ -206,25 +197,14 @@ def record_results(
|
|
|
206
197
|
)
|
|
207
198
|
|
|
208
199
|
if model_endpoint.spec.stream_path == "":
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
)
|
|
218
|
-
else:
|
|
219
|
-
if last_in_batch_set is not None:
|
|
220
|
-
logger.warning(
|
|
221
|
-
"`last_in_batch_set` is not `None`, but the model endpoint has a stream path. "
|
|
222
|
-
"Ignoring `last_in_batch_set`, as it is relevant only when the model "
|
|
223
|
-
"endpoint does not have a model monitoring infrastructure in place (i.e. stream path is "
|
|
224
|
-
" empty). Set `last_in_batch_set` to `None` to resolve this warning.",
|
|
225
|
-
project=project,
|
|
226
|
-
endpoint_id=model_endpoint.metadata.uid,
|
|
227
|
-
)
|
|
200
|
+
logger.info(
|
|
201
|
+
"Updating the last request time to mark the current monitoring window as completed",
|
|
202
|
+
project=project,
|
|
203
|
+
endpoint_id=model_endpoint.metadata.uid,
|
|
204
|
+
)
|
|
205
|
+
bump_model_endpoint_last_request(
|
|
206
|
+
project=project, model_endpoint=model_endpoint, db=db
|
|
207
|
+
)
|
|
228
208
|
|
|
229
209
|
if trigger_monitoring_job:
|
|
230
210
|
# Run the monitoring batch drift job
|
|
@@ -612,9 +592,7 @@ def read_dataset_as_dataframe(
|
|
|
612
592
|
if label_columns is None:
|
|
613
593
|
label_columns = dataset.status.label_column
|
|
614
594
|
# Get the features and parse to DataFrame:
|
|
615
|
-
dataset =
|
|
616
|
-
dataset.uri, drop_columns=drop_columns
|
|
617
|
-
).to_dataframe()
|
|
595
|
+
dataset = dataset.get_offline_features(drop_columns=drop_columns).to_dataframe()
|
|
618
596
|
|
|
619
597
|
elif isinstance(dataset, (list, np.ndarray)):
|
|
620
598
|
if not feature_columns:
|
mlrun/model_monitoring/batch.py
CHANGED
|
@@ -117,20 +117,21 @@ class KullbackLeiblerDivergence(HistogramDistanceMetric, metric_name="kld"):
|
|
|
117
117
|
def _calc_kl_div(
|
|
118
118
|
actual_dist: np.array, expected_dist: np.array, kld_scaling: float
|
|
119
119
|
) -> float:
|
|
120
|
-
"""Return the
|
|
120
|
+
"""Return the asymmetric KL divergence"""
|
|
121
|
+
# We take 0*log(0) == 0 for this calculation
|
|
122
|
+
mask = actual_dist != 0
|
|
123
|
+
actual_dist = actual_dist[mask]
|
|
124
|
+
expected_dist = expected_dist[mask]
|
|
121
125
|
return np.sum(
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
(
|
|
125
|
-
|
|
126
|
-
actual_dist
|
|
127
|
-
/ np.where(expected_dist != 0, expected_dist, kld_scaling)
|
|
128
|
-
),
|
|
129
|
-
0,
|
|
130
|
-
)
|
|
126
|
+
actual_dist
|
|
127
|
+
* np.log(
|
|
128
|
+
actual_dist / np.where(expected_dist != 0, expected_dist, kld_scaling)
|
|
129
|
+
),
|
|
131
130
|
)
|
|
132
131
|
|
|
133
|
-
def compute(
|
|
132
|
+
def compute(
|
|
133
|
+
self, capping: Optional[float] = None, kld_scaling: float = 1e-4
|
|
134
|
+
) -> float:
|
|
134
135
|
"""
|
|
135
136
|
:param capping: A bounded value for the KL Divergence. For infinite distance, the result is replaced with
|
|
136
137
|
the capping value which indicates a huge differences between the distributions.
|
|
@@ -141,8 +142,8 @@ class KullbackLeiblerDivergence(HistogramDistanceMetric, metric_name="kld"):
|
|
|
141
142
|
t_u = self._calc_kl_div(self.distrib_t, self.distrib_u, kld_scaling)
|
|
142
143
|
u_t = self._calc_kl_div(self.distrib_u, self.distrib_t, kld_scaling)
|
|
143
144
|
result = t_u + u_t
|
|
144
|
-
if capping:
|
|
145
|
-
return capping
|
|
145
|
+
if capping and result == float("inf"):
|
|
146
|
+
return capping
|
|
146
147
|
return result
|
|
147
148
|
|
|
148
149
|
|