mlrun 1.6.2rc6__py3-none-any.whl → 1.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/model.py +28 -22
- mlrun/common/db/sql_session.py +3 -0
- mlrun/common/model_monitoring/helpers.py +4 -2
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/common.py +40 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +21 -5
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +59 -20
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/azure_blob.py +9 -9
- mlrun/datastore/base.py +22 -44
- mlrun/datastore/google_cloud_storage.py +6 -6
- mlrun/datastore/v3io.py +74 -73
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +18 -0
- mlrun/db/httpdb.py +79 -55
- mlrun/execution.py +3 -3
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
- mlrun/frameworks/tf_keras/model_handler.py +7 -7
- mlrun/k8s_utils.py +10 -5
- mlrun/kfpops.py +19 -10
- mlrun/lists.py +2 -0
- mlrun/model.py +31 -2
- mlrun/model_monitoring/api.py +8 -8
- mlrun/model_monitoring/batch.py +1 -1
- mlrun/model_monitoring/controller.py +0 -7
- mlrun/model_monitoring/features_drift_table.py +6 -0
- mlrun/model_monitoring/helpers.py +4 -1
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
- mlrun/model_monitoring/stream_processing.py +50 -37
- mlrun/package/packagers/pandas_packagers.py +3 -3
- mlrun/package/utils/_archiver.py +3 -1
- mlrun/platforms/iguazio.py +6 -65
- mlrun/projects/pipelines.py +51 -17
- mlrun/projects/project.py +77 -61
- mlrun/render.py +13 -4
- mlrun/run.py +2 -0
- mlrun/runtimes/base.py +24 -1
- mlrun/runtimes/function.py +9 -9
- mlrun/runtimes/kubejob.py +5 -3
- mlrun/runtimes/local.py +2 -2
- mlrun/runtimes/mpijob/abstract.py +6 -6
- mlrun/runtimes/pod.py +8 -8
- mlrun/runtimes/serving.py +3 -3
- mlrun/runtimes/sparkjob/spark3job.py +3 -3
- mlrun/serving/remote.py +4 -2
- mlrun/utils/async_http.py +28 -8
- mlrun/utils/helpers.py +20 -0
- mlrun/utils/http.py +3 -3
- mlrun/utils/logger.py +11 -6
- mlrun/utils/notifications/notification_pusher.py +6 -6
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/METADATA +18 -18
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/RECORD +60 -59
- mlrun/datastore/helpers.py +0 -18
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/LICENSE +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/WHEEL +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/top_level.txt +0 -0
mlrun/db/httpdb.py
CHANGED
|
@@ -33,6 +33,7 @@ import mlrun.common.schemas
|
|
|
33
33
|
import mlrun.model_monitoring.model_endpoint
|
|
34
34
|
import mlrun.platforms
|
|
35
35
|
import mlrun.projects
|
|
36
|
+
from mlrun.db.auth_utils import OAuthClientIDTokenProvider, StaticTokenProvider
|
|
36
37
|
from mlrun.errors import MLRunInvalidArgumentError, err_to_str
|
|
37
38
|
|
|
38
39
|
from ..artifacts import Artifact
|
|
@@ -133,17 +134,28 @@ class HTTPRunDB(RunDBInterface):
|
|
|
133
134
|
endpoint += f":{parsed_url.port}"
|
|
134
135
|
base_url = f"{parsed_url.scheme}://{endpoint}{parsed_url.path}"
|
|
135
136
|
|
|
137
|
+
self.base_url = base_url
|
|
136
138
|
username = parsed_url.username or config.httpdb.user
|
|
137
139
|
password = parsed_url.password or config.httpdb.password
|
|
140
|
+
self.token_provider = None
|
|
138
141
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
+
if config.auth_with_client_id.enabled:
|
|
143
|
+
self.token_provider = OAuthClientIDTokenProvider(
|
|
144
|
+
token_endpoint=mlrun.get_secret_or_env("MLRUN_AUTH_TOKEN_ENDPOINT"),
|
|
145
|
+
client_id=mlrun.get_secret_or_env("MLRUN_AUTH_CLIENT_ID"),
|
|
146
|
+
client_secret=mlrun.get_secret_or_env("MLRUN_AUTH_CLIENT_SECRET"),
|
|
147
|
+
timeout=config.auth_with_client_id.request_timeout,
|
|
148
|
+
)
|
|
149
|
+
else:
|
|
150
|
+
username, password, token = mlrun.platforms.add_or_refresh_credentials(
|
|
151
|
+
parsed_url.hostname, username, password, config.httpdb.token
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if token:
|
|
155
|
+
self.token_provider = StaticTokenProvider(token)
|
|
142
156
|
|
|
143
|
-
self.base_url = base_url
|
|
144
157
|
self.user = username
|
|
145
158
|
self.password = password
|
|
146
|
-
self.token = token
|
|
147
159
|
|
|
148
160
|
def __repr__(self):
|
|
149
161
|
cls = self.__class__.__name__
|
|
@@ -152,7 +164,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
152
164
|
@staticmethod
|
|
153
165
|
def get_api_path_prefix(version: str = None) -> str:
|
|
154
166
|
"""
|
|
155
|
-
:param version: API version to use, None (the default) will mean to use the default value from
|
|
167
|
+
:param version: API version to use, None (the default) will mean to use the default value from mlrun.config,
|
|
156
168
|
for un-versioned api set an empty string.
|
|
157
169
|
"""
|
|
158
170
|
if version is not None:
|
|
@@ -213,17 +225,19 @@ class HTTPRunDB(RunDBInterface):
|
|
|
213
225
|
|
|
214
226
|
if self.user:
|
|
215
227
|
kw["auth"] = (self.user, self.password)
|
|
216
|
-
elif self.
|
|
217
|
-
|
|
218
|
-
if
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
"
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
228
|
+
elif self.token_provider:
|
|
229
|
+
token = self.token_provider.get_token()
|
|
230
|
+
if token:
|
|
231
|
+
# Iguazio auth doesn't support passing token through bearer, so use cookie instead
|
|
232
|
+
if self.token_provider.is_iguazio_session():
|
|
233
|
+
session_cookie = f'j:{{"sid": "{token}"}}'
|
|
234
|
+
cookies = {
|
|
235
|
+
"session": session_cookie,
|
|
236
|
+
}
|
|
237
|
+
kw["cookies"] = cookies
|
|
238
|
+
else:
|
|
239
|
+
if "Authorization" not in kw.setdefault("headers", {}):
|
|
240
|
+
kw["headers"].update({"Authorization": "Bearer " + token})
|
|
227
241
|
|
|
228
242
|
if mlrun.common.schemas.HeaderNames.client_version not in kw.setdefault(
|
|
229
243
|
"headers", {}
|
|
@@ -250,7 +264,11 @@ class HTTPRunDB(RunDBInterface):
|
|
|
250
264
|
|
|
251
265
|
try:
|
|
252
266
|
response = self.session.request(
|
|
253
|
-
method,
|
|
267
|
+
method,
|
|
268
|
+
url,
|
|
269
|
+
timeout=timeout,
|
|
270
|
+
verify=config.httpdb.http.verify,
|
|
271
|
+
**kw,
|
|
254
272
|
)
|
|
255
273
|
except requests.RequestException as exc:
|
|
256
274
|
error = f"{err_to_str(exc)}: {error}" if error else err_to_str(exc)
|
|
@@ -302,11 +320,11 @@ class HTTPRunDB(RunDBInterface):
|
|
|
302
320
|
|
|
303
321
|
def connect(self, secrets=None):
|
|
304
322
|
"""Connect to the MLRun API server. Must be called prior to executing any other method.
|
|
305
|
-
The code utilizes the URL for the API server from the configuration - ``
|
|
323
|
+
The code utilizes the URL for the API server from the configuration - ``config.dbpath``.
|
|
306
324
|
|
|
307
325
|
For example::
|
|
308
326
|
|
|
309
|
-
|
|
327
|
+
config.dbpath = config.dbpath or 'http://mlrun-api:8080'
|
|
310
328
|
db = get_run_db().connect()
|
|
311
329
|
"""
|
|
312
330
|
# hack to allow unit tests to instantiate HTTPRunDB without a real server behind
|
|
@@ -500,7 +518,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
500
518
|
if offset < 0:
|
|
501
519
|
raise MLRunInvalidArgumentError("Offset cannot be negative")
|
|
502
520
|
if size is None:
|
|
503
|
-
size = int(
|
|
521
|
+
size = int(config.httpdb.logs.pull_logs_default_size_limit)
|
|
504
522
|
elif size == -1:
|
|
505
523
|
logger.warning(
|
|
506
524
|
"Retrieving all logs. This may be inefficient and can result in a large log."
|
|
@@ -546,25 +564,23 @@ class HTTPRunDB(RunDBInterface):
|
|
|
546
564
|
|
|
547
565
|
state, text = self.get_log(uid, project, offset=offset)
|
|
548
566
|
if text:
|
|
549
|
-
print(text.decode(errors=
|
|
567
|
+
print(text.decode(errors=config.httpdb.logs.decode.errors))
|
|
550
568
|
nil_resp = 0
|
|
551
569
|
while True:
|
|
552
570
|
offset += len(text)
|
|
553
571
|
# if we get 3 nil responses in a row, increase the sleep time to 10 seconds
|
|
554
572
|
# TODO: refactor this to use a conditional backoff mechanism
|
|
555
573
|
if nil_resp < 3:
|
|
556
|
-
time.sleep(int(
|
|
574
|
+
time.sleep(int(config.httpdb.logs.pull_logs_default_interval))
|
|
557
575
|
else:
|
|
558
576
|
time.sleep(
|
|
559
|
-
int(
|
|
560
|
-
mlrun.mlconf.httpdb.logs.pull_logs_backoff_no_logs_default_interval
|
|
561
|
-
)
|
|
577
|
+
int(config.httpdb.logs.pull_logs_backoff_no_logs_default_interval)
|
|
562
578
|
)
|
|
563
579
|
state, text = self.get_log(uid, project, offset=offset)
|
|
564
580
|
if text:
|
|
565
581
|
nil_resp = 0
|
|
566
582
|
print(
|
|
567
|
-
text.decode(errors=
|
|
583
|
+
text.decode(errors=config.httpdb.logs.decode.errors),
|
|
568
584
|
end="",
|
|
569
585
|
)
|
|
570
586
|
else:
|
|
@@ -928,6 +944,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
928
944
|
kind: str = None,
|
|
929
945
|
category: Union[str, mlrun.common.schemas.ArtifactCategories] = None,
|
|
930
946
|
tree: str = None,
|
|
947
|
+
producer_uri: str = None,
|
|
931
948
|
) -> ArtifactList:
|
|
932
949
|
"""List artifacts filtered by various parameters.
|
|
933
950
|
|
|
@@ -954,9 +971,12 @@ class HTTPRunDB(RunDBInterface):
|
|
|
954
971
|
:param best_iteration: Returns the artifact which belongs to the best iteration of a given run, in the case of
|
|
955
972
|
artifacts generated from a hyper-param run. If only a single iteration exists, will return the artifact
|
|
956
973
|
from that iteration. If using ``best_iter``, the ``iter`` parameter must not be used.
|
|
957
|
-
:param kind:
|
|
958
|
-
:param category:
|
|
959
|
-
:param tree:
|
|
974
|
+
:param kind: Return artifacts of the requested kind.
|
|
975
|
+
:param category: Return artifacts of the requested category.
|
|
976
|
+
:param tree: Return artifacts of the requested tree.
|
|
977
|
+
:param producer_uri: Return artifacts produced by the requested producer URI. Producer URI usually
|
|
978
|
+
points to a run and is used to filter artifacts by the run that produced them when the artifact producer id
|
|
979
|
+
is a workflow id (artifact was created as part of a workflow).
|
|
960
980
|
"""
|
|
961
981
|
|
|
962
982
|
project = project or config.default_project
|
|
@@ -975,6 +995,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
975
995
|
"category": category,
|
|
976
996
|
"tree": tree,
|
|
977
997
|
"format": mlrun.common.schemas.ArtifactsFormat.full.value,
|
|
998
|
+
"producer_uri": producer_uri,
|
|
978
999
|
}
|
|
979
1000
|
error = "list artifacts"
|
|
980
1001
|
endpoint_path = f"projects/{project}/artifacts"
|
|
@@ -1135,17 +1156,17 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1135
1156
|
structured_dict = {}
|
|
1136
1157
|
for project, job_runtime_resources_map in response.json().items():
|
|
1137
1158
|
for job_id, runtime_resources in job_runtime_resources_map.items():
|
|
1138
|
-
structured_dict.setdefault(project, {})[
|
|
1139
|
-
|
|
1140
|
-
|
|
1159
|
+
structured_dict.setdefault(project, {})[job_id] = (
|
|
1160
|
+
mlrun.common.schemas.RuntimeResources(**runtime_resources)
|
|
1161
|
+
)
|
|
1141
1162
|
return structured_dict
|
|
1142
1163
|
elif group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.project:
|
|
1143
1164
|
structured_dict = {}
|
|
1144
1165
|
for project, kind_runtime_resources_map in response.json().items():
|
|
1145
1166
|
for kind, runtime_resources in kind_runtime_resources_map.items():
|
|
1146
|
-
structured_dict.setdefault(project, {})[
|
|
1147
|
-
|
|
1148
|
-
|
|
1167
|
+
structured_dict.setdefault(project, {})[kind] = (
|
|
1168
|
+
mlrun.common.schemas.RuntimeResources(**runtime_resources)
|
|
1169
|
+
)
|
|
1149
1170
|
return structured_dict
|
|
1150
1171
|
else:
|
|
1151
1172
|
raise NotImplementedError(
|
|
@@ -1173,7 +1194,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1173
1194
|
:param force: Force deletion - delete the runtime resource even if it's not in terminal state or if the grace
|
|
1174
1195
|
period didn't pass.
|
|
1175
1196
|
:param grace_period: Grace period given to the runtime resource before they are actually removed, counted from
|
|
1176
|
-
the moment they moved to terminal state
|
|
1197
|
+
the moment they moved to terminal state
|
|
1198
|
+
(defaults to mlrun.config.config.runtime_resources_deletion_grace_period).
|
|
1177
1199
|
|
|
1178
1200
|
:returns: :py:class:`~mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput` listing the runtime resources
|
|
1179
1201
|
that were removed.
|
|
@@ -1203,9 +1225,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1203
1225
|
structured_dict = {}
|
|
1204
1226
|
for project, kind_runtime_resources_map in response.json().items():
|
|
1205
1227
|
for kind, runtime_resources in kind_runtime_resources_map.items():
|
|
1206
|
-
structured_dict.setdefault(project, {})[
|
|
1207
|
-
|
|
1208
|
-
|
|
1228
|
+
structured_dict.setdefault(project, {})[kind] = (
|
|
1229
|
+
mlrun.common.schemas.RuntimeResources(**runtime_resources)
|
|
1230
|
+
)
|
|
1209
1231
|
return structured_dict
|
|
1210
1232
|
|
|
1211
1233
|
def create_schedule(
|
|
@@ -1340,7 +1362,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1340
1362
|
logger.warning(
|
|
1341
1363
|
"Building a function image to ECR and loading an S3 source to the image may require conflicting access "
|
|
1342
1364
|
"keys. Only the permissions granted to the platform's configured secret will take affect "
|
|
1343
|
-
"(see mlrun.
|
|
1365
|
+
"(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
|
|
1344
1366
|
"In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
|
|
1345
1367
|
source=func.spec.build.source,
|
|
1346
1368
|
load_source_on_run=func.spec.build.load_source_on_run,
|
|
@@ -1495,7 +1517,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1495
1517
|
Retrieve updated information on project background tasks being executed.
|
|
1496
1518
|
If no filter is provided, will return background tasks from the last week.
|
|
1497
1519
|
|
|
1498
|
-
:param project: Project name (defaults to mlrun.
|
|
1520
|
+
:param project: Project name (defaults to mlrun.config.config.default_project).
|
|
1499
1521
|
:param state: List only background tasks whose state is specified.
|
|
1500
1522
|
:param created_from: Filter by background task created time in ``[created_from, created_to]``.
|
|
1501
1523
|
:param created_to: Filter by background task created time in ``[created_from, created_to]``.
|
|
@@ -1608,19 +1630,21 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1608
1630
|
artifact_path=None,
|
|
1609
1631
|
ops=None,
|
|
1610
1632
|
cleanup_ttl=None,
|
|
1633
|
+
timeout=60,
|
|
1611
1634
|
):
|
|
1612
1635
|
"""Submit a KFP pipeline for execution.
|
|
1613
1636
|
|
|
1614
|
-
:param project:
|
|
1615
|
-
:param pipeline:
|
|
1616
|
-
:param arguments:
|
|
1617
|
-
:param experiment:
|
|
1618
|
-
:param run:
|
|
1619
|
-
:param namespace:
|
|
1620
|
-
:param artifact_path:
|
|
1621
|
-
:param ops:
|
|
1622
|
-
:param cleanup_ttl:
|
|
1623
|
-
|
|
1637
|
+
:param project: The project of the pipeline
|
|
1638
|
+
:param pipeline: Pipeline function or path to .yaml/.zip pipeline file.
|
|
1639
|
+
:param arguments: A dictionary of arguments to pass to the pipeline.
|
|
1640
|
+
:param experiment: A name to assign for the specific experiment.
|
|
1641
|
+
:param run: A name for this specific run.
|
|
1642
|
+
:param namespace: Kubernetes namespace to execute the pipeline in.
|
|
1643
|
+
:param artifact_path: A path to artifacts used by this pipeline.
|
|
1644
|
+
:param ops: Transformers to apply on all ops in the pipeline.
|
|
1645
|
+
:param cleanup_ttl: Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
|
|
1646
|
+
workflow and all its resources are deleted)
|
|
1647
|
+
:param timeout: Timeout for the API call.
|
|
1624
1648
|
"""
|
|
1625
1649
|
|
|
1626
1650
|
if isinstance(pipeline, str):
|
|
@@ -1662,7 +1686,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1662
1686
|
"POST",
|
|
1663
1687
|
f"projects/{project}/pipelines",
|
|
1664
1688
|
params=params,
|
|
1665
|
-
timeout=
|
|
1689
|
+
timeout=timeout,
|
|
1666
1690
|
body=data,
|
|
1667
1691
|
headers=headers,
|
|
1668
1692
|
)
|
|
@@ -3450,8 +3474,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3450
3474
|
source: Optional[str] = None,
|
|
3451
3475
|
run_name: Optional[str] = None,
|
|
3452
3476
|
namespace: Optional[str] = None,
|
|
3453
|
-
notifications:
|
|
3454
|
-
):
|
|
3477
|
+
notifications: list[mlrun.model.Notification] = None,
|
|
3478
|
+
) -> mlrun.common.schemas.WorkflowResponse:
|
|
3455
3479
|
"""
|
|
3456
3480
|
Submitting workflow for a remote execution.
|
|
3457
3481
|
|
mlrun/execution.py
CHANGED
|
@@ -559,9 +559,9 @@ class MLClientCtx(object):
|
|
|
559
559
|
for k, v in get_in(task, ["status", "results"], {}).items():
|
|
560
560
|
self._results[k] = v
|
|
561
561
|
for artifact in get_in(task, ["status", run_keys.artifacts], []):
|
|
562
|
-
self._artifacts_manager.artifacts[
|
|
563
|
-
artifact
|
|
564
|
-
|
|
562
|
+
self._artifacts_manager.artifacts[artifact["metadata"]["key"]] = (
|
|
563
|
+
artifact
|
|
564
|
+
)
|
|
565
565
|
self._artifacts_manager.link_artifact(
|
|
566
566
|
self.project,
|
|
567
567
|
self.name,
|
|
@@ -389,9 +389,9 @@ class LoggingCallback(Callback):
|
|
|
389
389
|
):
|
|
390
390
|
try:
|
|
391
391
|
self._get_hyperparameter(key_chain=learning_rate_key_chain)
|
|
392
|
-
self._dynamic_hyperparameters_keys[
|
|
393
|
-
|
|
394
|
-
|
|
392
|
+
self._dynamic_hyperparameters_keys[learning_rate_key] = (
|
|
393
|
+
learning_rate_key_chain
|
|
394
|
+
)
|
|
395
395
|
except (KeyError, IndexError, ValueError):
|
|
396
396
|
pass
|
|
397
397
|
|
|
@@ -263,13 +263,13 @@ class TFKerasModelHandler(DLModelHandler):
|
|
|
263
263
|
# Update the paths and log artifacts if context is available:
|
|
264
264
|
if self._weights_file is not None:
|
|
265
265
|
if self._context is not None:
|
|
266
|
-
artifacts[
|
|
267
|
-
self.
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
266
|
+
artifacts[self._get_weights_file_artifact_name()] = (
|
|
267
|
+
self._context.log_artifact(
|
|
268
|
+
self._weights_file,
|
|
269
|
+
local_path=self._weights_file,
|
|
270
|
+
artifact_path=output_path,
|
|
271
|
+
db_key=False,
|
|
272
|
+
)
|
|
273
273
|
)
|
|
274
274
|
|
|
275
275
|
return artifacts if self._context is not None else None
|
mlrun/k8s_utils.py
CHANGED
|
@@ -134,13 +134,13 @@ def sanitize_label_value(value: str) -> str:
|
|
|
134
134
|
return re.sub(r"([^a-zA-Z0-9_.-]|^[^a-zA-Z0-9]|[^a-zA-Z0-9]$)", "-", value[:63])
|
|
135
135
|
|
|
136
136
|
|
|
137
|
-
def verify_label_key(key):
|
|
137
|
+
def verify_label_key(key: str):
|
|
138
|
+
"""
|
|
139
|
+
Verify that the label key is valid for Kubernetes.
|
|
140
|
+
Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
|
141
|
+
"""
|
|
138
142
|
if not key:
|
|
139
143
|
raise mlrun.errors.MLRunInvalidArgumentError("label key cannot be empty")
|
|
140
|
-
if key.startswith("k8s.io") or key.startswith("kubernetes.io"):
|
|
141
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
142
|
-
"Labels cannot start with 'k8s.io' or 'kubernetes.io'"
|
|
143
|
-
)
|
|
144
144
|
|
|
145
145
|
mlrun.utils.helpers.verify_field_regex(
|
|
146
146
|
f"project.metadata.labels.'{key}'",
|
|
@@ -148,6 +148,11 @@ def verify_label_key(key):
|
|
|
148
148
|
mlrun.utils.regex.k8s_character_limit,
|
|
149
149
|
)
|
|
150
150
|
|
|
151
|
+
if key.startswith("k8s.io/") or key.startswith("kubernetes.io/"):
|
|
152
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
153
|
+
"Labels cannot start with 'k8s.io/' or 'kubernetes.io/'"
|
|
154
|
+
)
|
|
155
|
+
|
|
151
156
|
parts = key.split("/")
|
|
152
157
|
if len(parts) == 1:
|
|
153
158
|
name = parts[0]
|
mlrun/kfpops.py
CHANGED
|
@@ -41,8 +41,8 @@ from .utils import (
|
|
|
41
41
|
|
|
42
42
|
# default KFP artifacts and output (ui metadata, metrics etc.)
|
|
43
43
|
# directories to /tmp to allow running with security context
|
|
44
|
-
KFPMETA_DIR =
|
|
45
|
-
KFP_ARTIFACTS_DIR =
|
|
44
|
+
KFPMETA_DIR = "/tmp"
|
|
45
|
+
KFP_ARTIFACTS_DIR = "/tmp"
|
|
46
46
|
|
|
47
47
|
project_annotation = "mlrun/project"
|
|
48
48
|
run_annotation = "mlrun/pipeline-step-type"
|
|
@@ -71,7 +71,7 @@ def write_kfpmeta(struct):
|
|
|
71
71
|
{"name": k, "numberValue": v} for k, v in results.items() if is_num(v)
|
|
72
72
|
],
|
|
73
73
|
}
|
|
74
|
-
with open(KFPMETA_DIR
|
|
74
|
+
with open(os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"), "w") as f:
|
|
75
75
|
json.dump(metrics, f)
|
|
76
76
|
|
|
77
77
|
struct = deepcopy(struct)
|
|
@@ -91,7 +91,14 @@ def write_kfpmeta(struct):
|
|
|
91
91
|
elif key in results:
|
|
92
92
|
val = results[key]
|
|
93
93
|
try:
|
|
94
|
-
|
|
94
|
+
# NOTE: if key has "../x", it would fail on path traversal
|
|
95
|
+
path = os.path.join(KFP_ARTIFACTS_DIR, key)
|
|
96
|
+
if not mlrun.utils.helpers.is_safe_path(KFP_ARTIFACTS_DIR, path):
|
|
97
|
+
logger.warning(
|
|
98
|
+
"Path traversal is not allowed ignoring", path=path, key=key
|
|
99
|
+
)
|
|
100
|
+
continue
|
|
101
|
+
path = os.path.abspath(path)
|
|
95
102
|
logger.info("Writing artifact output", path=path, val=val)
|
|
96
103
|
with open(path, "w") as fp:
|
|
97
104
|
fp.write(str(val))
|
|
@@ -109,7 +116,7 @@ def write_kfpmeta(struct):
|
|
|
109
116
|
"outputs": output_artifacts
|
|
110
117
|
+ [{"type": "markdown", "storage": "inline", "source": text}]
|
|
111
118
|
}
|
|
112
|
-
with open(KFPMETA_DIR
|
|
119
|
+
with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
|
|
113
120
|
json.dump(metadata, f)
|
|
114
121
|
|
|
115
122
|
|
|
@@ -401,9 +408,9 @@ def mlrun_op(
|
|
|
401
408
|
cmd += ["--label", f"{label}={val}"]
|
|
402
409
|
for output in outputs:
|
|
403
410
|
cmd += ["-o", str(output)]
|
|
404
|
-
file_outputs[
|
|
405
|
-
output
|
|
406
|
-
|
|
411
|
+
file_outputs[output.replace(".", "_")] = (
|
|
412
|
+
f"/tmp/{output}" # not using path.join to avoid windows "\"
|
|
413
|
+
)
|
|
407
414
|
if project:
|
|
408
415
|
cmd += ["--project", project]
|
|
409
416
|
if handler:
|
|
@@ -450,8 +457,10 @@ def mlrun_op(
|
|
|
450
457
|
command=cmd + [command],
|
|
451
458
|
file_outputs=file_outputs,
|
|
452
459
|
output_artifact_paths={
|
|
453
|
-
"mlpipeline-ui-metadata":
|
|
454
|
-
|
|
460
|
+
"mlpipeline-ui-metadata": os.path.join(
|
|
461
|
+
KFPMETA_DIR, "mlpipeline-ui-metadata.json"
|
|
462
|
+
),
|
|
463
|
+
"mlpipeline-metrics": os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"),
|
|
455
464
|
},
|
|
456
465
|
)
|
|
457
466
|
cop = add_default_function_resources(cop)
|
mlrun/lists.py
CHANGED
|
@@ -36,6 +36,7 @@ list_header = [
|
|
|
36
36
|
"parameters",
|
|
37
37
|
"results",
|
|
38
38
|
"artifacts",
|
|
39
|
+
"artifact_uris",
|
|
39
40
|
"error",
|
|
40
41
|
]
|
|
41
42
|
|
|
@@ -63,6 +64,7 @@ class RunList(list):
|
|
|
63
64
|
get_in(run, "spec.parameters", ""),
|
|
64
65
|
get_in(run, "status.results", ""),
|
|
65
66
|
get_in(run, "status.artifacts", []),
|
|
67
|
+
get_in(run, "status.artifact_uris", {}),
|
|
66
68
|
get_in(run, "status.error", ""),
|
|
67
69
|
]
|
|
68
70
|
if extend_iterations and iterations:
|
mlrun/model.py
CHANGED
|
@@ -62,6 +62,7 @@ class ModelObj:
|
|
|
62
62
|
return new_type.from_dict(param)
|
|
63
63
|
return param
|
|
64
64
|
|
|
65
|
+
@mlrun.utils.filter_warnings("ignore", FutureWarning)
|
|
65
66
|
def to_dict(self, fields=None, exclude=None):
|
|
66
67
|
"""convert the object to a python dictionary
|
|
67
68
|
|
|
@@ -359,6 +360,7 @@ class ImageBuilder(ModelObj):
|
|
|
359
360
|
requirements: list = None,
|
|
360
361
|
extra_args=None,
|
|
361
362
|
builder_env=None,
|
|
363
|
+
source_code_target_dir=None,
|
|
362
364
|
):
|
|
363
365
|
self.functionSourceCode = functionSourceCode #: functionSourceCode
|
|
364
366
|
self.codeEntryType = "" #: codeEntryType
|
|
@@ -379,6 +381,7 @@ class ImageBuilder(ModelObj):
|
|
|
379
381
|
self.auto_build = auto_build #: auto_build
|
|
380
382
|
self.build_pod = None
|
|
381
383
|
self.requirements = requirements or [] #: pip requirements
|
|
384
|
+
self.source_code_target_dir = source_code_target_dir or None
|
|
382
385
|
|
|
383
386
|
@property
|
|
384
387
|
def source(self):
|
|
@@ -415,6 +418,7 @@ class ImageBuilder(ModelObj):
|
|
|
415
418
|
overwrite=False,
|
|
416
419
|
builder_env=None,
|
|
417
420
|
extra_args=None,
|
|
421
|
+
source_code_target_dir=None,
|
|
418
422
|
):
|
|
419
423
|
if image:
|
|
420
424
|
self.image = image
|
|
@@ -440,6 +444,8 @@ class ImageBuilder(ModelObj):
|
|
|
440
444
|
self.builder_env = builder_env
|
|
441
445
|
if extra_args:
|
|
442
446
|
self.extra_args = extra_args
|
|
447
|
+
if source_code_target_dir:
|
|
448
|
+
self.source_code_target_dir = source_code_target_dir
|
|
443
449
|
|
|
444
450
|
def with_commands(
|
|
445
451
|
self,
|
|
@@ -618,6 +624,11 @@ class RunMetadata(ModelObj):
|
|
|
618
624
|
def iteration(self, iteration):
|
|
619
625
|
self._iteration = iteration
|
|
620
626
|
|
|
627
|
+
def is_workflow_runner(self):
|
|
628
|
+
if not self.labels:
|
|
629
|
+
return False
|
|
630
|
+
return self.labels.get("job-type", "") == "workflow-runner"
|
|
631
|
+
|
|
621
632
|
|
|
622
633
|
class HyperParamStrategies:
|
|
623
634
|
grid = "grid"
|
|
@@ -1047,6 +1058,7 @@ class RunStatus(ModelObj):
|
|
|
1047
1058
|
ui_url=None,
|
|
1048
1059
|
reason: str = None,
|
|
1049
1060
|
notifications: Dict[str, Notification] = None,
|
|
1061
|
+
artifact_uris: dict[str, str] = None,
|
|
1050
1062
|
):
|
|
1051
1063
|
self.state = state or "created"
|
|
1052
1064
|
self.status_text = status_text
|
|
@@ -1061,6 +1073,21 @@ class RunStatus(ModelObj):
|
|
|
1061
1073
|
self.ui_url = ui_url
|
|
1062
1074
|
self.reason = reason
|
|
1063
1075
|
self.notifications = notifications or {}
|
|
1076
|
+
# Artifact key -> URI mapping, since the full artifacts are not stored in the runs DB table
|
|
1077
|
+
self.artifact_uris = artifact_uris or {}
|
|
1078
|
+
|
|
1079
|
+
def is_failed(self) -> Optional[bool]:
|
|
1080
|
+
"""
|
|
1081
|
+
This method returns whether a run has failed.
|
|
1082
|
+
Returns none if state has yet to be defined. callee is responsible for handling None.
|
|
1083
|
+
(e.g wait for state to be defined)
|
|
1084
|
+
"""
|
|
1085
|
+
if not self.state:
|
|
1086
|
+
return None
|
|
1087
|
+
return self.state.casefold() in [
|
|
1088
|
+
mlrun.run.RunStatuses.failed.casefold(),
|
|
1089
|
+
mlrun.run.RunStatuses.error.casefold(),
|
|
1090
|
+
]
|
|
1064
1091
|
|
|
1065
1092
|
|
|
1066
1093
|
class RunTemplate(ModelObj):
|
|
@@ -1360,8 +1387,10 @@ class RunObject(RunTemplate):
|
|
|
1360
1387
|
iter=self.metadata.iteration,
|
|
1361
1388
|
)
|
|
1362
1389
|
if run:
|
|
1363
|
-
|
|
1364
|
-
|
|
1390
|
+
run_status = run.get("status", {})
|
|
1391
|
+
# Artifacts are not stored in the DB, so we need to preserve them here
|
|
1392
|
+
run_status["artifacts"] = self.status.artifacts
|
|
1393
|
+
self.status = RunStatus.from_dict(run_status)
|
|
1365
1394
|
return self
|
|
1366
1395
|
|
|
1367
1396
|
def show(self):
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -436,9 +436,9 @@ def _generate_model_endpoint(
|
|
|
436
436
|
] = possible_drift_threshold
|
|
437
437
|
|
|
438
438
|
model_endpoint.spec.monitoring_mode = monitoring_mode
|
|
439
|
-
model_endpoint.status.first_request = (
|
|
440
|
-
|
|
441
|
-
)
|
|
439
|
+
model_endpoint.status.first_request = model_endpoint.status.last_request = (
|
|
440
|
+
datetime_now().isoformat()
|
|
441
|
+
)
|
|
442
442
|
if sample_set_statistics:
|
|
443
443
|
model_endpoint.status.feature_stats = sample_set_statistics
|
|
444
444
|
|
|
@@ -476,11 +476,11 @@ def trigger_drift_batch_job(
|
|
|
476
476
|
db_session = mlrun.get_run_db()
|
|
477
477
|
|
|
478
478
|
# Register the monitoring batch job (do nothing if already exist) and get the job function as a dictionary
|
|
479
|
-
batch_function_dict: typing.Dict[
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
479
|
+
batch_function_dict: typing.Dict[str, typing.Any] = (
|
|
480
|
+
db_session.deploy_monitoring_batch_job(
|
|
481
|
+
project=project,
|
|
482
|
+
default_batch_image=default_batch_image,
|
|
483
|
+
)
|
|
484
484
|
)
|
|
485
485
|
|
|
486
486
|
# Prepare current run params
|
mlrun/model_monitoring/batch.py
CHANGED
|
@@ -426,13 +426,6 @@ class MonitoringApplicationController:
|
|
|
426
426
|
m_fs = fstore.get_feature_set(
|
|
427
427
|
endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
|
|
428
428
|
)
|
|
429
|
-
labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
|
|
430
|
-
if labels:
|
|
431
|
-
if isinstance(labels, str):
|
|
432
|
-
labels = json.loads(labels)
|
|
433
|
-
for label in labels:
|
|
434
|
-
if label not in list(m_fs.spec.features.keys()):
|
|
435
|
-
m_fs.add_feature(fstore.Feature(name=label, value_type="float"))
|
|
436
429
|
|
|
437
430
|
for application in applications_names:
|
|
438
431
|
batch_window = batch_window_generator.get_batch_window(
|
|
@@ -19,6 +19,7 @@ import plotly.graph_objects as go
|
|
|
19
19
|
from plotly.subplots import make_subplots
|
|
20
20
|
|
|
21
21
|
import mlrun.common.schemas.model_monitoring
|
|
22
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
22
23
|
|
|
23
24
|
# A type for representing a drift result, a tuple of the status and the drift mean:
|
|
24
25
|
DriftResultType = Tuple[mlrun.common.schemas.model_monitoring.DriftStatus, float]
|
|
@@ -112,6 +113,11 @@ class FeaturesDriftTablePlot:
|
|
|
112
113
|
:return: The full path to the html file of the plot.
|
|
113
114
|
"""
|
|
114
115
|
# Plot the drift table:
|
|
116
|
+
features = [
|
|
117
|
+
feature
|
|
118
|
+
for feature in features
|
|
119
|
+
if feature not in mm_constants.FeatureSetFeatures.list()
|
|
120
|
+
]
|
|
115
121
|
figure = self._plot(
|
|
116
122
|
features=features,
|
|
117
123
|
sample_set_statistics=sample_set_statistics,
|
|
@@ -41,7 +41,7 @@ class _MLRunNoRunsFoundError(Exception):
|
|
|
41
41
|
pass
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
def get_stream_path(project: str = None, application_name: str = None):
|
|
44
|
+
def get_stream_path(project: str = None, application_name: str = None) -> str:
|
|
45
45
|
"""
|
|
46
46
|
Get stream path from the project secret. If wasn't set, take it from the system configurations
|
|
47
47
|
|
|
@@ -62,6 +62,9 @@ def get_stream_path(project: str = None, application_name: str = None):
|
|
|
62
62
|
application_name=application_name,
|
|
63
63
|
)
|
|
64
64
|
|
|
65
|
+
if isinstance(stream_uri, list): # ML-6043 - user side gets only the new stream uri
|
|
66
|
+
stream_uri = stream_uri[1]
|
|
67
|
+
|
|
65
68
|
return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
|
|
66
69
|
stream_uri=stream_uri, project=project, application_name=application_name
|
|
67
70
|
)
|