mlrun 1.6.1__py3-none-any.whl → 1.6.2rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/model_monitoring/helpers.py +4 -2
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/common.py +40 -0
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +13 -7
- mlrun/datastore/azure_blob.py +9 -9
- mlrun/datastore/google_cloud_storage.py +6 -6
- mlrun/db/base.py +18 -0
- mlrun/db/httpdb.py +28 -25
- mlrun/execution.py +3 -3
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
- mlrun/frameworks/tf_keras/model_handler.py +7 -7
- mlrun/k8s_utils.py +10 -5
- mlrun/kfpops.py +19 -10
- mlrun/model.py +5 -0
- mlrun/model_monitoring/api.py +8 -8
- mlrun/model_monitoring/batch.py +1 -1
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
- mlrun/package/packagers/pandas_packagers.py +3 -3
- mlrun/package/utils/_archiver.py +3 -1
- mlrun/platforms/iguazio.py +6 -65
- mlrun/projects/pipelines.py +21 -11
- mlrun/projects/project.py +65 -46
- mlrun/runtimes/base.py +20 -1
- mlrun/runtimes/function.py +9 -9
- mlrun/runtimes/kubejob.py +5 -3
- mlrun/runtimes/local.py +2 -2
- mlrun/runtimes/mpijob/abstract.py +6 -6
- mlrun/runtimes/pod.py +3 -3
- mlrun/runtimes/serving.py +3 -3
- mlrun/runtimes/sparkjob/spark3job.py +3 -3
- mlrun/serving/remote.py +4 -2
- mlrun/utils/async_http.py +3 -3
- mlrun/utils/helpers.py +8 -0
- mlrun/utils/http.py +3 -3
- mlrun/utils/notifications/notification_pusher.py +6 -6
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.1.dist-info → mlrun-1.6.2rc1.dist-info}/METADATA +12 -14
- {mlrun-1.6.1.dist-info → mlrun-1.6.2rc1.dist-info}/RECORD +44 -43
- {mlrun-1.6.1.dist-info → mlrun-1.6.2rc1.dist-info}/LICENSE +0 -0
- {mlrun-1.6.1.dist-info → mlrun-1.6.2rc1.dist-info}/WHEEL +0 -0
- {mlrun-1.6.1.dist-info → mlrun-1.6.2rc1.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.1.dist-info → mlrun-1.6.2rc1.dist-info}/top_level.txt +0 -0
|
@@ -82,13 +82,15 @@ def parse_monitoring_stream_path(
|
|
|
82
82
|
if application_name is None:
|
|
83
83
|
stream_uri = (
|
|
84
84
|
mlrun.mlconf.model_endpoint_monitoring.default_http_sink.format(
|
|
85
|
-
project=project
|
|
85
|
+
project=project, namespace=mlrun.mlconf.namespace
|
|
86
86
|
)
|
|
87
87
|
)
|
|
88
88
|
else:
|
|
89
89
|
stream_uri = (
|
|
90
90
|
mlrun.mlconf.model_endpoint_monitoring.default_http_sink_app.format(
|
|
91
|
-
project=project,
|
|
91
|
+
project=project,
|
|
92
|
+
application_name=application_name,
|
|
93
|
+
namespace=mlrun.mlconf.namespace,
|
|
92
94
|
)
|
|
93
95
|
)
|
|
94
96
|
return stream_uri
|
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
import typing
|
|
16
|
+
|
|
17
|
+
import pydantic
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ImageBuilder(pydantic.BaseModel):
|
|
21
|
+
functionSourceCode: typing.Optional[str] = None
|
|
22
|
+
codeEntryType: typing.Optional[str] = None
|
|
23
|
+
codeEntryAttributes: typing.Optional[str] = None
|
|
24
|
+
source: typing.Optional[str] = None
|
|
25
|
+
code_origin: typing.Optional[str] = None
|
|
26
|
+
origin_filename: typing.Optional[str] = None
|
|
27
|
+
image: typing.Optional[str] = None
|
|
28
|
+
base_image: typing.Optional[str] = None
|
|
29
|
+
commands: typing.Optional[list] = None
|
|
30
|
+
extra: typing.Optional[str] = None
|
|
31
|
+
extra_args: typing.Optional[dict] = None
|
|
32
|
+
builder_env: typing.Optional[dict] = None
|
|
33
|
+
secret: typing.Optional[str] = None
|
|
34
|
+
registry: typing.Optional[str] = None
|
|
35
|
+
load_source_on_run: typing.Optional[bool] = None
|
|
36
|
+
with_mlrun: typing.Optional[bool] = None
|
|
37
|
+
auto_build: typing.Optional[bool] = None
|
|
38
|
+
build_pod: typing.Optional[str] = None
|
|
39
|
+
requirements: typing.Optional[list] = None
|
|
40
|
+
source_code_target_dir: typing.Optional[str] = None
|
mlrun/common/schemas/project.py
CHANGED
|
@@ -19,6 +19,7 @@ import pydantic
|
|
|
19
19
|
|
|
20
20
|
import mlrun.common.types
|
|
21
21
|
|
|
22
|
+
from .common import ImageBuilder
|
|
22
23
|
from .object import ObjectKind, ObjectStatus
|
|
23
24
|
|
|
24
25
|
|
|
@@ -85,6 +86,7 @@ class ProjectSpec(pydantic.BaseModel):
|
|
|
85
86
|
desired_state: typing.Optional[ProjectDesiredState] = ProjectDesiredState.online
|
|
86
87
|
custom_packagers: typing.Optional[typing.List[typing.Tuple[str, bool]]] = None
|
|
87
88
|
default_image: typing.Optional[str] = None
|
|
89
|
+
build: typing.Optional[ImageBuilder] = None
|
|
88
90
|
|
|
89
91
|
class Config:
|
|
90
92
|
extra = pydantic.Extra.allow
|
mlrun/config.py
CHANGED
|
@@ -288,6 +288,12 @@ default_config = {
|
|
|
288
288
|
"state": "online",
|
|
289
289
|
"retry_api_call_on_exception": "enabled",
|
|
290
290
|
"http_connection_timeout_keep_alive": 11,
|
|
291
|
+
# http client used by httpdb
|
|
292
|
+
"http": {
|
|
293
|
+
# when True, the client will verify the server's TLS
|
|
294
|
+
# set to False for backwards compatibility.
|
|
295
|
+
"verify": False,
|
|
296
|
+
},
|
|
291
297
|
"db": {
|
|
292
298
|
"commit_retry_timeout": 30,
|
|
293
299
|
"commit_retry_interval": 3,
|
|
@@ -481,8 +487,8 @@ default_config = {
|
|
|
481
487
|
"offline_storage_path": "model-endpoints/{kind}",
|
|
482
488
|
# Default http path that points to the monitoring stream nuclio function. Will be used as a stream path
|
|
483
489
|
# when the user is working in CE environment and has not provided any stream path.
|
|
484
|
-
"default_http_sink": "http://nuclio-{project}-model-monitoring-stream.
|
|
485
|
-
"default_http_sink_app": "http://nuclio-{project}-{application_name}.
|
|
490
|
+
"default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
|
|
491
|
+
"default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
|
|
486
492
|
"batch_processing_function_branch": "master",
|
|
487
493
|
"parquet_batching_max_events": 10_000,
|
|
488
494
|
"parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
|
|
@@ -602,7 +608,7 @@ default_config = {
|
|
|
602
608
|
"workflows": {
|
|
603
609
|
"default_workflow_runner_name": "workflow-runner-{}",
|
|
604
610
|
# Default timeout seconds for retrieving workflow id after execution:
|
|
605
|
-
"timeouts": {"local": 120, "kfp": 30, "remote":
|
|
611
|
+
"timeouts": {"local": 120, "kfp": 30, "remote": 90},
|
|
606
612
|
},
|
|
607
613
|
"log_collector": {
|
|
608
614
|
"address": "localhost:8282",
|
|
@@ -954,10 +960,10 @@ class Config:
|
|
|
954
960
|
with_gpu = (
|
|
955
961
|
with_gpu_requests if requirement == "requests" else with_gpu_limits
|
|
956
962
|
)
|
|
957
|
-
resources[
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
963
|
+
resources[requirement] = (
|
|
964
|
+
self.get_default_function_pod_requirement_resources(
|
|
965
|
+
requirement, with_gpu
|
|
966
|
+
)
|
|
961
967
|
)
|
|
962
968
|
return resources
|
|
963
969
|
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -175,9 +175,9 @@ class AzureBlobStore(DataStore):
|
|
|
175
175
|
|
|
176
176
|
if "client_secret" in st or "client_id" in st or "tenant_id" in st:
|
|
177
177
|
res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "OAuth"
|
|
178
|
-
res[
|
|
179
|
-
|
|
180
|
-
|
|
178
|
+
res[f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"] = (
|
|
179
|
+
"org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
|
|
180
|
+
)
|
|
181
181
|
if "client_id" in st:
|
|
182
182
|
res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
|
|
183
183
|
"client_id"
|
|
@@ -188,14 +188,14 @@ class AzureBlobStore(DataStore):
|
|
|
188
188
|
]
|
|
189
189
|
if "tenant_id" in st:
|
|
190
190
|
tenant_id = st["tenant_id"]
|
|
191
|
-
res[
|
|
192
|
-
f"
|
|
193
|
-
|
|
191
|
+
res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
|
|
192
|
+
f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
|
|
193
|
+
)
|
|
194
194
|
|
|
195
195
|
if "sas_token" in st:
|
|
196
196
|
res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "SAS"
|
|
197
|
-
res[
|
|
198
|
-
|
|
199
|
-
|
|
197
|
+
res[f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"] = (
|
|
198
|
+
"org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
|
|
199
|
+
)
|
|
200
200
|
res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
|
|
201
201
|
return res
|
|
@@ -147,13 +147,13 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
147
147
|
if "project_id" in credentials:
|
|
148
148
|
res["spark.hadoop.fs.gs.project.id"] = credentials["project_id"]
|
|
149
149
|
if "private_key_id" in credentials:
|
|
150
|
-
res[
|
|
151
|
-
"
|
|
152
|
-
|
|
150
|
+
res["spark.hadoop.fs.gs.auth.service.account.private.key.id"] = (
|
|
151
|
+
credentials["private_key_id"]
|
|
152
|
+
)
|
|
153
153
|
if "private_key" in credentials:
|
|
154
|
-
res[
|
|
155
|
-
"
|
|
156
|
-
|
|
154
|
+
res["spark.hadoop.fs.gs.auth.service.account.private.key"] = (
|
|
155
|
+
credentials["private_key"]
|
|
156
|
+
)
|
|
157
157
|
if "client_email" in credentials:
|
|
158
158
|
res["spark.hadoop.fs.gs.auth.service.account.email"] = credentials[
|
|
159
159
|
"client_email"
|
mlrun/db/base.py
CHANGED
|
@@ -677,3 +677,21 @@ class RunDBInterface(ABC):
|
|
|
677
677
|
self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
|
|
678
678
|
):
|
|
679
679
|
pass
|
|
680
|
+
|
|
681
|
+
def submit_workflow(
|
|
682
|
+
self,
|
|
683
|
+
project: str,
|
|
684
|
+
name: str,
|
|
685
|
+
workflow_spec: Union[
|
|
686
|
+
"mlrun.projects.pipelines.WorkflowSpec",
|
|
687
|
+
"mlrun.common.schemas.WorkflowSpec",
|
|
688
|
+
dict,
|
|
689
|
+
],
|
|
690
|
+
arguments: Optional[dict] = None,
|
|
691
|
+
artifact_path: Optional[str] = None,
|
|
692
|
+
source: Optional[str] = None,
|
|
693
|
+
run_name: Optional[str] = None,
|
|
694
|
+
namespace: Optional[str] = None,
|
|
695
|
+
notifications: list["mlrun.model.Notification"] = None,
|
|
696
|
+
) -> "mlrun.common.schemas.WorkflowResponse":
|
|
697
|
+
pass
|
mlrun/db/httpdb.py
CHANGED
|
@@ -152,7 +152,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
152
152
|
@staticmethod
|
|
153
153
|
def get_api_path_prefix(version: str = None) -> str:
|
|
154
154
|
"""
|
|
155
|
-
:param version: API version to use, None (the default) will mean to use the default value from
|
|
155
|
+
:param version: API version to use, None (the default) will mean to use the default value from mlrun.config,
|
|
156
156
|
for un-versioned api set an empty string.
|
|
157
157
|
"""
|
|
158
158
|
if version is not None:
|
|
@@ -250,7 +250,11 @@ class HTTPRunDB(RunDBInterface):
|
|
|
250
250
|
|
|
251
251
|
try:
|
|
252
252
|
response = self.session.request(
|
|
253
|
-
method,
|
|
253
|
+
method,
|
|
254
|
+
url,
|
|
255
|
+
timeout=timeout,
|
|
256
|
+
verify=config.httpdb.http.verify,
|
|
257
|
+
**kw,
|
|
254
258
|
)
|
|
255
259
|
except requests.RequestException as exc:
|
|
256
260
|
error = f"{err_to_str(exc)}: {error}" if error else err_to_str(exc)
|
|
@@ -302,11 +306,11 @@ class HTTPRunDB(RunDBInterface):
|
|
|
302
306
|
|
|
303
307
|
def connect(self, secrets=None):
|
|
304
308
|
"""Connect to the MLRun API server. Must be called prior to executing any other method.
|
|
305
|
-
The code utilizes the URL for the API server from the configuration - ``
|
|
309
|
+
The code utilizes the URL for the API server from the configuration - ``config.dbpath``.
|
|
306
310
|
|
|
307
311
|
For example::
|
|
308
312
|
|
|
309
|
-
|
|
313
|
+
config.dbpath = config.dbpath or 'http://mlrun-api:8080'
|
|
310
314
|
db = get_run_db().connect()
|
|
311
315
|
"""
|
|
312
316
|
# hack to allow unit tests to instantiate HTTPRunDB without a real server behind
|
|
@@ -500,7 +504,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
500
504
|
if offset < 0:
|
|
501
505
|
raise MLRunInvalidArgumentError("Offset cannot be negative")
|
|
502
506
|
if size is None:
|
|
503
|
-
size = int(
|
|
507
|
+
size = int(config.httpdb.logs.pull_logs_default_size_limit)
|
|
504
508
|
elif size == -1:
|
|
505
509
|
logger.warning(
|
|
506
510
|
"Retrieving all logs. This may be inefficient and can result in a large log."
|
|
@@ -546,25 +550,23 @@ class HTTPRunDB(RunDBInterface):
|
|
|
546
550
|
|
|
547
551
|
state, text = self.get_log(uid, project, offset=offset)
|
|
548
552
|
if text:
|
|
549
|
-
print(text.decode(errors=
|
|
553
|
+
print(text.decode(errors=config.httpdb.logs.decode.errors))
|
|
550
554
|
nil_resp = 0
|
|
551
555
|
while True:
|
|
552
556
|
offset += len(text)
|
|
553
557
|
# if we get 3 nil responses in a row, increase the sleep time to 10 seconds
|
|
554
558
|
# TODO: refactor this to use a conditional backoff mechanism
|
|
555
559
|
if nil_resp < 3:
|
|
556
|
-
time.sleep(int(
|
|
560
|
+
time.sleep(int(config.httpdb.logs.pull_logs_default_interval))
|
|
557
561
|
else:
|
|
558
562
|
time.sleep(
|
|
559
|
-
int(
|
|
560
|
-
mlrun.mlconf.httpdb.logs.pull_logs_backoff_no_logs_default_interval
|
|
561
|
-
)
|
|
563
|
+
int(config.httpdb.logs.pull_logs_backoff_no_logs_default_interval)
|
|
562
564
|
)
|
|
563
565
|
state, text = self.get_log(uid, project, offset=offset)
|
|
564
566
|
if text:
|
|
565
567
|
nil_resp = 0
|
|
566
568
|
print(
|
|
567
|
-
text.decode(errors=
|
|
569
|
+
text.decode(errors=config.httpdb.logs.decode.errors),
|
|
568
570
|
end="",
|
|
569
571
|
)
|
|
570
572
|
else:
|
|
@@ -1135,17 +1137,17 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1135
1137
|
structured_dict = {}
|
|
1136
1138
|
for project, job_runtime_resources_map in response.json().items():
|
|
1137
1139
|
for job_id, runtime_resources in job_runtime_resources_map.items():
|
|
1138
|
-
structured_dict.setdefault(project, {})[
|
|
1139
|
-
|
|
1140
|
-
|
|
1140
|
+
structured_dict.setdefault(project, {})[job_id] = (
|
|
1141
|
+
mlrun.common.schemas.RuntimeResources(**runtime_resources)
|
|
1142
|
+
)
|
|
1141
1143
|
return structured_dict
|
|
1142
1144
|
elif group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.project:
|
|
1143
1145
|
structured_dict = {}
|
|
1144
1146
|
for project, kind_runtime_resources_map in response.json().items():
|
|
1145
1147
|
for kind, runtime_resources in kind_runtime_resources_map.items():
|
|
1146
|
-
structured_dict.setdefault(project, {})[
|
|
1147
|
-
|
|
1148
|
-
|
|
1148
|
+
structured_dict.setdefault(project, {})[kind] = (
|
|
1149
|
+
mlrun.common.schemas.RuntimeResources(**runtime_resources)
|
|
1150
|
+
)
|
|
1149
1151
|
return structured_dict
|
|
1150
1152
|
else:
|
|
1151
1153
|
raise NotImplementedError(
|
|
@@ -1173,7 +1175,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1173
1175
|
:param force: Force deletion - delete the runtime resource even if it's not in terminal state or if the grace
|
|
1174
1176
|
period didn't pass.
|
|
1175
1177
|
:param grace_period: Grace period given to the runtime resource before they are actually removed, counted from
|
|
1176
|
-
the moment they moved to terminal state
|
|
1178
|
+
the moment they moved to terminal state
|
|
1179
|
+
(defaults to mlrun.config.config.runtime_resources_deletion_grace_period).
|
|
1177
1180
|
|
|
1178
1181
|
:returns: :py:class:`~mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput` listing the runtime resources
|
|
1179
1182
|
that were removed.
|
|
@@ -1203,9 +1206,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1203
1206
|
structured_dict = {}
|
|
1204
1207
|
for project, kind_runtime_resources_map in response.json().items():
|
|
1205
1208
|
for kind, runtime_resources in kind_runtime_resources_map.items():
|
|
1206
|
-
structured_dict.setdefault(project, {})[
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
+
structured_dict.setdefault(project, {})[kind] = (
|
|
1210
|
+
mlrun.common.schemas.RuntimeResources(**runtime_resources)
|
|
1211
|
+
)
|
|
1209
1212
|
return structured_dict
|
|
1210
1213
|
|
|
1211
1214
|
def create_schedule(
|
|
@@ -1340,7 +1343,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1340
1343
|
logger.warning(
|
|
1341
1344
|
"Building a function image to ECR and loading an S3 source to the image may require conflicting access "
|
|
1342
1345
|
"keys. Only the permissions granted to the platform's configured secret will take affect "
|
|
1343
|
-
"(see mlrun.
|
|
1346
|
+
"(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
|
|
1344
1347
|
"In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
|
|
1345
1348
|
source=func.spec.build.source,
|
|
1346
1349
|
load_source_on_run=func.spec.build.load_source_on_run,
|
|
@@ -1495,7 +1498,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1495
1498
|
Retrieve updated information on project background tasks being executed.
|
|
1496
1499
|
If no filter is provided, will return background tasks from the last week.
|
|
1497
1500
|
|
|
1498
|
-
:param project: Project name (defaults to mlrun.
|
|
1501
|
+
:param project: Project name (defaults to mlrun.config.config.default_project).
|
|
1499
1502
|
:param state: List only background tasks whose state is specified.
|
|
1500
1503
|
:param created_from: Filter by background task created time in ``[created_from, created_to]``.
|
|
1501
1504
|
:param created_to: Filter by background task created time in ``[created_from, created_to]``.
|
|
@@ -3450,8 +3453,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3450
3453
|
source: Optional[str] = None,
|
|
3451
3454
|
run_name: Optional[str] = None,
|
|
3452
3455
|
namespace: Optional[str] = None,
|
|
3453
|
-
notifications:
|
|
3454
|
-
):
|
|
3456
|
+
notifications: list[mlrun.model.Notification] = None,
|
|
3457
|
+
) -> mlrun.common.schemas.WorkflowResponse:
|
|
3455
3458
|
"""
|
|
3456
3459
|
Submitting workflow for a remote execution.
|
|
3457
3460
|
|
mlrun/execution.py
CHANGED
|
@@ -559,9 +559,9 @@ class MLClientCtx(object):
|
|
|
559
559
|
for k, v in get_in(task, ["status", "results"], {}).items():
|
|
560
560
|
self._results[k] = v
|
|
561
561
|
for artifact in get_in(task, ["status", run_keys.artifacts], []):
|
|
562
|
-
self._artifacts_manager.artifacts[
|
|
563
|
-
artifact
|
|
564
|
-
|
|
562
|
+
self._artifacts_manager.artifacts[artifact["metadata"]["key"]] = (
|
|
563
|
+
artifact
|
|
564
|
+
)
|
|
565
565
|
self._artifacts_manager.link_artifact(
|
|
566
566
|
self.project,
|
|
567
567
|
self.name,
|
|
@@ -389,9 +389,9 @@ class LoggingCallback(Callback):
|
|
|
389
389
|
):
|
|
390
390
|
try:
|
|
391
391
|
self._get_hyperparameter(key_chain=learning_rate_key_chain)
|
|
392
|
-
self._dynamic_hyperparameters_keys[
|
|
393
|
-
|
|
394
|
-
|
|
392
|
+
self._dynamic_hyperparameters_keys[learning_rate_key] = (
|
|
393
|
+
learning_rate_key_chain
|
|
394
|
+
)
|
|
395
395
|
except (KeyError, IndexError, ValueError):
|
|
396
396
|
pass
|
|
397
397
|
|
|
@@ -263,13 +263,13 @@ class TFKerasModelHandler(DLModelHandler):
|
|
|
263
263
|
# Update the paths and log artifacts if context is available:
|
|
264
264
|
if self._weights_file is not None:
|
|
265
265
|
if self._context is not None:
|
|
266
|
-
artifacts[
|
|
267
|
-
self.
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
266
|
+
artifacts[self._get_weights_file_artifact_name()] = (
|
|
267
|
+
self._context.log_artifact(
|
|
268
|
+
self._weights_file,
|
|
269
|
+
local_path=self._weights_file,
|
|
270
|
+
artifact_path=output_path,
|
|
271
|
+
db_key=False,
|
|
272
|
+
)
|
|
273
273
|
)
|
|
274
274
|
|
|
275
275
|
return artifacts if self._context is not None else None
|
mlrun/k8s_utils.py
CHANGED
|
@@ -134,13 +134,13 @@ def sanitize_label_value(value: str) -> str:
|
|
|
134
134
|
return re.sub(r"([^a-zA-Z0-9_.-]|^[^a-zA-Z0-9]|[^a-zA-Z0-9]$)", "-", value[:63])
|
|
135
135
|
|
|
136
136
|
|
|
137
|
-
def verify_label_key(key):
|
|
137
|
+
def verify_label_key(key: str):
|
|
138
|
+
"""
|
|
139
|
+
Verify that the label key is valid for Kubernetes.
|
|
140
|
+
Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
|
141
|
+
"""
|
|
138
142
|
if not key:
|
|
139
143
|
raise mlrun.errors.MLRunInvalidArgumentError("label key cannot be empty")
|
|
140
|
-
if key.startswith("k8s.io") or key.startswith("kubernetes.io"):
|
|
141
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
142
|
-
"Labels cannot start with 'k8s.io' or 'kubernetes.io'"
|
|
143
|
-
)
|
|
144
144
|
|
|
145
145
|
mlrun.utils.helpers.verify_field_regex(
|
|
146
146
|
f"project.metadata.labels.'{key}'",
|
|
@@ -148,6 +148,11 @@ def verify_label_key(key):
|
|
|
148
148
|
mlrun.utils.regex.k8s_character_limit,
|
|
149
149
|
)
|
|
150
150
|
|
|
151
|
+
if key.startswith("k8s.io/") or key.startswith("kubernetes.io/"):
|
|
152
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
153
|
+
"Labels cannot start with 'k8s.io/' or 'kubernetes.io/'"
|
|
154
|
+
)
|
|
155
|
+
|
|
151
156
|
parts = key.split("/")
|
|
152
157
|
if len(parts) == 1:
|
|
153
158
|
name = parts[0]
|
mlrun/kfpops.py
CHANGED
|
@@ -41,8 +41,8 @@ from .utils import (
|
|
|
41
41
|
|
|
42
42
|
# default KFP artifacts and output (ui metadata, metrics etc.)
|
|
43
43
|
# directories to /tmp to allow running with security context
|
|
44
|
-
KFPMETA_DIR =
|
|
45
|
-
KFP_ARTIFACTS_DIR =
|
|
44
|
+
KFPMETA_DIR = "/tmp"
|
|
45
|
+
KFP_ARTIFACTS_DIR = "/tmp"
|
|
46
46
|
|
|
47
47
|
project_annotation = "mlrun/project"
|
|
48
48
|
run_annotation = "mlrun/pipeline-step-type"
|
|
@@ -71,7 +71,7 @@ def write_kfpmeta(struct):
|
|
|
71
71
|
{"name": k, "numberValue": v} for k, v in results.items() if is_num(v)
|
|
72
72
|
],
|
|
73
73
|
}
|
|
74
|
-
with open(KFPMETA_DIR
|
|
74
|
+
with open(os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"), "w") as f:
|
|
75
75
|
json.dump(metrics, f)
|
|
76
76
|
|
|
77
77
|
struct = deepcopy(struct)
|
|
@@ -91,7 +91,14 @@ def write_kfpmeta(struct):
|
|
|
91
91
|
elif key in results:
|
|
92
92
|
val = results[key]
|
|
93
93
|
try:
|
|
94
|
-
|
|
94
|
+
# NOTE: if key has "../x", it would fail on path traversal
|
|
95
|
+
path = os.path.join(KFP_ARTIFACTS_DIR, key)
|
|
96
|
+
if not mlrun.utils.helpers.is_safe_path(KFP_ARTIFACTS_DIR, path):
|
|
97
|
+
logger.warning(
|
|
98
|
+
"Path traversal is not allowed ignoring", path=path, key=key
|
|
99
|
+
)
|
|
100
|
+
continue
|
|
101
|
+
path = os.path.abspath(path)
|
|
95
102
|
logger.info("Writing artifact output", path=path, val=val)
|
|
96
103
|
with open(path, "w") as fp:
|
|
97
104
|
fp.write(str(val))
|
|
@@ -109,7 +116,7 @@ def write_kfpmeta(struct):
|
|
|
109
116
|
"outputs": output_artifacts
|
|
110
117
|
+ [{"type": "markdown", "storage": "inline", "source": text}]
|
|
111
118
|
}
|
|
112
|
-
with open(KFPMETA_DIR
|
|
119
|
+
with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
|
|
113
120
|
json.dump(metadata, f)
|
|
114
121
|
|
|
115
122
|
|
|
@@ -401,9 +408,9 @@ def mlrun_op(
|
|
|
401
408
|
cmd += ["--label", f"{label}={val}"]
|
|
402
409
|
for output in outputs:
|
|
403
410
|
cmd += ["-o", str(output)]
|
|
404
|
-
file_outputs[
|
|
405
|
-
output
|
|
406
|
-
|
|
411
|
+
file_outputs[output.replace(".", "_")] = (
|
|
412
|
+
f"/tmp/{output}" # not using path.join to avoid windows "\"
|
|
413
|
+
)
|
|
407
414
|
if project:
|
|
408
415
|
cmd += ["--project", project]
|
|
409
416
|
if handler:
|
|
@@ -450,8 +457,10 @@ def mlrun_op(
|
|
|
450
457
|
command=cmd + [command],
|
|
451
458
|
file_outputs=file_outputs,
|
|
452
459
|
output_artifact_paths={
|
|
453
|
-
"mlpipeline-ui-metadata":
|
|
454
|
-
|
|
460
|
+
"mlpipeline-ui-metadata": os.path.join(
|
|
461
|
+
KFPMETA_DIR, "mlpipeline-ui-metadata.json"
|
|
462
|
+
),
|
|
463
|
+
"mlpipeline-metrics": os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"),
|
|
455
464
|
},
|
|
456
465
|
)
|
|
457
466
|
cop = add_default_function_resources(cop)
|
mlrun/model.py
CHANGED
|
@@ -359,6 +359,7 @@ class ImageBuilder(ModelObj):
|
|
|
359
359
|
requirements: list = None,
|
|
360
360
|
extra_args=None,
|
|
361
361
|
builder_env=None,
|
|
362
|
+
source_code_target_dir=None,
|
|
362
363
|
):
|
|
363
364
|
self.functionSourceCode = functionSourceCode #: functionSourceCode
|
|
364
365
|
self.codeEntryType = "" #: codeEntryType
|
|
@@ -379,6 +380,7 @@ class ImageBuilder(ModelObj):
|
|
|
379
380
|
self.auto_build = auto_build #: auto_build
|
|
380
381
|
self.build_pod = None
|
|
381
382
|
self.requirements = requirements or [] #: pip requirements
|
|
383
|
+
self.source_code_target_dir = source_code_target_dir or None
|
|
382
384
|
|
|
383
385
|
@property
|
|
384
386
|
def source(self):
|
|
@@ -415,6 +417,7 @@ class ImageBuilder(ModelObj):
|
|
|
415
417
|
overwrite=False,
|
|
416
418
|
builder_env=None,
|
|
417
419
|
extra_args=None,
|
|
420
|
+
source_code_target_dir=None,
|
|
418
421
|
):
|
|
419
422
|
if image:
|
|
420
423
|
self.image = image
|
|
@@ -440,6 +443,8 @@ class ImageBuilder(ModelObj):
|
|
|
440
443
|
self.builder_env = builder_env
|
|
441
444
|
if extra_args:
|
|
442
445
|
self.extra_args = extra_args
|
|
446
|
+
if source_code_target_dir:
|
|
447
|
+
self.source_code_target_dir = source_code_target_dir
|
|
443
448
|
|
|
444
449
|
def with_commands(
|
|
445
450
|
self,
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -436,9 +436,9 @@ def _generate_model_endpoint(
|
|
|
436
436
|
] = possible_drift_threshold
|
|
437
437
|
|
|
438
438
|
model_endpoint.spec.monitoring_mode = monitoring_mode
|
|
439
|
-
model_endpoint.status.first_request = (
|
|
440
|
-
|
|
441
|
-
)
|
|
439
|
+
model_endpoint.status.first_request = model_endpoint.status.last_request = (
|
|
440
|
+
datetime_now().isoformat()
|
|
441
|
+
)
|
|
442
442
|
if sample_set_statistics:
|
|
443
443
|
model_endpoint.status.feature_stats = sample_set_statistics
|
|
444
444
|
|
|
@@ -476,11 +476,11 @@ def trigger_drift_batch_job(
|
|
|
476
476
|
db_session = mlrun.get_run_db()
|
|
477
477
|
|
|
478
478
|
# Register the monitoring batch job (do nothing if already exist) and get the job function as a dictionary
|
|
479
|
-
batch_function_dict: typing.Dict[
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
479
|
+
batch_function_dict: typing.Dict[str, typing.Any] = (
|
|
480
|
+
db_session.deploy_monitoring_batch_job(
|
|
481
|
+
project=project,
|
|
482
|
+
default_batch_image=default_batch_image,
|
|
483
|
+
)
|
|
484
484
|
)
|
|
485
485
|
|
|
486
486
|
# Prepare current run params
|
mlrun/model_monitoring/batch.py
CHANGED
|
@@ -540,24 +540,24 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
540
540
|
and endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS]
|
|
541
541
|
== "null"
|
|
542
542
|
):
|
|
543
|
-
endpoint[
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
543
|
+
endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS] = (
|
|
544
|
+
json.dumps(
|
|
545
|
+
{
|
|
546
|
+
mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
|
|
547
|
+
mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
|
|
548
|
+
mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
|
|
549
|
+
}
|
|
550
550
|
}
|
|
551
|
-
|
|
551
|
+
)
|
|
552
552
|
)
|
|
553
553
|
# Validate key `uid` instead of `endpoint_id`
|
|
554
554
|
# For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
|
|
555
555
|
if mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID in endpoint:
|
|
556
|
-
endpoint[
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
556
|
+
endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID] = (
|
|
557
|
+
endpoint[
|
|
558
|
+
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
|
|
559
|
+
]
|
|
560
|
+
)
|
|
561
561
|
|
|
562
562
|
@staticmethod
|
|
563
563
|
def _encode_field(field: typing.Union[str, bytes]) -> bytes:
|
|
@@ -31,7 +31,6 @@ from .models import get_model_endpoints_table
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class SQLModelEndpointStore(ModelEndpointStore):
|
|
34
|
-
|
|
35
34
|
"""
|
|
36
35
|
Handles the DB operations when the DB target is from type SQL. For the SQL operations, we use SQLAlchemy, a Python
|
|
37
36
|
SQL toolkit that handles the communication with the database. When using SQL for storing the model endpoints
|
|
@@ -838,9 +838,9 @@ class PandasDataFramePackager(DefaultPackager):
|
|
|
838
838
|
"""
|
|
839
839
|
if isinstance(obj, dict):
|
|
840
840
|
for key, value in obj.items():
|
|
841
|
-
obj[
|
|
842
|
-
PandasDataFramePackager._prepare_result(obj=
|
|
843
|
-
|
|
841
|
+
obj[PandasDataFramePackager._prepare_result(obj=key)] = (
|
|
842
|
+
PandasDataFramePackager._prepare_result(obj=value)
|
|
843
|
+
)
|
|
844
844
|
elif isinstance(obj, list):
|
|
845
845
|
for i, value in enumerate(obj):
|
|
846
846
|
obj[i] = PandasDataFramePackager._prepare_result(obj=value)
|