mlrun 1.6.0rc26__py3-none-any.whl → 1.6.3rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/manager.py +6 -0
- mlrun/artifacts/model.py +28 -22
- mlrun/common/db/sql_session.py +3 -0
- mlrun/common/model_monitoring/helpers.py +4 -2
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/common.py +40 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +21 -5
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +43 -17
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/azure_blob.py +9 -9
- mlrun/datastore/base.py +22 -44
- mlrun/datastore/datastore.py +7 -3
- mlrun/datastore/datastore_profile.py +15 -3
- mlrun/datastore/google_cloud_storage.py +7 -7
- mlrun/datastore/sources.py +17 -4
- mlrun/datastore/targets.py +3 -1
- mlrun/datastore/utils.py +11 -1
- mlrun/datastore/v3io.py +70 -46
- mlrun/db/base.py +18 -0
- mlrun/db/httpdb.py +41 -36
- mlrun/execution.py +3 -3
- mlrun/feature_store/api.py +133 -132
- mlrun/feature_store/feature_set.py +89 -0
- mlrun/feature_store/feature_vector.py +120 -0
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
- mlrun/frameworks/tf_keras/model_handler.py +7 -7
- mlrun/k8s_utils.py +56 -0
- mlrun/kfpops.py +19 -10
- mlrun/model.py +6 -0
- mlrun/model_monitoring/api.py +8 -8
- mlrun/model_monitoring/batch.py +1 -1
- mlrun/model_monitoring/controller.py +0 -7
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
- mlrun/model_monitoring/stream_processing.py +52 -38
- mlrun/package/packagers/pandas_packagers.py +3 -3
- mlrun/package/utils/_archiver.py +3 -1
- mlrun/platforms/iguazio.py +6 -65
- mlrun/projects/pipelines.py +29 -12
- mlrun/projects/project.py +100 -61
- mlrun/run.py +2 -0
- mlrun/runtimes/base.py +24 -1
- mlrun/runtimes/function.py +14 -15
- mlrun/runtimes/kubejob.py +5 -3
- mlrun/runtimes/local.py +2 -2
- mlrun/runtimes/mpijob/abstract.py +6 -6
- mlrun/runtimes/pod.py +3 -3
- mlrun/runtimes/serving.py +7 -14
- mlrun/runtimes/sparkjob/spark3job.py +3 -3
- mlrun/serving/remote.py +4 -2
- mlrun/serving/routers.py +14 -8
- mlrun/utils/async_http.py +3 -3
- mlrun/utils/helpers.py +59 -3
- mlrun/utils/http.py +3 -3
- mlrun/utils/logger.py +2 -2
- mlrun/utils/notifications/notification_pusher.py +6 -6
- mlrun/utils/regex.py +5 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/METADATA +21 -23
- {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/RECORD +66 -65
- {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/WHEEL +1 -1
- {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/LICENSE +0 -0
- {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/top_level.txt +0 -0
|
@@ -367,7 +367,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
|
|
|
367
367
|
)
|
|
368
368
|
|
|
369
369
|
|
|
370
|
-
def datastore_profile_read(url):
|
|
370
|
+
def datastore_profile_read(url, project_name="", secrets: dict = None):
|
|
371
371
|
parsed_url = urlparse(url)
|
|
372
372
|
if parsed_url.scheme.lower() != "ds":
|
|
373
373
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -375,17 +375,29 @@ def datastore_profile_read(url):
|
|
|
375
375
|
)
|
|
376
376
|
|
|
377
377
|
profile_name = parsed_url.hostname
|
|
378
|
-
project_name =
|
|
378
|
+
project_name = project_name or mlrun.mlconf.default_project
|
|
379
379
|
datastore = TemporaryClientDatastoreProfiles().get(profile_name)
|
|
380
380
|
if datastore:
|
|
381
381
|
return datastore
|
|
382
382
|
public_profile = mlrun.db.get_run_db().get_datastore_profile(
|
|
383
383
|
profile_name, project_name
|
|
384
384
|
)
|
|
385
|
+
# The mlrun.db.get_run_db().get_datastore_profile() function is capable of returning
|
|
386
|
+
# two distinct types of objects based on its execution context.
|
|
387
|
+
# If it operates from the client or within the pod (which is the common scenario),
|
|
388
|
+
# it yields an instance of `mlrun.datastore.DatastoreProfile`. Conversely,
|
|
389
|
+
# when executed on the server with a direct call to `sqldb`, it produces an instance of
|
|
390
|
+
# mlrun.common.schemas.DatastoreProfile.
|
|
391
|
+
# In the latter scenario, an extra conversion step is required to transform the object
|
|
392
|
+
# into mlrun.datastore.DatastoreProfile.
|
|
393
|
+
if isinstance(public_profile, mlrun.common.schemas.DatastoreProfile):
|
|
394
|
+
public_profile = DatastoreProfile2Json.create_from_json(
|
|
395
|
+
public_json=public_profile.object
|
|
396
|
+
)
|
|
385
397
|
project_ds_name_private = DatastoreProfile.generate_secret_key(
|
|
386
398
|
profile_name, project_name
|
|
387
399
|
)
|
|
388
|
-
private_body = get_secret_or_env(project_ds_name_private)
|
|
400
|
+
private_body = get_secret_or_env(project_ds_name_private, secret_provider=secrets)
|
|
389
401
|
if not public_profile or not private_body:
|
|
390
402
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
391
403
|
f"Unable to retrieve the datastore profile '{url}' from either the server or local environment. "
|
|
@@ -60,7 +60,7 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
60
60
|
except json.JSONDecodeError:
|
|
61
61
|
# If it's not json, handle it as a filename
|
|
62
62
|
token = credentials
|
|
63
|
-
|
|
63
|
+
return self._sanitize_storage_options(dict(token=token))
|
|
64
64
|
else:
|
|
65
65
|
logger.info(
|
|
66
66
|
"No GCS credentials available - auth will rely on auto-discovery of credentials"
|
|
@@ -147,13 +147,13 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
147
147
|
if "project_id" in credentials:
|
|
148
148
|
res["spark.hadoop.fs.gs.project.id"] = credentials["project_id"]
|
|
149
149
|
if "private_key_id" in credentials:
|
|
150
|
-
res[
|
|
151
|
-
"
|
|
152
|
-
|
|
150
|
+
res["spark.hadoop.fs.gs.auth.service.account.private.key.id"] = (
|
|
151
|
+
credentials["private_key_id"]
|
|
152
|
+
)
|
|
153
153
|
if "private_key" in credentials:
|
|
154
|
-
res[
|
|
155
|
-
"
|
|
156
|
-
|
|
154
|
+
res["spark.hadoop.fs.gs.auth.service.account.private.key"] = (
|
|
155
|
+
credentials["private_key"]
|
|
156
|
+
)
|
|
157
157
|
if "client_email" in credentials:
|
|
158
158
|
res["spark.hadoop.fs.gs.auth.service.account.email"] = credentials[
|
|
159
159
|
"client_email"
|
mlrun/datastore/sources.py
CHANGED
|
@@ -118,7 +118,10 @@ class BaseSourceDriver(DataSource):
|
|
|
118
118
|
if named_view:
|
|
119
119
|
df.createOrReplaceTempView(self.name)
|
|
120
120
|
return self._filter_spark_df(df, time_field, columns)
|
|
121
|
-
raise NotImplementedError(
|
|
121
|
+
raise NotImplementedError(
|
|
122
|
+
f"Conversion of a source of type '{type(self).__name__}' "
|
|
123
|
+
"to a Spark dataframe is not possible, as this operation is not supported"
|
|
124
|
+
)
|
|
122
125
|
|
|
123
126
|
def _filter_spark_df(self, df, time_field=None, columns=None):
|
|
124
127
|
if not (columns or time_field):
|
|
@@ -897,7 +900,7 @@ class StreamSource(OnlineSource):
|
|
|
897
900
|
engine = function.spec.graph.engine
|
|
898
901
|
if mlrun.mlconf.is_explicit_ack() and engine == "async":
|
|
899
902
|
kwargs["explicit_ack_mode"] = "explicitOnly"
|
|
900
|
-
kwargs["
|
|
903
|
+
kwargs["worker_allocation_mode"] = "static"
|
|
901
904
|
|
|
902
905
|
function.add_v3io_stream_trigger(
|
|
903
906
|
self.path,
|
|
@@ -986,9 +989,13 @@ class KafkaSource(OnlineSource):
|
|
|
986
989
|
if mlrun.mlconf.is_explicit_ack() and engine == "async":
|
|
987
990
|
explicit_ack_mode = "explicitOnly"
|
|
988
991
|
extra_attributes["workerAllocationMode"] = extra_attributes.get(
|
|
989
|
-
"
|
|
992
|
+
"worker_allocation_mode", "static"
|
|
990
993
|
)
|
|
991
994
|
|
|
995
|
+
trigger_kwargs = {}
|
|
996
|
+
if "max_workers" in extra_attributes:
|
|
997
|
+
trigger_kwargs = {"max_workers": extra_attributes.pop("max_workers")}
|
|
998
|
+
|
|
992
999
|
trigger = KafkaTrigger(
|
|
993
1000
|
brokers=extra_attributes.pop("brokers"),
|
|
994
1001
|
topics=extra_attributes.pop("topics"),
|
|
@@ -997,7 +1004,7 @@ class KafkaSource(OnlineSource):
|
|
|
997
1004
|
initial_offset=extra_attributes.pop("initial_offset"),
|
|
998
1005
|
explicit_ack_mode=explicit_ack_mode,
|
|
999
1006
|
extra_attributes=extra_attributes,
|
|
1000
|
-
|
|
1007
|
+
**trigger_kwargs,
|
|
1001
1008
|
)
|
|
1002
1009
|
function = function.add_trigger("kafka", trigger)
|
|
1003
1010
|
|
|
@@ -1016,6 +1023,12 @@ class KafkaSource(OnlineSource):
|
|
|
1016
1023
|
|
|
1017
1024
|
return function
|
|
1018
1025
|
|
|
1026
|
+
def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
|
|
1027
|
+
raise NotImplementedError(
|
|
1028
|
+
"Conversion of a source of type 'KafkaSource' "
|
|
1029
|
+
"to a Spark dataframe is not possible, as this operation is not supported by Spark"
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1019
1032
|
|
|
1020
1033
|
class SQLSource(BaseSourceDriver):
|
|
1021
1034
|
kind = "sqldb"
|
mlrun/datastore/targets.py
CHANGED
|
@@ -456,7 +456,7 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
456
456
|
self.get_target_path(),
|
|
457
457
|
credentials_prefix_secrets,
|
|
458
458
|
)
|
|
459
|
-
if self.get_target_path().startswith("ds://"):
|
|
459
|
+
if self.get_target_path() and self.get_target_path().startswith("ds://"):
|
|
460
460
|
return store, store.url + resolved_store_path
|
|
461
461
|
else:
|
|
462
462
|
return store, self.get_target_path()
|
|
@@ -1984,6 +1984,8 @@ def _get_target_path(driver, resource, run_id_mode=False):
|
|
|
1984
1984
|
|
|
1985
1985
|
|
|
1986
1986
|
def generate_path_with_chunk(target, chunk_id, path):
|
|
1987
|
+
if path is None:
|
|
1988
|
+
return ""
|
|
1987
1989
|
prefix, suffix = os.path.splitext(path)
|
|
1988
1990
|
if chunk_id and not target.partitioned and not target.time_partitioning_granularity:
|
|
1989
1991
|
return f"{prefix}/{chunk_id:0>4}{suffix}"
|
mlrun/datastore/utils.py
CHANGED
|
@@ -18,6 +18,7 @@ import typing
|
|
|
18
18
|
from urllib.parse import parse_qs, urlparse, urlunparse
|
|
19
19
|
|
|
20
20
|
import pandas as pd
|
|
21
|
+
import semver
|
|
21
22
|
|
|
22
23
|
import mlrun.datastore
|
|
23
24
|
|
|
@@ -137,7 +138,16 @@ def filter_df_generator(
|
|
|
137
138
|
def _execute_time_filter(
|
|
138
139
|
df: pd.DataFrame, time_column: str, start_time: pd.Timestamp, end_time: pd.Timestamp
|
|
139
140
|
):
|
|
140
|
-
|
|
141
|
+
if semver.parse(pd.__version__)["major"] >= 2:
|
|
142
|
+
# pandas 2 is too strict by default (ML-5629)
|
|
143
|
+
kwargs = {
|
|
144
|
+
"format": "mixed",
|
|
145
|
+
"yearfirst": True,
|
|
146
|
+
}
|
|
147
|
+
else:
|
|
148
|
+
# pandas 1 may fail on format "mixed" (ML-5661)
|
|
149
|
+
kwargs = {}
|
|
150
|
+
df[time_column] = pd.to_datetime(df[time_column], **kwargs)
|
|
141
151
|
if start_time:
|
|
142
152
|
df = df[df[time_column] > start_time]
|
|
143
153
|
if end_time:
|
mlrun/datastore/v3io.py
CHANGED
|
@@ -15,12 +15,11 @@
|
|
|
15
15
|
import mmap
|
|
16
16
|
import os
|
|
17
17
|
import time
|
|
18
|
-
from copy import deepcopy
|
|
19
18
|
from datetime import datetime
|
|
20
19
|
|
|
21
20
|
import fsspec
|
|
22
|
-
import
|
|
23
|
-
|
|
21
|
+
import v3io
|
|
22
|
+
from v3io.dataplane.response import HttpResponseError
|
|
24
23
|
|
|
25
24
|
import mlrun
|
|
26
25
|
from mlrun.datastore.helpers import ONE_GB, ONE_MB
|
|
@@ -30,11 +29,6 @@ from .base import (
|
|
|
30
29
|
DataStore,
|
|
31
30
|
FileStats,
|
|
32
31
|
basic_auth_header,
|
|
33
|
-
get_range,
|
|
34
|
-
http_get,
|
|
35
|
-
http_head,
|
|
36
|
-
http_put,
|
|
37
|
-
http_upload,
|
|
38
32
|
)
|
|
39
33
|
|
|
40
34
|
V3IO_LOCAL_ROOT = "v3io"
|
|
@@ -47,17 +41,18 @@ class V3ioStore(DataStore):
|
|
|
47
41
|
|
|
48
42
|
self.headers = None
|
|
49
43
|
self.secure = self.kind == "v3ios"
|
|
44
|
+
|
|
45
|
+
token = self._get_secret_or_env("V3IO_ACCESS_KEY")
|
|
46
|
+
username = self._get_secret_or_env("V3IO_USERNAME")
|
|
47
|
+
password = self._get_secret_or_env("V3IO_PASSWORD")
|
|
50
48
|
if self.endpoint.startswith("https://"):
|
|
51
49
|
self.endpoint = self.endpoint[len("https://") :]
|
|
52
50
|
self.secure = True
|
|
53
51
|
elif self.endpoint.startswith("http://"):
|
|
54
52
|
self.endpoint = self.endpoint[len("http://") :]
|
|
55
53
|
self.secure = False
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
username = self._get_secret_or_env("V3IO_USERNAME")
|
|
59
|
-
password = self._get_secret_or_env("V3IO_PASSWORD")
|
|
60
|
-
|
|
54
|
+
self.client = v3io.dataplane.Client(access_key=token, endpoint=self.url)
|
|
55
|
+
self.object = self.client.object
|
|
61
56
|
self.auth = None
|
|
62
57
|
self.token = token
|
|
63
58
|
if token:
|
|
@@ -65,6 +60,16 @@ class V3ioStore(DataStore):
|
|
|
65
60
|
elif username and password:
|
|
66
61
|
self.headers = basic_auth_header(username, password)
|
|
67
62
|
|
|
63
|
+
@staticmethod
|
|
64
|
+
def _do_object_request(function: callable, *args, **kwargs):
|
|
65
|
+
try:
|
|
66
|
+
return function(*args, **kwargs)
|
|
67
|
+
except HttpResponseError as http_response_error:
|
|
68
|
+
raise mlrun.errors.err_for_status_code(
|
|
69
|
+
status_code=http_response_error.status_code,
|
|
70
|
+
message=mlrun.errors.err_to_str(http_response_error),
|
|
71
|
+
)
|
|
72
|
+
|
|
68
73
|
@staticmethod
|
|
69
74
|
def uri_to_ipython(endpoint, subpath):
|
|
70
75
|
return V3IO_LOCAL_ROOT + subpath
|
|
@@ -91,13 +96,19 @@ class V3ioStore(DataStore):
|
|
|
91
96
|
|
|
92
97
|
def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
|
|
93
98
|
"""helper function for upload method, allows for controlling max_chunk_size in testing"""
|
|
99
|
+
container, path = split_path(self._join(key))
|
|
94
100
|
file_size = os.path.getsize(src_path) # in bytes
|
|
95
101
|
if file_size <= ONE_MB:
|
|
96
|
-
|
|
102
|
+
with open(src_path, "rb") as source_file:
|
|
103
|
+
data = source_file.read()
|
|
104
|
+
self._do_object_request(
|
|
105
|
+
self.object.put,
|
|
106
|
+
container=container,
|
|
107
|
+
path=path,
|
|
108
|
+
body=data,
|
|
109
|
+
append=False,
|
|
110
|
+
)
|
|
97
111
|
return
|
|
98
|
-
append_header = deepcopy(self.headers)
|
|
99
|
-
append_header["Range"] = "-1"
|
|
100
|
-
|
|
101
112
|
# chunk must be a multiple of the ALLOCATIONGRANULARITY
|
|
102
113
|
# https://docs.python.org/3/library/mmap.html
|
|
103
114
|
if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
|
|
@@ -114,11 +125,13 @@ class V3ioStore(DataStore):
|
|
|
114
125
|
access=mmap.ACCESS_READ,
|
|
115
126
|
offset=file_offset,
|
|
116
127
|
) as mmap_obj:
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
128
|
+
append = file_offset != 0
|
|
129
|
+
self._do_object_request(
|
|
130
|
+
self.object.put,
|
|
131
|
+
container=container,
|
|
132
|
+
path=path,
|
|
133
|
+
body=mmap_obj,
|
|
134
|
+
append=append,
|
|
122
135
|
)
|
|
123
136
|
file_offset += chunk_size
|
|
124
137
|
|
|
@@ -126,43 +139,55 @@ class V3ioStore(DataStore):
|
|
|
126
139
|
return self._upload(key, src_path)
|
|
127
140
|
|
|
128
141
|
def get(self, key, size=None, offset=0):
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
142
|
+
container, path = split_path(self._join(key))
|
|
143
|
+
return self._do_object_request(
|
|
144
|
+
function=self.object.get,
|
|
145
|
+
container=container,
|
|
146
|
+
path=path,
|
|
147
|
+
offset=offset,
|
|
148
|
+
num_bytes=size,
|
|
149
|
+
).body
|
|
134
150
|
|
|
135
|
-
def _put(self, key, data, max_chunk_size: int = ONE_GB):
|
|
151
|
+
def _put(self, key, data, append=False, max_chunk_size: int = ONE_GB):
|
|
136
152
|
"""helper function for put method, allows for controlling max_chunk_size in testing"""
|
|
153
|
+
container, path = split_path(self._join(key))
|
|
137
154
|
buffer_size = len(data) # in bytes
|
|
138
155
|
if buffer_size <= ONE_MB:
|
|
139
|
-
|
|
156
|
+
self._do_object_request(
|
|
157
|
+
self.object.put,
|
|
158
|
+
container=container,
|
|
159
|
+
path=path,
|
|
160
|
+
body=data,
|
|
161
|
+
append=append,
|
|
162
|
+
)
|
|
140
163
|
return
|
|
141
|
-
append_header = deepcopy(self.headers)
|
|
142
|
-
append_header["Range"] = "-1"
|
|
143
164
|
buffer_offset = 0
|
|
144
165
|
try:
|
|
145
166
|
data = memoryview(data)
|
|
146
167
|
except TypeError:
|
|
147
168
|
pass
|
|
148
169
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
170
|
+
while buffer_offset < buffer_size:
|
|
171
|
+
chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
|
|
172
|
+
append = True if buffer_offset or append else False
|
|
173
|
+
self._do_object_request(
|
|
174
|
+
self.object.put,
|
|
175
|
+
container=container,
|
|
176
|
+
path=path,
|
|
177
|
+
body=data[buffer_offset : buffer_offset + chunk_size],
|
|
178
|
+
append=append,
|
|
179
|
+
)
|
|
180
|
+
buffer_offset += chunk_size
|
|
160
181
|
|
|
161
182
|
def put(self, key, data, append=False):
|
|
162
|
-
return self._put(key, data)
|
|
183
|
+
return self._put(key, data, append)
|
|
163
184
|
|
|
164
185
|
def stat(self, key):
|
|
165
|
-
|
|
186
|
+
container, path = split_path(self._join(key))
|
|
187
|
+
response = self._do_object_request(
|
|
188
|
+
function=self.object.head, container=container, path=path
|
|
189
|
+
)
|
|
190
|
+
head = dict(response.headers)
|
|
166
191
|
size = int(head.get("Content-Length", "0"))
|
|
167
192
|
datestr = head.get("Last-Modified", "0")
|
|
168
193
|
modified = time.mktime(
|
|
@@ -171,7 +196,6 @@ class V3ioStore(DataStore):
|
|
|
171
196
|
return FileStats(size, modified)
|
|
172
197
|
|
|
173
198
|
def listdir(self, key):
|
|
174
|
-
v3io_client = v3io.dataplane.Client(endpoint=self.url, access_key=self.token)
|
|
175
199
|
container, subpath = split_path(self._join(key))
|
|
176
200
|
if not subpath.endswith("/"):
|
|
177
201
|
subpath += "/"
|
|
@@ -180,7 +204,7 @@ class V3ioStore(DataStore):
|
|
|
180
204
|
subpath_length = len(subpath) - 1
|
|
181
205
|
|
|
182
206
|
try:
|
|
183
|
-
response =
|
|
207
|
+
response = self.client.container.list(
|
|
184
208
|
container=container,
|
|
185
209
|
path=subpath,
|
|
186
210
|
get_all_attributes=False,
|
mlrun/db/base.py
CHANGED
|
@@ -677,3 +677,21 @@ class RunDBInterface(ABC):
|
|
|
677
677
|
self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
|
|
678
678
|
):
|
|
679
679
|
pass
|
|
680
|
+
|
|
681
|
+
def submit_workflow(
|
|
682
|
+
self,
|
|
683
|
+
project: str,
|
|
684
|
+
name: str,
|
|
685
|
+
workflow_spec: Union[
|
|
686
|
+
"mlrun.projects.pipelines.WorkflowSpec",
|
|
687
|
+
"mlrun.common.schemas.WorkflowSpec",
|
|
688
|
+
dict,
|
|
689
|
+
],
|
|
690
|
+
arguments: Optional[dict] = None,
|
|
691
|
+
artifact_path: Optional[str] = None,
|
|
692
|
+
source: Optional[str] = None,
|
|
693
|
+
run_name: Optional[str] = None,
|
|
694
|
+
namespace: Optional[str] = None,
|
|
695
|
+
notifications: list["mlrun.model.Notification"] = None,
|
|
696
|
+
) -> "mlrun.common.schemas.WorkflowResponse":
|
|
697
|
+
pass
|
mlrun/db/httpdb.py
CHANGED
|
@@ -152,7 +152,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
152
152
|
@staticmethod
|
|
153
153
|
def get_api_path_prefix(version: str = None) -> str:
|
|
154
154
|
"""
|
|
155
|
-
:param version: API version to use, None (the default) will mean to use the default value from
|
|
155
|
+
:param version: API version to use, None (the default) will mean to use the default value from mlrun.config,
|
|
156
156
|
for un-versioned api set an empty string.
|
|
157
157
|
"""
|
|
158
158
|
if version is not None:
|
|
@@ -250,7 +250,11 @@ class HTTPRunDB(RunDBInterface):
|
|
|
250
250
|
|
|
251
251
|
try:
|
|
252
252
|
response = self.session.request(
|
|
253
|
-
method,
|
|
253
|
+
method,
|
|
254
|
+
url,
|
|
255
|
+
timeout=timeout,
|
|
256
|
+
verify=config.httpdb.http.verify,
|
|
257
|
+
**kw,
|
|
254
258
|
)
|
|
255
259
|
except requests.RequestException as exc:
|
|
256
260
|
error = f"{err_to_str(exc)}: {error}" if error else err_to_str(exc)
|
|
@@ -302,11 +306,11 @@ class HTTPRunDB(RunDBInterface):
|
|
|
302
306
|
|
|
303
307
|
def connect(self, secrets=None):
|
|
304
308
|
"""Connect to the MLRun API server. Must be called prior to executing any other method.
|
|
305
|
-
The code utilizes the URL for the API server from the configuration - ``
|
|
309
|
+
The code utilizes the URL for the API server from the configuration - ``config.dbpath``.
|
|
306
310
|
|
|
307
311
|
For example::
|
|
308
312
|
|
|
309
|
-
|
|
313
|
+
config.dbpath = config.dbpath or 'http://mlrun-api:8080'
|
|
310
314
|
db = get_run_db().connect()
|
|
311
315
|
"""
|
|
312
316
|
# hack to allow unit tests to instantiate HTTPRunDB without a real server behind
|
|
@@ -500,7 +504,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
500
504
|
if offset < 0:
|
|
501
505
|
raise MLRunInvalidArgumentError("Offset cannot be negative")
|
|
502
506
|
if size is None:
|
|
503
|
-
size = int(
|
|
507
|
+
size = int(config.httpdb.logs.pull_logs_default_size_limit)
|
|
504
508
|
elif size == -1:
|
|
505
509
|
logger.warning(
|
|
506
510
|
"Retrieving all logs. This may be inefficient and can result in a large log."
|
|
@@ -546,25 +550,23 @@ class HTTPRunDB(RunDBInterface):
|
|
|
546
550
|
|
|
547
551
|
state, text = self.get_log(uid, project, offset=offset)
|
|
548
552
|
if text:
|
|
549
|
-
print(text.decode(errors=
|
|
553
|
+
print(text.decode(errors=config.httpdb.logs.decode.errors))
|
|
550
554
|
nil_resp = 0
|
|
551
555
|
while True:
|
|
552
556
|
offset += len(text)
|
|
553
557
|
# if we get 3 nil responses in a row, increase the sleep time to 10 seconds
|
|
554
558
|
# TODO: refactor this to use a conditional backoff mechanism
|
|
555
559
|
if nil_resp < 3:
|
|
556
|
-
time.sleep(int(
|
|
560
|
+
time.sleep(int(config.httpdb.logs.pull_logs_default_interval))
|
|
557
561
|
else:
|
|
558
562
|
time.sleep(
|
|
559
|
-
int(
|
|
560
|
-
mlrun.mlconf.httpdb.logs.pull_logs_backoff_no_logs_default_interval
|
|
561
|
-
)
|
|
563
|
+
int(config.httpdb.logs.pull_logs_backoff_no_logs_default_interval)
|
|
562
564
|
)
|
|
563
565
|
state, text = self.get_log(uid, project, offset=offset)
|
|
564
566
|
if text:
|
|
565
567
|
nil_resp = 0
|
|
566
568
|
print(
|
|
567
|
-
text.decode(errors=
|
|
569
|
+
text.decode(errors=config.httpdb.logs.decode.errors),
|
|
568
570
|
end="",
|
|
569
571
|
)
|
|
570
572
|
else:
|
|
@@ -1135,17 +1137,17 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1135
1137
|
structured_dict = {}
|
|
1136
1138
|
for project, job_runtime_resources_map in response.json().items():
|
|
1137
1139
|
for job_id, runtime_resources in job_runtime_resources_map.items():
|
|
1138
|
-
structured_dict.setdefault(project, {})[
|
|
1139
|
-
|
|
1140
|
-
|
|
1140
|
+
structured_dict.setdefault(project, {})[job_id] = (
|
|
1141
|
+
mlrun.common.schemas.RuntimeResources(**runtime_resources)
|
|
1142
|
+
)
|
|
1141
1143
|
return structured_dict
|
|
1142
1144
|
elif group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.project:
|
|
1143
1145
|
structured_dict = {}
|
|
1144
1146
|
for project, kind_runtime_resources_map in response.json().items():
|
|
1145
1147
|
for kind, runtime_resources in kind_runtime_resources_map.items():
|
|
1146
|
-
structured_dict.setdefault(project, {})[
|
|
1147
|
-
|
|
1148
|
-
|
|
1148
|
+
structured_dict.setdefault(project, {})[kind] = (
|
|
1149
|
+
mlrun.common.schemas.RuntimeResources(**runtime_resources)
|
|
1150
|
+
)
|
|
1149
1151
|
return structured_dict
|
|
1150
1152
|
else:
|
|
1151
1153
|
raise NotImplementedError(
|
|
@@ -1173,7 +1175,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1173
1175
|
:param force: Force deletion - delete the runtime resource even if it's not in terminal state or if the grace
|
|
1174
1176
|
period didn't pass.
|
|
1175
1177
|
:param grace_period: Grace period given to the runtime resource before they are actually removed, counted from
|
|
1176
|
-
the moment they moved to terminal state
|
|
1178
|
+
the moment they moved to terminal state
|
|
1179
|
+
(defaults to mlrun.config.config.runtime_resources_deletion_grace_period).
|
|
1177
1180
|
|
|
1178
1181
|
:returns: :py:class:`~mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput` listing the runtime resources
|
|
1179
1182
|
that were removed.
|
|
@@ -1203,9 +1206,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1203
1206
|
structured_dict = {}
|
|
1204
1207
|
for project, kind_runtime_resources_map in response.json().items():
|
|
1205
1208
|
for kind, runtime_resources in kind_runtime_resources_map.items():
|
|
1206
|
-
structured_dict.setdefault(project, {})[
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
+
structured_dict.setdefault(project, {})[kind] = (
|
|
1210
|
+
mlrun.common.schemas.RuntimeResources(**runtime_resources)
|
|
1211
|
+
)
|
|
1209
1212
|
return structured_dict
|
|
1210
1213
|
|
|
1211
1214
|
def create_schedule(
|
|
@@ -1340,7 +1343,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1340
1343
|
logger.warning(
|
|
1341
1344
|
"Building a function image to ECR and loading an S3 source to the image may require conflicting access "
|
|
1342
1345
|
"keys. Only the permissions granted to the platform's configured secret will take affect "
|
|
1343
|
-
"(see mlrun.
|
|
1346
|
+
"(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
|
|
1344
1347
|
"In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
|
|
1345
1348
|
source=func.spec.build.source,
|
|
1346
1349
|
load_source_on_run=func.spec.build.load_source_on_run,
|
|
@@ -1495,7 +1498,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1495
1498
|
Retrieve updated information on project background tasks being executed.
|
|
1496
1499
|
If no filter is provided, will return background tasks from the last week.
|
|
1497
1500
|
|
|
1498
|
-
:param project: Project name (defaults to mlrun.
|
|
1501
|
+
:param project: Project name (defaults to mlrun.config.config.default_project).
|
|
1499
1502
|
:param state: List only background tasks whose state is specified.
|
|
1500
1503
|
:param created_from: Filter by background task created time in ``[created_from, created_to]``.
|
|
1501
1504
|
:param created_to: Filter by background task created time in ``[created_from, created_to]``.
|
|
@@ -1608,19 +1611,21 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1608
1611
|
artifact_path=None,
|
|
1609
1612
|
ops=None,
|
|
1610
1613
|
cleanup_ttl=None,
|
|
1614
|
+
timeout=60,
|
|
1611
1615
|
):
|
|
1612
1616
|
"""Submit a KFP pipeline for execution.
|
|
1613
1617
|
|
|
1614
|
-
:param project:
|
|
1615
|
-
:param pipeline:
|
|
1616
|
-
:param arguments:
|
|
1617
|
-
:param experiment:
|
|
1618
|
-
:param run:
|
|
1619
|
-
:param namespace:
|
|
1620
|
-
:param artifact_path:
|
|
1621
|
-
:param ops:
|
|
1622
|
-
:param cleanup_ttl:
|
|
1623
|
-
|
|
1618
|
+
:param project: The project of the pipeline
|
|
1619
|
+
:param pipeline: Pipeline function or path to .yaml/.zip pipeline file.
|
|
1620
|
+
:param arguments: A dictionary of arguments to pass to the pipeline.
|
|
1621
|
+
:param experiment: A name to assign for the specific experiment.
|
|
1622
|
+
:param run: A name for this specific run.
|
|
1623
|
+
:param namespace: Kubernetes namespace to execute the pipeline in.
|
|
1624
|
+
:param artifact_path: A path to artifacts used by this pipeline.
|
|
1625
|
+
:param ops: Transformers to apply on all ops in the pipeline.
|
|
1626
|
+
:param cleanup_ttl: Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
|
|
1627
|
+
workflow and all its resources are deleted)
|
|
1628
|
+
:param timeout: Timeout for the API call.
|
|
1624
1629
|
"""
|
|
1625
1630
|
|
|
1626
1631
|
if isinstance(pipeline, str):
|
|
@@ -1662,7 +1667,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1662
1667
|
"POST",
|
|
1663
1668
|
f"projects/{project}/pipelines",
|
|
1664
1669
|
params=params,
|
|
1665
|
-
timeout=
|
|
1670
|
+
timeout=timeout,
|
|
1666
1671
|
body=data,
|
|
1667
1672
|
headers=headers,
|
|
1668
1673
|
)
|
|
@@ -3450,8 +3455,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3450
3455
|
source: Optional[str] = None,
|
|
3451
3456
|
run_name: Optional[str] = None,
|
|
3452
3457
|
namespace: Optional[str] = None,
|
|
3453
|
-
notifications:
|
|
3454
|
-
):
|
|
3458
|
+
notifications: list[mlrun.model.Notification] = None,
|
|
3459
|
+
) -> mlrun.common.schemas.WorkflowResponse:
|
|
3455
3460
|
"""
|
|
3456
3461
|
Submitting workflow for a remote execution.
|
|
3457
3462
|
|
mlrun/execution.py
CHANGED
|
@@ -559,9 +559,9 @@ class MLClientCtx(object):
|
|
|
559
559
|
for k, v in get_in(task, ["status", "results"], {}).items():
|
|
560
560
|
self._results[k] = v
|
|
561
561
|
for artifact in get_in(task, ["status", run_keys.artifacts], []):
|
|
562
|
-
self._artifacts_manager.artifacts[
|
|
563
|
-
artifact
|
|
564
|
-
|
|
562
|
+
self._artifacts_manager.artifacts[artifact["metadata"]["key"]] = (
|
|
563
|
+
artifact
|
|
564
|
+
)
|
|
565
565
|
self._artifacts_manager.link_artifact(
|
|
566
566
|
self.project,
|
|
567
567
|
self.name,
|