mlrun 1.6.0rc7__py3-none-any.whl → 1.6.0rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +27 -27
- mlrun/common/schemas/auth.py +2 -0
- mlrun/config.py +2 -2
- mlrun/datastore/dbfs_store.py +0 -3
- mlrun/datastore/sources.py +12 -2
- mlrun/datastore/targets.py +3 -0
- mlrun/db/httpdb.py +15 -0
- mlrun/feature_store/feature_set.py +5 -2
- mlrun/feature_store/retrieval/spark_merger.py +7 -1
- mlrun/kfpops.py +1 -1
- mlrun/launcher/client.py +1 -6
- mlrun/launcher/remote.py +5 -3
- mlrun/model.py +1 -1
- mlrun/model_monitoring/batch_application.py +48 -85
- mlrun/package/packager.py +115 -89
- mlrun/package/packagers/default_packager.py +66 -65
- mlrun/package/packagers/numpy_packagers.py +109 -62
- mlrun/package/packagers/pandas_packagers.py +12 -23
- mlrun/package/packagers/python_standard_library_packagers.py +35 -57
- mlrun/package/packagers_manager.py +16 -13
- mlrun/package/utils/_pickler.py +8 -18
- mlrun/package/utils/_supported_format.py +1 -1
- mlrun/projects/pipelines.py +11 -6
- mlrun/projects/project.py +11 -4
- mlrun/runtimes/__init__.py +6 -0
- mlrun/runtimes/base.py +8 -0
- mlrun/runtimes/daskjob.py +73 -5
- mlrun/runtimes/local.py +9 -9
- mlrun/runtimes/remotesparkjob.py +1 -0
- mlrun/runtimes/utils.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/METADATA +2 -2
- {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/RECORD +38 -38
- {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/WHEEL +1 -1
- {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/LICENSE +0 -0
- {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/top_level.txt +0 -0
|
@@ -11,25 +11,20 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
import concurrent.futures
|
|
16
16
|
import datetime
|
|
17
17
|
import json
|
|
18
18
|
import os
|
|
19
19
|
import re
|
|
20
|
-
from typing import Callable, Tuple
|
|
20
|
+
from typing import Callable, Optional, Tuple
|
|
21
21
|
|
|
22
|
-
import numpy as np
|
|
23
22
|
import pandas as pd
|
|
24
23
|
|
|
25
24
|
import mlrun
|
|
26
|
-
import mlrun.common.helpers
|
|
27
|
-
import mlrun.common.model_monitoring.helpers
|
|
28
|
-
import mlrun.common.schemas.model_monitoring
|
|
29
25
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
30
26
|
import mlrun.data_types.infer
|
|
31
27
|
import mlrun.feature_store as fstore
|
|
32
|
-
import mlrun.utils.v3io_clients
|
|
33
28
|
from mlrun.datastore import get_stream_pusher
|
|
34
29
|
from mlrun.datastore.targets import ParquetTarget
|
|
35
30
|
from mlrun.model_monitoring.batch import calculate_inputs_statistics
|
|
@@ -72,46 +67,39 @@ class BatchApplicationProcessor:
|
|
|
72
67
|
|
|
73
68
|
# Get the batch interval range
|
|
74
69
|
self.batch_dict = context.parameters[
|
|
75
|
-
|
|
70
|
+
mm_constants.EventFieldType.BATCH_INTERVALS_DICT
|
|
76
71
|
]
|
|
77
72
|
|
|
78
|
-
# TODO: This will be removed
|
|
73
|
+
# TODO: This will be removed once the job params can be parsed with different types
|
|
79
74
|
# Convert batch dict string into a dictionary
|
|
80
75
|
if isinstance(self.batch_dict, str):
|
|
81
76
|
self._parse_batch_dict_str()
|
|
82
77
|
# If provided, only model endpoints in that that list will be analyzed
|
|
83
78
|
self.model_endpoints = context.parameters.get(
|
|
84
|
-
|
|
85
|
-
)
|
|
86
|
-
self.v3io_access_key = os.environ.get("V3IO_ACCESS_KEY")
|
|
87
|
-
self.model_monitoring_access_key = (
|
|
88
|
-
os.environ.get("MODEL_MONITORING_ACCESS_KEY") or self.v3io_access_key
|
|
79
|
+
mm_constants.EventFieldType.MODEL_ENDPOINTS, None
|
|
89
80
|
)
|
|
81
|
+
self.model_monitoring_access_key = self._get_model_monitoring_access_key()
|
|
90
82
|
self.parquet_directory = get_monitoring_parquet_path(
|
|
91
83
|
project=project,
|
|
92
|
-
kind=
|
|
84
|
+
kind=mm_constants.FileTargetKind.BATCH_CONTROLLER_PARQUET,
|
|
93
85
|
)
|
|
94
86
|
self.storage_options = None
|
|
95
87
|
if not mlrun.mlconf.is_ce_mode():
|
|
96
|
-
self._initialize_v3io_configurations(
|
|
97
|
-
model_monitoring_access_key=self.model_monitoring_access_key
|
|
98
|
-
)
|
|
88
|
+
self._initialize_v3io_configurations()
|
|
99
89
|
elif self.parquet_directory.startswith("s3://"):
|
|
100
90
|
self.storage_options = mlrun.mlconf.get_s3_storage_options()
|
|
101
91
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
self.
|
|
112
|
-
|
|
113
|
-
self.v3io_access_key = v3io_access_key or os.environ.get("V3IO_ACCESS_KEY")
|
|
114
|
-
self.model_monitoring_access_key = model_monitoring_access_key
|
|
92
|
+
@staticmethod
|
|
93
|
+
def _get_model_monitoring_access_key() -> Optional[str]:
|
|
94
|
+
access_key = os.getenv(mm_constants.ProjectSecretKeys.ACCESS_KEY)
|
|
95
|
+
# allow access key to be empty and don't fetch v3io access key if not needed
|
|
96
|
+
if access_key is None:
|
|
97
|
+
access_key = mlrun.mlconf.get_v3io_access_key()
|
|
98
|
+
return access_key
|
|
99
|
+
|
|
100
|
+
def _initialize_v3io_configurations(self) -> None:
|
|
101
|
+
self.v3io_framesd = mlrun.mlconf.v3io_framesd
|
|
102
|
+
self.v3io_api = mlrun.mlconf.v3io_api
|
|
115
103
|
self.storage_options = dict(
|
|
116
104
|
v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
|
|
117
105
|
)
|
|
@@ -126,9 +114,7 @@ class BatchApplicationProcessor:
|
|
|
126
114
|
self.project
|
|
127
115
|
).list_model_monitoring_functions()
|
|
128
116
|
if application:
|
|
129
|
-
applications_names =
|
|
130
|
-
[app.metadata.name for app in application]
|
|
131
|
-
).tolist()
|
|
117
|
+
applications_names = list({app.metadata.name for app in application})
|
|
132
118
|
else:
|
|
133
119
|
logger.info("There are no monitoring application found in this project")
|
|
134
120
|
applications_names = []
|
|
@@ -144,26 +130,18 @@ class BatchApplicationProcessor:
|
|
|
144
130
|
futures = []
|
|
145
131
|
for endpoint in endpoints:
|
|
146
132
|
if (
|
|
147
|
-
endpoint[
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
and endpoint[
|
|
151
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.MONITORING_MODE
|
|
152
|
-
]
|
|
153
|
-
== mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled.value
|
|
133
|
+
endpoint[mm_constants.EventFieldType.ACTIVE]
|
|
134
|
+
and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
|
|
135
|
+
== mm_constants.ModelMonitoringMode.enabled.value
|
|
154
136
|
):
|
|
155
137
|
# Skip router endpoint:
|
|
156
138
|
if (
|
|
157
|
-
int(
|
|
158
|
-
|
|
159
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_TYPE
|
|
160
|
-
]
|
|
161
|
-
)
|
|
162
|
-
== mlrun.common.schemas.model_monitoring.EndpointType.ROUTER
|
|
139
|
+
int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
|
|
140
|
+
== mm_constants.EndpointType.ROUTER
|
|
163
141
|
):
|
|
164
142
|
# Router endpoint has no feature stats
|
|
165
143
|
logger.info(
|
|
166
|
-
f"{endpoint[
|
|
144
|
+
f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
|
|
167
145
|
)
|
|
168
146
|
continue
|
|
169
147
|
future = pool.submit(
|
|
@@ -184,8 +162,9 @@ class BatchApplicationProcessor:
|
|
|
184
162
|
|
|
185
163
|
self._delete_old_parquet()
|
|
186
164
|
|
|
187
|
-
@
|
|
165
|
+
@classmethod
|
|
188
166
|
def model_endpoint_process(
|
|
167
|
+
cls,
|
|
189
168
|
endpoint: dict,
|
|
190
169
|
applications_names: list[str],
|
|
191
170
|
bath_dict: dict,
|
|
@@ -207,20 +186,14 @@ class BatchApplicationProcessor:
|
|
|
207
186
|
:param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
|
|
208
187
|
|
|
209
188
|
"""
|
|
210
|
-
endpoint_id = endpoint[
|
|
189
|
+
endpoint_id = endpoint[mm_constants.EventFieldType.UID]
|
|
211
190
|
try:
|
|
212
191
|
# Getting batch interval start time and end time
|
|
213
|
-
start_time, end_time =
|
|
214
|
-
bath_dict
|
|
215
|
-
)
|
|
192
|
+
start_time, end_time = cls._get_interval_range(bath_dict)
|
|
216
193
|
m_fs = fstore.get_feature_set(
|
|
217
|
-
endpoint[
|
|
218
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
|
|
219
|
-
]
|
|
194
|
+
endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
|
|
220
195
|
)
|
|
221
|
-
labels = endpoint[
|
|
222
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.LABEL_NAMES
|
|
223
|
-
]
|
|
196
|
+
labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
|
|
224
197
|
if labels:
|
|
225
198
|
if isinstance(labels, str):
|
|
226
199
|
labels = json.loads(labels)
|
|
@@ -232,7 +205,7 @@ class BatchApplicationProcessor:
|
|
|
232
205
|
|
|
233
206
|
try:
|
|
234
207
|
# get sample data
|
|
235
|
-
df =
|
|
208
|
+
df = cls._get_sample_df(
|
|
236
209
|
m_fs,
|
|
237
210
|
endpoint_id,
|
|
238
211
|
end_time,
|
|
@@ -245,9 +218,7 @@ class BatchApplicationProcessor:
|
|
|
245
218
|
logger.warn(
|
|
246
219
|
"Not enough model events since the beginning of the batch interval",
|
|
247
220
|
featureset_name=m_fs.metadata.name,
|
|
248
|
-
endpoint=endpoint[
|
|
249
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.UID
|
|
250
|
-
],
|
|
221
|
+
endpoint=endpoint[mm_constants.EventFieldType.UID],
|
|
251
222
|
min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
|
|
252
223
|
start_time=start_time,
|
|
253
224
|
end_time=end_time,
|
|
@@ -262,9 +233,7 @@ class BatchApplicationProcessor:
|
|
|
262
233
|
logger.warn(
|
|
263
234
|
"Parquet not found, probably due to not enough model events",
|
|
264
235
|
# parquet_target=m_fs.status.targets[0].path, TODO:
|
|
265
|
-
endpoint=endpoint[
|
|
266
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.UID
|
|
267
|
-
],
|
|
236
|
+
endpoint=endpoint[mm_constants.EventFieldType.UID],
|
|
268
237
|
min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
|
|
269
238
|
)
|
|
270
239
|
return
|
|
@@ -280,15 +249,11 @@ class BatchApplicationProcessor:
|
|
|
280
249
|
m_fs.save()
|
|
281
250
|
|
|
282
251
|
# Get the timestamp of the latest request:
|
|
283
|
-
latest_request = df[
|
|
284
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.TIMESTAMP
|
|
285
|
-
].iloc[-1]
|
|
252
|
+
latest_request = df[mm_constants.EventFieldType.TIMESTAMP].iloc[-1]
|
|
286
253
|
|
|
287
254
|
# Get the feature stats from the model endpoint for reference data
|
|
288
255
|
feature_stats = json.loads(
|
|
289
|
-
endpoint[
|
|
290
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_STATS
|
|
291
|
-
]
|
|
256
|
+
endpoint[mm_constants.EventFieldType.FEATURE_STATS]
|
|
292
257
|
)
|
|
293
258
|
|
|
294
259
|
# Get the current stats:
|
|
@@ -298,7 +263,7 @@ class BatchApplicationProcessor:
|
|
|
298
263
|
)
|
|
299
264
|
|
|
300
265
|
# create and push data to all applications
|
|
301
|
-
|
|
266
|
+
cls._push_to_applications(
|
|
302
267
|
current_stats,
|
|
303
268
|
feature_stats,
|
|
304
269
|
parquet_directory,
|
|
@@ -312,7 +277,7 @@ class BatchApplicationProcessor:
|
|
|
312
277
|
|
|
313
278
|
except FileNotFoundError as e:
|
|
314
279
|
logger.error(
|
|
315
|
-
f"Exception for endpoint {endpoint[
|
|
280
|
+
f"Exception for endpoint {endpoint[mm_constants.EventFieldType.UID]}"
|
|
316
281
|
)
|
|
317
282
|
return endpoint_id, e
|
|
318
283
|
|
|
@@ -323,9 +288,9 @@ class BatchApplicationProcessor:
|
|
|
323
288
|
) -> Tuple[datetime.datetime, datetime.datetime]:
|
|
324
289
|
"""Getting batch interval time range"""
|
|
325
290
|
minutes, hours, days = (
|
|
326
|
-
batch_dict[
|
|
327
|
-
batch_dict[
|
|
328
|
-
batch_dict[
|
|
291
|
+
batch_dict[mm_constants.EventFieldType.MINUTES],
|
|
292
|
+
batch_dict[mm_constants.EventFieldType.HOURS],
|
|
293
|
+
batch_dict[mm_constants.EventFieldType.DAYS],
|
|
329
294
|
)
|
|
330
295
|
end_time = now_func() - datetime.timedelta(
|
|
331
296
|
seconds=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs
|
|
@@ -360,7 +325,7 @@ class BatchApplicationProcessor:
|
|
|
360
325
|
("minute", "%M"),
|
|
361
326
|
]:
|
|
362
327
|
schedule_time_str += f"{unit}={schedule_time.strftime(fmt)}/"
|
|
363
|
-
endpoint_str = f"{
|
|
328
|
+
endpoint_str = f"{mm_constants.EventFieldType.ENDPOINT_ID}={endpoint_id}"
|
|
364
329
|
|
|
365
330
|
return f"{parquet_directory}/{schedule_time_str}/{endpoint_str}"
|
|
366
331
|
|
|
@@ -377,7 +342,7 @@ class BatchApplicationProcessor:
|
|
|
377
342
|
|
|
378
343
|
base_directory = get_monitoring_parquet_path(
|
|
379
344
|
project=self.project,
|
|
380
|
-
kind=
|
|
345
|
+
kind=mm_constants.FileTargetKind.BATCH_CONTROLLER_PARQUET,
|
|
381
346
|
)
|
|
382
347
|
target = ParquetTarget(path=base_directory)
|
|
383
348
|
store, _ = target._get_store_and_path()
|
|
@@ -456,7 +421,7 @@ class BatchApplicationProcessor:
|
|
|
456
421
|
mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
|
|
457
422
|
mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
|
|
458
423
|
project=project,
|
|
459
|
-
application_name=
|
|
424
|
+
application_name=mm_constants.MonitoringFunctionNames.WRITER,
|
|
460
425
|
),
|
|
461
426
|
}
|
|
462
427
|
for app_name in applications_names:
|
|
@@ -504,9 +469,7 @@ class BatchApplicationProcessor:
|
|
|
504
469
|
} # to avoid exception when the taf is not latest
|
|
505
470
|
entity_rows = pd.DataFrame(
|
|
506
471
|
{
|
|
507
|
-
|
|
508
|
-
endpoint_id
|
|
509
|
-
],
|
|
472
|
+
mm_constants.EventFieldType.ENDPOINT_ID: [endpoint_id],
|
|
510
473
|
"scheduled_time": [end_time],
|
|
511
474
|
}
|
|
512
475
|
)
|
|
@@ -516,12 +479,12 @@ class BatchApplicationProcessor:
|
|
|
516
479
|
entity_timestamp_column="scheduled_time",
|
|
517
480
|
start_time=start_time,
|
|
518
481
|
end_time=end_time,
|
|
519
|
-
timestamp_for_filtering=
|
|
482
|
+
timestamp_for_filtering=mm_constants.EventFieldType.TIMESTAMP,
|
|
520
483
|
target=ParquetTarget(
|
|
521
484
|
path=parquet_directory,
|
|
522
485
|
time_partitioning_granularity="minute",
|
|
523
486
|
partition_cols=[
|
|
524
|
-
|
|
487
|
+
mm_constants.EventFieldType.ENDPOINT_ID,
|
|
525
488
|
],
|
|
526
489
|
storage_options=storage_options,
|
|
527
490
|
),
|
mlrun/package/packager.py
CHANGED
|
@@ -12,9 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
-
import
|
|
16
|
-
import tempfile
|
|
17
|
-
from abc import ABC, ABCMeta, abstractmethod
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
18
16
|
from pathlib import Path
|
|
19
17
|
from typing import Any, List, Tuple, Type, Union
|
|
20
18
|
|
|
@@ -24,53 +22,9 @@ from mlrun.datastore import DataItem
|
|
|
24
22
|
from .utils import TypeHintUtils
|
|
25
23
|
|
|
26
24
|
|
|
27
|
-
|
|
28
|
-
class _PackagerMeta(ABCMeta):
|
|
25
|
+
class Packager(ABC):
|
|
29
26
|
"""
|
|
30
|
-
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
def __lt__(cls, other) -> bool:
|
|
34
|
-
"""
|
|
35
|
-
A less than implementation to compare by priority in order to be able to sort the packagers by it.
|
|
36
|
-
|
|
37
|
-
:param other: The compared packager.
|
|
38
|
-
|
|
39
|
-
:return: True if priority is lower (means better) and False otherwise.
|
|
40
|
-
"""
|
|
41
|
-
return cls.PRIORITY < other.PRIORITY
|
|
42
|
-
|
|
43
|
-
def __repr__(cls) -> str:
|
|
44
|
-
"""
|
|
45
|
-
Get the string representation of a packager in the following format:
|
|
46
|
-
<packager name>(type=<handled type>, artifact_types=[<all supported artifact types>], priority=<priority>)
|
|
47
|
-
|
|
48
|
-
:return: The string representation of e packager.
|
|
49
|
-
"""
|
|
50
|
-
# Get the packager info into variables:
|
|
51
|
-
packager_name = cls.__name__
|
|
52
|
-
handled_type = (
|
|
53
|
-
(
|
|
54
|
-
# Types have __name__ attribute but typing's types do not.
|
|
55
|
-
cls.PACKABLE_OBJECT_TYPE.__name__
|
|
56
|
-
if hasattr(cls.PACKABLE_OBJECT_TYPE, "__name__")
|
|
57
|
-
else str(cls.PACKABLE_OBJECT_TYPE)
|
|
58
|
-
)
|
|
59
|
-
if cls.PACKABLE_OBJECT_TYPE is not ...
|
|
60
|
-
else "Any"
|
|
61
|
-
)
|
|
62
|
-
supported_artifact_types = cls.get_supported_artifact_types()
|
|
63
|
-
|
|
64
|
-
# Return the string representation in the format noted above:
|
|
65
|
-
return (
|
|
66
|
-
f"{packager_name}(packable_type={handled_type}, artifact_types={supported_artifact_types}, "
|
|
67
|
-
f"priority={cls.PRIORITY})"
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
class Packager(ABC, metaclass=_PackagerMeta):
|
|
72
|
-
"""
|
|
73
|
-
The abstract base class for a packager. A packager is a static class that has two main duties:
|
|
27
|
+
The abstract base class for a packager. Packager has two main duties:
|
|
74
28
|
|
|
75
29
|
1. **Packing** - get an object that was returned from a function and log it to MLRun. The user can specify packing
|
|
76
30
|
configurations to the packager using log hints. The packed object can be an artifact or a result.
|
|
@@ -134,7 +88,7 @@ class Packager(ABC, metaclass=_PackagerMeta):
|
|
|
134
88
|
with open("./some_file.txt", "w") as file:
|
|
135
89
|
file.write("Pack me")
|
|
136
90
|
artifact = Artifact(key="my_artifact")
|
|
137
|
-
|
|
91
|
+
self.add_future_clearing_path(path="./some_file.txt")
|
|
138
92
|
return artifact, None
|
|
139
93
|
"""
|
|
140
94
|
|
|
@@ -144,12 +98,16 @@ class Packager(ABC, metaclass=_PackagerMeta):
|
|
|
144
98
|
#: The priority of this packager in the packagers collection of the manager (lower is better).
|
|
145
99
|
PRIORITY: int = ...
|
|
146
100
|
|
|
147
|
-
|
|
148
|
-
|
|
101
|
+
def __init__(self):
|
|
102
|
+
# Assign the packager's priority (notice that if it is equal to `...` then it will bbe overriden by the packager
|
|
103
|
+
# manager when collected):
|
|
104
|
+
self._priority = Packager.PRIORITY
|
|
105
|
+
|
|
106
|
+
# List of all paths to be deleted by the manager of this packager after logging the packages:
|
|
107
|
+
self._future_clearing_path_list: List[str] = []
|
|
149
108
|
|
|
150
|
-
@classmethod
|
|
151
109
|
@abstractmethod
|
|
152
|
-
def get_default_packing_artifact_type(
|
|
110
|
+
def get_default_packing_artifact_type(self, obj: Any) -> str:
|
|
153
111
|
"""
|
|
154
112
|
Get the default artifact type used for packing. The method is used when an object is sent for packing
|
|
155
113
|
without an artifact type noted by the user.
|
|
@@ -160,9 +118,8 @@ class Packager(ABC, metaclass=_PackagerMeta):
|
|
|
160
118
|
"""
|
|
161
119
|
pass
|
|
162
120
|
|
|
163
|
-
@classmethod
|
|
164
121
|
@abstractmethod
|
|
165
|
-
def get_default_unpacking_artifact_type(
|
|
122
|
+
def get_default_unpacking_artifact_type(self, data_item: DataItem) -> str:
|
|
166
123
|
"""
|
|
167
124
|
Get the default artifact type used for unpacking a data item holding an object of this packager. The method
|
|
168
125
|
is used when a data item is sent for unpacking without it being a package, but is a simple url or an old
|
|
@@ -174,9 +131,8 @@ class Packager(ABC, metaclass=_PackagerMeta):
|
|
|
174
131
|
"""
|
|
175
132
|
pass
|
|
176
133
|
|
|
177
|
-
@classmethod
|
|
178
134
|
@abstractmethod
|
|
179
|
-
def get_supported_artifact_types(
|
|
135
|
+
def get_supported_artifact_types(self) -> List[str]:
|
|
180
136
|
"""
|
|
181
137
|
Get all the supported artifact types on this packager.
|
|
182
138
|
|
|
@@ -184,10 +140,9 @@ class Packager(ABC, metaclass=_PackagerMeta):
|
|
|
184
140
|
"""
|
|
185
141
|
pass
|
|
186
142
|
|
|
187
|
-
@classmethod
|
|
188
143
|
@abstractmethod
|
|
189
144
|
def pack(
|
|
190
|
-
|
|
145
|
+
self,
|
|
191
146
|
obj: Any,
|
|
192
147
|
key: str = None,
|
|
193
148
|
artifact_type: str = None,
|
|
@@ -206,10 +161,9 @@ class Packager(ABC, metaclass=_PackagerMeta):
|
|
|
206
161
|
"""
|
|
207
162
|
pass
|
|
208
163
|
|
|
209
|
-
@classmethod
|
|
210
164
|
@abstractmethod
|
|
211
165
|
def unpack(
|
|
212
|
-
|
|
166
|
+
self,
|
|
213
167
|
data_item: DataItem,
|
|
214
168
|
artifact_type: str = None,
|
|
215
169
|
instructions: dict = None,
|
|
@@ -225,9 +179,8 @@ class Packager(ABC, metaclass=_PackagerMeta):
|
|
|
225
179
|
"""
|
|
226
180
|
pass
|
|
227
181
|
|
|
228
|
-
@classmethod
|
|
229
182
|
def is_packable(
|
|
230
|
-
|
|
183
|
+
self, obj: Any, artifact_type: str = None, configurations: dict = None
|
|
231
184
|
) -> bool:
|
|
232
185
|
"""
|
|
233
186
|
Check if this packager can pack an object of the provided type as the provided artifact type.
|
|
@@ -247,20 +200,19 @@ class Packager(ABC, metaclass=_PackagerMeta):
|
|
|
247
200
|
|
|
248
201
|
# Validate the object type (ellipses means any type):
|
|
249
202
|
if (
|
|
250
|
-
|
|
251
|
-
and object_type !=
|
|
203
|
+
self.PACKABLE_OBJECT_TYPE is not ...
|
|
204
|
+
and object_type != self.PACKABLE_OBJECT_TYPE
|
|
252
205
|
):
|
|
253
206
|
return False
|
|
254
207
|
|
|
255
208
|
# Validate the artifact type (if given):
|
|
256
|
-
if artifact_type and artifact_type not in
|
|
209
|
+
if artifact_type and artifact_type not in self.get_supported_artifact_types():
|
|
257
210
|
return False
|
|
258
211
|
|
|
259
212
|
return True
|
|
260
213
|
|
|
261
|
-
@classmethod
|
|
262
214
|
def is_unpackable(
|
|
263
|
-
|
|
215
|
+
self, data_item: DataItem, type_hint: Type, artifact_type: str = None
|
|
264
216
|
) -> bool:
|
|
265
217
|
"""
|
|
266
218
|
Check if this packager can unpack an input according to the user-given type hint and the provided artifact type.
|
|
@@ -275,44 +227,118 @@ class Packager(ABC, metaclass=_PackagerMeta):
|
|
|
275
227
|
:return: True if unpackable and False otherwise.
|
|
276
228
|
"""
|
|
277
229
|
# Check type (ellipses means any type):
|
|
278
|
-
if
|
|
230
|
+
if self.PACKABLE_OBJECT_TYPE is not ...:
|
|
279
231
|
if not TypeHintUtils.is_matching(
|
|
280
232
|
object_type=type_hint, # The type hint is the expected object type the MLRun function wants.
|
|
281
|
-
type_hint=
|
|
233
|
+
type_hint=self.PACKABLE_OBJECT_TYPE,
|
|
282
234
|
reduce_type_hint=False,
|
|
283
235
|
):
|
|
284
236
|
return False
|
|
285
237
|
|
|
286
238
|
# Check the artifact type:
|
|
287
|
-
if artifact_type and artifact_type not in
|
|
239
|
+
if artifact_type and artifact_type not in self.get_supported_artifact_types():
|
|
288
240
|
return False
|
|
289
241
|
|
|
290
242
|
# Unpackable:
|
|
291
243
|
return True
|
|
292
244
|
|
|
293
|
-
|
|
294
|
-
def add_future_clearing_path(
|
|
295
|
-
cls, path: Union[str, Path], add_temp_paths_only: bool = True
|
|
296
|
-
):
|
|
245
|
+
def add_future_clearing_path(self, path: Union[str, Path]):
|
|
297
246
|
"""
|
|
298
247
|
Mark a path to be cleared by this packager's manager after logging the packaged artifacts.
|
|
299
248
|
|
|
300
|
-
:param path:
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
249
|
+
:param path: The path to clear post logging the artifacts.
|
|
250
|
+
"""
|
|
251
|
+
self._future_clearing_path_list.append(str(path))
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
def priority(self) -> int:
|
|
255
|
+
"""
|
|
256
|
+
Get the packager's priority.
|
|
257
|
+
|
|
258
|
+
:return: The packager's priority.
|
|
259
|
+
"""
|
|
260
|
+
return self._priority
|
|
261
|
+
|
|
262
|
+
@priority.setter
|
|
263
|
+
def priority(self, priority: int):
|
|
304
264
|
"""
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
@
|
|
312
|
-
def
|
|
265
|
+
Set the packager's priority.
|
|
266
|
+
|
|
267
|
+
:param priority: The priority to set.
|
|
268
|
+
"""
|
|
269
|
+
self._priority = priority
|
|
270
|
+
|
|
271
|
+
@property
|
|
272
|
+
def future_clearing_path_list(self) -> List[str]:
|
|
313
273
|
"""
|
|
314
274
|
Get the packager's future clearing path list.
|
|
315
275
|
|
|
316
276
|
:return: The clearing path list.
|
|
317
277
|
"""
|
|
318
|
-
return
|
|
278
|
+
return self._future_clearing_path_list
|
|
279
|
+
|
|
280
|
+
def __lt__(self, other: "Packager") -> bool:
|
|
281
|
+
"""
|
|
282
|
+
A less than implementation to compare by priority in order to be able to sort the packagers by it.
|
|
283
|
+
|
|
284
|
+
:param other: The compared packager.
|
|
285
|
+
|
|
286
|
+
:return: True if priority is lower (means better) and False otherwise.
|
|
287
|
+
"""
|
|
288
|
+
return self.priority < other.priority
|
|
289
|
+
|
|
290
|
+
def __repr__(self) -> str:
|
|
291
|
+
"""
|
|
292
|
+
Get the string representation of a packager in the following format:
|
|
293
|
+
<packager name>(type=<handled type>, artifact_types=[<all supported artifact types>], priority=<priority>)
|
|
294
|
+
|
|
295
|
+
:return: The string representation of e packager.
|
|
296
|
+
"""
|
|
297
|
+
# Get the packager info into variables:
|
|
298
|
+
packager_name = self.__class__.__name__
|
|
299
|
+
handled_type = (
|
|
300
|
+
(
|
|
301
|
+
# Types have __name__ attribute but typing's types do not.
|
|
302
|
+
self.PACKABLE_OBJECT_TYPE.__name__
|
|
303
|
+
if hasattr(self.PACKABLE_OBJECT_TYPE, "__name__")
|
|
304
|
+
else str(self.PACKABLE_OBJECT_TYPE)
|
|
305
|
+
)
|
|
306
|
+
if self.PACKABLE_OBJECT_TYPE is not ...
|
|
307
|
+
else "Any"
|
|
308
|
+
)
|
|
309
|
+
supported_artifact_types = self.get_supported_artifact_types()
|
|
310
|
+
|
|
311
|
+
# Return the string representation in the format noted above:
|
|
312
|
+
return (
|
|
313
|
+
f"{packager_name}(packable_type={handled_type}, artifact_types={supported_artifact_types}, "
|
|
314
|
+
f"priority={self.priority})"
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
def get_data_item_local_path(
|
|
318
|
+
self, data_item: DataItem, add_to_future_clearing_path: bool = None
|
|
319
|
+
) -> str:
|
|
320
|
+
"""
|
|
321
|
+
Get the local path to the item handled by the data item provided. The local path can be the same as the data
|
|
322
|
+
item in case the data item points to a local path, or will be downloaded to a temporary directory and return
|
|
323
|
+
this newly created temporary local path.
|
|
324
|
+
|
|
325
|
+
:param data_item: The data item to get its item local path.
|
|
326
|
+
:param add_to_future_clearing_path: Whether to add the local path to the future clearing paths list. If None, it
|
|
327
|
+
will add the path to the list only if the data item is not of kind 'file',
|
|
328
|
+
meaning it represents a local file and hence we don't want to delete it post
|
|
329
|
+
running automatically. We wish to delete it only if the local path is
|
|
330
|
+
temporary (and that will be in case kind is not 'file', so it is being
|
|
331
|
+
downloaded to a temporary directory).
|
|
332
|
+
|
|
333
|
+
:return: The data item local path.
|
|
334
|
+
"""
|
|
335
|
+
# Get the local path to the item handled by the data item (download it to temporary if not local already):
|
|
336
|
+
local_path = data_item.local()
|
|
337
|
+
|
|
338
|
+
# Check if needed to add to the future clear list:
|
|
339
|
+
if add_to_future_clearing_path or (
|
|
340
|
+
add_to_future_clearing_path is None and data_item.kind != "file"
|
|
341
|
+
):
|
|
342
|
+
self.add_future_clearing_path(path=local_path)
|
|
343
|
+
|
|
344
|
+
return local_path
|