mlrun 1.6.0rc7__py3-none-any.whl → 1.6.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (38) hide show
  1. mlrun/__main__.py +27 -27
  2. mlrun/common/schemas/auth.py +2 -0
  3. mlrun/config.py +2 -2
  4. mlrun/datastore/dbfs_store.py +0 -3
  5. mlrun/datastore/sources.py +12 -2
  6. mlrun/datastore/targets.py +3 -0
  7. mlrun/db/httpdb.py +15 -0
  8. mlrun/feature_store/feature_set.py +5 -2
  9. mlrun/feature_store/retrieval/spark_merger.py +7 -1
  10. mlrun/kfpops.py +1 -1
  11. mlrun/launcher/client.py +1 -6
  12. mlrun/launcher/remote.py +5 -3
  13. mlrun/model.py +1 -1
  14. mlrun/model_monitoring/batch_application.py +48 -85
  15. mlrun/package/packager.py +115 -89
  16. mlrun/package/packagers/default_packager.py +66 -65
  17. mlrun/package/packagers/numpy_packagers.py +109 -62
  18. mlrun/package/packagers/pandas_packagers.py +12 -23
  19. mlrun/package/packagers/python_standard_library_packagers.py +35 -57
  20. mlrun/package/packagers_manager.py +16 -13
  21. mlrun/package/utils/_pickler.py +8 -18
  22. mlrun/package/utils/_supported_format.py +1 -1
  23. mlrun/projects/pipelines.py +11 -6
  24. mlrun/projects/project.py +11 -4
  25. mlrun/runtimes/__init__.py +6 -0
  26. mlrun/runtimes/base.py +8 -0
  27. mlrun/runtimes/daskjob.py +73 -5
  28. mlrun/runtimes/local.py +9 -9
  29. mlrun/runtimes/remotesparkjob.py +1 -0
  30. mlrun/runtimes/utils.py +1 -1
  31. mlrun/utils/notifications/notification_pusher.py +1 -1
  32. mlrun/utils/version/version.json +2 -2
  33. {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/METADATA +2 -2
  34. {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/RECORD +38 -38
  35. {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/WHEEL +1 -1
  36. {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/LICENSE +0 -0
  37. {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/entry_points.txt +0 -0
  38. {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/top_level.txt +0 -0
@@ -11,25 +11,20 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
14
+
15
15
  import concurrent.futures
16
16
  import datetime
17
17
  import json
18
18
  import os
19
19
  import re
20
- from typing import Callable, Tuple
20
+ from typing import Callable, Optional, Tuple
21
21
 
22
- import numpy as np
23
22
  import pandas as pd
24
23
 
25
24
  import mlrun
26
- import mlrun.common.helpers
27
- import mlrun.common.model_monitoring.helpers
28
- import mlrun.common.schemas.model_monitoring
29
25
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
30
26
  import mlrun.data_types.infer
31
27
  import mlrun.feature_store as fstore
32
- import mlrun.utils.v3io_clients
33
28
  from mlrun.datastore import get_stream_pusher
34
29
  from mlrun.datastore.targets import ParquetTarget
35
30
  from mlrun.model_monitoring.batch import calculate_inputs_statistics
@@ -72,46 +67,39 @@ class BatchApplicationProcessor:
72
67
 
73
68
  # Get the batch interval range
74
69
  self.batch_dict = context.parameters[
75
- mlrun.common.schemas.model_monitoring.EventFieldType.BATCH_INTERVALS_DICT
70
+ mm_constants.EventFieldType.BATCH_INTERVALS_DICT
76
71
  ]
77
72
 
78
- # TODO: This will be removed in 1.5.0 once the job params can be parsed with different types
73
+ # TODO: This will be removed once the job params can be parsed with different types
79
74
  # Convert batch dict string into a dictionary
80
75
  if isinstance(self.batch_dict, str):
81
76
  self._parse_batch_dict_str()
82
77
  # If provided, only model endpoints in that that list will be analyzed
83
78
  self.model_endpoints = context.parameters.get(
84
- mlrun.common.schemas.model_monitoring.EventFieldType.MODEL_ENDPOINTS, None
85
- )
86
- self.v3io_access_key = os.environ.get("V3IO_ACCESS_KEY")
87
- self.model_monitoring_access_key = (
88
- os.environ.get("MODEL_MONITORING_ACCESS_KEY") or self.v3io_access_key
79
+ mm_constants.EventFieldType.MODEL_ENDPOINTS, None
89
80
  )
81
+ self.model_monitoring_access_key = self._get_model_monitoring_access_key()
90
82
  self.parquet_directory = get_monitoring_parquet_path(
91
83
  project=project,
92
- kind=mlrun.common.schemas.model_monitoring.FileTargetKind.BATCH_CONTROLLER_PARQUET,
84
+ kind=mm_constants.FileTargetKind.BATCH_CONTROLLER_PARQUET,
93
85
  )
94
86
  self.storage_options = None
95
87
  if not mlrun.mlconf.is_ce_mode():
96
- self._initialize_v3io_configurations(
97
- model_monitoring_access_key=self.model_monitoring_access_key
98
- )
88
+ self._initialize_v3io_configurations()
99
89
  elif self.parquet_directory.startswith("s3://"):
100
90
  self.storage_options = mlrun.mlconf.get_s3_storage_options()
101
91
 
102
- def _initialize_v3io_configurations(
103
- self,
104
- v3io_access_key: str = None,
105
- v3io_framesd: str = None,
106
- v3io_api: str = None,
107
- model_monitoring_access_key: str = None,
108
- ):
109
- # Get the V3IO configurations
110
- self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
111
- self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
112
-
113
- self.v3io_access_key = v3io_access_key or os.environ.get("V3IO_ACCESS_KEY")
114
- self.model_monitoring_access_key = model_monitoring_access_key
92
+ @staticmethod
93
+ def _get_model_monitoring_access_key() -> Optional[str]:
94
+ access_key = os.getenv(mm_constants.ProjectSecretKeys.ACCESS_KEY)
95
+ # allow access key to be empty and don't fetch v3io access key if not needed
96
+ if access_key is None:
97
+ access_key = mlrun.mlconf.get_v3io_access_key()
98
+ return access_key
99
+
100
+ def _initialize_v3io_configurations(self) -> None:
101
+ self.v3io_framesd = mlrun.mlconf.v3io_framesd
102
+ self.v3io_api = mlrun.mlconf.v3io_api
115
103
  self.storage_options = dict(
116
104
  v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
117
105
  )
@@ -126,9 +114,7 @@ class BatchApplicationProcessor:
126
114
  self.project
127
115
  ).list_model_monitoring_functions()
128
116
  if application:
129
- applications_names = np.unique(
130
- [app.metadata.name for app in application]
131
- ).tolist()
117
+ applications_names = list({app.metadata.name for app in application})
132
118
  else:
133
119
  logger.info("There are no monitoring application found in this project")
134
120
  applications_names = []
@@ -144,26 +130,18 @@ class BatchApplicationProcessor:
144
130
  futures = []
145
131
  for endpoint in endpoints:
146
132
  if (
147
- endpoint[
148
- mlrun.common.schemas.model_monitoring.EventFieldType.ACTIVE
149
- ]
150
- and endpoint[
151
- mlrun.common.schemas.model_monitoring.EventFieldType.MONITORING_MODE
152
- ]
153
- == mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled.value
133
+ endpoint[mm_constants.EventFieldType.ACTIVE]
134
+ and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
135
+ == mm_constants.ModelMonitoringMode.enabled.value
154
136
  ):
155
137
  # Skip router endpoint:
156
138
  if (
157
- int(
158
- endpoint[
159
- mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_TYPE
160
- ]
161
- )
162
- == mlrun.common.schemas.model_monitoring.EndpointType.ROUTER
139
+ int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
140
+ == mm_constants.EndpointType.ROUTER
163
141
  ):
164
142
  # Router endpoint has no feature stats
165
143
  logger.info(
166
- f"{endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]} is router skipping"
144
+ f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
167
145
  )
168
146
  continue
169
147
  future = pool.submit(
@@ -184,8 +162,9 @@ class BatchApplicationProcessor:
184
162
 
185
163
  self._delete_old_parquet()
186
164
 
187
- @staticmethod
165
+ @classmethod
188
166
  def model_endpoint_process(
167
+ cls,
189
168
  endpoint: dict,
190
169
  applications_names: list[str],
191
170
  bath_dict: dict,
@@ -207,20 +186,14 @@ class BatchApplicationProcessor:
207
186
  :param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
208
187
 
209
188
  """
210
- endpoint_id = endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]
189
+ endpoint_id = endpoint[mm_constants.EventFieldType.UID]
211
190
  try:
212
191
  # Getting batch interval start time and end time
213
- start_time, end_time = BatchApplicationProcessor._get_interval_range(
214
- bath_dict
215
- )
192
+ start_time, end_time = cls._get_interval_range(bath_dict)
216
193
  m_fs = fstore.get_feature_set(
217
- endpoint[
218
- mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
219
- ]
194
+ endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
220
195
  )
221
- labels = endpoint[
222
- mlrun.common.schemas.model_monitoring.EventFieldType.LABEL_NAMES
223
- ]
196
+ labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
224
197
  if labels:
225
198
  if isinstance(labels, str):
226
199
  labels = json.loads(labels)
@@ -232,7 +205,7 @@ class BatchApplicationProcessor:
232
205
 
233
206
  try:
234
207
  # get sample data
235
- df = BatchApplicationProcessor._get_sample_df(
208
+ df = cls._get_sample_df(
236
209
  m_fs,
237
210
  endpoint_id,
238
211
  end_time,
@@ -245,9 +218,7 @@ class BatchApplicationProcessor:
245
218
  logger.warn(
246
219
  "Not enough model events since the beginning of the batch interval",
247
220
  featureset_name=m_fs.metadata.name,
248
- endpoint=endpoint[
249
- mlrun.common.schemas.model_monitoring.EventFieldType.UID
250
- ],
221
+ endpoint=endpoint[mm_constants.EventFieldType.UID],
251
222
  min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
252
223
  start_time=start_time,
253
224
  end_time=end_time,
@@ -262,9 +233,7 @@ class BatchApplicationProcessor:
262
233
  logger.warn(
263
234
  "Parquet not found, probably due to not enough model events",
264
235
  # parquet_target=m_fs.status.targets[0].path, TODO:
265
- endpoint=endpoint[
266
- mlrun.common.schemas.model_monitoring.EventFieldType.UID
267
- ],
236
+ endpoint=endpoint[mm_constants.EventFieldType.UID],
268
237
  min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
269
238
  )
270
239
  return
@@ -280,15 +249,11 @@ class BatchApplicationProcessor:
280
249
  m_fs.save()
281
250
 
282
251
  # Get the timestamp of the latest request:
283
- latest_request = df[
284
- mlrun.common.schemas.model_monitoring.EventFieldType.TIMESTAMP
285
- ].iloc[-1]
252
+ latest_request = df[mm_constants.EventFieldType.TIMESTAMP].iloc[-1]
286
253
 
287
254
  # Get the feature stats from the model endpoint for reference data
288
255
  feature_stats = json.loads(
289
- endpoint[
290
- mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_STATS
291
- ]
256
+ endpoint[mm_constants.EventFieldType.FEATURE_STATS]
292
257
  )
293
258
 
294
259
  # Get the current stats:
@@ -298,7 +263,7 @@ class BatchApplicationProcessor:
298
263
  )
299
264
 
300
265
  # create and push data to all applications
301
- BatchApplicationProcessor._push_to_applications(
266
+ cls._push_to_applications(
302
267
  current_stats,
303
268
  feature_stats,
304
269
  parquet_directory,
@@ -312,7 +277,7 @@ class BatchApplicationProcessor:
312
277
 
313
278
  except FileNotFoundError as e:
314
279
  logger.error(
315
- f"Exception for endpoint {endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]}"
280
+ f"Exception for endpoint {endpoint[mm_constants.EventFieldType.UID]}"
316
281
  )
317
282
  return endpoint_id, e
318
283
 
@@ -323,9 +288,9 @@ class BatchApplicationProcessor:
323
288
  ) -> Tuple[datetime.datetime, datetime.datetime]:
324
289
  """Getting batch interval time range"""
325
290
  minutes, hours, days = (
326
- batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.MINUTES],
327
- batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.HOURS],
328
- batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.DAYS],
291
+ batch_dict[mm_constants.EventFieldType.MINUTES],
292
+ batch_dict[mm_constants.EventFieldType.HOURS],
293
+ batch_dict[mm_constants.EventFieldType.DAYS],
329
294
  )
330
295
  end_time = now_func() - datetime.timedelta(
331
296
  seconds=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs
@@ -360,7 +325,7 @@ class BatchApplicationProcessor:
360
325
  ("minute", "%M"),
361
326
  ]:
362
327
  schedule_time_str += f"{unit}={schedule_time.strftime(fmt)}/"
363
- endpoint_str = f"{mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID}={endpoint_id}"
328
+ endpoint_str = f"{mm_constants.EventFieldType.ENDPOINT_ID}={endpoint_id}"
364
329
 
365
330
  return f"{parquet_directory}/{schedule_time_str}/{endpoint_str}"
366
331
 
@@ -377,7 +342,7 @@ class BatchApplicationProcessor:
377
342
 
378
343
  base_directory = get_monitoring_parquet_path(
379
344
  project=self.project,
380
- kind=mlrun.common.schemas.model_monitoring.FileTargetKind.BATCH_CONTROLLER_PARQUET,
345
+ kind=mm_constants.FileTargetKind.BATCH_CONTROLLER_PARQUET,
381
346
  )
382
347
  target = ParquetTarget(path=base_directory)
383
348
  store, _ = target._get_store_and_path()
@@ -456,7 +421,7 @@ class BatchApplicationProcessor:
456
421
  mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
457
422
  mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
458
423
  project=project,
459
- application_name=mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.WRITER,
424
+ application_name=mm_constants.MonitoringFunctionNames.WRITER,
460
425
  ),
461
426
  }
462
427
  for app_name in applications_names:
@@ -504,9 +469,7 @@ class BatchApplicationProcessor:
504
469
  } # to avoid exception when the taf is not latest
505
470
  entity_rows = pd.DataFrame(
506
471
  {
507
- mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID: [
508
- endpoint_id
509
- ],
472
+ mm_constants.EventFieldType.ENDPOINT_ID: [endpoint_id],
510
473
  "scheduled_time": [end_time],
511
474
  }
512
475
  )
@@ -516,12 +479,12 @@ class BatchApplicationProcessor:
516
479
  entity_timestamp_column="scheduled_time",
517
480
  start_time=start_time,
518
481
  end_time=end_time,
519
- timestamp_for_filtering=mlrun.common.schemas.model_monitoring.EventFieldType.TIMESTAMP,
482
+ timestamp_for_filtering=mm_constants.EventFieldType.TIMESTAMP,
520
483
  target=ParquetTarget(
521
484
  path=parquet_directory,
522
485
  time_partitioning_granularity="minute",
523
486
  partition_cols=[
524
- mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID,
487
+ mm_constants.EventFieldType.ENDPOINT_ID,
525
488
  ],
526
489
  storage_options=storage_options,
527
490
  ),
mlrun/package/packager.py CHANGED
@@ -12,9 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
- import pathlib
16
- import tempfile
17
- from abc import ABC, ABCMeta, abstractmethod
15
+ from abc import ABC, abstractmethod
18
16
  from pathlib import Path
19
17
  from typing import Any, List, Tuple, Type, Union
20
18
 
@@ -24,53 +22,9 @@ from mlrun.datastore import DataItem
24
22
  from .utils import TypeHintUtils
25
23
 
26
24
 
27
- # TODO: When 3.7 is no longer supported, add "Packager" as reference type hint to cls (cls: Type["Packager"]) and other.
28
- class _PackagerMeta(ABCMeta):
25
+ class Packager(ABC):
29
26
  """
30
- Metaclass for `Packager` to override type class methods.
31
- """
32
-
33
- def __lt__(cls, other) -> bool:
34
- """
35
- A less than implementation to compare by priority in order to be able to sort the packagers by it.
36
-
37
- :param other: The compared packager.
38
-
39
- :return: True if priority is lower (means better) and False otherwise.
40
- """
41
- return cls.PRIORITY < other.PRIORITY
42
-
43
- def __repr__(cls) -> str:
44
- """
45
- Get the string representation of a packager in the following format:
46
- <packager name>(type=<handled type>, artifact_types=[<all supported artifact types>], priority=<priority>)
47
-
48
- :return: The string representation of e packager.
49
- """
50
- # Get the packager info into variables:
51
- packager_name = cls.__name__
52
- handled_type = (
53
- (
54
- # Types have __name__ attribute but typing's types do not.
55
- cls.PACKABLE_OBJECT_TYPE.__name__
56
- if hasattr(cls.PACKABLE_OBJECT_TYPE, "__name__")
57
- else str(cls.PACKABLE_OBJECT_TYPE)
58
- )
59
- if cls.PACKABLE_OBJECT_TYPE is not ...
60
- else "Any"
61
- )
62
- supported_artifact_types = cls.get_supported_artifact_types()
63
-
64
- # Return the string representation in the format noted above:
65
- return (
66
- f"{packager_name}(packable_type={handled_type}, artifact_types={supported_artifact_types}, "
67
- f"priority={cls.PRIORITY})"
68
- )
69
-
70
-
71
- class Packager(ABC, metaclass=_PackagerMeta):
72
- """
73
- The abstract base class for a packager. A packager is a static class that has two main duties:
27
+ The abstract base class for a packager. Packager has two main duties:
74
28
 
75
29
  1. **Packing** - get an object that was returned from a function and log it to MLRun. The user can specify packing
76
30
  configurations to the packager using log hints. The packed object can be an artifact or a result.
@@ -134,7 +88,7 @@ class Packager(ABC, metaclass=_PackagerMeta):
134
88
  with open("./some_file.txt", "w") as file:
135
89
  file.write("Pack me")
136
90
  artifact = Artifact(key="my_artifact")
137
- cls.add_future_clearing_path(path="./some_file.txt")
91
+ self.add_future_clearing_path(path="./some_file.txt")
138
92
  return artifact, None
139
93
  """
140
94
 
@@ -144,12 +98,16 @@ class Packager(ABC, metaclass=_PackagerMeta):
144
98
  #: The priority of this packager in the packagers collection of the manager (lower is better).
145
99
  PRIORITY: int = ...
146
100
 
147
- # List of all paths to be deleted by the manager of this packager after logging the packages:
148
- _CLEARING_PATH_LIST: List[str] = []
101
+ def __init__(self):
102
+ # Assign the packager's priority (notice that if it is equal to `...` then it will bbe overriden by the packager
103
+ # manager when collected):
104
+ self._priority = Packager.PRIORITY
105
+
106
+ # List of all paths to be deleted by the manager of this packager after logging the packages:
107
+ self._future_clearing_path_list: List[str] = []
149
108
 
150
- @classmethod
151
109
  @abstractmethod
152
- def get_default_packing_artifact_type(cls, obj: Any) -> str:
110
+ def get_default_packing_artifact_type(self, obj: Any) -> str:
153
111
  """
154
112
  Get the default artifact type used for packing. The method is used when an object is sent for packing
155
113
  without an artifact type noted by the user.
@@ -160,9 +118,8 @@ class Packager(ABC, metaclass=_PackagerMeta):
160
118
  """
161
119
  pass
162
120
 
163
- @classmethod
164
121
  @abstractmethod
165
- def get_default_unpacking_artifact_type(cls, data_item: DataItem) -> str:
122
+ def get_default_unpacking_artifact_type(self, data_item: DataItem) -> str:
166
123
  """
167
124
  Get the default artifact type used for unpacking a data item holding an object of this packager. The method
168
125
  is used when a data item is sent for unpacking without it being a package, but is a simple url or an old
@@ -174,9 +131,8 @@ class Packager(ABC, metaclass=_PackagerMeta):
174
131
  """
175
132
  pass
176
133
 
177
- @classmethod
178
134
  @abstractmethod
179
- def get_supported_artifact_types(cls) -> List[str]:
135
+ def get_supported_artifact_types(self) -> List[str]:
180
136
  """
181
137
  Get all the supported artifact types on this packager.
182
138
 
@@ -184,10 +140,9 @@ class Packager(ABC, metaclass=_PackagerMeta):
184
140
  """
185
141
  pass
186
142
 
187
- @classmethod
188
143
  @abstractmethod
189
144
  def pack(
190
- cls,
145
+ self,
191
146
  obj: Any,
192
147
  key: str = None,
193
148
  artifact_type: str = None,
@@ -206,10 +161,9 @@ class Packager(ABC, metaclass=_PackagerMeta):
206
161
  """
207
162
  pass
208
163
 
209
- @classmethod
210
164
  @abstractmethod
211
165
  def unpack(
212
- cls,
166
+ self,
213
167
  data_item: DataItem,
214
168
  artifact_type: str = None,
215
169
  instructions: dict = None,
@@ -225,9 +179,8 @@ class Packager(ABC, metaclass=_PackagerMeta):
225
179
  """
226
180
  pass
227
181
 
228
- @classmethod
229
182
  def is_packable(
230
- cls, obj: Any, artifact_type: str = None, configurations: dict = None
183
+ self, obj: Any, artifact_type: str = None, configurations: dict = None
231
184
  ) -> bool:
232
185
  """
233
186
  Check if this packager can pack an object of the provided type as the provided artifact type.
@@ -247,20 +200,19 @@ class Packager(ABC, metaclass=_PackagerMeta):
247
200
 
248
201
  # Validate the object type (ellipses means any type):
249
202
  if (
250
- cls.PACKABLE_OBJECT_TYPE is not ...
251
- and object_type != cls.PACKABLE_OBJECT_TYPE
203
+ self.PACKABLE_OBJECT_TYPE is not ...
204
+ and object_type != self.PACKABLE_OBJECT_TYPE
252
205
  ):
253
206
  return False
254
207
 
255
208
  # Validate the artifact type (if given):
256
- if artifact_type and artifact_type not in cls.get_supported_artifact_types():
209
+ if artifact_type and artifact_type not in self.get_supported_artifact_types():
257
210
  return False
258
211
 
259
212
  return True
260
213
 
261
- @classmethod
262
214
  def is_unpackable(
263
- cls, data_item: DataItem, type_hint: Type, artifact_type: str = None
215
+ self, data_item: DataItem, type_hint: Type, artifact_type: str = None
264
216
  ) -> bool:
265
217
  """
266
218
  Check if this packager can unpack an input according to the user-given type hint and the provided artifact type.
@@ -275,44 +227,118 @@ class Packager(ABC, metaclass=_PackagerMeta):
275
227
  :return: True if unpackable and False otherwise.
276
228
  """
277
229
  # Check type (ellipses means any type):
278
- if cls.PACKABLE_OBJECT_TYPE is not ...:
230
+ if self.PACKABLE_OBJECT_TYPE is not ...:
279
231
  if not TypeHintUtils.is_matching(
280
232
  object_type=type_hint, # The type hint is the expected object type the MLRun function wants.
281
- type_hint=cls.PACKABLE_OBJECT_TYPE,
233
+ type_hint=self.PACKABLE_OBJECT_TYPE,
282
234
  reduce_type_hint=False,
283
235
  ):
284
236
  return False
285
237
 
286
238
  # Check the artifact type:
287
- if artifact_type and artifact_type not in cls.get_supported_artifact_types():
239
+ if artifact_type and artifact_type not in self.get_supported_artifact_types():
288
240
  return False
289
241
 
290
242
  # Unpackable:
291
243
  return True
292
244
 
293
- @classmethod
294
- def add_future_clearing_path(
295
- cls, path: Union[str, Path], add_temp_paths_only: bool = True
296
- ):
245
+ def add_future_clearing_path(self, path: Union[str, Path]):
297
246
  """
298
247
  Mark a path to be cleared by this packager's manager after logging the packaged artifacts.
299
248
 
300
- :param path: The path to clear.
301
- :param add_temp_paths_only: Whether to add only temporary files. When running locally on local files
302
- ``DataItem.local()`` returns the local given path, which should not be deleted.
303
- This flag helps to avoid deleting files in that scenario.
249
+ :param path: The path to clear post logging the artifacts.
250
+ """
251
+ self._future_clearing_path_list.append(str(path))
252
+
253
+ @property
254
+ def priority(self) -> int:
255
+ """
256
+ Get the packager's priority.
257
+
258
+ :return: The packager's priority.
259
+ """
260
+ return self._priority
261
+
262
+ @priority.setter
263
+ def priority(self, priority: int):
304
264
  """
305
- if add_temp_paths_only:
306
- if pathlib.Path(path).is_relative_to(tempfile.gettempdir()):
307
- cls._CLEARING_PATH_LIST.append(str(path))
308
- return
309
- cls._CLEARING_PATH_LIST.append(str(path))
310
-
311
- @classmethod
312
- def get_future_clearing_path_list(cls) -> List[str]:
265
+ Set the packager's priority.
266
+
267
+ :param priority: The priority to set.
268
+ """
269
+ self._priority = priority
270
+
271
+ @property
272
+ def future_clearing_path_list(self) -> List[str]:
313
273
  """
314
274
  Get the packager's future clearing path list.
315
275
 
316
276
  :return: The clearing path list.
317
277
  """
318
- return cls._CLEARING_PATH_LIST
278
+ return self._future_clearing_path_list
279
+
280
+ def __lt__(self, other: "Packager") -> bool:
281
+ """
282
+ A less than implementation to compare by priority in order to be able to sort the packagers by it.
283
+
284
+ :param other: The compared packager.
285
+
286
+ :return: True if priority is lower (means better) and False otherwise.
287
+ """
288
+ return self.priority < other.priority
289
+
290
+ def __repr__(self) -> str:
291
+ """
292
+ Get the string representation of a packager in the following format:
293
+ <packager name>(type=<handled type>, artifact_types=[<all supported artifact types>], priority=<priority>)
294
+
295
+ :return: The string representation of e packager.
296
+ """
297
+ # Get the packager info into variables:
298
+ packager_name = self.__class__.__name__
299
+ handled_type = (
300
+ (
301
+ # Types have __name__ attribute but typing's types do not.
302
+ self.PACKABLE_OBJECT_TYPE.__name__
303
+ if hasattr(self.PACKABLE_OBJECT_TYPE, "__name__")
304
+ else str(self.PACKABLE_OBJECT_TYPE)
305
+ )
306
+ if self.PACKABLE_OBJECT_TYPE is not ...
307
+ else "Any"
308
+ )
309
+ supported_artifact_types = self.get_supported_artifact_types()
310
+
311
+ # Return the string representation in the format noted above:
312
+ return (
313
+ f"{packager_name}(packable_type={handled_type}, artifact_types={supported_artifact_types}, "
314
+ f"priority={self.priority})"
315
+ )
316
+
317
+ def get_data_item_local_path(
318
+ self, data_item: DataItem, add_to_future_clearing_path: bool = None
319
+ ) -> str:
320
+ """
321
+ Get the local path to the item handled by the data item provided. The local path can be the same as the data
322
+ item in case the data item points to a local path, or will be downloaded to a temporary directory and return
323
+ this newly created temporary local path.
324
+
325
+ :param data_item: The data item to get its item local path.
326
+ :param add_to_future_clearing_path: Whether to add the local path to the future clearing paths list. If None, it
327
+ will add the path to the list only if the data item is not of kind 'file',
328
+ meaning it represents a local file and hence we don't want to delete it post
329
+ running automatically. We wish to delete it only if the local path is
330
+ temporary (and that will be in case kind is not 'file', so it is being
331
+ downloaded to a temporary directory).
332
+
333
+ :return: The data item local path.
334
+ """
335
+ # Get the local path to the item handled by the data item (download it to temporary if not local already):
336
+ local_path = data_item.local()
337
+
338
+ # Check if needed to add to the future clear list:
339
+ if add_to_future_clearing_path or (
340
+ add_to_future_clearing_path is None and data_item.kind != "file"
341
+ ):
342
+ self.add_future_clearing_path(path=local_path)
343
+
344
+ return local_path