oodeel 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. oodeel/__init__.py +28 -0
  2. oodeel/aggregator/__init__.py +26 -0
  3. oodeel/aggregator/base.py +70 -0
  4. oodeel/aggregator/fisher.py +259 -0
  5. oodeel/aggregator/mean.py +72 -0
  6. oodeel/aggregator/std.py +86 -0
  7. oodeel/datasets/__init__.py +24 -0
  8. oodeel/datasets/data_handler.py +334 -0
  9. oodeel/datasets/deprecated/DEPRECATED_data_handler.py +236 -0
  10. oodeel/datasets/deprecated/DEPRECATED_ooddataset.py +330 -0
  11. oodeel/datasets/deprecated/DEPRECATED_tf_data_handler.py +671 -0
  12. oodeel/datasets/deprecated/DEPRECATED_torch_data_handler.py +769 -0
  13. oodeel/datasets/deprecated/__init__.py +31 -0
  14. oodeel/datasets/tf_data_handler.py +600 -0
  15. oodeel/datasets/torch_data_handler.py +672 -0
  16. oodeel/eval/__init__.py +22 -0
  17. oodeel/eval/metrics.py +218 -0
  18. oodeel/eval/plots/__init__.py +27 -0
  19. oodeel/eval/plots/features.py +345 -0
  20. oodeel/eval/plots/metrics.py +118 -0
  21. oodeel/eval/plots/plotly.py +162 -0
  22. oodeel/extractor/__init__.py +35 -0
  23. oodeel/extractor/feature_extractor.py +187 -0
  24. oodeel/extractor/hf_torch_feature_extractor.py +184 -0
  25. oodeel/extractor/keras_feature_extractor.py +409 -0
  26. oodeel/extractor/torch_feature_extractor.py +506 -0
  27. oodeel/methods/__init__.py +47 -0
  28. oodeel/methods/base.py +570 -0
  29. oodeel/methods/dknn.py +185 -0
  30. oodeel/methods/energy.py +119 -0
  31. oodeel/methods/entropy.py +113 -0
  32. oodeel/methods/gen.py +113 -0
  33. oodeel/methods/gram.py +274 -0
  34. oodeel/methods/mahalanobis.py +209 -0
  35. oodeel/methods/mls.py +113 -0
  36. oodeel/methods/odin.py +109 -0
  37. oodeel/methods/rmds.py +172 -0
  38. oodeel/methods/she.py +159 -0
  39. oodeel/methods/vim.py +273 -0
  40. oodeel/preprocess/__init__.py +31 -0
  41. oodeel/preprocess/tf_preprocess.py +95 -0
  42. oodeel/preprocess/torch_preprocess.py +97 -0
  43. oodeel/types/__init__.py +75 -0
  44. oodeel/utils/__init__.py +38 -0
  45. oodeel/utils/general_utils.py +97 -0
  46. oodeel/utils/operator.py +253 -0
  47. oodeel/utils/tf_operator.py +269 -0
  48. oodeel/utils/tf_training_tools.py +219 -0
  49. oodeel/utils/torch_operator.py +292 -0
  50. oodeel/utils/torch_training_tools.py +303 -0
  51. oodeel-0.4.0.dist-info/METADATA +409 -0
  52. oodeel-0.4.0.dist-info/RECORD +63 -0
  53. oodeel-0.4.0.dist-info/WHEEL +5 -0
  54. oodeel-0.4.0.dist-info/licenses/LICENSE +21 -0
  55. oodeel-0.4.0.dist-info/top_level.txt +2 -0
  56. tests/__init__.py +22 -0
  57. tests/tests_tensorflow/__init__.py +37 -0
  58. tests/tests_tensorflow/tf_methods_utils.py +140 -0
  59. tests/tests_tensorflow/tools_tf.py +86 -0
  60. tests/tests_torch/__init__.py +38 -0
  61. tests/tests_torch/tools_torch.py +151 -0
  62. tests/tests_torch/torch_methods_utils.py +148 -0
  63. tests/tools_operator.py +153 -0
@@ -0,0 +1,334 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
3
+ # rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
4
+ # CRIAQ and ANITI - https://www.deel.ai/
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in all
14
+ # copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ # SOFTWARE.
23
+ import importlib.util
24
+ from abc import ABC
25
+ from abc import abstractmethod
26
+
27
+ import numpy as np
28
+
29
+ from ..types import Callable
30
+ from ..types import DatasetType
31
+ from ..types import ItemType
32
+ from ..types import Optional
33
+ from ..types import TensorType
34
+ from ..types import Tuple
35
+ from ..types import Union
36
+
37
+
38
+ def get_backend():
39
+ """Detects whether TensorFlow or PyTorch is available and returns
40
+ the preferred backend."""
41
+ available_backends = []
42
+ if importlib.util.find_spec("tensorflow"):
43
+ available_backends.append("tensorflow")
44
+ if importlib.util.find_spec("torch"):
45
+ available_backends.append("torch")
46
+
47
+ if len(available_backends) == 1:
48
+ return available_backends[0]
49
+ elif len(available_backends) == 0:
50
+ raise ImportError("Neither TensorFlow nor PyTorch is installed.")
51
+ else:
52
+ raise ImportError(
53
+ "Both TensorFlow and PyTorch are installed. Please specify the backend."
54
+ )
55
+
56
+
57
+ def load_data_handler(backend: str = None):
58
+ if backend is None:
59
+ backend = get_backend()
60
+
61
+ if backend == "tensorflow":
62
+ from .tf_data_handler import TFDataHandler
63
+
64
+ return TFDataHandler()
65
+
66
+ elif backend == "torch":
67
+ from .torch_data_handler import TorchDataHandler
68
+
69
+ return TorchDataHandler()
70
+
71
+
72
+ class DataHandler(ABC):
73
+ """
74
+ Class to manage Datasets. The aim is to provide a simple interface
75
+ for working with datasets (torch, tensorflow or other...) and manage them without
76
+ having to use library-specific syntax.
77
+ """
78
+
79
+ def __init__(self):
80
+ self.backend = None
81
+ self.channel_order = None
82
+
83
+ def split_by_class(
84
+ self,
85
+ dataset: DatasetType,
86
+ in_labels: Optional[Union[np.ndarray, list]] = None,
87
+ out_labels: Optional[Union[np.ndarray, list]] = None,
88
+ ) -> Optional[Tuple[DatasetType]]:
89
+ """Filter the dataset by assigning ood labels depending on labels
90
+ value (typically, class id).
91
+
92
+ Args:
93
+ in_labels (Optional[Union[np.ndarray, list]], optional): set of labels
94
+ to be considered as in-distribution. Defaults to None.
95
+ out_labels (Optional[Union[np.ndarray, list]], optional): set of labels
96
+ to be considered as out-of-distribution. Defaults to None.
97
+
98
+ Returns:
99
+ Optional[Tuple[OODDataset]]: Tuple of in-distribution and
100
+ out-of-distribution OODDatasets
101
+ """
102
+ # Make sure the dataset has labels
103
+ assert (in_labels is not None) or (
104
+ out_labels is not None
105
+ ), "specify labels to filter with"
106
+ assert self.get_item_length(dataset) >= 2, "the dataset has no labels"
107
+
108
+ # Filter the dataset depending on in_labels and out_labels given
109
+ if (out_labels is not None) and (in_labels is not None):
110
+ in_data = self.filter_by_value(dataset, "label", in_labels)
111
+ out_data = self.filter_by_value(dataset, "label", out_labels)
112
+
113
+ if out_labels is None:
114
+ in_data = self.filter_by_value(dataset, "label", in_labels)
115
+ out_data = self.filter_by_value(dataset, "label", in_labels, excluded=True)
116
+
117
+ elif in_labels is None:
118
+ in_data = self.filter_by_value(dataset, "label", out_labels, excluded=True)
119
+ out_data = self.filter_by_value(dataset, "label", out_labels)
120
+
121
+ # Return the filtered OODDatasets
122
+ return in_data, out_data
123
+
124
+ @classmethod
125
+ @abstractmethod
126
+ def prepare(
127
+ cls,
128
+ dataset: DatasetType,
129
+ batch_size: int,
130
+ preprocess_fn: Optional[Callable] = None,
131
+ augment_fn: Optional[Callable] = None,
132
+ columns: Optional[list] = None,
133
+ shuffle: bool = False,
134
+ dict_based_fns: bool = True,
135
+ return_tuple: bool = True,
136
+ **kwargs_prepare,
137
+ ) -> DatasetType:
138
+ """Prepare dataset for scoring or training
139
+
140
+ Args:
141
+ batch_size (int): Batch size
142
+ preprocess_fn (Callable, optional): Preprocessing function to apply to
143
+ the dataset. Defaults to None.
144
+ augment_fn (Callable, optional): Augment function to be used (when the
145
+ returned dataset is to be used for training). Defaults to None.
146
+ columns (list, optional): List of columns
147
+ that will be returned. Keep all columns if None. Defaults to None.
148
+ shuffle (bool, optional): To shuffle the returned dataset or not.
149
+ Defaults to False.
150
+ dict_based_fns (bool): Whether to use preprocess and DA functions as dict
151
+ based (if True) or as tuple based (if False). Defaults to True.
152
+ return_tuple (bool, optional): Whether to return each dataset item
153
+ as a tuple. Defaults to True.
154
+ kwargs_prepare (dict): Additional parameters to be passed to the
155
+ data_handler for backend specific preparation.
156
+
157
+
158
+ Returns:
159
+ DatasetType: prepared dataset
160
+ """
161
+ raise NotImplementedError()
162
+
163
+ @staticmethod
164
+ @abstractmethod
165
+ def load_dataset_from_arrays(
166
+ dataset_id: ItemType, columns: Optional[list] = None
167
+ ) -> DatasetType:
168
+ """Load a DatasetType from a np.ndarray / Tensor
169
+
170
+ Args:
171
+ dataset_id (ItemType): numpy array(s) to load.
172
+ columns (list, optional): Column names to assign. If None,
173
+ assigned as "input_i" for i-th column. Defaults to None.
174
+
175
+ Returns:
176
+ DatasetType
177
+ """
178
+ raise NotImplementedError()
179
+
180
+ @staticmethod
181
+ @abstractmethod
182
+ def load_custom_dataset(
183
+ dataset_id: DatasetType, columns: Optional[list] = None
184
+ ) -> DatasetType:
185
+ """Load a custom dataset by ensuring it is properly formatted.
186
+
187
+ Args:
188
+ dataset_id (DatasetType): dataset
189
+ columns (list, optional): Column names to use for elements if dataset_id is
190
+ tuple based. If None, assigned as "input_i"
191
+ for i-th column. Defaults to None.
192
+
193
+ Returns:
194
+ A properly formatted dataset.
195
+ """
196
+ raise NotImplementedError()
197
+
198
+ @staticmethod
199
+ @abstractmethod
200
+ def load_from_huggingface(
201
+ dataset_id: str,
202
+ load_kwargs: dict = {},
203
+ ) -> DatasetType:
204
+ """Load a Dataset from the Hugging Face datasets catalog
205
+
206
+ Args:
207
+ dataset_id (str): Identifier of the dataset
208
+ load_kwargs (dict): Loading kwargs to add to the initialization
209
+ of the dataset.
210
+
211
+ Returns:
212
+ DatasetType: dataset
213
+ """
214
+ raise NotImplementedError()
215
+
216
+ @staticmethod
217
+ @abstractmethod
218
+ def map_ds(dataset: DatasetType, map_fn: Callable) -> DatasetType:
219
+ """Map a function to a Dataset
220
+
221
+ Args:
222
+ dataset (DatasetType): Dataset to map the function to
223
+ map_fn (Callable): Function to map
224
+
225
+ Returns:
226
+ DatasetType: Mapped dataset
227
+ """
228
+ raise NotImplementedError()
229
+
230
+ @staticmethod
231
+ @abstractmethod
232
+ def filter_by_value(
233
+ dataset: DatasetType,
234
+ column_name: str,
235
+ values: list,
236
+ excluded: bool = False,
237
+ ) -> DatasetType:
238
+ """Filter the dataset by checking the value of a column is in `values`
239
+
240
+ Args:
241
+ dataset (Dataset): Dataset to filter
242
+ column_name (str): Column to filter the dataset with
243
+ values (list): Column values to keep (if excluded is False)
244
+ or to exclude
245
+ excluded (bool, optional): To keep (False) or exclude (True) the samples
246
+ with column value included in Values. Defaults to False.
247
+
248
+ Returns:
249
+ DatasetType: Filtered dataset
250
+ """
251
+ raise NotImplementedError()
252
+
253
+ @staticmethod
254
+ @abstractmethod
255
+ def get_item_length(dataset: DatasetType) -> int:
256
+ """Number of elements in a dataset item
257
+
258
+ Args:
259
+ dataset (DatasetType): Dataset
260
+
261
+ Returns:
262
+ int: Item length
263
+ """
264
+ raise NotImplementedError()
265
+
266
+ @staticmethod
267
+ @abstractmethod
268
+ def get_dataset_length(dataset: DatasetType) -> int:
269
+ """Number of items in a dataset
270
+
271
+ Args:
272
+ dataset (DatasetType): Dataset
273
+
274
+ Returns:
275
+ int: Dataset length
276
+ """
277
+ raise NotImplementedError()
278
+
279
+ @staticmethod
280
+ @abstractmethod
281
+ def get_column_elements_shape(
282
+ dataset: DatasetType, column_name: Union[str, int]
283
+ ) -> tuple:
284
+ """Get the shape of the elements of a column of dataset identified by
285
+ column_name
286
+
287
+ Args:
288
+ dataset (Dataset): a Dataset
289
+ column_name (Union[str, int]): The column name to get
290
+ the element shape from.
291
+
292
+ Returns:
293
+ tuple: the shape of an element from column_name
294
+ """
295
+ raise NotImplementedError()
296
+
297
+ @staticmethod
298
+ def get_columns_shapes(dataset: DatasetType) -> dict:
299
+ """Get the shapes of the elements of all columns of a dataset
300
+
301
+ Args:
302
+ dataset (Dataset): a Dataset
303
+
304
+ Returns:
305
+ dict: dictionary of column names and their corresponding shape
306
+ """
307
+ raise NotImplementedError()
308
+
309
+ @staticmethod
310
+ @abstractmethod
311
+ def get_input_from_dataset_item(elem: ItemType) -> TensorType:
312
+ """Get the tensor that is to be feed as input to a model from a dataset element.
313
+
314
+ Args:
315
+ elem (ItemType): dataset element to extract input from
316
+
317
+ Returns:
318
+ TensorType: Input tensor
319
+ """
320
+ raise NotImplementedError()
321
+
322
+ @staticmethod
323
+ @abstractmethod
324
+ def get_label_from_dataset_item(item: ItemType):
325
+ """Retrieve label tensor from item as a tuple/list. Label must be at index 1
326
+ in the item tuple. If one-hot encoded, labels are converted to single value.
327
+
328
+ Args:
329
+ elem (ItemType): dataset element to extract label from
330
+
331
+ Returns:
332
+ Any: Label tensor
333
+ """
334
+ raise NotImplementedError()
@@ -0,0 +1,236 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
3
+ # rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
4
+ # CRIAQ and ANITI - https://www.deel.ai/
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in all
14
+ # copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ # SOFTWARE.
23
+ from abc import ABC
24
+ from abc import abstractmethod
25
+
26
+ import numpy as np
27
+
28
+ from ...types import Callable
29
+ from ...types import DatasetType
30
+ from ...types import ItemType
31
+ from ...types import Optional
32
+ from ...types import Tuple
33
+ from ...types import Union
34
+
35
+
36
+ class DataHandler(ABC):
37
+ """
38
+ Class to manage Datasets. The aim is to provide a simple interface
39
+ for working with datasets (torch, tensorflow or other...) and manage them without
40
+ having to use library-specific syntax.
41
+ """
42
+
43
+ @classmethod
44
+ @abstractmethod
45
+ def load_dataset(
46
+ cls,
47
+ dataset_id: Union[ItemType, DatasetType, str],
48
+ keys: Optional[list] = None,
49
+ load_kwargs: dict = {},
50
+ ) -> DatasetType:
51
+ """Load dataset from different manners
52
+
53
+ Args:
54
+ dataset_id (Union[ItemType, DatasetType, str]): dataset identification
55
+ keys (list, optional): Features keys. If None, assigned as "input_i"
56
+ for i-th feature. Defaults to None.
57
+ load_kwargs (dict, optional): Additional loading kwargs. Defaults to {}.
58
+
59
+ Returns:
60
+ DatasetType: dataset
61
+ """
62
+ raise NotImplementedError()
63
+
64
+ @staticmethod
65
+ @abstractmethod
66
+ def assign_feature_value(
67
+ dataset: DatasetType, feature_key: str, value: int
68
+ ) -> DatasetType:
69
+ """Assign a value to a feature for every sample in a Dataset
70
+
71
+ Args:
72
+ dataset (DatasetType): Dataset to assign the value to
73
+ feature_key (str): Feature to assign the value to
74
+ value (int): Value to assign
75
+
76
+ Returns:
77
+ DatasetType: updated dataset
78
+ """
79
+ raise NotImplementedError()
80
+
81
+ @staticmethod
82
+ @abstractmethod
83
+ def get_feature_from_ds(dataset: DatasetType, feature_key: str) -> np.ndarray:
84
+ """Get a feature from a Dataset
85
+
86
+ Args:
87
+ dataset (DatasetType): Dataset to get the feature from
88
+ feature_key (str): Feature value to get
89
+
90
+ Returns:
91
+ np.ndarray: Feature values for dataset
92
+ """
93
+ raise NotImplementedError()
94
+
95
+ @staticmethod
96
+ @abstractmethod
97
+ def get_ds_feature_keys(dataset: DatasetType) -> list:
98
+ """Get the feature keys of a Dataset
99
+
100
+ Args:
101
+ dataset (Dataset): Dataset to get the feature keys from
102
+
103
+ Returns:
104
+ list: List of feature keys
105
+ """
106
+ raise NotImplementedError()
107
+
108
+ @staticmethod
109
+ @abstractmethod
110
+ def has_feature_key(dataset: DatasetType, key: str) -> bool:
111
+ """Check if a Dataset has a feature denoted by key
112
+
113
+ Args:
114
+ dataset (DatasetType): Dataset to check
115
+ key (str): Key to check
116
+
117
+ Returns:
118
+ bool: If the dataset has a feature denoted by key
119
+ """
120
+ raise NotImplementedError()
121
+
122
+ @staticmethod
123
+ @abstractmethod
124
+ def map_ds(dataset: DatasetType, map_fn: Callable) -> DatasetType:
125
+ """Map a function to a Dataset
126
+
127
+ Args:
128
+ dataset (DatasetType): Dataset to map the function to
129
+ map_fn (Callable): Function to map
130
+
131
+ Returns:
132
+ DatasetType: Mapped dataset
133
+ """
134
+ raise NotImplementedError()
135
+
136
+ @staticmethod
137
+ @abstractmethod
138
+ def filter_by_feature_value(
139
+ dataset: DatasetType,
140
+ feature_key: str,
141
+ values: list,
142
+ excluded: bool = False,
143
+ ) -> DatasetType:
144
+ """Filter the dataset by checking the value of a feature is in `values`
145
+
146
+ Args:
147
+ dataset (Dataset): Dataset to filter
148
+ feature_key (str): Feature name to check the value
149
+ values (list): Feature_key values to keep (if excluded is False)
150
+ or to exclude
151
+ excluded (bool, optional): To keep (False) or exclude (True) the samples
152
+ with Feature_key value included in Values. Defaults to False.
153
+
154
+ Returns:
155
+ DatasetType: Filtered dataset
156
+ """
157
+ raise NotImplementedError()
158
+
159
+ @staticmethod
160
+ @abstractmethod
161
+ def merge(
162
+ id_dataset: DatasetType,
163
+ ood_dataset: DatasetType,
164
+ resize: Optional[bool] = False,
165
+ shape: Optional[Tuple[int]] = None,
166
+ ) -> DatasetType:
167
+ """Merge two datasets
168
+
169
+ Args:
170
+ id_dataset (Dataset): dataset of in-distribution data
171
+ ood_dataset (DictDataset): dataset of out-of-distribution data
172
+ resize (Optional[bool], optional): toggles if input tensors of the
173
+ datasets have to be resized to have the same shape. Defaults to True.
174
+ shape (Optional[Tuple[int]], optional): shape to use for resizing input
175
+ tensors. If None, the tensors are resized with the shape of the
176
+ id_dataset input tensors. Defaults to None.
177
+
178
+ Returns:
179
+ DatasetType: merged dataset
180
+ """
181
+ raise NotImplementedError()
182
+
183
+ @classmethod
184
+ @abstractmethod
185
+ def prepare_for_training(
186
+ cls,
187
+ dataset: DatasetType,
188
+ batch_size: int,
189
+ shuffle: bool = False,
190
+ preprocess_fn: Optional[Callable] = None,
191
+ augment_fn: Optional[Callable] = None,
192
+ output_keys: list = ["input", "label"],
193
+ ) -> DatasetType:
194
+ """Prepare a dataset for training
195
+
196
+ Args:
197
+ dataset (DictDataset): Dataset to prepare
198
+ batch_size (int): Batch size
199
+ shuffle (bool): Wether to shuffle the dataloader or not
200
+ preprocess_fn (Callable, optional): Preprocessing function to apply to
201
+ the dataset. Defaults to None.
202
+ augment_fn (Callable, optional): Augment function to be used (when the
203
+ returned dataset is to be used for training). Defaults to None.
204
+ output_keys (list): List of keys corresponding to the features that will be
205
+ returned. Keep all features if None. Defaults to None.
206
+
207
+ Returns:
208
+ DatasetType: prepared dataset / dataloader
209
+ """
210
+ raise NotImplementedError()
211
+
212
+ @staticmethod
213
+ @abstractmethod
214
+ def get_item_length(dataset: DatasetType) -> int:
215
+ """Number of elements in a dataset item
216
+
217
+ Args:
218
+ dataset (DatasetType): Dataset
219
+
220
+ Returns:
221
+ int: Item length
222
+ """
223
+ raise NotImplementedError()
224
+
225
+ @staticmethod
226
+ @abstractmethod
227
+ def get_dataset_length(dataset: DatasetType) -> int:
228
+ """Number of items in a dataset
229
+
230
+ Args:
231
+ dataset (DatasetType): Dataset
232
+
233
+ Returns:
234
+ int: Dataset length
235
+ """
236
+ raise NotImplementedError()