oodeel 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oodeel/__init__.py +28 -0
- oodeel/aggregator/__init__.py +26 -0
- oodeel/aggregator/base.py +70 -0
- oodeel/aggregator/fisher.py +259 -0
- oodeel/aggregator/mean.py +72 -0
- oodeel/aggregator/std.py +86 -0
- oodeel/datasets/__init__.py +24 -0
- oodeel/datasets/data_handler.py +334 -0
- oodeel/datasets/deprecated/DEPRECATED_data_handler.py +236 -0
- oodeel/datasets/deprecated/DEPRECATED_ooddataset.py +330 -0
- oodeel/datasets/deprecated/DEPRECATED_tf_data_handler.py +671 -0
- oodeel/datasets/deprecated/DEPRECATED_torch_data_handler.py +769 -0
- oodeel/datasets/deprecated/__init__.py +31 -0
- oodeel/datasets/tf_data_handler.py +600 -0
- oodeel/datasets/torch_data_handler.py +672 -0
- oodeel/eval/__init__.py +22 -0
- oodeel/eval/metrics.py +218 -0
- oodeel/eval/plots/__init__.py +27 -0
- oodeel/eval/plots/features.py +345 -0
- oodeel/eval/plots/metrics.py +118 -0
- oodeel/eval/plots/plotly.py +162 -0
- oodeel/extractor/__init__.py +35 -0
- oodeel/extractor/feature_extractor.py +187 -0
- oodeel/extractor/hf_torch_feature_extractor.py +184 -0
- oodeel/extractor/keras_feature_extractor.py +409 -0
- oodeel/extractor/torch_feature_extractor.py +506 -0
- oodeel/methods/__init__.py +47 -0
- oodeel/methods/base.py +570 -0
- oodeel/methods/dknn.py +185 -0
- oodeel/methods/energy.py +119 -0
- oodeel/methods/entropy.py +113 -0
- oodeel/methods/gen.py +113 -0
- oodeel/methods/gram.py +274 -0
- oodeel/methods/mahalanobis.py +209 -0
- oodeel/methods/mls.py +113 -0
- oodeel/methods/odin.py +109 -0
- oodeel/methods/rmds.py +172 -0
- oodeel/methods/she.py +159 -0
- oodeel/methods/vim.py +273 -0
- oodeel/preprocess/__init__.py +31 -0
- oodeel/preprocess/tf_preprocess.py +95 -0
- oodeel/preprocess/torch_preprocess.py +97 -0
- oodeel/types/__init__.py +75 -0
- oodeel/utils/__init__.py +38 -0
- oodeel/utils/general_utils.py +97 -0
- oodeel/utils/operator.py +253 -0
- oodeel/utils/tf_operator.py +269 -0
- oodeel/utils/tf_training_tools.py +219 -0
- oodeel/utils/torch_operator.py +292 -0
- oodeel/utils/torch_training_tools.py +303 -0
- oodeel-0.4.0.dist-info/METADATA +409 -0
- oodeel-0.4.0.dist-info/RECORD +63 -0
- oodeel-0.4.0.dist-info/WHEEL +5 -0
- oodeel-0.4.0.dist-info/licenses/LICENSE +21 -0
- oodeel-0.4.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +22 -0
- tests/tests_tensorflow/__init__.py +37 -0
- tests/tests_tensorflow/tf_methods_utils.py +140 -0
- tests/tests_tensorflow/tools_tf.py +86 -0
- tests/tests_torch/__init__.py +38 -0
- tests/tests_torch/tools_torch.py +151 -0
- tests/tests_torch/torch_methods_utils.py +148 -0
- tests/tools_operator.py +153 -0
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
import importlib.util
|
|
24
|
+
from abc import ABC
|
|
25
|
+
from abc import abstractmethod
|
|
26
|
+
|
|
27
|
+
import numpy as np
|
|
28
|
+
|
|
29
|
+
from ..types import Callable
|
|
30
|
+
from ..types import DatasetType
|
|
31
|
+
from ..types import ItemType
|
|
32
|
+
from ..types import Optional
|
|
33
|
+
from ..types import TensorType
|
|
34
|
+
from ..types import Tuple
|
|
35
|
+
from ..types import Union
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_backend():
|
|
39
|
+
"""Detects whether TensorFlow or PyTorch is available and returns
|
|
40
|
+
the preferred backend."""
|
|
41
|
+
available_backends = []
|
|
42
|
+
if importlib.util.find_spec("tensorflow"):
|
|
43
|
+
available_backends.append("tensorflow")
|
|
44
|
+
if importlib.util.find_spec("torch"):
|
|
45
|
+
available_backends.append("torch")
|
|
46
|
+
|
|
47
|
+
if len(available_backends) == 1:
|
|
48
|
+
return available_backends[0]
|
|
49
|
+
elif len(available_backends) == 0:
|
|
50
|
+
raise ImportError("Neither TensorFlow nor PyTorch is installed.")
|
|
51
|
+
else:
|
|
52
|
+
raise ImportError(
|
|
53
|
+
"Both TensorFlow and PyTorch are installed. Please specify the backend."
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def load_data_handler(backend: str = None):
|
|
58
|
+
if backend is None:
|
|
59
|
+
backend = get_backend()
|
|
60
|
+
|
|
61
|
+
if backend == "tensorflow":
|
|
62
|
+
from .tf_data_handler import TFDataHandler
|
|
63
|
+
|
|
64
|
+
return TFDataHandler()
|
|
65
|
+
|
|
66
|
+
elif backend == "torch":
|
|
67
|
+
from .torch_data_handler import TorchDataHandler
|
|
68
|
+
|
|
69
|
+
return TorchDataHandler()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class DataHandler(ABC):
|
|
73
|
+
"""
|
|
74
|
+
Class to manage Datasets. The aim is to provide a simple interface
|
|
75
|
+
for working with datasets (torch, tensorflow or other...) and manage them without
|
|
76
|
+
having to use library-specific syntax.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(self):
|
|
80
|
+
self.backend = None
|
|
81
|
+
self.channel_order = None
|
|
82
|
+
|
|
83
|
+
def split_by_class(
|
|
84
|
+
self,
|
|
85
|
+
dataset: DatasetType,
|
|
86
|
+
in_labels: Optional[Union[np.ndarray, list]] = None,
|
|
87
|
+
out_labels: Optional[Union[np.ndarray, list]] = None,
|
|
88
|
+
) -> Optional[Tuple[DatasetType]]:
|
|
89
|
+
"""Filter the dataset by assigning ood labels depending on labels
|
|
90
|
+
value (typically, class id).
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
in_labels (Optional[Union[np.ndarray, list]], optional): set of labels
|
|
94
|
+
to be considered as in-distribution. Defaults to None.
|
|
95
|
+
out_labels (Optional[Union[np.ndarray, list]], optional): set of labels
|
|
96
|
+
to be considered as out-of-distribution. Defaults to None.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Optional[Tuple[OODDataset]]: Tuple of in-distribution and
|
|
100
|
+
out-of-distribution OODDatasets
|
|
101
|
+
"""
|
|
102
|
+
# Make sure the dataset has labels
|
|
103
|
+
assert (in_labels is not None) or (
|
|
104
|
+
out_labels is not None
|
|
105
|
+
), "specify labels to filter with"
|
|
106
|
+
assert self.get_item_length(dataset) >= 2, "the dataset has no labels"
|
|
107
|
+
|
|
108
|
+
# Filter the dataset depending on in_labels and out_labels given
|
|
109
|
+
if (out_labels is not None) and (in_labels is not None):
|
|
110
|
+
in_data = self.filter_by_value(dataset, "label", in_labels)
|
|
111
|
+
out_data = self.filter_by_value(dataset, "label", out_labels)
|
|
112
|
+
|
|
113
|
+
if out_labels is None:
|
|
114
|
+
in_data = self.filter_by_value(dataset, "label", in_labels)
|
|
115
|
+
out_data = self.filter_by_value(dataset, "label", in_labels, excluded=True)
|
|
116
|
+
|
|
117
|
+
elif in_labels is None:
|
|
118
|
+
in_data = self.filter_by_value(dataset, "label", out_labels, excluded=True)
|
|
119
|
+
out_data = self.filter_by_value(dataset, "label", out_labels)
|
|
120
|
+
|
|
121
|
+
# Return the filtered OODDatasets
|
|
122
|
+
return in_data, out_data
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
@abstractmethod
|
|
126
|
+
def prepare(
|
|
127
|
+
cls,
|
|
128
|
+
dataset: DatasetType,
|
|
129
|
+
batch_size: int,
|
|
130
|
+
preprocess_fn: Optional[Callable] = None,
|
|
131
|
+
augment_fn: Optional[Callable] = None,
|
|
132
|
+
columns: Optional[list] = None,
|
|
133
|
+
shuffle: bool = False,
|
|
134
|
+
dict_based_fns: bool = True,
|
|
135
|
+
return_tuple: bool = True,
|
|
136
|
+
**kwargs_prepare,
|
|
137
|
+
) -> DatasetType:
|
|
138
|
+
"""Prepare dataset for scoring or training
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
batch_size (int): Batch size
|
|
142
|
+
preprocess_fn (Callable, optional): Preprocessing function to apply to
|
|
143
|
+
the dataset. Defaults to None.
|
|
144
|
+
augment_fn (Callable, optional): Augment function to be used (when the
|
|
145
|
+
returned dataset is to be used for training). Defaults to None.
|
|
146
|
+
columns (list, optional): List of columns
|
|
147
|
+
that will be returned. Keep all columns if None. Defaults to None.
|
|
148
|
+
shuffle (bool, optional): To shuffle the returned dataset or not.
|
|
149
|
+
Defaults to False.
|
|
150
|
+
dict_based_fns (bool): Whether to use preprocess and DA functions as dict
|
|
151
|
+
based (if True) or as tuple based (if False). Defaults to True.
|
|
152
|
+
return_tuple (bool, optional): Whether to return each dataset item
|
|
153
|
+
as a tuple. Defaults to True.
|
|
154
|
+
kwargs_prepare (dict): Additional parameters to be passed to the
|
|
155
|
+
data_handler for backend specific preparation.
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
DatasetType: prepared dataset
|
|
160
|
+
"""
|
|
161
|
+
raise NotImplementedError()
|
|
162
|
+
|
|
163
|
+
@staticmethod
|
|
164
|
+
@abstractmethod
|
|
165
|
+
def load_dataset_from_arrays(
|
|
166
|
+
dataset_id: ItemType, columns: Optional[list] = None
|
|
167
|
+
) -> DatasetType:
|
|
168
|
+
"""Load a DatasetType from a np.ndarray / Tensor
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
dataset_id (ItemType): numpy array(s) to load.
|
|
172
|
+
columns (list, optional): Column names to assign. If None,
|
|
173
|
+
assigned as "input_i" for i-th column. Defaults to None.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
DatasetType
|
|
177
|
+
"""
|
|
178
|
+
raise NotImplementedError()
|
|
179
|
+
|
|
180
|
+
@staticmethod
|
|
181
|
+
@abstractmethod
|
|
182
|
+
def load_custom_dataset(
|
|
183
|
+
dataset_id: DatasetType, columns: Optional[list] = None
|
|
184
|
+
) -> DatasetType:
|
|
185
|
+
"""Load a custom dataset by ensuring it is properly formatted.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
dataset_id (DatasetType): dataset
|
|
189
|
+
columns (list, optional): Column names to use for elements if dataset_id is
|
|
190
|
+
tuple based. If None, assigned as "input_i"
|
|
191
|
+
for i-th column. Defaults to None.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
A properly formatted dataset.
|
|
195
|
+
"""
|
|
196
|
+
raise NotImplementedError()
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
@abstractmethod
|
|
200
|
+
def load_from_huggingface(
|
|
201
|
+
dataset_id: str,
|
|
202
|
+
load_kwargs: dict = {},
|
|
203
|
+
) -> DatasetType:
|
|
204
|
+
"""Load a Dataset from the Hugging Face datasets catalog
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
dataset_id (str): Identifier of the dataset
|
|
208
|
+
load_kwargs (dict): Loading kwargs to add to the initialization
|
|
209
|
+
of the dataset.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
DatasetType: dataset
|
|
213
|
+
"""
|
|
214
|
+
raise NotImplementedError()
|
|
215
|
+
|
|
216
|
+
@staticmethod
|
|
217
|
+
@abstractmethod
|
|
218
|
+
def map_ds(dataset: DatasetType, map_fn: Callable) -> DatasetType:
|
|
219
|
+
"""Map a function to a Dataset
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
dataset (DatasetType): Dataset to map the function to
|
|
223
|
+
map_fn (Callable): Function to map
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
DatasetType: Mapped dataset
|
|
227
|
+
"""
|
|
228
|
+
raise NotImplementedError()
|
|
229
|
+
|
|
230
|
+
@staticmethod
|
|
231
|
+
@abstractmethod
|
|
232
|
+
def filter_by_value(
|
|
233
|
+
dataset: DatasetType,
|
|
234
|
+
column_name: str,
|
|
235
|
+
values: list,
|
|
236
|
+
excluded: bool = False,
|
|
237
|
+
) -> DatasetType:
|
|
238
|
+
"""Filter the dataset by checking the value of a column is in `values`
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
dataset (Dataset): Dataset to filter
|
|
242
|
+
column_name (str): Column to filter the dataset with
|
|
243
|
+
values (list): Column values to keep (if excluded is False)
|
|
244
|
+
or to exclude
|
|
245
|
+
excluded (bool, optional): To keep (False) or exclude (True) the samples
|
|
246
|
+
with column value included in Values. Defaults to False.
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
DatasetType: Filtered dataset
|
|
250
|
+
"""
|
|
251
|
+
raise NotImplementedError()
|
|
252
|
+
|
|
253
|
+
@staticmethod
|
|
254
|
+
@abstractmethod
|
|
255
|
+
def get_item_length(dataset: DatasetType) -> int:
|
|
256
|
+
"""Number of elements in a dataset item
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
dataset (DatasetType): Dataset
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
int: Item length
|
|
263
|
+
"""
|
|
264
|
+
raise NotImplementedError()
|
|
265
|
+
|
|
266
|
+
@staticmethod
|
|
267
|
+
@abstractmethod
|
|
268
|
+
def get_dataset_length(dataset: DatasetType) -> int:
|
|
269
|
+
"""Number of items in a dataset
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
dataset (DatasetType): Dataset
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
int: Dataset length
|
|
276
|
+
"""
|
|
277
|
+
raise NotImplementedError()
|
|
278
|
+
|
|
279
|
+
@staticmethod
|
|
280
|
+
@abstractmethod
|
|
281
|
+
def get_column_elements_shape(
|
|
282
|
+
dataset: DatasetType, column_name: Union[str, int]
|
|
283
|
+
) -> tuple:
|
|
284
|
+
"""Get the shape of the elements of a column of dataset identified by
|
|
285
|
+
column_name
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
dataset (Dataset): a Dataset
|
|
289
|
+
column_name (Union[str, int]): The column name to get
|
|
290
|
+
the element shape from.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
tuple: the shape of an element from column_name
|
|
294
|
+
"""
|
|
295
|
+
raise NotImplementedError()
|
|
296
|
+
|
|
297
|
+
@staticmethod
|
|
298
|
+
def get_columns_shapes(dataset: DatasetType) -> dict:
|
|
299
|
+
"""Get the shapes of the elements of all columns of a dataset
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
dataset (Dataset): a Dataset
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
dict: dictionary of column names and their corresponding shape
|
|
306
|
+
"""
|
|
307
|
+
raise NotImplementedError()
|
|
308
|
+
|
|
309
|
+
@staticmethod
|
|
310
|
+
@abstractmethod
|
|
311
|
+
def get_input_from_dataset_item(elem: ItemType) -> TensorType:
|
|
312
|
+
"""Get the tensor that is to be feed as input to a model from a dataset element.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
elem (ItemType): dataset element to extract input from
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
TensorType: Input tensor
|
|
319
|
+
"""
|
|
320
|
+
raise NotImplementedError()
|
|
321
|
+
|
|
322
|
+
@staticmethod
|
|
323
|
+
@abstractmethod
|
|
324
|
+
def get_label_from_dataset_item(item: ItemType):
|
|
325
|
+
"""Retrieve label tensor from item as a tuple/list. Label must be at index 1
|
|
326
|
+
in the item tuple. If one-hot encoded, labels are converted to single value.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
elem (ItemType): dataset element to extract label from
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
Any: Label tensor
|
|
333
|
+
"""
|
|
334
|
+
raise NotImplementedError()
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
from abc import ABC
|
|
24
|
+
from abc import abstractmethod
|
|
25
|
+
|
|
26
|
+
import numpy as np
|
|
27
|
+
|
|
28
|
+
from ...types import Callable
|
|
29
|
+
from ...types import DatasetType
|
|
30
|
+
from ...types import ItemType
|
|
31
|
+
from ...types import Optional
|
|
32
|
+
from ...types import Tuple
|
|
33
|
+
from ...types import Union
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DataHandler(ABC):
|
|
37
|
+
"""
|
|
38
|
+
Class to manage Datasets. The aim is to provide a simple interface
|
|
39
|
+
for working with datasets (torch, tensorflow or other...) and manage them without
|
|
40
|
+
having to use library-specific syntax.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def load_dataset(
|
|
46
|
+
cls,
|
|
47
|
+
dataset_id: Union[ItemType, DatasetType, str],
|
|
48
|
+
keys: Optional[list] = None,
|
|
49
|
+
load_kwargs: dict = {},
|
|
50
|
+
) -> DatasetType:
|
|
51
|
+
"""Load dataset from different manners
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
dataset_id (Union[ItemType, DatasetType, str]): dataset identification
|
|
55
|
+
keys (list, optional): Features keys. If None, assigned as "input_i"
|
|
56
|
+
for i-th feature. Defaults to None.
|
|
57
|
+
load_kwargs (dict, optional): Additional loading kwargs. Defaults to {}.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
DatasetType: dataset
|
|
61
|
+
"""
|
|
62
|
+
raise NotImplementedError()
|
|
63
|
+
|
|
64
|
+
@staticmethod
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def assign_feature_value(
|
|
67
|
+
dataset: DatasetType, feature_key: str, value: int
|
|
68
|
+
) -> DatasetType:
|
|
69
|
+
"""Assign a value to a feature for every sample in a Dataset
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
dataset (DatasetType): Dataset to assign the value to
|
|
73
|
+
feature_key (str): Feature to assign the value to
|
|
74
|
+
value (int): Value to assign
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
DatasetType: updated dataset
|
|
78
|
+
"""
|
|
79
|
+
raise NotImplementedError()
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
@abstractmethod
|
|
83
|
+
def get_feature_from_ds(dataset: DatasetType, feature_key: str) -> np.ndarray:
|
|
84
|
+
"""Get a feature from a Dataset
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
dataset (DatasetType): Dataset to get the feature from
|
|
88
|
+
feature_key (str): Feature value to get
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
np.ndarray: Feature values for dataset
|
|
92
|
+
"""
|
|
93
|
+
raise NotImplementedError()
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
@abstractmethod
|
|
97
|
+
def get_ds_feature_keys(dataset: DatasetType) -> list:
|
|
98
|
+
"""Get the feature keys of a Dataset
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
dataset (Dataset): Dataset to get the feature keys from
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
list: List of feature keys
|
|
105
|
+
"""
|
|
106
|
+
raise NotImplementedError()
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
@abstractmethod
|
|
110
|
+
def has_feature_key(dataset: DatasetType, key: str) -> bool:
|
|
111
|
+
"""Check if a Dataset has a feature denoted by key
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
dataset (DatasetType): Dataset to check
|
|
115
|
+
key (str): Key to check
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
bool: If the dataset has a feature denoted by key
|
|
119
|
+
"""
|
|
120
|
+
raise NotImplementedError()
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
@abstractmethod
|
|
124
|
+
def map_ds(dataset: DatasetType, map_fn: Callable) -> DatasetType:
|
|
125
|
+
"""Map a function to a Dataset
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
dataset (DatasetType): Dataset to map the function to
|
|
129
|
+
map_fn (Callable): Function to map
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
DatasetType: Mapped dataset
|
|
133
|
+
"""
|
|
134
|
+
raise NotImplementedError()
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
@abstractmethod
|
|
138
|
+
def filter_by_feature_value(
|
|
139
|
+
dataset: DatasetType,
|
|
140
|
+
feature_key: str,
|
|
141
|
+
values: list,
|
|
142
|
+
excluded: bool = False,
|
|
143
|
+
) -> DatasetType:
|
|
144
|
+
"""Filter the dataset by checking the value of a feature is in `values`
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
dataset (Dataset): Dataset to filter
|
|
148
|
+
feature_key (str): Feature name to check the value
|
|
149
|
+
values (list): Feature_key values to keep (if excluded is False)
|
|
150
|
+
or to exclude
|
|
151
|
+
excluded (bool, optional): To keep (False) or exclude (True) the samples
|
|
152
|
+
with Feature_key value included in Values. Defaults to False.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
DatasetType: Filtered dataset
|
|
156
|
+
"""
|
|
157
|
+
raise NotImplementedError()
|
|
158
|
+
|
|
159
|
+
@staticmethod
|
|
160
|
+
@abstractmethod
|
|
161
|
+
def merge(
|
|
162
|
+
id_dataset: DatasetType,
|
|
163
|
+
ood_dataset: DatasetType,
|
|
164
|
+
resize: Optional[bool] = False,
|
|
165
|
+
shape: Optional[Tuple[int]] = None,
|
|
166
|
+
) -> DatasetType:
|
|
167
|
+
"""Merge two datasets
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
id_dataset (Dataset): dataset of in-distribution data
|
|
171
|
+
ood_dataset (DictDataset): dataset of out-of-distribution data
|
|
172
|
+
resize (Optional[bool], optional): toggles if input tensors of the
|
|
173
|
+
datasets have to be resized to have the same shape. Defaults to True.
|
|
174
|
+
shape (Optional[Tuple[int]], optional): shape to use for resizing input
|
|
175
|
+
tensors. If None, the tensors are resized with the shape of the
|
|
176
|
+
id_dataset input tensors. Defaults to None.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
DatasetType: merged dataset
|
|
180
|
+
"""
|
|
181
|
+
raise NotImplementedError()
|
|
182
|
+
|
|
183
|
+
@classmethod
|
|
184
|
+
@abstractmethod
|
|
185
|
+
def prepare_for_training(
|
|
186
|
+
cls,
|
|
187
|
+
dataset: DatasetType,
|
|
188
|
+
batch_size: int,
|
|
189
|
+
shuffle: bool = False,
|
|
190
|
+
preprocess_fn: Optional[Callable] = None,
|
|
191
|
+
augment_fn: Optional[Callable] = None,
|
|
192
|
+
output_keys: list = ["input", "label"],
|
|
193
|
+
) -> DatasetType:
|
|
194
|
+
"""Prepare a dataset for training
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
dataset (DictDataset): Dataset to prepare
|
|
198
|
+
batch_size (int): Batch size
|
|
199
|
+
shuffle (bool): Wether to shuffle the dataloader or not
|
|
200
|
+
preprocess_fn (Callable, optional): Preprocessing function to apply to
|
|
201
|
+
the dataset. Defaults to None.
|
|
202
|
+
augment_fn (Callable, optional): Augment function to be used (when the
|
|
203
|
+
returned dataset is to be used for training). Defaults to None.
|
|
204
|
+
output_keys (list): List of keys corresponding to the features that will be
|
|
205
|
+
returned. Keep all features if None. Defaults to None.
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
DatasetType: prepared dataset / dataloader
|
|
209
|
+
"""
|
|
210
|
+
raise NotImplementedError()
|
|
211
|
+
|
|
212
|
+
@staticmethod
|
|
213
|
+
@abstractmethod
|
|
214
|
+
def get_item_length(dataset: DatasetType) -> int:
|
|
215
|
+
"""Number of elements in a dataset item
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
dataset (DatasetType): Dataset
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
int: Item length
|
|
222
|
+
"""
|
|
223
|
+
raise NotImplementedError()
|
|
224
|
+
|
|
225
|
+
@staticmethod
|
|
226
|
+
@abstractmethod
|
|
227
|
+
def get_dataset_length(dataset: DatasetType) -> int:
|
|
228
|
+
"""Number of items in a dataset
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
dataset (DatasetType): Dataset
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
int: Dataset length
|
|
235
|
+
"""
|
|
236
|
+
raise NotImplementedError()
|