oodeel 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oodeel/__init__.py +28 -0
- oodeel/aggregator/__init__.py +26 -0
- oodeel/aggregator/base.py +70 -0
- oodeel/aggregator/fisher.py +259 -0
- oodeel/aggregator/mean.py +72 -0
- oodeel/aggregator/std.py +86 -0
- oodeel/datasets/__init__.py +24 -0
- oodeel/datasets/data_handler.py +334 -0
- oodeel/datasets/deprecated/DEPRECATED_data_handler.py +236 -0
- oodeel/datasets/deprecated/DEPRECATED_ooddataset.py +330 -0
- oodeel/datasets/deprecated/DEPRECATED_tf_data_handler.py +671 -0
- oodeel/datasets/deprecated/DEPRECATED_torch_data_handler.py +769 -0
- oodeel/datasets/deprecated/__init__.py +31 -0
- oodeel/datasets/tf_data_handler.py +600 -0
- oodeel/datasets/torch_data_handler.py +672 -0
- oodeel/eval/__init__.py +22 -0
- oodeel/eval/metrics.py +218 -0
- oodeel/eval/plots/__init__.py +27 -0
- oodeel/eval/plots/features.py +345 -0
- oodeel/eval/plots/metrics.py +118 -0
- oodeel/eval/plots/plotly.py +162 -0
- oodeel/extractor/__init__.py +35 -0
- oodeel/extractor/feature_extractor.py +187 -0
- oodeel/extractor/hf_torch_feature_extractor.py +184 -0
- oodeel/extractor/keras_feature_extractor.py +409 -0
- oodeel/extractor/torch_feature_extractor.py +506 -0
- oodeel/methods/__init__.py +47 -0
- oodeel/methods/base.py +570 -0
- oodeel/methods/dknn.py +185 -0
- oodeel/methods/energy.py +119 -0
- oodeel/methods/entropy.py +113 -0
- oodeel/methods/gen.py +113 -0
- oodeel/methods/gram.py +274 -0
- oodeel/methods/mahalanobis.py +209 -0
- oodeel/methods/mls.py +113 -0
- oodeel/methods/odin.py +109 -0
- oodeel/methods/rmds.py +172 -0
- oodeel/methods/she.py +159 -0
- oodeel/methods/vim.py +273 -0
- oodeel/preprocess/__init__.py +31 -0
- oodeel/preprocess/tf_preprocess.py +95 -0
- oodeel/preprocess/torch_preprocess.py +97 -0
- oodeel/types/__init__.py +75 -0
- oodeel/utils/__init__.py +38 -0
- oodeel/utils/general_utils.py +97 -0
- oodeel/utils/operator.py +253 -0
- oodeel/utils/tf_operator.py +269 -0
- oodeel/utils/tf_training_tools.py +219 -0
- oodeel/utils/torch_operator.py +292 -0
- oodeel/utils/torch_training_tools.py +303 -0
- oodeel-0.4.0.dist-info/METADATA +409 -0
- oodeel-0.4.0.dist-info/RECORD +63 -0
- oodeel-0.4.0.dist-info/WHEEL +5 -0
- oodeel-0.4.0.dist-info/licenses/LICENSE +21 -0
- oodeel-0.4.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +22 -0
- tests/tests_tensorflow/__init__.py +37 -0
- tests/tests_tensorflow/tf_methods_utils.py +140 -0
- tests/tests_tensorflow/tools_tf.py +86 -0
- tests/tests_torch/__init__.py +38 -0
- tests/tests_torch/tools_torch.py +151 -0
- tests/tests_torch/torch_methods_utils.py +148 -0
- tests/tools_operator.py +153 -0
oodeel/methods/gram.py
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
from typing import Dict
|
|
24
|
+
from typing import List
|
|
25
|
+
from typing import Optional
|
|
26
|
+
from typing import Union
|
|
27
|
+
|
|
28
|
+
import numpy as np
|
|
29
|
+
from sklearn.model_selection import train_test_split
|
|
30
|
+
|
|
31
|
+
from ..aggregator import BaseAggregator
|
|
32
|
+
from ..types import DatasetType
|
|
33
|
+
from ..types import TensorType
|
|
34
|
+
from .base import FeatureBasedDetector
|
|
35
|
+
|
|
36
|
+
EPSILON = 1e-6 # Numerical stability constant
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class Gram(FeatureBasedDetector):
|
|
40
|
+
r"""
|
|
41
|
+
"Detecting Out-of-Distribution Examples with Gram Matrices"
|
|
42
|
+
[link](https://proceedings.mlr.press/v119/sastry20a.html)
|
|
43
|
+
|
|
44
|
+
**Important Disclaimer**: Taking the statistics of min/max deviation, as in the
|
|
45
|
+
paper, raises some problems. The method may yield a score of zero for some tasks
|
|
46
|
+
because the sample extreme values become more extreme with larger sample sizes.
|
|
47
|
+
To mitigate this, we replace the min/max with the q / (1-q) quantile threshold,
|
|
48
|
+
where q is a parameter that controls the discriminative ability of the method.
|
|
49
|
+
|
|
50
|
+
This approach improved baseline performance in our experiments.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
orders (Union[List[int], int]): Power orders to consider for the correlation
|
|
54
|
+
matrix. If an int is provided, it is converted to a list.
|
|
55
|
+
quantile (float): Quantile to consider for the correlations to build the
|
|
56
|
+
deviation threshold.
|
|
57
|
+
aggregator (Optional[BaseAggregator]): Aggregator to combine multi-layer scores.
|
|
58
|
+
If multiple layers are used and no aggregator is provided,
|
|
59
|
+
StdNormalizedAggregator is used by default. Defaults to None.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
orders: Union[List[int], int] = list(range(1, 6)),
|
|
65
|
+
quantile: float = 0.01,
|
|
66
|
+
aggregator: Optional[BaseAggregator] = None,
|
|
67
|
+
**kwargs,
|
|
68
|
+
):
|
|
69
|
+
super().__init__(aggregator=aggregator, **kwargs)
|
|
70
|
+
if isinstance(orders, int):
|
|
71
|
+
orders = [orders]
|
|
72
|
+
self.orders: List[int] = orders
|
|
73
|
+
self.quantile = quantile
|
|
74
|
+
|
|
75
|
+
self.postproc_fns = None # Will be set during fit
|
|
76
|
+
# Mapping class -> list (per-layer) of thresholds [lower, upper]
|
|
77
|
+
self.min_maxs: Dict[int, List[TensorType]] = {}
|
|
78
|
+
|
|
79
|
+
# === Public API (override of _fit_to_dataset) ===
|
|
80
|
+
def _fit_to_dataset(
|
|
81
|
+
self,
|
|
82
|
+
fit_dataset: DatasetType,
|
|
83
|
+
verbose: bool = False,
|
|
84
|
+
**kwargs,
|
|
85
|
+
) -> None:
|
|
86
|
+
"""Fit thresholds on Gram statistics from a dataset.
|
|
87
|
+
|
|
88
|
+
This method sets :attr:`postproc_fns` to compute Gram matrices for all
|
|
89
|
+
selected feature layers and then delegates the actual fitting to the
|
|
90
|
+
generic implementation in :class:`OODBaseDetector`.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
fit_dataset: Dataset containing in-distribution samples.
|
|
94
|
+
verbose: Whether to display a progress bar during feature extraction.
|
|
95
|
+
**kwargs: Additional keyword arguments forwarded to :func:`_fit_layer`.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
None
|
|
99
|
+
"""
|
|
100
|
+
n_layers = len(self.feature_extractor.feature_layers_id)
|
|
101
|
+
if self.postproc_fns is None:
|
|
102
|
+
self.postproc_fns = [self._stat] * n_layers
|
|
103
|
+
|
|
104
|
+
super()._fit_to_dataset(fit_dataset, verbose=verbose, **kwargs)
|
|
105
|
+
|
|
106
|
+
# === Per-layer logic ===
|
|
107
|
+
def _fit_layer(
|
|
108
|
+
self,
|
|
109
|
+
layer_id: int,
|
|
110
|
+
layer_stats: np.ndarray,
|
|
111
|
+
info: dict,
|
|
112
|
+
val_split: float = None,
|
|
113
|
+
**kwargs,
|
|
114
|
+
) -> None:
|
|
115
|
+
"""Fit thresholds for one layer and store validation data.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
layer_id: Index of the processed layer.
|
|
119
|
+
layer_stats: Gram statistics for this layer.
|
|
120
|
+
info: Dictionary containing the logits of the training data.
|
|
121
|
+
val_split: Ratio of samples used for aggregator fitting.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
preds_all = np.argmax(info["logits"], axis=1)
|
|
125
|
+
|
|
126
|
+
# initialize min_maxs if not already done.
|
|
127
|
+
if not self.min_maxs:
|
|
128
|
+
n_layers = len(self.feature_extractor.feature_layers_id)
|
|
129
|
+
self._classes = np.sort(np.unique(preds_all)).tolist()
|
|
130
|
+
self.min_maxs = {cls: [None] * n_layers for cls in self._classes}
|
|
131
|
+
|
|
132
|
+
# split the dataset into training and validation sets (as in original paper).
|
|
133
|
+
idx_all = np.arange(preds_all.shape[0])
|
|
134
|
+
train_idx, val_idx = (
|
|
135
|
+
train_test_split(idx_all, test_size=val_split, random_state=42)
|
|
136
|
+
if val_split is not None
|
|
137
|
+
else (idx_all, idx_all)
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
train_stats = layer_stats[train_idx]
|
|
141
|
+
val_stats = layer_stats[val_idx]
|
|
142
|
+
train_preds = preds_all[train_idx]
|
|
143
|
+
val_preds = preds_all[val_idx]
|
|
144
|
+
|
|
145
|
+
# compute min/max thresholds for each class
|
|
146
|
+
for cls in self._classes:
|
|
147
|
+
cls_mask = train_preds == cls
|
|
148
|
+
stats_cls_np = train_stats[cls_mask]
|
|
149
|
+
stats_cls_t = self.op.from_numpy(stats_cls_np)
|
|
150
|
+
lower = self.op.quantile(stats_cls_t, self.quantile, dim=0)
|
|
151
|
+
upper = self.op.quantile(stats_cls_t, 1 - self.quantile, dim=0)
|
|
152
|
+
self.min_maxs[cls][layer_id] = self.op.cat(
|
|
153
|
+
[self.op.unsqueeze(lower, -1), self.op.unsqueeze(upper, -1)],
|
|
154
|
+
dim=-1,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
if getattr(self, "aggregator", None) is not None:
|
|
158
|
+
# Store validation data for the aggregator.
|
|
159
|
+
if not hasattr(self, "_val_stats"):
|
|
160
|
+
self._val_stats = []
|
|
161
|
+
self._val_preds = []
|
|
162
|
+
while len(self._val_stats) <= layer_id:
|
|
163
|
+
self._val_stats.append(None)
|
|
164
|
+
self._val_preds.append(None)
|
|
165
|
+
self._val_stats[layer_id] = val_stats
|
|
166
|
+
self._val_preds[layer_id] = val_preds
|
|
167
|
+
|
|
168
|
+
def _score_layer(
|
|
169
|
+
self,
|
|
170
|
+
layer_id: int,
|
|
171
|
+
layer_stats: TensorType,
|
|
172
|
+
info: dict,
|
|
173
|
+
fit: bool = False,
|
|
174
|
+
**kwargs,
|
|
175
|
+
) -> np.ndarray:
|
|
176
|
+
"""Score inputs for a single layer.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
layer_id (int): Layer index.
|
|
180
|
+
layer_stats (TensorType): Gram stats.
|
|
181
|
+
info (dict): Dictionary containing auxiliary data, such as logits.
|
|
182
|
+
fit (bool): Whether scoring is performed during fitting. If `True`
|
|
183
|
+
the validation subset stored by :func:`_fit_layer` is used.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
np.ndarray: Deviation-based OOD scores.
|
|
187
|
+
"""
|
|
188
|
+
if fit and hasattr(self, "_val_stats"):
|
|
189
|
+
layer_stats = self.op.from_numpy(self._val_stats[layer_id])
|
|
190
|
+
preds = self._val_preds[layer_id]
|
|
191
|
+
else:
|
|
192
|
+
preds = np.argmax(self.op.convert_to_numpy(info["logits"]), axis=1)
|
|
193
|
+
|
|
194
|
+
thr_batch = self.op.stack([self.min_maxs[int(lbl)][layer_id] for lbl in preds])
|
|
195
|
+
dev = self._deviation(layer_stats, thr_batch)
|
|
196
|
+
return self.op.convert_to_numpy(dev)
|
|
197
|
+
|
|
198
|
+
# === Internal utilities ===
|
|
199
|
+
def _stat(self, feature_map: TensorType) -> TensorType:
|
|
200
|
+
"""Compute Gram statistics for a single layer.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
feature_map (TensorType): Feature map of shape `[B, ...]`.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
TensorType: Statistics of shape `[B, n_orders, C]`.
|
|
207
|
+
"""
|
|
208
|
+
fm_shape = feature_map.shape
|
|
209
|
+
stats = []
|
|
210
|
+
for p in self.orders:
|
|
211
|
+
# Raise the feature map to the specified order.
|
|
212
|
+
fm_p = feature_map**p
|
|
213
|
+
if len(fm_shape) == 2:
|
|
214
|
+
# Dense layers: compute outer product.
|
|
215
|
+
fm_p = self.op.einsum("bi,bj->bij", fm_p, fm_p)
|
|
216
|
+
else:
|
|
217
|
+
# Convolutional feature maps: flatten spatial dimensions.
|
|
218
|
+
if self.backend == "tensorflow":
|
|
219
|
+
fm_p = self.op.reshape(
|
|
220
|
+
self.op.einsum("i...j->ij...", fm_p),
|
|
221
|
+
(fm_shape[0], fm_shape[-1], -1),
|
|
222
|
+
)
|
|
223
|
+
else:
|
|
224
|
+
fm_p = self.op.reshape(fm_p, (fm_shape[0], fm_shape[1], -1))
|
|
225
|
+
fm_p = self.op.matmul(fm_p, self.op.permute(fm_p, (0, 2, 1)))
|
|
226
|
+
# Normalize and recover the original power.
|
|
227
|
+
fm_p = self.op.sign(fm_p) * (self.op.abs(fm_p) ** (1 / p))
|
|
228
|
+
# Use only the lower triangular part.
|
|
229
|
+
fm_p = self.op.tril(fm_p)
|
|
230
|
+
# Aggregate row-wise.
|
|
231
|
+
fm_p = self.op.sum(fm_p, dim=2)
|
|
232
|
+
stats.append(fm_p)
|
|
233
|
+
return self.op.stack(stats, dim=1)
|
|
234
|
+
|
|
235
|
+
def _deviation(self, stats: TensorType, thresholds: TensorType) -> TensorType:
|
|
236
|
+
"""Compute deviation of `stats` outside `thresholds`.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
stats (TensorType): Gram stats, shape `[B, *, C]`.
|
|
240
|
+
thresholds (TensorType): Lower & upper bounds, shape `[B, *, C, 2]`.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
TensorType: Deviation values, shape `[B]`.
|
|
244
|
+
"""
|
|
245
|
+
below = self.op.where(stats < thresholds[..., 0], 1.0, 0.0)
|
|
246
|
+
above = self.op.where(stats > thresholds[..., 1], 1.0, 0.0)
|
|
247
|
+
dev_low = (
|
|
248
|
+
(thresholds[..., 0] - stats) / (self.op.abs(thresholds[..., 0]) + EPSILON)
|
|
249
|
+
) * below
|
|
250
|
+
dev_high = (
|
|
251
|
+
(stats - thresholds[..., 1]) / (self.op.abs(thresholds[..., 1]) + EPSILON)
|
|
252
|
+
) * above
|
|
253
|
+
return self.op.sum(dev_low + dev_high, dim=(1, 2))
|
|
254
|
+
|
|
255
|
+
# === Properties ===
|
|
256
|
+
@property
|
|
257
|
+
def requires_to_fit_dataset(self) -> bool:
|
|
258
|
+
"""
|
|
259
|
+
Indicates whether this OOD detector requires in-distribution data for fitting.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
bool: True, since fitting requires computing class-conditional statistics.
|
|
263
|
+
"""
|
|
264
|
+
return True
|
|
265
|
+
|
|
266
|
+
@property
|
|
267
|
+
def requires_internal_features(self) -> bool:
|
|
268
|
+
"""
|
|
269
|
+
Indicates whether this OOD detector utilizes internal model features.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
bool: True, as it operates on intermediate feature representations.
|
|
273
|
+
"""
|
|
274
|
+
return True
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
from typing import Dict
|
|
24
|
+
from typing import List
|
|
25
|
+
from typing import Optional
|
|
26
|
+
from typing import Tuple
|
|
27
|
+
|
|
28
|
+
import numpy as np
|
|
29
|
+
|
|
30
|
+
from ..aggregator import BaseAggregator
|
|
31
|
+
from ..types import TensorType
|
|
32
|
+
from .base import FeatureBasedDetector
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Mahalanobis(FeatureBasedDetector):
|
|
36
|
+
"""
|
|
37
|
+
"A Simple Unified Framework for Detecting Out-of-Distribution Samples and
|
|
38
|
+
Adversarial Attacks"
|
|
39
|
+
https://arxiv.org/abs/1807.03888
|
|
40
|
+
|
|
41
|
+
This detector computes the Mahalanobis distance between the feature representations
|
|
42
|
+
of input samples and class-conditional Gaussian distributions estimated from
|
|
43
|
+
in-distribution data. It supports multiple feature layers by computing statistics
|
|
44
|
+
(class means and a covariance matrix) for each layer. During inference, scores
|
|
45
|
+
computed for each layer are aggregated using a provided aggregator.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
eps (float): Magnitude for gradient-based input perturbation. Defaults to
|
|
49
|
+
0.0014.
|
|
50
|
+
temperature (float, optional): Temperature parameter. Defaults to 1000.
|
|
51
|
+
aggregator (Optional[BaseAggregator]): Aggregator to combine scores from
|
|
52
|
+
multiple feature layers. If `None` and more than one layer is
|
|
53
|
+
used, a `StdNormalizedAggregator` is instantiated automatically.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
eps: float = 0.0014,
|
|
59
|
+
temperature: float = 1000,
|
|
60
|
+
aggregator: Optional[BaseAggregator] = None,
|
|
61
|
+
**kwargs,
|
|
62
|
+
):
|
|
63
|
+
super().__init__(
|
|
64
|
+
eps=eps, temperature=temperature, aggregator=aggregator, **kwargs
|
|
65
|
+
)
|
|
66
|
+
self._layer_stats: List[Tuple[Dict, np.ndarray]] = []
|
|
67
|
+
self._classes: Optional[np.ndarray] = None
|
|
68
|
+
|
|
69
|
+
# === Per-layer logic ===
|
|
70
|
+
def _fit_layer(
|
|
71
|
+
self,
|
|
72
|
+
layer_id: int,
|
|
73
|
+
layer_features: np.ndarray,
|
|
74
|
+
info: dict,
|
|
75
|
+
**kwargs,
|
|
76
|
+
) -> None:
|
|
77
|
+
"""Compute class statistics for one feature layer.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
layer_id: Index of the processed layer.
|
|
81
|
+
layer_features: In-distribution features for this layer.
|
|
82
|
+
info: Dictionary containing the training labels.
|
|
83
|
+
"""
|
|
84
|
+
labels = info["labels"]
|
|
85
|
+
|
|
86
|
+
if isinstance(layer_features, np.ndarray):
|
|
87
|
+
layer_features = self.op.from_numpy(layer_features)
|
|
88
|
+
|
|
89
|
+
mus, pinv_cov = self._compute_layer_stats(layer_features, labels)
|
|
90
|
+
|
|
91
|
+
self._layer_stats.append((mus, pinv_cov))
|
|
92
|
+
|
|
93
|
+
def _score_layer(
|
|
94
|
+
self,
|
|
95
|
+
layer_id: int,
|
|
96
|
+
layer_features: TensorType,
|
|
97
|
+
info: dict,
|
|
98
|
+
fit: bool = False,
|
|
99
|
+
**kwargs,
|
|
100
|
+
) -> np.ndarray:
|
|
101
|
+
"""Compute Mahalanobis confidence for one feature layer.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
layer_id: Index of the processed layer.
|
|
105
|
+
layer_features: Feature tensor for the current batch.
|
|
106
|
+
info: Unused dictionary of auxiliary data.
|
|
107
|
+
fit: Whether scoring is performed during fitting. Unused here.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
np.ndarray: Negative Mahalanobis confidence scores.
|
|
111
|
+
"""
|
|
112
|
+
mus, pinv_cov = self._layer_stats[layer_id]
|
|
113
|
+
feats = self.op.flatten(layer_features)
|
|
114
|
+
g_scores = self._gaussian_log_probs(feats, mus, pinv_cov)
|
|
115
|
+
max_score = self.op.max(g_scores, dim=1)
|
|
116
|
+
return -self.op.convert_to_numpy(max_score)
|
|
117
|
+
|
|
118
|
+
# === Internal utilities ===
|
|
119
|
+
def _compute_layer_stats(
|
|
120
|
+
self, layer_features: TensorType, labels: np.ndarray
|
|
121
|
+
) -> Tuple[Dict[int, TensorType], np.ndarray]:
|
|
122
|
+
"""
|
|
123
|
+
Compute class-conditional statistics for a given feature layer.
|
|
124
|
+
|
|
125
|
+
For each class present in the labels, this method computes the mean feature
|
|
126
|
+
vector. It also computes a weighted average covariance matrix (across all
|
|
127
|
+
classes) and its pseudo-inverse, which will later be used for computing
|
|
128
|
+
Mahalanobis distances.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
layer_features (TensorType): Feature tensor for a specific layer extracted
|
|
132
|
+
from in-distribution data.
|
|
133
|
+
labels (np.ndarray): Corresponding labels for the in-distribution data.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Tuple[Dict, TensorType]:
|
|
137
|
+
- A dictionary mapping each class label to its mean feature vector.
|
|
138
|
+
- The pseudo-inverse of the weighted average covariance matrix.
|
|
139
|
+
"""
|
|
140
|
+
classes = np.sort(np.unique(labels))
|
|
141
|
+
labels = self.op.from_numpy(labels) # convert to tensor
|
|
142
|
+
|
|
143
|
+
feats = self.op.flatten(layer_features)
|
|
144
|
+
n_total = feats.shape[0]
|
|
145
|
+
|
|
146
|
+
mus: Dict[int, TensorType] = {}
|
|
147
|
+
mean_cov: TensorType = None
|
|
148
|
+
|
|
149
|
+
for cls in classes:
|
|
150
|
+
idx = self.op.equal(labels, cls)
|
|
151
|
+
feats_cls = self.op.flatten(layer_features[idx])
|
|
152
|
+
mu = self.op.mean(feats_cls, dim=0)
|
|
153
|
+
mus[cls] = mu
|
|
154
|
+
|
|
155
|
+
zero_f = feats_cls - mu
|
|
156
|
+
cov_cls = self.op.matmul(self.op.t(zero_f), zero_f) / zero_f.shape[0]
|
|
157
|
+
weight = feats_cls.shape[0] / n_total
|
|
158
|
+
mean_cov = (
|
|
159
|
+
cov_cls * weight if mean_cov is None else mean_cov + cov_cls * weight
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
pinv_cov = self.op.pinv(mean_cov) # type: ignore[arg-type]
|
|
163
|
+
if self._classes is None:
|
|
164
|
+
self._classes = classes
|
|
165
|
+
return mus, pinv_cov
|
|
166
|
+
|
|
167
|
+
def _gaussian_log_probs(
|
|
168
|
+
self, out_features: TensorType, mus: Dict[int, TensorType], pinv_cov: TensorType
|
|
169
|
+
) -> TensorType:
|
|
170
|
+
"""Compute unnormalised Gaussian log-probabilities for all classes.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
out_features (TensorType): Features of shape [B, D].
|
|
174
|
+
mus (Dict[int, TensorType]): Class mean vectors.
|
|
175
|
+
pinv_cov (TensorType): Pseudo-inverse covariance matrix.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
TensorType: Log-probabilities with shape [B, n_classes].
|
|
179
|
+
"""
|
|
180
|
+
scores = []
|
|
181
|
+
for cls in self._classes: # type: ignore[assignment]
|
|
182
|
+
mu = mus[cls]
|
|
183
|
+
zero_f = out_features - mu
|
|
184
|
+
log_prob = -0.5 * self.op.diag(
|
|
185
|
+
self.op.matmul(self.op.matmul(zero_f, pinv_cov), self.op.t(zero_f))
|
|
186
|
+
)
|
|
187
|
+
scores.append(self.op.reshape(log_prob, (-1, 1)))
|
|
188
|
+
return self.op.cat(scores, dim=1)
|
|
189
|
+
|
|
190
|
+
# ===
|
|
191
|
+
@property
|
|
192
|
+
def requires_to_fit_dataset(self) -> bool:
|
|
193
|
+
"""
|
|
194
|
+
Indicates whether this OOD detector requires in-distribution data for fitting.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
bool: True, since fitting requires computing class-conditional statistics.
|
|
198
|
+
"""
|
|
199
|
+
return True
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def requires_internal_features(self) -> bool:
|
|
203
|
+
"""
|
|
204
|
+
Indicates whether this OOD detector utilizes internal model features.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
bool: True, as it operates on intermediate feature representations.
|
|
208
|
+
"""
|
|
209
|
+
return True
|
oodeel/methods/mls.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
import numpy as np
|
|
24
|
+
|
|
25
|
+
from ..types import TensorType
|
|
26
|
+
from ..types import Tuple
|
|
27
|
+
from .base import OODBaseDetector
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MLS(OODBaseDetector):
|
|
31
|
+
"""
|
|
32
|
+
Maximum Logit Scores method for OOD detection.
|
|
33
|
+
"Open-Set Recognition: a Good Closed-Set Classifier is All You Need?"
|
|
34
|
+
https://arxiv.org/abs/2110.06207,
|
|
35
|
+
and Maximum Softmax Score
|
|
36
|
+
"A Baseline for Detecting Misclassified and Out-of-Distribution Examples
|
|
37
|
+
in Neural Networks"
|
|
38
|
+
http://arxiv.org/abs/1610.02136
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
output_activation (str): activation function for the last layer. If "linear",
|
|
42
|
+
the method is MLS and if "softmax", the method is MSS.
|
|
43
|
+
Defaults to "linear".
|
|
44
|
+
use_react (bool): if true, apply ReAct method by clipping penultimate
|
|
45
|
+
activations under a threshold value.
|
|
46
|
+
react_quantile (Optional[float]): q value in the range [0, 1] used to compute
|
|
47
|
+
the react clipping threshold defined as the q-th quantile penultimate layer
|
|
48
|
+
activations. Defaults to 0.8.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
output_activation: str = "linear",
|
|
54
|
+
use_react: bool = False,
|
|
55
|
+
use_scale: bool = False,
|
|
56
|
+
use_ash: bool = False,
|
|
57
|
+
react_quantile: float = 0.8,
|
|
58
|
+
scale_percentile: float = 0.85,
|
|
59
|
+
ash_percentile: float = 0.90,
|
|
60
|
+
**kwargs,
|
|
61
|
+
):
|
|
62
|
+
super().__init__(
|
|
63
|
+
use_react=use_react,
|
|
64
|
+
use_scale=use_scale,
|
|
65
|
+
use_ash=use_ash,
|
|
66
|
+
react_quantile=react_quantile,
|
|
67
|
+
scale_percentile=scale_percentile,
|
|
68
|
+
ash_percentile=ash_percentile,
|
|
69
|
+
)
|
|
70
|
+
self.output_activation = output_activation
|
|
71
|
+
|
|
72
|
+
def _score_tensor(self, inputs: TensorType) -> Tuple[np.ndarray]:
|
|
73
|
+
"""
|
|
74
|
+
Computes an OOD score for input samples "inputs" based on
|
|
75
|
+
the distance to nearest neighbors in the feature space of self.model
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
inputs: input samples to score
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Tuple[np.ndarray]: scores, logits
|
|
82
|
+
"""
|
|
83
|
+
# optional: apply input perturbation
|
|
84
|
+
if self.eps > 0:
|
|
85
|
+
inputs = self._input_perturbation(inputs, self.eps, self.temperature)
|
|
86
|
+
|
|
87
|
+
_, logits = self.feature_extractor.predict_tensor(inputs)
|
|
88
|
+
if self.output_activation == "softmax":
|
|
89
|
+
logits = self.op.softmax(logits)
|
|
90
|
+
logits = self.op.convert_to_numpy(logits)
|
|
91
|
+
scores = -np.max(logits, axis=1)
|
|
92
|
+
return scores
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def requires_to_fit_dataset(self) -> bool:
|
|
96
|
+
"""
|
|
97
|
+
Whether an OOD detector needs a `fit_dataset` argument in the fit function.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
bool: True if `fit_dataset` is required else False.
|
|
101
|
+
"""
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def requires_internal_features(self) -> bool:
|
|
106
|
+
"""
|
|
107
|
+
Whether an OOD detector acts on internal model features.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
bool: True if the detector perform computations on an intermediate layer
|
|
111
|
+
else False.
|
|
112
|
+
"""
|
|
113
|
+
return False
|