oodeel 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oodeel/__init__.py +28 -0
- oodeel/aggregator/__init__.py +26 -0
- oodeel/aggregator/base.py +70 -0
- oodeel/aggregator/fisher.py +259 -0
- oodeel/aggregator/mean.py +72 -0
- oodeel/aggregator/std.py +86 -0
- oodeel/datasets/__init__.py +24 -0
- oodeel/datasets/data_handler.py +334 -0
- oodeel/datasets/deprecated/DEPRECATED_data_handler.py +236 -0
- oodeel/datasets/deprecated/DEPRECATED_ooddataset.py +330 -0
- oodeel/datasets/deprecated/DEPRECATED_tf_data_handler.py +671 -0
- oodeel/datasets/deprecated/DEPRECATED_torch_data_handler.py +769 -0
- oodeel/datasets/deprecated/__init__.py +31 -0
- oodeel/datasets/tf_data_handler.py +600 -0
- oodeel/datasets/torch_data_handler.py +672 -0
- oodeel/eval/__init__.py +22 -0
- oodeel/eval/metrics.py +218 -0
- oodeel/eval/plots/__init__.py +27 -0
- oodeel/eval/plots/features.py +345 -0
- oodeel/eval/plots/metrics.py +118 -0
- oodeel/eval/plots/plotly.py +162 -0
- oodeel/extractor/__init__.py +35 -0
- oodeel/extractor/feature_extractor.py +187 -0
- oodeel/extractor/hf_torch_feature_extractor.py +184 -0
- oodeel/extractor/keras_feature_extractor.py +409 -0
- oodeel/extractor/torch_feature_extractor.py +506 -0
- oodeel/methods/__init__.py +47 -0
- oodeel/methods/base.py +570 -0
- oodeel/methods/dknn.py +185 -0
- oodeel/methods/energy.py +119 -0
- oodeel/methods/entropy.py +113 -0
- oodeel/methods/gen.py +113 -0
- oodeel/methods/gram.py +274 -0
- oodeel/methods/mahalanobis.py +209 -0
- oodeel/methods/mls.py +113 -0
- oodeel/methods/odin.py +109 -0
- oodeel/methods/rmds.py +172 -0
- oodeel/methods/she.py +159 -0
- oodeel/methods/vim.py +273 -0
- oodeel/preprocess/__init__.py +31 -0
- oodeel/preprocess/tf_preprocess.py +95 -0
- oodeel/preprocess/torch_preprocess.py +97 -0
- oodeel/types/__init__.py +75 -0
- oodeel/utils/__init__.py +38 -0
- oodeel/utils/general_utils.py +97 -0
- oodeel/utils/operator.py +253 -0
- oodeel/utils/tf_operator.py +269 -0
- oodeel/utils/tf_training_tools.py +219 -0
- oodeel/utils/torch_operator.py +292 -0
- oodeel/utils/torch_training_tools.py +303 -0
- oodeel-0.4.0.dist-info/METADATA +409 -0
- oodeel-0.4.0.dist-info/RECORD +63 -0
- oodeel-0.4.0.dist-info/WHEEL +5 -0
- oodeel-0.4.0.dist-info/licenses/LICENSE +21 -0
- oodeel-0.4.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +22 -0
- tests/tests_tensorflow/__init__.py +37 -0
- tests/tests_tensorflow/tf_methods_utils.py +140 -0
- tests/tests_tensorflow/tools_tf.py +86 -0
- tests/tests_torch/__init__.py +38 -0
- tests/tests_torch/tools_torch.py +151 -0
- tests/tests_torch/torch_methods_utils.py +148 -0
- tests/tools_operator.py +153 -0
oodeel/__init__.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
"""
|
|
24
|
+
oodeel
|
|
25
|
+
-------
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
__version__ = "0.4.0"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
from .base import BaseAggregator
|
|
24
|
+
from .fisher import FisherAggregator
|
|
25
|
+
from .mean import MeanNormalizedAggregator
|
|
26
|
+
from .std import StdNormalizedAggregator
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
from typing import List
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class BaseAggregator:
|
|
29
|
+
"""
|
|
30
|
+
Base class for aggregating out-of-distribution (OOD) detection scores from multiple
|
|
31
|
+
feature layers.
|
|
32
|
+
|
|
33
|
+
This abstract class defines the interface for aggregators that combine per-layer
|
|
34
|
+
scores into a single score per sample. Subclasses should implement the `fit` and
|
|
35
|
+
`aggregate` methods to capture any necessary statistics from training data
|
|
36
|
+
and to combine scores during inference.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def fit(self, per_layer_scores: List[np.ndarray]) -> None:
|
|
40
|
+
"""
|
|
41
|
+
Fit the aggregator on per-layer scores computed from in-distribution (ID)
|
|
42
|
+
training data.
|
|
43
|
+
|
|
44
|
+
This method extracts any statistical properties (e.g., standard deviations)
|
|
45
|
+
from the provided scores that will be used later to normalize or weight the
|
|
46
|
+
per-layer scores during aggregation.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
per_layer_scores (List[np.ndarray]): A list of numpy arrays, where each
|
|
50
|
+
array contains the scores for a particular feature layer
|
|
51
|
+
(expected shape: (num_samples,)).
|
|
52
|
+
"""
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
def aggregate(self, per_layer_scores: List[np.ndarray]) -> np.ndarray:
|
|
56
|
+
"""
|
|
57
|
+
Aggregate scores from multiple feature layers into a single score for each
|
|
58
|
+
sample.
|
|
59
|
+
|
|
60
|
+
This method should combine the per-layer scores (e.g., via normalization and
|
|
61
|
+
averaging) into a unified score that can be used for OOD detection.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
per_layer_scores (List[np.ndarray]): A list of numpy arrays,
|
|
65
|
+
representing the scores from one feature layer for all samples.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
np.ndarray: An array containing the aggregated score for each sample.
|
|
69
|
+
"""
|
|
70
|
+
raise NotImplementedError("Aggregator must implement an aggregate method.")
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
from typing import List
|
|
24
|
+
from typing import Tuple
|
|
25
|
+
|
|
26
|
+
import numpy as np
|
|
27
|
+
import scipy.stats
|
|
28
|
+
|
|
29
|
+
from .base import BaseAggregator
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class FisherAggregator(BaseAggregator):
|
|
33
|
+
"""
|
|
34
|
+
Aggregator that combines out-of-distribution (OOD) scores from multiple feature
|
|
35
|
+
layers using Fisher's method with Brown's correction.
|
|
36
|
+
|
|
37
|
+
Sources:
|
|
38
|
+
[1] Haroush, M., Frostig, T., Heller, R., & Soudry, D. (2021). "A statistical
|
|
39
|
+
framework for efficient out of distribution detection in deep neural networks."
|
|
40
|
+
[2] Dadalto, E., Alberge, F., Duhamel, P., & Piantanida, P. (2024). "Combine and
|
|
41
|
+
Conquer: A Meta-Analysis on Data Shift and Out-of-Distribution Detection."
|
|
42
|
+
|
|
43
|
+
The aggregator operates in two phases:
|
|
44
|
+
|
|
45
|
+
1. **Fitting:**
|
|
46
|
+
It fits an empirical cumulative distribution function (ECDF) on in-distribution
|
|
47
|
+
(ID) training or validation scores obtained from all feature layers. It then
|
|
48
|
+
computes the Fisher combined test statistic from the ID scores and derives
|
|
49
|
+
Brown's correction parameters (mean and variance of the Fisher scores),
|
|
50
|
+
which are used to adjust for correlations between layers.
|
|
51
|
+
|
|
52
|
+
2. **Aggregation:**
|
|
53
|
+
At test time, given per-layer OOD scores, it computes p-values using the ECDF,
|
|
54
|
+
combines these p-values with Fisher's method, applies Brown's correction, and
|
|
55
|
+
returns an aggregated score indicating the likelihood that a sample is
|
|
56
|
+
out-of-distribution. In this setting, higher aggregated scores correspond to a
|
|
57
|
+
higher OOD likelihood.
|
|
58
|
+
|
|
59
|
+
Methods:
|
|
60
|
+
fit(per_layer_scores):
|
|
61
|
+
Fit the aggregator using ID scores from each feature layer.
|
|
62
|
+
aggregate(per_layer_scores):
|
|
63
|
+
Compute and return an aggregated OOD score from per-layer scores for test
|
|
64
|
+
samples.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(self) -> None:
|
|
68
|
+
self.id_scores = None # Stacked ID scores from training data (used for ECDF)
|
|
69
|
+
self.y_ecdf = None # Empirical CDF values corresponding to the ID scores
|
|
70
|
+
self.id_fisher_scores = (
|
|
71
|
+
None # Fisher combined test statistic computed on the ID scores
|
|
72
|
+
)
|
|
73
|
+
self.mu = None # Mean of the Fisher scores (for Brown's correction)
|
|
74
|
+
self.sigma2 = None # Variance of the Fisher scores (for Brown's correction)
|
|
75
|
+
self.c = None # Correction factor derived from sigma2 and mu
|
|
76
|
+
self.kprime = None # Effective degrees of freedom after Brown's correction
|
|
77
|
+
|
|
78
|
+
# === Public API ===
|
|
79
|
+
def fit(self, per_layer_scores: List[np.ndarray]) -> None:
|
|
80
|
+
"""
|
|
81
|
+
Fit the aggregator using in-distribution (ID) scores.
|
|
82
|
+
|
|
83
|
+
This method performs the following steps:
|
|
84
|
+
1. Stacks the per-layer ID scores.
|
|
85
|
+
2. Computes the empirical CDF over the stacked scores.
|
|
86
|
+
3. Computes the Fisher combined test statistic for the training scores.
|
|
87
|
+
4. Derives Brown's correction parameters based on the mean and variance of the
|
|
88
|
+
Fisher scores.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
per_layer_scores (List[np.ndarray]): A list of 1D numpy arrays, where each
|
|
92
|
+
array contains the OOD detection scores from a specific feature layer
|
|
93
|
+
for the ID data.
|
|
94
|
+
"""
|
|
95
|
+
# Stack scores so that the resulting shape is (num_samples, num_layers)
|
|
96
|
+
id_scores = np.stack(per_layer_scores, axis=-1)
|
|
97
|
+
# Compute empirical CDF based on the ID scores
|
|
98
|
+
self.id_scores, self.y_ecdf = self._empirical_cdf(id_scores)
|
|
99
|
+
# Compute Fisher's combined statistic for the ID scores
|
|
100
|
+
self.id_fisher_scores = self._compute_fisher_scores(id_scores)
|
|
101
|
+
# Derive Brown's correction parameters from the Fisher scores
|
|
102
|
+
self.mu = np.mean(self.id_fisher_scores)
|
|
103
|
+
self.sigma2 = np.var(self.id_fisher_scores)
|
|
104
|
+
self.c = self.sigma2 / (2 * self.mu)
|
|
105
|
+
self.kprime = 2 * self.mu**2 / self.sigma2
|
|
106
|
+
|
|
107
|
+
def aggregate(self, per_layer_scores: List[np.ndarray]) -> np.ndarray:
|
|
108
|
+
"""
|
|
109
|
+
Aggregate per-layer scores into a single OOD score for each test sample.
|
|
110
|
+
|
|
111
|
+
The aggregation process involves:
|
|
112
|
+
1. Stacking the per-layer scores.
|
|
113
|
+
2. Computing Fisher's combined test statistic.
|
|
114
|
+
3. Applying Brown's correction to obtain adjusted p-values.
|
|
115
|
+
4. Converting these p-values into an aggregated OOD score, where higher scores
|
|
116
|
+
indicate a higher likelihood that a sample is out-of-distribution.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
per_layer_scores (List[np.ndarray]): A list of 1D numpy arrays representing
|
|
120
|
+
the OOD scores from each feature layer for the test data.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
np.ndarray: A 1D numpy array of aggregated OOD scores for each test sample.
|
|
124
|
+
"""
|
|
125
|
+
scores = np.stack(per_layer_scores, axis=-1)
|
|
126
|
+
fisher_scores = self._compute_fisher_scores(scores)
|
|
127
|
+
# Apply Brown's correction: compute p-values from the corrected chi-square dist
|
|
128
|
+
p_values = 1 - scipy.stats.chi2.cdf(fisher_scores / self.c, self.kprime)
|
|
129
|
+
# Convert p-values to aggregated score (lower p-values => higher OOD likelihood)
|
|
130
|
+
return 1 - p_values
|
|
131
|
+
|
|
132
|
+
# === Private methods: computation ===
|
|
133
|
+
def _compute_fisher_scores(self, scores: np.ndarray) -> np.ndarray:
|
|
134
|
+
"""
|
|
135
|
+
Compute Fisher's combined test statistic for the given scores.
|
|
136
|
+
|
|
137
|
+
This method first converts the scores into p-values (using the empirical CDF)
|
|
138
|
+
and then computes the Fisher statistic for each sample by summing the logarithms
|
|
139
|
+
of the p-values.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
scores (np.ndarray): The stacked scores (ID or test) with shape
|
|
143
|
+
(num_samples, num_layers).
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
np.ndarray: A 1D array of Fisher combined test statistics, one for each
|
|
147
|
+
sample.
|
|
148
|
+
"""
|
|
149
|
+
p_values = self._compute_p_values(scores)
|
|
150
|
+
return self._fisher_tau_method(p_values)
|
|
151
|
+
|
|
152
|
+
def _p_value_fn(
|
|
153
|
+
self, test_statistic: np.ndarray, X: np.ndarray, y_ecdf: np.ndarray
|
|
154
|
+
) -> np.ndarray:
|
|
155
|
+
"""
|
|
156
|
+
Compute p-values for the given test statistics using the empirical CDF.
|
|
157
|
+
|
|
158
|
+
For each feature layer, this function linearly interpolates the test statistic
|
|
159
|
+
values within the sorted training sample values (augmented with bounds) and
|
|
160
|
+
returns the ECDF values.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
test_statistic (np.ndarray): Array of test statistics with shape (n, m)
|
|
164
|
+
where n is the number of test samples and m is the number of layers.
|
|
165
|
+
X (np.ndarray): Sorted training sample values (with bounds) obtained from
|
|
166
|
+
`_empirical_cdf`, shape (N, m).
|
|
167
|
+
y_ecdf (np.ndarray): Corresponding ECDF values for each layer, shape (N, m).
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
np.ndarray: Interpolated p-values for the test samples with shape (n, m).
|
|
171
|
+
"""
|
|
172
|
+
test_statistic = -test_statistic # Ensure consistency with the ECDF computation
|
|
173
|
+
interpolated = []
|
|
174
|
+
for i in range(test_statistic.shape[1]):
|
|
175
|
+
layer_test_stat = test_statistic[:, i]
|
|
176
|
+
layer_X = X[:, i]
|
|
177
|
+
layer_ecdf = y_ecdf[:, i]
|
|
178
|
+
interp_values = np.interp(layer_test_stat, layer_X, layer_ecdf).reshape(
|
|
179
|
+
-1, 1
|
|
180
|
+
)
|
|
181
|
+
interpolated.append(interp_values)
|
|
182
|
+
return np.concatenate(interpolated, axis=1)
|
|
183
|
+
|
|
184
|
+
def _compute_p_values(self, scores: np.ndarray) -> np.ndarray:
|
|
185
|
+
"""
|
|
186
|
+
Compute p-values for test scores based on the empirical CDF from the training
|
|
187
|
+
data.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
scores (np.ndarray): A numpy array of stacked test scores with shape
|
|
191
|
+
(num_samples, num_layers).
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
np.ndarray: A numpy array of p-values with the same shape as the input.
|
|
195
|
+
"""
|
|
196
|
+
return self._p_value_fn(scores, self.id_scores, self.y_ecdf)
|
|
197
|
+
|
|
198
|
+
# === Private methods: ECDF and p-value ===
|
|
199
|
+
|
|
200
|
+
def _empirical_cdf(
|
|
201
|
+
self, X: np.ndarray, w: np.ndarray = None
|
|
202
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
203
|
+
"""
|
|
204
|
+
Compute the empirical cumulative distribution function (ECDF) for a given
|
|
205
|
+
sample.
|
|
206
|
+
|
|
207
|
+
The function first negates the input data (assuming that lower scores indicate
|
|
208
|
+
higher in-distribution confidence), augments the sample with lower and upper
|
|
209
|
+
bounds, sorts the values, and then computes the ECDF.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
X (np.ndarray): An array of shape (N, m), where N is the number of samples
|
|
213
|
+
and m is the number of feature layers.
|
|
214
|
+
w (np.ndarray, optional): Optional weights to adjust the ECDF values.
|
|
215
|
+
Defaults to None.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Tuple[np.ndarray, np.ndarray]:
|
|
219
|
+
- An array of sorted (and augmented) sample values.
|
|
220
|
+
- An array of ECDF values corresponding to the sample.
|
|
221
|
+
"""
|
|
222
|
+
# Negate scores so that higher values correspond to higher in-dist confidence.
|
|
223
|
+
X = -X
|
|
224
|
+
if X.ndim == 1:
|
|
225
|
+
X = X.reshape(-1, 1)
|
|
226
|
+
mult_factor_min = np.where(X.min(0) > 0, np.array(1 / len(X)), np.array(len(X)))
|
|
227
|
+
mult_factor_max = np.where(X.max(0) > 0, np.array(len(X)), np.array(1 / len(X)))
|
|
228
|
+
lower_bound = X.min(0) * mult_factor_min
|
|
229
|
+
upper_bound = X.max(0) * mult_factor_max
|
|
230
|
+
X_aug = np.concatenate(
|
|
231
|
+
(lower_bound.reshape(1, -1), X, upper_bound.reshape(1, -1)), axis=0
|
|
232
|
+
)
|
|
233
|
+
X_sorted = np.sort(X_aug, axis=0)
|
|
234
|
+
y_ecdf = np.concatenate(
|
|
235
|
+
[np.arange(1, X_sorted.shape[0] + 1).reshape(-1, 1) / X_sorted.shape[0]]
|
|
236
|
+
* X_sorted.shape[1],
|
|
237
|
+
axis=1,
|
|
238
|
+
)
|
|
239
|
+
if w is not None:
|
|
240
|
+
y_ecdf = y_ecdf * w.reshape(1, -1)
|
|
241
|
+
return X_sorted, y_ecdf
|
|
242
|
+
|
|
243
|
+
def _fisher_tau_method(self, p_values: np.ndarray) -> np.ndarray:
|
|
244
|
+
"""
|
|
245
|
+
Combine p-values using Fisher's method.
|
|
246
|
+
|
|
247
|
+
For each sample, the Fisher statistic is computed as:
|
|
248
|
+
tau = -2 * sum(log(p_i))
|
|
249
|
+
where the sum is taken over the p-values from all feature layers.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
p_values (np.ndarray): Array of p-values with shape (n, m).
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
np.ndarray: A 1D array of Fisher combined statistics, one per test sample
|
|
256
|
+
(shape: (n,)).
|
|
257
|
+
"""
|
|
258
|
+
tau = -2 * np.sum(np.log(p_values), axis=1)
|
|
259
|
+
return tau
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
from typing import List
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
|
|
27
|
+
from .base import BaseAggregator
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MeanNormalizedAggregator(BaseAggregator):
|
|
31
|
+
"""
|
|
32
|
+
Aggregator that normalizes per-layer scores by their mean before aggregating them.
|
|
33
|
+
|
|
34
|
+
This aggregator mimics the behavior of the original Gram detector:
|
|
35
|
+
during the fitting phase, it computes, for each layer, a normalization constant that
|
|
36
|
+
is the average (mean) of the deviation scores (computed on a validation set). At
|
|
37
|
+
test time, each layer's score is divided by its corresponding mean, and the final
|
|
38
|
+
score is obtained by averaging across layers.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self):
|
|
42
|
+
self.means = None
|
|
43
|
+
|
|
44
|
+
def fit(self, per_layer_scores: List[np.ndarray]) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Computes and stores the mean for each feature layer's scores.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
per_layer_scores (List[np.ndarray]): A list of arrays where each array
|
|
50
|
+
contains the per-layer scores (shape: (num_samples,)).
|
|
51
|
+
"""
|
|
52
|
+
scores_stack = np.stack(
|
|
53
|
+
per_layer_scores, axis=-1
|
|
54
|
+
) # shape: (num_samples, num_layers)
|
|
55
|
+
self.means = scores_stack.mean(axis=0, keepdims=True) + 1e-10
|
|
56
|
+
|
|
57
|
+
def aggregate(self, per_layer_scores: List[np.ndarray]) -> np.ndarray:
|
|
58
|
+
"""
|
|
59
|
+
Normalizes each layer's scores by its mean and averages them.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
per_layer_scores (List[np.ndarray]): A list of arrays with scores from
|
|
63
|
+
different feature layers.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
np.ndarray: A 1D array of aggregated scores.
|
|
67
|
+
"""
|
|
68
|
+
scores_stack = np.stack(per_layer_scores, axis=-1)
|
|
69
|
+
if self.means is None:
|
|
70
|
+
raise ValueError("Aggregator has not been fitted yet.")
|
|
71
|
+
normalized_scores = scores_stack / self.means
|
|
72
|
+
return normalized_scores.mean(axis=-1)
|
oodeel/aggregator/std.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
from typing import List
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
|
|
27
|
+
from .base import BaseAggregator
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class StdNormalizedAggregator(BaseAggregator):
|
|
31
|
+
"""
|
|
32
|
+
Aggregator that normalizes per-layer scores by their standard deviation before
|
|
33
|
+
aggregating them.
|
|
34
|
+
|
|
35
|
+
This aggregator computes the standard deviation of the OOD detection scores for
|
|
36
|
+
each feature layer during the fitting stage. In the aggregation stage, each layer's
|
|
37
|
+
score is normalized by its respective standard deviation, and then the average of
|
|
38
|
+
the normalized scores is computed to produce the final score.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self):
|
|
42
|
+
self.stds = None
|
|
43
|
+
|
|
44
|
+
def fit(self, per_layer_scores: List[np.ndarray]) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Compute and store the standard deviation for each feature layer's scores from
|
|
47
|
+
training data.
|
|
48
|
+
|
|
49
|
+
The standard deviation is calculated for each layer across all training samples.
|
|
50
|
+
A small epsilon (1e-10) is added to each standard deviation to safeguard
|
|
51
|
+
against division by zero during normalization.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
per_layer_scores (List[np.ndarray]): A list of numpy arrays where each array
|
|
55
|
+
contains the scores for a specific feature layer
|
|
56
|
+
(shape: (num_samples,)).
|
|
57
|
+
"""
|
|
58
|
+
# Stack scores such that the resulting shape is (num_samples, num_layers)
|
|
59
|
+
scores_stack = np.stack(per_layer_scores, axis=-1)
|
|
60
|
+
# Compute standard deviation per layer and add a small epsilon to avoid division
|
|
61
|
+
# by zero
|
|
62
|
+
self.stds = scores_stack.std(axis=0, keepdims=True) + 1e-10
|
|
63
|
+
|
|
64
|
+
def aggregate(self, per_layer_scores: List[np.ndarray]) -> np.ndarray:
|
|
65
|
+
"""
|
|
66
|
+
Normalize each feature layer's scores by its standard deviation and average
|
|
67
|
+
across layers.
|
|
68
|
+
|
|
69
|
+
Each per-layer score is divided by the corresponding precomputed standard
|
|
70
|
+
deviation. The final aggregated score for each sample is the mean of the
|
|
71
|
+
normalized scores across all layers.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
per_layer_scores (List[np.ndarray]): A list of numpy arrays with scores from
|
|
75
|
+
different feature layers. Each array should be of shape (num_samples,).
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
np.ndarray: A 1D numpy array containing the aggregated score for each
|
|
79
|
+
sample.
|
|
80
|
+
"""
|
|
81
|
+
scores_stack = np.stack(per_layer_scores, axis=-1)
|
|
82
|
+
if self.stds is None:
|
|
83
|
+
raise ValueError("Aggregator has not been fitted yet.")
|
|
84
|
+
# Normalize per layer and compute the mean over layers
|
|
85
|
+
normalized_scores = scores_stack / self.stds
|
|
86
|
+
return normalized_scores.mean(axis=-1)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
from .data_handler import load_data_handler
|
|
24
|
+
from .deprecated.DEPRECATED_ooddataset import OODDataset
|