oodeel 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oodeel/__init__.py +28 -0
- oodeel/aggregator/__init__.py +26 -0
- oodeel/aggregator/base.py +70 -0
- oodeel/aggregator/fisher.py +259 -0
- oodeel/aggregator/mean.py +72 -0
- oodeel/aggregator/std.py +86 -0
- oodeel/datasets/__init__.py +24 -0
- oodeel/datasets/data_handler.py +334 -0
- oodeel/datasets/deprecated/DEPRECATED_data_handler.py +236 -0
- oodeel/datasets/deprecated/DEPRECATED_ooddataset.py +330 -0
- oodeel/datasets/deprecated/DEPRECATED_tf_data_handler.py +671 -0
- oodeel/datasets/deprecated/DEPRECATED_torch_data_handler.py +769 -0
- oodeel/datasets/deprecated/__init__.py +31 -0
- oodeel/datasets/tf_data_handler.py +600 -0
- oodeel/datasets/torch_data_handler.py +672 -0
- oodeel/eval/__init__.py +22 -0
- oodeel/eval/metrics.py +218 -0
- oodeel/eval/plots/__init__.py +27 -0
- oodeel/eval/plots/features.py +345 -0
- oodeel/eval/plots/metrics.py +118 -0
- oodeel/eval/plots/plotly.py +162 -0
- oodeel/extractor/__init__.py +35 -0
- oodeel/extractor/feature_extractor.py +187 -0
- oodeel/extractor/hf_torch_feature_extractor.py +184 -0
- oodeel/extractor/keras_feature_extractor.py +409 -0
- oodeel/extractor/torch_feature_extractor.py +506 -0
- oodeel/methods/__init__.py +47 -0
- oodeel/methods/base.py +570 -0
- oodeel/methods/dknn.py +185 -0
- oodeel/methods/energy.py +119 -0
- oodeel/methods/entropy.py +113 -0
- oodeel/methods/gen.py +113 -0
- oodeel/methods/gram.py +274 -0
- oodeel/methods/mahalanobis.py +209 -0
- oodeel/methods/mls.py +113 -0
- oodeel/methods/odin.py +109 -0
- oodeel/methods/rmds.py +172 -0
- oodeel/methods/she.py +159 -0
- oodeel/methods/vim.py +273 -0
- oodeel/preprocess/__init__.py +31 -0
- oodeel/preprocess/tf_preprocess.py +95 -0
- oodeel/preprocess/torch_preprocess.py +97 -0
- oodeel/types/__init__.py +75 -0
- oodeel/utils/__init__.py +38 -0
- oodeel/utils/general_utils.py +97 -0
- oodeel/utils/operator.py +253 -0
- oodeel/utils/tf_operator.py +269 -0
- oodeel/utils/tf_training_tools.py +219 -0
- oodeel/utils/torch_operator.py +292 -0
- oodeel/utils/torch_training_tools.py +303 -0
- oodeel-0.4.0.dist-info/METADATA +409 -0
- oodeel-0.4.0.dist-info/RECORD +63 -0
- oodeel-0.4.0.dist-info/WHEEL +5 -0
- oodeel-0.4.0.dist-info/licenses/LICENSE +21 -0
- oodeel-0.4.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +22 -0
- tests/tests_tensorflow/__init__.py +37 -0
- tests/tests_tensorflow/tf_methods_utils.py +140 -0
- tests/tests_tensorflow/tools_tf.py +86 -0
- tests/tests_torch/__init__.py +38 -0
- tests/tests_torch/tools_torch.py +151 -0
- tests/tests_torch/torch_methods_utils.py +148 -0
- tests/tools_operator.py +153 -0
oodeel/eval/metrics.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
import re
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
import sklearn
|
|
27
|
+
|
|
28
|
+
from ..types import Optional
|
|
29
|
+
from ..types import Tuple
|
|
30
|
+
from ..types import Union
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def bench_metrics(
|
|
34
|
+
scores: Union[np.ndarray, tuple],
|
|
35
|
+
labels: Optional[np.ndarray] = None,
|
|
36
|
+
in_value: Optional[int] = 0,
|
|
37
|
+
out_value: Optional[int] = 1,
|
|
38
|
+
metrics: Optional[list] = ["auroc", "fpr95tpr"],
|
|
39
|
+
threshold: Optional[float] = None,
|
|
40
|
+
step: Optional[int] = 4,
|
|
41
|
+
) -> dict:
|
|
42
|
+
"""Compute various common metrics from the OOD detector scores:
|
|
43
|
+
AUROC, FPR95TPR (or any other similar metric relative to confusion matrix),
|
|
44
|
+
Detection accuracy and sklearn.metric metrics
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
scores (Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]): scores output of
|
|
48
|
+
the OOD detector to evaluate. If a tuple is provided,
|
|
49
|
+
the first array is considered in-distribution scores, and the second
|
|
50
|
+
is considered out-of-distribution scores.
|
|
51
|
+
labels (Optional[np.ndarray], optional): labels denoting oodness. When scores is
|
|
52
|
+
a tuple, this argument and the following in_value and out_value are not
|
|
53
|
+
used. If scores is a np.ndarray, labels are required with in_value and
|
|
54
|
+
out_value if different from their default values.
|
|
55
|
+
Defaults to None.
|
|
56
|
+
in_value (Optional[int], optional): ood label value for in-distribution data.
|
|
57
|
+
Automatically assigned 0 if it is not the case.
|
|
58
|
+
Defaults to 0.
|
|
59
|
+
out_value (Optional[int], optional): ood label value for out-of-distribution
|
|
60
|
+
data. Automatically assigned 1 if it is not the case.
|
|
61
|
+
Defaults to 1.
|
|
62
|
+
metrics (Optional[List[str]], optional): list of metrics to compute. Can pass
|
|
63
|
+
any metric name from sklearn.metric or among "detect_acc" and
|
|
64
|
+
"<aaa><XX><bbb>" where <aaa> and <bbb> are in ["fpr", "tpr", "fnr", "tnr"]
|
|
65
|
+
and <XX> is an integer between 1 and 99. Defaults to ["auroc", "fpr95tpr"].
|
|
66
|
+
threshold (Optional[float], optional): Threshold to use when using
|
|
67
|
+
threshold-dependent metrics. Defaults to None.
|
|
68
|
+
step (Optional[int], optional): integration step (wrt percentile).
|
|
69
|
+
Only used for auroc and fpr95tpr. Defaults to 4.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
dict: Dictionnary of metrics
|
|
73
|
+
"""
|
|
74
|
+
metrics_dict = {}
|
|
75
|
+
|
|
76
|
+
if isinstance(scores, np.ndarray):
|
|
77
|
+
assert labels is not None, (
|
|
78
|
+
"Provide labels with scores, or provide a tuple of in-distribution "
|
|
79
|
+
"and out-of-distribution scores arrays"
|
|
80
|
+
)
|
|
81
|
+
labels = np.copy(labels) # to avoid mutable np.array to be modified
|
|
82
|
+
labels[labels == in_value] = 0
|
|
83
|
+
labels[labels == out_value] = 1
|
|
84
|
+
elif isinstance(scores, tuple):
|
|
85
|
+
scores_in, scores_out = scores
|
|
86
|
+
scores = np.concatenate([scores_in, scores_out])
|
|
87
|
+
labels = np.concatenate([scores_in * 0, scores_out * 0 + 1])
|
|
88
|
+
|
|
89
|
+
fpr, tpr, fnr, tnr, acc = get_curve(scores, labels, step)
|
|
90
|
+
|
|
91
|
+
for metric in metrics:
|
|
92
|
+
if isinstance(metric, str):
|
|
93
|
+
if metric == "auroc":
|
|
94
|
+
if np.__version__ >= "2.0.0":
|
|
95
|
+
auroc = -np.trapezoid(1.0 - fpr, tpr)
|
|
96
|
+
else:
|
|
97
|
+
auroc = -np.trapz(1.0 - fpr, tpr)
|
|
98
|
+
metrics_dict["auroc"] = auroc
|
|
99
|
+
|
|
100
|
+
elif metric == "detect_acc":
|
|
101
|
+
metrics_dict["detect_acc"] = np.max(acc)
|
|
102
|
+
|
|
103
|
+
# compute <aaa><XX><bbb> metrics (check docstring for more info)
|
|
104
|
+
elif (
|
|
105
|
+
re.search(r"^(fpr|tpr|fnr|tnr)(\d{1,2})(fpr|tpr|fnr|tnr)$", metric)
|
|
106
|
+
is not None
|
|
107
|
+
):
|
|
108
|
+
count_1_str, thr, count_2_str = re.match(
|
|
109
|
+
pattern=r"^(fpr|tpr|fnr|tnr)(\d{1,2})(fpr|tpr|fnr|tnr)$",
|
|
110
|
+
string=metric,
|
|
111
|
+
).groups()
|
|
112
|
+
thr = int(thr)
|
|
113
|
+
count_1, count_2 = locals()[count_1_str], locals()[count_2_str]
|
|
114
|
+
for i, c2 in enumerate(count_2):
|
|
115
|
+
if (count_2_str in ["fpr", "tpr"] and c2 < thr / 100) or (
|
|
116
|
+
count_2_str in ["tnr", "fnr"] and c2 > thr / 100
|
|
117
|
+
):
|
|
118
|
+
ind = i
|
|
119
|
+
break
|
|
120
|
+
metrics_dict[metric] = count_1[ind]
|
|
121
|
+
|
|
122
|
+
elif metric.__name__ in sklearn.metrics.__all__:
|
|
123
|
+
if metric.__name__[:3] == "roc":
|
|
124
|
+
metrics_dict[metric.__name__] = metric(labels, scores)
|
|
125
|
+
else:
|
|
126
|
+
if threshold is None:
|
|
127
|
+
print(
|
|
128
|
+
f"No threshold is specified for metric {metric.__name__}, "
|
|
129
|
+
"skipping"
|
|
130
|
+
)
|
|
131
|
+
else:
|
|
132
|
+
oodness = [1 if x > threshold else 0 for x in scores]
|
|
133
|
+
metrics_dict[metric.__name__] = metric(labels, oodness)
|
|
134
|
+
|
|
135
|
+
else:
|
|
136
|
+
print(f"Metric {metric.__name__} not implemented, skipping")
|
|
137
|
+
|
|
138
|
+
return metrics_dict
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def get_curve(
|
|
142
|
+
scores: np.ndarray,
|
|
143
|
+
labels: np.ndarray,
|
|
144
|
+
step: Optional[int] = 4,
|
|
145
|
+
return_raw: Optional[bool] = False,
|
|
146
|
+
) -> Union[Tuple[tuple, tuple], tuple]:
|
|
147
|
+
"""Computes the
|
|
148
|
+
* true positive rate: TP / (TP + FN),
|
|
149
|
+
* false positive rate: FP / (FP + TN),
|
|
150
|
+
* true negative rate: TN / (FP + TN),
|
|
151
|
+
* false negative rate: FN / (TP + FN),
|
|
152
|
+
* accuracy: (TN + TP) / (TP + FP + TN + FN),
|
|
153
|
+
for different threshold values. The values are uniformly
|
|
154
|
+
distributed among the percentiles, with a step = 4 / scores.shape[0]
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
scores (np.ndarray): scores output of the OOD detector to evaluate
|
|
158
|
+
labels (np.ndarray): 1 if ood else 0
|
|
159
|
+
step (Optional[int], optional): integration step (wrt percentile).
|
|
160
|
+
Defaults to 4.
|
|
161
|
+
return_raw (Optional[bool], optional): To return all the curves
|
|
162
|
+
or only the rate curves. Defaults to False.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Union[Tuple[Tuple[np.ndarray], Tuple[np.ndarray]], Tuple[np.ndarray]]: curves
|
|
166
|
+
"""
|
|
167
|
+
tpc = np.array([])
|
|
168
|
+
fpc = np.array([])
|
|
169
|
+
tnc = np.array([])
|
|
170
|
+
fnc = np.array([])
|
|
171
|
+
thresholds = np.sort(scores)
|
|
172
|
+
for i in range(1, len(scores), step):
|
|
173
|
+
fp, tp, fn, tn = ftpn(scores, labels, thresholds[i])
|
|
174
|
+
tpc = np.append(tpc, tp)
|
|
175
|
+
fpc = np.append(fpc, fp)
|
|
176
|
+
tnc = np.append(tnc, tn)
|
|
177
|
+
fnc = np.append(fnc, fn)
|
|
178
|
+
|
|
179
|
+
fpr = np.concatenate([[1.0], fpc / (fpc + tnc), [0.0]])
|
|
180
|
+
tpr = np.concatenate([[1.0], tpc / (tpc + fnc), [0.0]])
|
|
181
|
+
tnr = np.concatenate([[0.0], tnc / (fpc + tnc), [1.0]])
|
|
182
|
+
fnr = np.concatenate([[0.0], fnc / (tpc + fnc), [1.0]])
|
|
183
|
+
acc = (tnc + tpc) / (tpc + fpc + tnc + fnc)
|
|
184
|
+
|
|
185
|
+
if return_raw:
|
|
186
|
+
return (fpc, tpc, fnc, tnc), (fpr, tpr, fnr, tnr, acc)
|
|
187
|
+
else:
|
|
188
|
+
return fpr, tpr, fnr, tnr, acc
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def ftpn(scores: np.ndarray, labels: np.ndarray, threshold: float) -> tuple:
|
|
192
|
+
"""Computes the number of
|
|
193
|
+
* true positives,
|
|
194
|
+
* false positives,
|
|
195
|
+
* true negatives,
|
|
196
|
+
* false negatives,
|
|
197
|
+
for a given threshold
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
scores (np.ndarray): scores output of the OOD detector to evaluate
|
|
201
|
+
labels (np.ndarray): 1 if ood else 0
|
|
202
|
+
threshold (float): threshold to use to consider scores
|
|
203
|
+
as in-distribution or out-of-distribution
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Tuple[float]: The four metrics
|
|
207
|
+
"""
|
|
208
|
+
pos = np.where(scores >= threshold)
|
|
209
|
+
neg = np.where(scores < threshold)
|
|
210
|
+
n_pos = len(pos[0])
|
|
211
|
+
n_neg = len(neg[0])
|
|
212
|
+
|
|
213
|
+
tp = np.sum(labels[pos])
|
|
214
|
+
fp = n_pos - tp
|
|
215
|
+
fn = np.sum(labels[neg])
|
|
216
|
+
tn = n_neg - fn
|
|
217
|
+
|
|
218
|
+
return fp, tp, fn, tn
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
from .features import plot_2D_features
|
|
24
|
+
from .features import plot_3D_features
|
|
25
|
+
from .metrics import plot_ood_scores
|
|
26
|
+
from .metrics import plot_roc_curve
|
|
27
|
+
from .plotly import plotly_3D_features
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
|
|
3
|
+
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
|
|
4
|
+
# CRIAQ and ANITI - https://www.deel.ai/
|
|
5
|
+
#
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
import matplotlib.pyplot as plt
|
|
24
|
+
import numpy as np
|
|
25
|
+
import pandas as pd
|
|
26
|
+
import seaborn as sns
|
|
27
|
+
import sklearn
|
|
28
|
+
from matplotlib.lines import Line2D
|
|
29
|
+
from packaging.version import parse
|
|
30
|
+
from sklearn.decomposition import PCA
|
|
31
|
+
from sklearn.manifold import TSNE
|
|
32
|
+
|
|
33
|
+
from ...types import Callable
|
|
34
|
+
from ...types import DatasetType
|
|
35
|
+
from ...types import Union
|
|
36
|
+
from ...utils import import_backend_specific_stuff
|
|
37
|
+
|
|
38
|
+
sns.set_style("darkgrid")
|
|
39
|
+
|
|
40
|
+
PROJ_DICT = {
|
|
41
|
+
"TSNE": {
|
|
42
|
+
"name": "t-SNE",
|
|
43
|
+
"class": TSNE,
|
|
44
|
+
"default_kwargs": dict(perplexity=30.0, n_iter=800, random_state=0),
|
|
45
|
+
},
|
|
46
|
+
"PCA": {"name": "PCA", "class": PCA, "default_kwargs": dict()},
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# check sklearn version: if > 1.5, use max_iter instead of n_iter
|
|
50
|
+
if parse(sklearn.__version__) >= parse("1.5"):
|
|
51
|
+
n_iter = PROJ_DICT["TSNE"]["default_kwargs"].pop("n_iter")
|
|
52
|
+
PROJ_DICT["TSNE"]["default_kwargs"]["max_iter"] = n_iter
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def plot_2D_features(
|
|
56
|
+
model: Callable,
|
|
57
|
+
in_dataset: DatasetType,
|
|
58
|
+
output_layer_id: Union[int, str],
|
|
59
|
+
out_dataset: DatasetType = None,
|
|
60
|
+
proj_method: str = "TSNE",
|
|
61
|
+
max_samples: int = 4000,
|
|
62
|
+
title: str = None,
|
|
63
|
+
**proj_kwargs,
|
|
64
|
+
):
|
|
65
|
+
"""Visualize ID and OOD features of a model on a 2D plan using dimensionality
|
|
66
|
+
reduction methods and matplotlib scatter function. Different projection methods are
|
|
67
|
+
available: TSNE, PCA.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
model (Callable): Torch or Keras model.
|
|
71
|
+
in_dataset (DatasetType): In-distribution dataset (torch dataloader or tf
|
|
72
|
+
dataset) that will be projected on the model feature space.
|
|
73
|
+
output_layer_id (Union[int, str]): Identifier for the layer to inspect.
|
|
74
|
+
out_dataset (DatasetType, optional): Out-of-distribution dataset (torch
|
|
75
|
+
dataloader or tf dataset) that will be projected on the model feature space
|
|
76
|
+
if not equal to None. Defaults to None.
|
|
77
|
+
proj_method (str, optional): Projection method for 2d dimensionality reduction.
|
|
78
|
+
Defaults to "TSNE", alternative: "PCA".
|
|
79
|
+
max_samples (int, optional): Max samples to display on the scatter plot.
|
|
80
|
+
Defaults to 4000.
|
|
81
|
+
title (str, optional): Custom figure title. Defaults to None.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
_plot_features(
|
|
85
|
+
model=model,
|
|
86
|
+
in_dataset=in_dataset,
|
|
87
|
+
output_layer_id=output_layer_id,
|
|
88
|
+
out_dataset=out_dataset,
|
|
89
|
+
proj_method=proj_method,
|
|
90
|
+
max_samples=max_samples,
|
|
91
|
+
title=title,
|
|
92
|
+
n_components=2,
|
|
93
|
+
**proj_kwargs,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def plot_3D_features(
|
|
98
|
+
model: Callable,
|
|
99
|
+
in_dataset: DatasetType,
|
|
100
|
+
output_layer_id: Union[int, str],
|
|
101
|
+
out_dataset: DatasetType = None,
|
|
102
|
+
proj_method: str = "TSNE",
|
|
103
|
+
max_samples: int = 4000,
|
|
104
|
+
title: str = None,
|
|
105
|
+
**proj_kwargs,
|
|
106
|
+
):
|
|
107
|
+
"""Visualize ID and OOD features of a model on a 3D space using dimensionality
|
|
108
|
+
reduction methods and matplotlib scatter function. Different projection methods are
|
|
109
|
+
available: TSNE, PCA.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
model (Callable): Torch or Keras model.
|
|
113
|
+
in_dataset (DatasetType): In-distribution dataset (torch dataloader or tf
|
|
114
|
+
dataset) that will be projected on the model feature space.
|
|
115
|
+
output_layer_id (Union[int, str]): Identifier for the layer to inspect.
|
|
116
|
+
out_dataset (DatasetType, optional): Out-of-distribution dataset (torch
|
|
117
|
+
dataloader or tf dataset) that will be projected on the model feature space
|
|
118
|
+
if not equal to None. Defaults to None.
|
|
119
|
+
proj_method (str, optional): Projection method for 2d dimensionality reduction.
|
|
120
|
+
Defaults to "TSNE", alternative: "PCA".
|
|
121
|
+
max_samples (int, optional): Max samples to display on the scatter plot.
|
|
122
|
+
Defaults to 4000.
|
|
123
|
+
title (str, optional): Custom figure title. Defaults to None.
|
|
124
|
+
"""
|
|
125
|
+
_plot_features(
|
|
126
|
+
model=model,
|
|
127
|
+
in_dataset=in_dataset,
|
|
128
|
+
output_layer_id=output_layer_id,
|
|
129
|
+
out_dataset=out_dataset,
|
|
130
|
+
proj_method=proj_method,
|
|
131
|
+
max_samples=max_samples,
|
|
132
|
+
title=title,
|
|
133
|
+
n_components=3,
|
|
134
|
+
**proj_kwargs,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _plot_features(
|
|
139
|
+
model: Callable,
|
|
140
|
+
in_dataset: DatasetType,
|
|
141
|
+
output_layer_id: Union[int, str],
|
|
142
|
+
out_dataset: DatasetType = None,
|
|
143
|
+
proj_method: str = "TSNE",
|
|
144
|
+
max_samples: int = 4000,
|
|
145
|
+
title: str = None,
|
|
146
|
+
n_components: int = 2,
|
|
147
|
+
**proj_kwargs,
|
|
148
|
+
):
|
|
149
|
+
"""Visualize ID and OOD features of a model on a 2D or 3D space using dimensionality
|
|
150
|
+
reduction methods and matplotlib scatter function. Different projection methods are
|
|
151
|
+
available: TSNE, PCA.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
model (Callable): Torch or Keras model.
|
|
155
|
+
in_dataset (DatasetType): In-distribution dataset (torch dataloader or tf
|
|
156
|
+
dataset) that will be projected on the model feature space.
|
|
157
|
+
output_layer_id (Union[int, str]): Identifier for the layer to inspect.
|
|
158
|
+
out_dataset (DatasetType, optional): Out-of-distribution dataset (torch
|
|
159
|
+
dataloader or tf dataset) that will be projected on the model feature space
|
|
160
|
+
if not equal to None. Defaults to None.
|
|
161
|
+
proj_method (str, optional): Projection method for 2d dimensionality reduction.
|
|
162
|
+
Defaults to "TSNE", alternative: "PCA".
|
|
163
|
+
max_samples (int, optional): Max samples to display on the scatter plot.
|
|
164
|
+
Defaults to 4000.
|
|
165
|
+
title (str, optional): Custom figure title. If None a default one is provided.
|
|
166
|
+
Defaults to None.
|
|
167
|
+
"""
|
|
168
|
+
assert n_components in [2, 3], "The number of components should be 2 or 3"
|
|
169
|
+
max_samples = max_samples if out_dataset is None else max_samples // 2
|
|
170
|
+
|
|
171
|
+
# feature extractor
|
|
172
|
+
_, _, op, FeatureExtractorClass = import_backend_specific_stuff(model)
|
|
173
|
+
feature_extractor = FeatureExtractorClass(model, [output_layer_id])
|
|
174
|
+
|
|
175
|
+
# === extract id features ===
|
|
176
|
+
# features
|
|
177
|
+
in_features, _ = feature_extractor.predict(in_dataset, numpy_concat=True)
|
|
178
|
+
in_features = in_features[0].reshape(in_features[0].shape[0], -1)[:max_samples]
|
|
179
|
+
|
|
180
|
+
# labels
|
|
181
|
+
in_labels = []
|
|
182
|
+
for _, batch_y in in_dataset:
|
|
183
|
+
in_labels.append(op.convert_to_numpy(batch_y))
|
|
184
|
+
in_labels = np.concatenate(in_labels)[:max_samples]
|
|
185
|
+
in_labels_str = list(map(lambda x: f"class {x}", in_labels))
|
|
186
|
+
|
|
187
|
+
# === extract ood features ===
|
|
188
|
+
if out_dataset is not None:
|
|
189
|
+
# features
|
|
190
|
+
out_features, _ = feature_extractor.predict(out_dataset, numpy_concat=True)
|
|
191
|
+
out_features = out_features[0].reshape(out_features[0].shape[0], -1)[
|
|
192
|
+
:max_samples
|
|
193
|
+
]
|
|
194
|
+
|
|
195
|
+
# labels
|
|
196
|
+
out_labels_str = np.array(["unknown"] * len(out_features))
|
|
197
|
+
|
|
198
|
+
# concatenate id and ood items
|
|
199
|
+
features = np.concatenate([out_features, in_features])
|
|
200
|
+
labels_str = np.concatenate([out_labels_str, in_labels_str])
|
|
201
|
+
data_type = np.array(
|
|
202
|
+
["OOD"] * len(out_labels_str) + ["ID"] * len(in_labels_str)
|
|
203
|
+
)
|
|
204
|
+
else:
|
|
205
|
+
features = in_features
|
|
206
|
+
labels_str = in_labels_str
|
|
207
|
+
data_type = np.array(["ID"] * len(in_labels))
|
|
208
|
+
|
|
209
|
+
# === project on 2d/3d space using tsne or pca ===
|
|
210
|
+
proj_class = PROJ_DICT[proj_method]["class"]
|
|
211
|
+
p_kwargs = PROJ_DICT[proj_method]["default_kwargs"]
|
|
212
|
+
p_kwargs.update(proj_kwargs)
|
|
213
|
+
projector = proj_class(
|
|
214
|
+
n_components=n_components,
|
|
215
|
+
**p_kwargs,
|
|
216
|
+
)
|
|
217
|
+
features_proj = projector.fit_transform(features)
|
|
218
|
+
|
|
219
|
+
# === plot 2d/3d features ===
|
|
220
|
+
features_dim = features.shape[1]
|
|
221
|
+
method_str = PROJ_DICT[proj_method]["name"]
|
|
222
|
+
title = (
|
|
223
|
+
title
|
|
224
|
+
or f"{method_str} {n_components}D projection\n"
|
|
225
|
+
+ f"[layer {output_layer_id}, dim: {features_dim}]"
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
ax = plt.axes(plt.gca())
|
|
229
|
+
if n_components == 3:
|
|
230
|
+
if ax.name != "3d":
|
|
231
|
+
ax.remove()
|
|
232
|
+
ax = plt.axes(projection="3d")
|
|
233
|
+
ax.set_facecolor("white")
|
|
234
|
+
|
|
235
|
+
# 2D projection
|
|
236
|
+
if n_components == 2:
|
|
237
|
+
# id data
|
|
238
|
+
x, y = features_proj.T
|
|
239
|
+
df = pd.DataFrame(
|
|
240
|
+
{
|
|
241
|
+
"dim 1": x,
|
|
242
|
+
"dim 2": y,
|
|
243
|
+
"Class": labels_str,
|
|
244
|
+
"Data type": data_type,
|
|
245
|
+
}
|
|
246
|
+
)
|
|
247
|
+
s = sns.scatterplot(
|
|
248
|
+
data=df,
|
|
249
|
+
x="dim 1",
|
|
250
|
+
y="dim 2",
|
|
251
|
+
hue="Class",
|
|
252
|
+
hue_order=np.unique(df["Class"]),
|
|
253
|
+
size="Data type",
|
|
254
|
+
sizes=[40, 20],
|
|
255
|
+
style="Data type",
|
|
256
|
+
size_order=["ID", "OOD"],
|
|
257
|
+
style_order=["ID", "OOD"],
|
|
258
|
+
ax=ax,
|
|
259
|
+
)
|
|
260
|
+
s.legend(fontsize=8, bbox_to_anchor=(1.1, 1), borderaxespad=0)
|
|
261
|
+
|
|
262
|
+
# 3D projection
|
|
263
|
+
elif n_components == 3:
|
|
264
|
+
cmap = plt.get_cmap(
|
|
265
|
+
"tab10", int(np.max(in_labels)) - int(np.min(in_labels)) + 1
|
|
266
|
+
)
|
|
267
|
+
# id
|
|
268
|
+
x_in, y_in, z_in = features_proj[len(out_features) :].T
|
|
269
|
+
s = ax.scatter(
|
|
270
|
+
x_in,
|
|
271
|
+
y_in,
|
|
272
|
+
z_in,
|
|
273
|
+
c=in_labels,
|
|
274
|
+
marker="D",
|
|
275
|
+
label="ID data",
|
|
276
|
+
s=30,
|
|
277
|
+
alpha=1.0,
|
|
278
|
+
cmap=cmap,
|
|
279
|
+
vmin=np.min(in_labels) - 0.5,
|
|
280
|
+
vmax=np.max(in_labels) + 0.5,
|
|
281
|
+
edgecolors="white",
|
|
282
|
+
linewidths=0.3,
|
|
283
|
+
)
|
|
284
|
+
# ood
|
|
285
|
+
x_out, y_out, z_out = features_proj[: len(out_features)].T
|
|
286
|
+
ax.scatter(
|
|
287
|
+
x_out,
|
|
288
|
+
y_out,
|
|
289
|
+
z_out,
|
|
290
|
+
c="darkslategray",
|
|
291
|
+
marker="o",
|
|
292
|
+
label="OOD data",
|
|
293
|
+
s=15,
|
|
294
|
+
alpha=1.0,
|
|
295
|
+
edgecolors="white",
|
|
296
|
+
linewidths=0.3,
|
|
297
|
+
)
|
|
298
|
+
legend_elements = [
|
|
299
|
+
Line2D(
|
|
300
|
+
[],
|
|
301
|
+
[],
|
|
302
|
+
marker="D",
|
|
303
|
+
color="white",
|
|
304
|
+
linestyle="None",
|
|
305
|
+
label=f"class {v}",
|
|
306
|
+
markerfacecolor=cmap(v),
|
|
307
|
+
markersize=7,
|
|
308
|
+
linewidth=0.3,
|
|
309
|
+
)
|
|
310
|
+
for v in np.unique(in_labels)
|
|
311
|
+
] + [
|
|
312
|
+
Line2D(
|
|
313
|
+
[],
|
|
314
|
+
[],
|
|
315
|
+
marker="o",
|
|
316
|
+
color="white",
|
|
317
|
+
linestyle="None",
|
|
318
|
+
label="unknown",
|
|
319
|
+
markerfacecolor="darkslategray",
|
|
320
|
+
markersize=7,
|
|
321
|
+
linewidth=0.3,
|
|
322
|
+
)
|
|
323
|
+
]
|
|
324
|
+
ax.legend(
|
|
325
|
+
title="classes",
|
|
326
|
+
handles=legend_elements,
|
|
327
|
+
loc="upper right",
|
|
328
|
+
fontsize=8,
|
|
329
|
+
bbox_to_anchor=(1.35, 1),
|
|
330
|
+
borderaxespad=0,
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
plt.title(title, weight="bold").set_fontsize(11)
|
|
334
|
+
if n_components == 2:
|
|
335
|
+
ax.set_xlabel("Dimension 1")
|
|
336
|
+
ax.set_ylabel("Dimension 2")
|
|
337
|
+
if n_components == 3:
|
|
338
|
+
ax.set_xlabel("Dim 1")
|
|
339
|
+
ax.set_ylabel("Dim 2")
|
|
340
|
+
X = np.concatenate([x_in, x_out])
|
|
341
|
+
Y = np.concatenate([y_in, y_out])
|
|
342
|
+
Z = np.concatenate([z_in, z_out])
|
|
343
|
+
ax.set_xlim([X.min(), X.max()])
|
|
344
|
+
ax.set_ylim([Y.min(), Y.max()])
|
|
345
|
+
ax.set_zlim([Z.min(), Z.max()])
|