neural-feature-importance 0.5.2__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neural_feature_importance/__init__.py +9 -0
- neural_feature_importance/callbacks.py +26 -5
- neural_feature_importance/conv_callbacks.py +104 -0
- neural_feature_importance/embedding_callbacks.py +85 -0
- {neural_feature_importance-0.5.2.dist-info → neural_feature_importance-0.9.1.dist-info}/METADATA +36 -4
- neural_feature_importance-0.9.1.dist-info/RECORD +11 -0
- neural_feature_importance-0.9.1.dist-info/licenses/LICENSE +21 -0
- neural_feature_importance-0.5.2.dist-info/RECORD +0 -8
- {neural_feature_importance-0.5.2.dist-info → neural_feature_importance-0.9.1.dist-info}/WHEEL +0 -0
- {neural_feature_importance-0.5.2.dist-info → neural_feature_importance-0.9.1.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,11 @@ from .callbacks import (
|
|
7
7
|
VarianceImportanceKeras,
|
8
8
|
VarianceImportanceTorch,
|
9
9
|
)
|
10
|
+
from .conv_callbacks import ConvVarianceImportanceKeras, ConvVarianceImportanceTorch
|
11
|
+
from .embedding_callbacks import (
|
12
|
+
EmbeddingVarianceImportanceKeras,
|
13
|
+
EmbeddingVarianceImportanceTorch,
|
14
|
+
)
|
10
15
|
from .utils import MetricThreshold
|
11
16
|
|
12
17
|
try:
|
@@ -19,4 +24,8 @@ __all__ = [
|
|
19
24
|
"VarianceImportanceKeras",
|
20
25
|
"VarianceImportanceTorch",
|
21
26
|
"MetricThreshold",
|
27
|
+
"ConvVarianceImportanceKeras",
|
28
|
+
"ConvVarianceImportanceTorch",
|
29
|
+
"EmbeddingVarianceImportanceKeras",
|
30
|
+
"EmbeddingVarianceImportanceTorch",
|
22
31
|
]
|
@@ -1,4 +1,10 @@
|
|
1
|
-
"""
|
1
|
+
"""Utilities for computing variance-based feature importances.
|
2
|
+
|
3
|
+
These classes track the weights of the first trainable layer during training
|
4
|
+
and estimate feature importances by accumulating the variance of each weight
|
5
|
+
value. After training, the variances are combined with the last observed
|
6
|
+
weights to produce a normalized importance score for every input feature.
|
7
|
+
"""
|
2
8
|
|
3
9
|
from __future__ import annotations
|
4
10
|
|
@@ -13,7 +19,13 @@ logger = logging.getLogger(__name__)
|
|
13
19
|
|
14
20
|
|
15
21
|
class VarianceImportanceBase:
|
16
|
-
"""Compute feature
|
22
|
+
"""Compute feature importances using running variance statistics.
|
23
|
+
|
24
|
+
The class implements Welford's algorithm to accumulate the variance of
|
25
|
+
weight values over training iterations. Feature importances are derived by
|
26
|
+
combining the final variance estimates with the absolute value of the last
|
27
|
+
observed weights.
|
28
|
+
"""
|
17
29
|
|
18
30
|
def __init__(self) -> None:
|
19
31
|
self._n = 0
|
@@ -23,13 +35,21 @@ class VarianceImportanceBase:
|
|
23
35
|
self.var_scores: np.ndarray | None = None
|
24
36
|
|
25
37
|
def start(self, weights: np.ndarray) -> None:
|
26
|
-
"""Initialize statistics
|
38
|
+
"""Initialize running statistics.
|
39
|
+
|
40
|
+
Parameters
|
41
|
+
----------
|
42
|
+
weights:
|
43
|
+
Initial weight matrix of shape ``(features, outputs)``. The values
|
44
|
+
are converted to ``float64`` for numerical stability and the running
|
45
|
+
mean and variance buffers are reset.
|
46
|
+
"""
|
27
47
|
self._mean = weights.astype(np.float64)
|
28
48
|
self._m2 = np.zeros_like(self._mean)
|
29
49
|
self._n = 0
|
30
50
|
|
31
51
|
def update(self, weights: np.ndarray) -> None:
|
32
|
-
"""Update running
|
52
|
+
"""Update running mean and variance using new weights."""
|
33
53
|
if self._mean is None or self._m2 is None:
|
34
54
|
return
|
35
55
|
self._n += 1
|
@@ -40,7 +60,7 @@ class VarianceImportanceBase:
|
|
40
60
|
self._last_weights = weights
|
41
61
|
|
42
62
|
def finalize(self) -> None:
|
43
|
-
"""
|
63
|
+
"""Compute normalized importance scores from accumulated statistics."""
|
44
64
|
if self._last_weights is None or self._m2 is None:
|
45
65
|
logger.warning(
|
46
66
|
"%s was not fully initialized; no scores computed", self.__class__.__name__
|
@@ -67,6 +87,7 @@ class VarianceImportanceBase:
|
|
67
87
|
return self.var_scores
|
68
88
|
|
69
89
|
|
90
|
+
|
70
91
|
class VarianceImportanceKeras(Callback, VarianceImportanceBase):
|
71
92
|
"""Keras callback implementing variance-based feature importance."""
|
72
93
|
|
@@ -0,0 +1,104 @@
|
|
1
|
+
"""Callbacks that extend variance tracking to convolutional layers."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import logging
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
|
10
|
+
from .callbacks import VarianceImportanceKeras, VarianceImportanceTorch
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
def _flatten_weights(weights: np.ndarray, outputs_last: bool) -> np.ndarray:
|
16
|
+
"""Return a two-dimensional view of convolutional kernels.
|
17
|
+
|
18
|
+
Parameters
|
19
|
+
----------
|
20
|
+
weights:
|
21
|
+
Weight tensor from a convolutional layer. Expected shape is
|
22
|
+
``(H, W, in_channels, out_channels)`` when ``outputs_last`` is ``True``
|
23
|
+
and ``(out_channels, in_channels, H, W)`` otherwise.
|
24
|
+
outputs_last:
|
25
|
+
Whether the output dimension is the last axis of ``weights``.
|
26
|
+
|
27
|
+
Returns
|
28
|
+
-------
|
29
|
+
np.ndarray
|
30
|
+
Array of shape ``(features, outputs)`` suitable for variance tracking.
|
31
|
+
"""
|
32
|
+
if weights.ndim > 2:
|
33
|
+
if outputs_last:
|
34
|
+
return weights.reshape(-1, weights.shape[-1])
|
35
|
+
return weights.reshape(weights.shape[0], -1).T
|
36
|
+
return weights
|
37
|
+
|
38
|
+
|
39
|
+
class ConvVarianceImportanceKeras(VarianceImportanceKeras):
|
40
|
+
"""Keras callback that tracks convolutional kernels.
|
41
|
+
|
42
|
+
The first trainable layer is inspected and, if its weights have more than
|
43
|
+
two dimensions, they are flattened so that each spatial location and input
|
44
|
+
channel is treated as a separate feature. Variances are accumulated during
|
45
|
+
training and converted to per-filter importance scores.
|
46
|
+
"""
|
47
|
+
def on_train_begin(self, logs: Optional[dict] = None) -> None:
|
48
|
+
self._layer = None
|
49
|
+
for layer in self.model.layers:
|
50
|
+
has_vars = bool(layer.trainable_weights)
|
51
|
+
has_data = bool(layer.get_weights())
|
52
|
+
if has_vars and has_data:
|
53
|
+
self._layer = layer
|
54
|
+
break
|
55
|
+
if self._layer is None:
|
56
|
+
raise ValueError("Model does not contain trainable weights.")
|
57
|
+
weights = self._layer.get_weights()[0]
|
58
|
+
weights = _flatten_weights(weights, outputs_last=True)
|
59
|
+
logger.info(
|
60
|
+
"Tracking variance for layer '%s' with %d features",
|
61
|
+
self._layer.name,
|
62
|
+
weights.shape[0],
|
63
|
+
)
|
64
|
+
self.start(weights)
|
65
|
+
|
66
|
+
def on_epoch_end(self, epoch: int, logs: Optional[dict] = None) -> None:
|
67
|
+
if self._layer is None:
|
68
|
+
return
|
69
|
+
weights = self._layer.get_weights()[0]
|
70
|
+
weights = _flatten_weights(weights, outputs_last=True)
|
71
|
+
self.update(weights)
|
72
|
+
|
73
|
+
|
74
|
+
class ConvVarianceImportanceTorch(VarianceImportanceTorch):
|
75
|
+
"""PyTorch helper with convolutional support.
|
76
|
+
|
77
|
+
Works analogously to :class:`ConvVarianceImportanceKeras` but for models
|
78
|
+
built with :mod:`torch.nn`. The first trainable parameter with two or more
|
79
|
+
dimensions is flattened so each spatial position becomes a tracked feature.
|
80
|
+
"""
|
81
|
+
def on_train_begin(self) -> None:
|
82
|
+
from torch import nn
|
83
|
+
|
84
|
+
for name, param in self.model.named_parameters():
|
85
|
+
if param.requires_grad and param.dim() >= 2:
|
86
|
+
self._param = param
|
87
|
+
weights = param.detach().cpu().numpy()
|
88
|
+
weights = _flatten_weights(weights, outputs_last=False)
|
89
|
+
logger.info(
|
90
|
+
"Tracking variance for parameter '%s' with %d features",
|
91
|
+
name,
|
92
|
+
weights.shape[0],
|
93
|
+
)
|
94
|
+
self.start(weights)
|
95
|
+
break
|
96
|
+
if self._param is None:
|
97
|
+
raise ValueError("Model does not contain trainable parameters")
|
98
|
+
|
99
|
+
def on_epoch_end(self) -> None:
|
100
|
+
if self._param is None:
|
101
|
+
return
|
102
|
+
weights = self._param.detach().cpu().numpy()
|
103
|
+
weights = _flatten_weights(weights, outputs_last=False)
|
104
|
+
self.update(weights)
|
@@ -0,0 +1,85 @@
|
|
1
|
+
"""Callbacks that compute variance-based importance for embedding layers.
|
2
|
+
|
3
|
+
These callbacks extend :class:`~neural_feature_importance.callbacks.VarianceImportanceBase`
|
4
|
+
to operate on 2-D embedding matrices. The variance of each embedding vector is
|
5
|
+
accumulated over training and the resulting per-token scores are normalized
|
6
|
+
between 0 and 1.
|
7
|
+
"""
|
8
|
+
|
9
|
+
from __future__ import annotations
|
10
|
+
|
11
|
+
import logging
|
12
|
+
|
13
|
+
import numpy as np
|
14
|
+
|
15
|
+
from .callbacks import VarianceImportanceKeras, VarianceImportanceTorch
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
class EmbeddingVarianceImportanceKeras(VarianceImportanceKeras):
|
21
|
+
"""Variance-based importance callback for Keras embedding layers.
|
22
|
+
|
23
|
+
During training this callback monitors the weights of the first trainable
|
24
|
+
layer (expected to be an :class:`~tensorflow.keras.layers.Embedding`) and
|
25
|
+
accumulates the running variance of each embedding vector. After training the
|
26
|
+
variances are summed across the embedding dimension to yield a single score
|
27
|
+
per token.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def finalize(self) -> None: # type: ignore[override]
|
31
|
+
if self._last_weights is None or self._m2 is None:
|
32
|
+
logger.warning(
|
33
|
+
"%s was not fully initialized; no scores computed",
|
34
|
+
self.__class__.__name__,
|
35
|
+
)
|
36
|
+
return
|
37
|
+
|
38
|
+
if self._n < 2:
|
39
|
+
variance = np.full_like(self._m2, np.nan)
|
40
|
+
else:
|
41
|
+
variance = self._m2 / (self._n - 1)
|
42
|
+
|
43
|
+
scores = np.sum(variance, axis=1)
|
44
|
+
min_val = float(np.nanmin(scores))
|
45
|
+
max_val = float(np.nanmax(scores))
|
46
|
+
denom = max_val - min_val if max_val != min_val else 1.0
|
47
|
+
self.var_scores = (scores - min_val) / denom
|
48
|
+
|
49
|
+
top = np.argsort(self.var_scores)[-10:][::-1]
|
50
|
+
logger.info("Most important tokens: %s", top)
|
51
|
+
|
52
|
+
|
53
|
+
class EmbeddingVarianceImportanceTorch(VarianceImportanceTorch):
|
54
|
+
"""Variance-based importance for PyTorch embedding layers.
|
55
|
+
|
56
|
+
Parameters
|
57
|
+
----------
|
58
|
+
model:
|
59
|
+
Neural network containing an :class:`torch.nn.Embedding` layer whose
|
60
|
+
weights will be monitored.
|
61
|
+
"""
|
62
|
+
|
63
|
+
def finalize(self) -> None: # type: ignore[override]
|
64
|
+
if self._last_weights is None or self._m2 is None:
|
65
|
+
logger.warning(
|
66
|
+
"%s was not fully initialized; no scores computed",
|
67
|
+
self.__class__.__name__,
|
68
|
+
)
|
69
|
+
return
|
70
|
+
|
71
|
+
if self._n < 2:
|
72
|
+
variance = np.full_like(self._m2, np.nan)
|
73
|
+
else:
|
74
|
+
variance = self._m2 / (self._n - 1)
|
75
|
+
|
76
|
+
scores = np.sum(variance, axis=1)
|
77
|
+
min_val = float(np.nanmin(scores))
|
78
|
+
max_val = float(np.nanmax(scores))
|
79
|
+
denom = max_val - min_val if max_val != min_val else 1.0
|
80
|
+
self.var_scores = (scores - min_val) / denom
|
81
|
+
|
82
|
+
top = np.argsort(self.var_scores)[-10:][::-1]
|
83
|
+
logger.info("Most important tokens: %s", top)
|
84
|
+
|
85
|
+
|
{neural_feature_importance-0.5.2.dist-info → neural_feature_importance-0.9.1.dist-info}/METADATA
RENAMED
@@ -1,20 +1,23 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: neural-feature-importance
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.9.1
|
4
4
|
Summary: Variance-based feature importance for Neural Networks using callbacks for Keras and PyTorch
|
5
5
|
Author: CR de Sá
|
6
6
|
Requires-Python: >=3.10
|
7
7
|
Description-Content-Type: text/markdown
|
8
|
+
License-File: LICENSE
|
8
9
|
Requires-Dist: numpy
|
9
10
|
Provides-Extra: tensorflow
|
10
11
|
Requires-Dist: tensorflow; extra == "tensorflow"
|
11
12
|
Provides-Extra: torch
|
12
13
|
Requires-Dist: torch; extra == "torch"
|
14
|
+
Dynamic: license-file
|
13
15
|
|
14
16
|
# neural-feature-importance
|
15
17
|
|
16
18
|
[](https://pypi.org/project/neural-feature-importance/)
|
17
|
-
[](https://www.python.org/downloads/)
|
20
|
+
[](LICENSE)
|
18
21
|
|
19
22
|
Variance-based feature importance for deep learning models.
|
20
23
|
|
@@ -74,19 +77,44 @@ print(tracker.feature_importances_)
|
|
74
77
|
|
75
78
|
## Example scripts
|
76
79
|
|
77
|
-
Run `compare_feature_importance.py` to train a small network on the Iris dataset
|
80
|
+
Run `scripts/compare_feature_importance.py` to train a small network on the Iris dataset
|
78
81
|
and compare the scores with a random forest baseline:
|
79
82
|
|
80
83
|
```bash
|
81
84
|
python compare_feature_importance.py
|
82
85
|
```
|
83
86
|
|
84
|
-
Run `full_experiment.py` to reproduce the experiments from the paper:
|
87
|
+
Run `scripts/full_experiment.py` to reproduce the experiments from the paper:
|
85
88
|
|
86
89
|
```bash
|
87
90
|
python full_experiment.py
|
88
91
|
```
|
89
92
|
|
93
|
+
### Convolutional models
|
94
|
+
|
95
|
+
To compute importances for convolutional networks, use
|
96
|
+
`ConvVarianceImportanceKeras` from `neural_feature_importance.conv_callbacks`.
|
97
|
+
`scripts/conv_visualization_example.py` trains small Conv2D models on the MNIST
|
98
|
+
and scikit‑learn digits datasets and displays per-filter heatmaps. An equivalent
|
99
|
+
notebook is available in ``notebooks/conv_visualization_example.ipynb``:
|
100
|
+
|
101
|
+
```bash
|
102
|
+
python scripts/conv_visualization_example.py
|
103
|
+
```
|
104
|
+
|
105
|
+
### Embedding layers
|
106
|
+
|
107
|
+
To compute token importances from embedding weights, use
|
108
|
+
`EmbeddingVarianceImportanceKeras` or `EmbeddingVarianceImportanceTorch` from
|
109
|
+
`neural_feature_importance.embedding_callbacks`.
|
110
|
+
Run `scripts/token_importance_topk_example.py` to train a small text classifier
|
111
|
+
on IMDB and display the most important tokens. A matching notebook lives in
|
112
|
+
``notebooks/token_importance_topk_example.ipynb``:
|
113
|
+
|
114
|
+
```bash
|
115
|
+
python scripts/token_importance_topk_example.py
|
116
|
+
```
|
117
|
+
|
90
118
|
## Development
|
91
119
|
|
92
120
|
After making changes, run the following checks:
|
@@ -124,3 +152,7 @@ If you use this package in your research, please cite:
|
|
124
152
|
```
|
125
153
|
|
126
154
|
We appreciate citations as they help the community discover this work.
|
155
|
+
|
156
|
+
## License
|
157
|
+
|
158
|
+
This project is licensed under the [MIT License](LICENSE).
|
@@ -0,0 +1,11 @@
|
|
1
|
+
neural_feature_importance/__init__.py,sha256=EICAFjL6mHquX5wd1YWeV_6hI_jTgqiGNvKjYL7URSU,914
|
2
|
+
neural_feature_importance/callbacks.py,sha256=dPrxkjh6inf8hI8wGhNL-elBjWuCEWQHWICNmTYqbyE,5677
|
3
|
+
neural_feature_importance/conv_callbacks.py,sha256=L1u7EVAERtRBpNlWcH3u5A49jBcALe8Yv0giMxTdKPM,3789
|
4
|
+
neural_feature_importance/embedding_callbacks.py,sha256=TYc4Xu2MzK3Ff0JBFakiEoZtzCbRsVjoswPocheFcr0,2869
|
5
|
+
neural_feature_importance/utils/__init__.py,sha256=dMjBUCx8DCoJKAEAnjj_daXfEu9Q5va1k8XupmWdZiE,114
|
6
|
+
neural_feature_importance/utils/monitors.py,sha256=LTz7oE0-WgZ50DHyHDnTwfzWSSWMnjWd0xlwt7BWKuU,1763
|
7
|
+
neural_feature_importance-0.9.1.dist-info/licenses/LICENSE,sha256=6v0bh8lk889d7vmcFAqzUbqly-ogYCYdcCcTW4yZ2tg,1066
|
8
|
+
neural_feature_importance-0.9.1.dist-info/METADATA,sha256=l2Q2CbawJVfsux8Y80Vx2bJ_FC72SH-9tLrXy9dpw50,5187
|
9
|
+
neural_feature_importance-0.9.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
10
|
+
neural_feature_importance-0.9.1.dist-info/top_level.txt,sha256=yP0Q-BG7hDLLu1H1_x5bGEKwkCso5NxxvScnlmICb-o,26
|
11
|
+
neural_feature_importance-0.9.1.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 CR de Sá
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -1,8 +0,0 @@
|
|
1
|
-
neural_feature_importance/__init__.py,sha256=z3Rve0a7QTAhEpCesDhSdbkOwfSsNZgiwDVep1Is_c0,566
|
2
|
-
neural_feature_importance/callbacks.py,sha256=HMHsmVaqZOzy5NSbxN-8CWvq82vzgZgZD53zqp2nAz0,4811
|
3
|
-
neural_feature_importance/utils/__init__.py,sha256=dMjBUCx8DCoJKAEAnjj_daXfEu9Q5va1k8XupmWdZiE,114
|
4
|
-
neural_feature_importance/utils/monitors.py,sha256=LTz7oE0-WgZ50DHyHDnTwfzWSSWMnjWd0xlwt7BWKuU,1763
|
5
|
-
neural_feature_importance-0.5.2.dist-info/METADATA,sha256=W5HuRD-lpppfwv8DhQ26eevxiZUkL-vaevfQDLdbKak,4091
|
6
|
-
neural_feature_importance-0.5.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
7
|
-
neural_feature_importance-0.5.2.dist-info/top_level.txt,sha256=yP0Q-BG7hDLLu1H1_x5bGEKwkCso5NxxvScnlmICb-o,26
|
8
|
-
neural_feature_importance-0.5.2.dist-info/RECORD,,
|
{neural_feature_importance-0.5.2.dist-info → neural_feature_importance-0.9.1.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|