featlens 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- featlens-0.1.0/LICENSE +21 -0
- featlens-0.1.0/PKG-INFO +211 -0
- featlens-0.1.0/README.md +177 -0
- featlens-0.1.0/featlens/__init__.py +48 -0
- featlens-0.1.0/featlens/adapters/__init__.py +42 -0
- featlens-0.1.0/featlens/adapters/base.py +68 -0
- featlens-0.1.0/featlens/adapters/custom_adapter.py +67 -0
- featlens-0.1.0/featlens/adapters/external_adapter.py +59 -0
- featlens-0.1.0/featlens/adapters/hf_adapter.py +56 -0
- featlens-0.1.0/featlens/adapters/timm_adapter.py +62 -0
- featlens-0.1.0/featlens/adapters/torchhub_adapter.py +134 -0
- featlens-0.1.0/featlens/cli.py +84 -0
- featlens-0.1.0/featlens/extractor.py +168 -0
- featlens-0.1.0/featlens/grid.py +170 -0
- featlens-0.1.0/featlens/pca.py +73 -0
- featlens-0.1.0/featlens/preprocess.py +98 -0
- featlens-0.1.0/featlens/registry.py +71 -0
- featlens-0.1.0/featlens/tokens.py +58 -0
- featlens-0.1.0/featlens.egg-info/PKG-INFO +211 -0
- featlens-0.1.0/featlens.egg-info/SOURCES.txt +25 -0
- featlens-0.1.0/featlens.egg-info/dependency_links.txt +1 -0
- featlens-0.1.0/featlens.egg-info/entry_points.txt +2 -0
- featlens-0.1.0/featlens.egg-info/requires.txt +22 -0
- featlens-0.1.0/featlens.egg-info/top_level.txt +1 -0
- featlens-0.1.0/pyproject.toml +43 -0
- featlens-0.1.0/setup.cfg +4 -0
- featlens-0.1.0/tests/test_smoke.py +60 -0
featlens-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Turhan Can Kargın
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
featlens-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: featlens
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Model-agnostic feature-map visualization: PCA-to-RGB feature maps from any vision model and any layer.
|
|
5
|
+
Author: Turhan Can Kargın
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/turhancan97/FeatLens
|
|
8
|
+
Keywords: feature-maps,visualization,pca,vision-transformer,interpretability,dino,clip,vjepa
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Requires-Python: >=3.8
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: torch>=1.13
|
|
16
|
+
Requires-Dist: torchvision>=0.14
|
|
17
|
+
Requires-Dist: numpy>=1.21
|
|
18
|
+
Requires-Dist: pillow>=9.0
|
|
19
|
+
Requires-Dist: imageio>=2.20
|
|
20
|
+
Requires-Dist: einops>=0.6
|
|
21
|
+
Requires-Dist: matplotlib>=3.5
|
|
22
|
+
Requires-Dist: pyyaml>=5.4
|
|
23
|
+
Provides-Extra: timm
|
|
24
|
+
Requires-Dist: timm>=0.9; extra == "timm"
|
|
25
|
+
Provides-Extra: hf
|
|
26
|
+
Requires-Dist: transformers>=4.30; extra == "hf"
|
|
27
|
+
Provides-Extra: clip
|
|
28
|
+
Requires-Dist: open_clip_torch>=2.20; extra == "clip"
|
|
29
|
+
Provides-Extra: all
|
|
30
|
+
Requires-Dist: timm>=0.9; extra == "all"
|
|
31
|
+
Requires-Dist: transformers>=4.30; extra == "all"
|
|
32
|
+
Requires-Dist: open_clip_torch>=2.20; extra == "all"
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
# FeatLens
|
|
36
|
+
|
|
37
|
+
[](https://github.com/turhancan97/FeatLens/actions/workflows/test.yml)
|
|
38
|
+
[](https://pypi.org/project/featlens/)
|
|
39
|
+
[](LICENSE)
|
|
40
|
+
|
|
41
|
+
**See what any vision model encodes.** FeatLens renders PCA-to-RGB **feature maps** for
|
|
42
|
+
**any** vision model — DINO, DINOv2/v3, CLIP, SigLIP, MAE, DeiT, V-JEPA, CNNs, … — loaded from
|
|
43
|
+
**any** source (timm, HuggingFace `transformers`, `torch.hub`, an external repo, or a model you
|
|
44
|
+
built yourself), and from **any layer**, as a clean **model × layer** grid.
|
|
45
|
+
|
|
46
|
+
<p align="center">
|
|
47
|
+
<img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_cat.png" alt="DINO feature maps across layers" width="100%">
|
|
48
|
+
</p>
|
|
49
|
+
|
|
50
|
+
Most "DINO PCA" scripts are welded to one model. FeatLens separates **representation access**
|
|
51
|
+
(a small adapter layer over the model zoo) from **visualization** (robust PCA → RGB), so you can
|
|
52
|
+
point it at a new model in seconds and compare models/layers side by side.
|
|
53
|
+
|
|
54
|
+
## Gallery
|
|
55
|
+
|
|
56
|
+
All produced by `examples/quickstart.py` on the three bundled images. Sizes below are the
|
|
57
|
+
**originals**; each image is resized to `img_size` (default 224) before the model.
|
|
58
|
+
|
|
59
|
+
**`visualize(...)` — DINO ViT-B/16 feature maps across layers 2 / 5 / 8 / 11:**
|
|
60
|
+
|
|
61
|
+
| Image (original size) | Source | Feature maps |
|
|
62
|
+
|---|---|---|
|
|
63
|
+
| `astronaut.jpg` · 512×512 | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/images/astronaut.jpg" width="110"> | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_astronaut.png" width="430"> |
|
|
64
|
+
| `cat.jpg` · 451×300 | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/images/cat.jpg" width="110"> | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_cat.png" width="430"> |
|
|
65
|
+
| `coffee.jpg` · 600×400 | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/images/coffee.jpg" width="110"> | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_coffee.png" width="430"> |
|
|
66
|
+
|
|
67
|
+
**`grid(...)` — model × layer, overlaid on the image** (DINO vs DINOv2 across layers 2/5/8/11):
|
|
68
|
+
|
|
69
|
+
<p align="center"><img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/grid_overlay.png" alt="model x layer grid overlay" width="100%"></p>
|
|
70
|
+
|
|
71
|
+
**`compare(...)` — models at the final layer** | **`custom_adapter` — a ResNet-50 (CNN escape hatch)**
|
|
72
|
+
|
|
73
|
+
<p align="center">
|
|
74
|
+
<img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/compare_models.png" alt="compare models at last layer" height="320">
|
|
75
|
+
|
|
76
|
+
<img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/resnet50.png" alt="resnet50 feature map" height="320">
|
|
77
|
+
</p>
|
|
78
|
+
|
|
79
|
+
## Install
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install -e ".[timm]" # timm backend (DINO, CLIP, SigLIP, DeiT, ...)
|
|
83
|
+
# extras: [hf] transformers · [clip] open_clip · [all]
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Install PyTorch for your platform first (https://pytorch.org).
|
|
87
|
+
|
|
88
|
+
## Quick start (Python)
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
import featlens as ll
|
|
92
|
+
|
|
93
|
+
# One model, scrub layers (shared PCA basis -> colors comparable across the row)
|
|
94
|
+
ll.visualize("dinov2_vitb14", "img.jpg", layers=[2, 5, 8, 11], out="row.png")
|
|
95
|
+
|
|
96
|
+
# Compare models at the final layer (per-tile basis)
|
|
97
|
+
ll.compare(["dino_vitb16", "mae_vitb16", "clip_large_openai"], "img.jpg", layer=-1, out="cmp.png")
|
|
98
|
+
|
|
99
|
+
# Full model x layer grid, overlaid on the image
|
|
100
|
+
ll.grid(["dino_vitb16", "dinov2_vitb14"], "img.jpg", layers=[2, 5, 8, 11], overlay=True, out="grid.png")
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Quick start (CLI)
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
featlens --models dino_vitb16 clip_large_openai --layers 2 5 8 11 \
|
|
107
|
+
--images examples/images/cat.jpg --mode grid --out out/grid.png
|
|
108
|
+
featlens --config configs/example.yaml --images examples/images/cat.jpg --out out/grid.png
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Image size & resizing
|
|
112
|
+
|
|
113
|
+
Images are resized to a square **`img_size` × `img_size`** before the model (default **224**).
|
|
114
|
+
`img_size` must be divisible by the model's patch size (multiples of 16 for patch-16 models,
|
|
115
|
+
14 for patch-14). Larger sizes give a finer feature grid at more compute:
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
ll.visualize("dinov2_vitb14", "img.jpg", layers=[2, 5, 8, 11], img_size=448) # 32x32 grid
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
For **non-square images**, choose how aspect ratio is handled with `resize_mode`:
|
|
122
|
+
|
|
123
|
+
| `resize_mode` | behavior |
|
|
124
|
+
|---------------|----------|
|
|
125
|
+
| `squash` (default) | resize straight to `img_size²` — may distort |
|
|
126
|
+
| `crop` | resize shortest side to `img_size`, center-crop — aspect preserved |
|
|
127
|
+
| `pad` | resize longest side to `img_size`, pad to square — keeps the whole image |
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
ll.grid([...], "wide.jpg", resize_mode="crop") # Python
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
featlens --models dino_vitb16 --images wide.jpg --resize-mode pad --img-size 448 --out g.png
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
(`FeatureGrid(interpolation_size=…)` is separate — it only upscales the rendered tiles, not the
|
|
138
|
+
model input.)
|
|
139
|
+
|
|
140
|
+
## Model sources
|
|
141
|
+
|
|
142
|
+
| Source | How to pass it | Needs |
|
|
143
|
+
|--------|----------------|-------|
|
|
144
|
+
| **timm** | friendly name (`dinov2_vitb14`) or raw id (`vit_base_patch16_224`) | `[timm]` |
|
|
145
|
+
| **HuggingFace** | `hf:facebook/dinov2-base` | `[hf]` |
|
|
146
|
+
| **torch.hub (V-JEPA)** | `vjepa2_vitl16` | network for weights |
|
|
147
|
+
| **External repo** (VGGT/SPA/…) | `external_adapter.load(repo_dir, builder, hook_target=…)` | the cloned repo |
|
|
148
|
+
| **Your own model** | `custom_adapter.load(model, feature_fn=…)` | — |
|
|
149
|
+
|
|
150
|
+
Friendly names (see `featlens/registry.py`) cover DINO, DINOv2/v3, CLIP, SigLIP, MAE, DeiT,
|
|
151
|
+
Perception Encoder and V-JEPA; any other timm id works directly.
|
|
152
|
+
|
|
153
|
+
## Layers
|
|
154
|
+
|
|
155
|
+
`layers=[2, 5, 8, 11]` selects **transformer block indices** (0-based, **negatives allowed**,
|
|
156
|
+
`-1` = last). The same convention holds across backends — for HuggingFace models FeatLens maps
|
|
157
|
+
block `i` to `hidden_states[i+1]` (skipping the embedding output) for you.
|
|
158
|
+
|
|
159
|
+
## Bring your own model
|
|
160
|
+
|
|
161
|
+
Anything that isn't built in works through the escape hatch — give a feature function or a hook
|
|
162
|
+
target. CNNs work for free (their conv map is already spatial):
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
import torch.nn as nn, torchvision
|
|
166
|
+
from featlens import FeatureExtractor, FeatureGrid
|
|
167
|
+
from featlens.adapters import custom_adapter
|
|
168
|
+
|
|
169
|
+
resnet = torchvision.models.resnet50(weights="DEFAULT")
|
|
170
|
+
trunk = nn.Sequential(*list(resnet.children())[:-2]) # -> [B, 2048, h, w]
|
|
171
|
+
lm = custom_adapter.load(trunk, patch_size=32, feature_fn=lambda m, x: m(x), name="resnet50")
|
|
172
|
+
FeatureGrid([FeatureExtractor(lm)]).render("img.jpg", out_path="resnet50.png")
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
For a model in its own repo, `external_adapter.load(repo_dir, builder, hook_target="blocks")`
|
|
176
|
+
puts the repo on `sys.path`, builds the model, and hooks its blocks.
|
|
177
|
+
|
|
178
|
+
## How it works
|
|
179
|
+
|
|
180
|
+
1. **Adapters** resolve a spec → a `LoadedModel` and drive extraction in one of three modes:
|
|
181
|
+
forward **hooks** on per-block modules (ViTs/CNNs/V-JEPA), HF **`output_hidden_states`**, or a
|
|
182
|
+
user **callable**.
|
|
183
|
+
2. `tokens_to_grid` normalizes whatever a layer emits (`[B,N,D]` tokens with optional
|
|
184
|
+
CLS/register prefixes, or `[B,D,h,w]` maps) into a dense `[B,D,h,w]` grid.
|
|
185
|
+
3. **Robust PCA** (median-absolute-deviation outlier filtering) projects features to RGB;
|
|
186
|
+
`FeatureGrid` lays out the model × layer tiles with a per-tile or shared-per-model basis.
|
|
187
|
+
|
|
188
|
+
The extraction core adapts the `FrozenBackbone` pattern; the PCA is adapted from the SpaRRTa
|
|
189
|
+
feature-map script.
|
|
190
|
+
|
|
191
|
+
## Releasing
|
|
192
|
+
|
|
193
|
+
Releases publish to [PyPI](https://pypi.org/project/featlens/) automatically via
|
|
194
|
+
`.github/workflows/publish.yml` (PyPI **Trusted Publishing** — no API token stored in the repo).
|
|
195
|
+
|
|
196
|
+
One-time setup on PyPI: add a *trusted publisher* for the project (Account → Publishing) with
|
|
197
|
+
owner `turhancan97`, repository `FeatLens`, workflow `publish.yml`, environment `pypi`. PyPI
|
|
198
|
+
supports a *pending* publisher so the very first release can also go through Actions.
|
|
199
|
+
|
|
200
|
+
Then cut a release by pushing a tag:
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
# bump the version in pyproject.toml first, then:
|
|
204
|
+
git tag v0.1.0 && git push origin v0.1.0
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
The workflow builds the sdist + wheel, runs `twine check`, and uploads to PyPI.
|
|
208
|
+
|
|
209
|
+
## License
|
|
210
|
+
|
|
211
|
+
[MIT](LICENSE).
|
featlens-0.1.0/README.md
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# FeatLens
|
|
2
|
+
|
|
3
|
+
[](https://github.com/turhancan97/FeatLens/actions/workflows/test.yml)
|
|
4
|
+
[](https://pypi.org/project/featlens/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
**See what any vision model encodes.** FeatLens renders PCA-to-RGB **feature maps** for
|
|
8
|
+
**any** vision model — DINO, DINOv2/v3, CLIP, SigLIP, MAE, DeiT, V-JEPA, CNNs, … — loaded from
|
|
9
|
+
**any** source (timm, HuggingFace `transformers`, `torch.hub`, an external repo, or a model you
|
|
10
|
+
built yourself), and from **any layer**, as a clean **model × layer** grid.
|
|
11
|
+
|
|
12
|
+
<p align="center">
|
|
13
|
+
<img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_cat.png" alt="DINO feature maps across layers" width="100%">
|
|
14
|
+
</p>
|
|
15
|
+
|
|
16
|
+
Most "DINO PCA" scripts are welded to one model. FeatLens separates **representation access**
|
|
17
|
+
(a small adapter layer over the model zoo) from **visualization** (robust PCA → RGB), so you can
|
|
18
|
+
point it at a new model in seconds and compare models/layers side by side.
|
|
19
|
+
|
|
20
|
+
## Gallery
|
|
21
|
+
|
|
22
|
+
All produced by `examples/quickstart.py` on the three bundled images. Sizes below are the
|
|
23
|
+
**originals**; each image is resized to `img_size` (default 224) before the model.
|
|
24
|
+
|
|
25
|
+
**`visualize(...)` — DINO ViT-B/16 feature maps across layers 2 / 5 / 8 / 11:**
|
|
26
|
+
|
|
27
|
+
| Image (original size) | Source | Feature maps |
|
|
28
|
+
|---|---|---|
|
|
29
|
+
| `astronaut.jpg` · 512×512 | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/images/astronaut.jpg" width="110"> | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_astronaut.png" width="430"> |
|
|
30
|
+
| `cat.jpg` · 451×300 | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/images/cat.jpg" width="110"> | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_cat.png" width="430"> |
|
|
31
|
+
| `coffee.jpg` · 600×400 | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/images/coffee.jpg" width="110"> | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_coffee.png" width="430"> |
|
|
32
|
+
|
|
33
|
+
**`grid(...)` — model × layer, overlaid on the image** (DINO vs DINOv2 across layers 2/5/8/11):
|
|
34
|
+
|
|
35
|
+
<p align="center"><img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/grid_overlay.png" alt="model x layer grid overlay" width="100%"></p>
|
|
36
|
+
|
|
37
|
+
**`compare(...)` — models at the final layer** | **`custom_adapter` — a ResNet-50 (CNN escape hatch)**
|
|
38
|
+
|
|
39
|
+
<p align="center">
|
|
40
|
+
<img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/compare_models.png" alt="compare models at last layer" height="320">
|
|
41
|
+
|
|
42
|
+
<img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/resnet50.png" alt="resnet50 feature map" height="320">
|
|
43
|
+
</p>
|
|
44
|
+
|
|
45
|
+
## Install
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install -e ".[timm]" # timm backend (DINO, CLIP, SigLIP, DeiT, ...)
|
|
49
|
+
# extras: [hf] transformers · [clip] open_clip · [all]
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Install PyTorch for your platform first (https://pytorch.org).
|
|
53
|
+
|
|
54
|
+
## Quick start (Python)
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
import featlens as ll
|
|
58
|
+
|
|
59
|
+
# One model, scrub layers (shared PCA basis -> colors comparable across the row)
|
|
60
|
+
ll.visualize("dinov2_vitb14", "img.jpg", layers=[2, 5, 8, 11], out="row.png")
|
|
61
|
+
|
|
62
|
+
# Compare models at the final layer (per-tile basis)
|
|
63
|
+
ll.compare(["dino_vitb16", "mae_vitb16", "clip_large_openai"], "img.jpg", layer=-1, out="cmp.png")
|
|
64
|
+
|
|
65
|
+
# Full model x layer grid, overlaid on the image
|
|
66
|
+
ll.grid(["dino_vitb16", "dinov2_vitb14"], "img.jpg", layers=[2, 5, 8, 11], overlay=True, out="grid.png")
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Quick start (CLI)
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
featlens --models dino_vitb16 clip_large_openai --layers 2 5 8 11 \
|
|
73
|
+
--images examples/images/cat.jpg --mode grid --out out/grid.png
|
|
74
|
+
featlens --config configs/example.yaml --images examples/images/cat.jpg --out out/grid.png
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Image size & resizing
|
|
78
|
+
|
|
79
|
+
Images are resized to a square **`img_size` × `img_size`** before the model (default **224**).
|
|
80
|
+
`img_size` must be divisible by the model's patch size (multiples of 16 for patch-16 models,
|
|
81
|
+
14 for patch-14). Larger sizes give a finer feature grid at more compute:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
ll.visualize("dinov2_vitb14", "img.jpg", layers=[2, 5, 8, 11], img_size=448) # 32x32 grid
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
For **non-square images**, choose how aspect ratio is handled with `resize_mode`:
|
|
88
|
+
|
|
89
|
+
| `resize_mode` | behavior |
|
|
90
|
+
|---------------|----------|
|
|
91
|
+
| `squash` (default) | resize straight to `img_size²` — may distort |
|
|
92
|
+
| `crop` | resize shortest side to `img_size`, center-crop — aspect preserved |
|
|
93
|
+
| `pad` | resize longest side to `img_size`, pad to square — keeps the whole image |
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
ll.grid([...], "wide.jpg", resize_mode="crop") # Python
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
featlens --models dino_vitb16 --images wide.jpg --resize-mode pad --img-size 448 --out g.png
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
(`FeatureGrid(interpolation_size=…)` is separate — it only upscales the rendered tiles, not the
|
|
104
|
+
model input.)
|
|
105
|
+
|
|
106
|
+
## Model sources
|
|
107
|
+
|
|
108
|
+
| Source | How to pass it | Needs |
|
|
109
|
+
|--------|----------------|-------|
|
|
110
|
+
| **timm** | friendly name (`dinov2_vitb14`) or raw id (`vit_base_patch16_224`) | `[timm]` |
|
|
111
|
+
| **HuggingFace** | `hf:facebook/dinov2-base` | `[hf]` |
|
|
112
|
+
| **torch.hub (V-JEPA)** | `vjepa2_vitl16` | network for weights |
|
|
113
|
+
| **External repo** (VGGT/SPA/…) | `external_adapter.load(repo_dir, builder, hook_target=…)` | the cloned repo |
|
|
114
|
+
| **Your own model** | `custom_adapter.load(model, feature_fn=…)` | — |
|
|
115
|
+
|
|
116
|
+
Friendly names (see `featlens/registry.py`) cover DINO, DINOv2/v3, CLIP, SigLIP, MAE, DeiT,
|
|
117
|
+
Perception Encoder and V-JEPA; any other timm id works directly.
|
|
118
|
+
|
|
119
|
+
## Layers
|
|
120
|
+
|
|
121
|
+
`layers=[2, 5, 8, 11]` selects **transformer block indices** (0-based, **negatives allowed**,
|
|
122
|
+
`-1` = last). The same convention holds across backends — for HuggingFace models FeatLens maps
|
|
123
|
+
block `i` to `hidden_states[i+1]` (skipping the embedding output) for you.
|
|
124
|
+
|
|
125
|
+
## Bring your own model
|
|
126
|
+
|
|
127
|
+
Anything that isn't built in works through the escape hatch — give a feature function or a hook
|
|
128
|
+
target. CNNs work for free (their conv map is already spatial):
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
import torch.nn as nn, torchvision
|
|
132
|
+
from featlens import FeatureExtractor, FeatureGrid
|
|
133
|
+
from featlens.adapters import custom_adapter
|
|
134
|
+
|
|
135
|
+
resnet = torchvision.models.resnet50(weights="DEFAULT")
|
|
136
|
+
trunk = nn.Sequential(*list(resnet.children())[:-2]) # -> [B, 2048, h, w]
|
|
137
|
+
lm = custom_adapter.load(trunk, patch_size=32, feature_fn=lambda m, x: m(x), name="resnet50")
|
|
138
|
+
FeatureGrid([FeatureExtractor(lm)]).render("img.jpg", out_path="resnet50.png")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
For a model in its own repo, `external_adapter.load(repo_dir, builder, hook_target="blocks")`
|
|
142
|
+
puts the repo on `sys.path`, builds the model, and hooks its blocks.
|
|
143
|
+
|
|
144
|
+
## How it works
|
|
145
|
+
|
|
146
|
+
1. **Adapters** resolve a spec → a `LoadedModel` and drive extraction in one of three modes:
|
|
147
|
+
forward **hooks** on per-block modules (ViTs/CNNs/V-JEPA), HF **`output_hidden_states`**, or a
|
|
148
|
+
user **callable**.
|
|
149
|
+
2. `tokens_to_grid` normalizes whatever a layer emits (`[B,N,D]` tokens with optional
|
|
150
|
+
CLS/register prefixes, or `[B,D,h,w]` maps) into a dense `[B,D,h,w]` grid.
|
|
151
|
+
3. **Robust PCA** (median-absolute-deviation outlier filtering) projects features to RGB;
|
|
152
|
+
`FeatureGrid` lays out the model × layer tiles with a per-tile or shared-per-model basis.
|
|
153
|
+
|
|
154
|
+
The extraction core adapts the `FrozenBackbone` pattern; the PCA is adapted from the SpaRRTa
|
|
155
|
+
feature-map script.
|
|
156
|
+
|
|
157
|
+
## Releasing
|
|
158
|
+
|
|
159
|
+
Releases publish to [PyPI](https://pypi.org/project/featlens/) automatically via
|
|
160
|
+
`.github/workflows/publish.yml` (PyPI **Trusted Publishing** — no API token stored in the repo).
|
|
161
|
+
|
|
162
|
+
One-time setup on PyPI: add a *trusted publisher* for the project (Account → Publishing) with
|
|
163
|
+
owner `turhancan97`, repository `FeatLens`, workflow `publish.yml`, environment `pypi`. PyPI
|
|
164
|
+
supports a *pending* publisher so the very first release can also go through Actions.
|
|
165
|
+
|
|
166
|
+
Then cut a release by pushing a tag:
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
# bump the version in pyproject.toml first, then:
|
|
170
|
+
git tag v0.1.0 && git push origin v0.1.0
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
The workflow builds the sdist + wheel, runs `twine check`, and uploads to PyPI.
|
|
174
|
+
|
|
175
|
+
## License
|
|
176
|
+
|
|
177
|
+
[MIT](LICENSE).
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""FeatLens — model-agnostic feature-map visualization.
|
|
2
|
+
|
|
3
|
+
Load any vision model (timm / HuggingFace / torch.hub / external repo / your own) and render
|
|
4
|
+
PCA-to-RGB feature maps from any layer, as a model × layer grid.
|
|
5
|
+
|
|
6
|
+
Quick start::
|
|
7
|
+
|
|
8
|
+
import featlens as ll
|
|
9
|
+
ll.grid(["dino_vitb16", "clip_large_openai"], "img.jpg", layers=[2, 5, 8, 11], out="grid.png")
|
|
10
|
+
ll.visualize("dinov2_vitb14", "img.jpg", layers=[2, 5, 8, 11], out="row.png") # scrub layers
|
|
11
|
+
ll.compare(["dino_vitb16", "mae_vitb16"], "img.jpg", layer=-1, out="cmp.png") # compare models
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Optional, Sequence, Union
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from .extractor import FeatureExtractor
|
|
18
|
+
from .grid import FeatureGrid
|
|
19
|
+
|
|
20
|
+
__version__ = "0.1.0"
|
|
21
|
+
__all__ = ["FeatureExtractor", "FeatureGrid", "grid", "visualize", "compare"]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
_RENDER_KEYS = ("overlay", "overlay_alpha", "figscale")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _split_kwargs(kwargs):
|
|
28
|
+
render_kw = {k: kwargs.pop(k) for k in list(kwargs) if k in _RENDER_KEYS}
|
|
29
|
+
return kwargs, render_kw
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def grid(models, images, layers=None, out=None, **kwargs):
|
|
33
|
+
"""Render a full model × layer grid (per-tile PCA basis by default)."""
|
|
34
|
+
ctor_kw, render_kw = _split_kwargs(kwargs)
|
|
35
|
+
return FeatureGrid(models, layers=layers, **ctor_kw).render(images, out_path=out, **render_kw)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def visualize(model, images, layers=None, out=None, **kwargs):
|
|
39
|
+
"""One model across layers — uses a shared PCA basis so colors are comparable across the row."""
|
|
40
|
+
kwargs.setdefault("basis", "shared_per_model")
|
|
41
|
+
ctor_kw, render_kw = _split_kwargs(kwargs)
|
|
42
|
+
return FeatureGrid([model], layers=layers, **ctor_kw).render(images, out_path=out, **render_kw)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def compare(models, images, layer: int = -1, out=None, **kwargs):
|
|
46
|
+
"""Many models at a single layer (per-tile PCA basis)."""
|
|
47
|
+
ctor_kw, render_kw = _split_kwargs(kwargs)
|
|
48
|
+
return FeatureGrid(models, layers=[layer], **ctor_kw).render(images, out_path=out, **render_kw)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Adapter registry + dispatch.
|
|
2
|
+
|
|
3
|
+
``load_spec(spec, ...)`` resolves a string spec (via ``registry.resolve_spec``) to a
|
|
4
|
+
``LoadedModel`` using the right backend. The ``custom`` and ``external`` backends are not
|
|
5
|
+
string-dispatched — call ``custom_adapter.load(model, ...)`` /
|
|
6
|
+
``external_adapter.load(repo_dir, builder, ...)`` directly (they need Python objects).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from .base import LoadedModel, infer_patch_size, real_index
|
|
12
|
+
from . import timm_adapter, hf_adapter, torchhub_adapter, custom_adapter, external_adapter
|
|
13
|
+
from ..registry import resolve_spec
|
|
14
|
+
|
|
15
|
+
_STRING_BACKENDS = {
|
|
16
|
+
"timm": timm_adapter.load,
|
|
17
|
+
"hf": hf_adapter.load,
|
|
18
|
+
"hub": torchhub_adapter.load,
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def load_spec(spec: str, img_size: int = 224, pretrained: bool = True, **kwargs) -> LoadedModel:
|
|
23
|
+
backend, ident = resolve_spec(spec)
|
|
24
|
+
if backend not in _STRING_BACKENDS:
|
|
25
|
+
raise ValueError(
|
|
26
|
+
f"Backend '{backend}' is not string-dispatchable. Use "
|
|
27
|
+
"custom_adapter.load(model, ...) or external_adapter.load(repo_dir, builder, ...)."
|
|
28
|
+
)
|
|
29
|
+
return _STRING_BACKENDS[backend](ident, img_size=img_size, pretrained=pretrained, **kwargs)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"LoadedModel",
|
|
34
|
+
"infer_patch_size",
|
|
35
|
+
"real_index",
|
|
36
|
+
"load_spec",
|
|
37
|
+
"timm_adapter",
|
|
38
|
+
"hf_adapter",
|
|
39
|
+
"torchhub_adapter",
|
|
40
|
+
"custom_adapter",
|
|
41
|
+
"external_adapter",
|
|
42
|
+
]
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Adapter base types.
|
|
2
|
+
|
|
3
|
+
Every backend (timm / hf / torchhub / external / custom) resolves a model spec into a
|
|
4
|
+
``LoadedModel`` descriptor. The extractor then drives all of them uniformly via one of
|
|
5
|
+
three modes:
|
|
6
|
+
|
|
7
|
+
- ``"hook"`` : register forward hooks on per-block modules (ViTs, CNNs, V-JEPA).
|
|
8
|
+
- ``"hidden_states"`` : run once with ``output_hidden_states=True`` and read the tuple (HF).
|
|
9
|
+
- ``"callable"`` : call a user ``feature_fn(model, images)`` (the escape hatch).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Callable, Optional, Sequence
|
|
16
|
+
|
|
17
|
+
import torch.nn as nn
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class LoadedModel:
|
|
22
|
+
model: nn.Module
|
|
23
|
+
num_blocks: int
|
|
24
|
+
embed_dim: int
|
|
25
|
+
patch_size: int
|
|
26
|
+
mean: Sequence[float]
|
|
27
|
+
std: Sequence[float]
|
|
28
|
+
mode: str # "hook" | "hidden_states" | "callable"
|
|
29
|
+
# mode="hook": maps (model, block_index) -> the submodule to hook.
|
|
30
|
+
hook_module_fn: Optional[Callable[[nn.Module, int], nn.Module]] = None
|
|
31
|
+
# mode="callable": feature_fn(model, images) -> [B, N, D] or [B, D, h, w].
|
|
32
|
+
feature_fn: Optional[Callable] = None
|
|
33
|
+
# mode="hidden_states": +1 offset so block index i -> hidden_states[i+1] (skip embeddings).
|
|
34
|
+
hidden_states_offset: int = 1
|
|
35
|
+
uses_temporal: bool = False # V-JEPA 2.1 expects a time axis
|
|
36
|
+
name: str = "model"
|
|
37
|
+
extra: dict = field(default_factory=dict)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def infer_patch_size(model: nn.Module) -> Optional[int]:
|
|
41
|
+
"""Infer a square ViT patch size from a model's patch_embed, if present."""
|
|
42
|
+
patch_embed = getattr(model, "patch_embed", None)
|
|
43
|
+
if patch_embed is None:
|
|
44
|
+
return None
|
|
45
|
+
patch_size = getattr(patch_embed, "patch_size", None)
|
|
46
|
+
if patch_size is not None:
|
|
47
|
+
if isinstance(patch_size, (tuple, list)):
|
|
48
|
+
if len(patch_size) != 2 or patch_size[0] != patch_size[1]:
|
|
49
|
+
raise ValueError(f"Only square patch sizes are supported, got {patch_size}.")
|
|
50
|
+
return int(patch_size[0])
|
|
51
|
+
return int(patch_size)
|
|
52
|
+
proj = getattr(patch_embed, "proj", None)
|
|
53
|
+
kernel_size = getattr(proj, "kernel_size", None)
|
|
54
|
+
if kernel_size is not None:
|
|
55
|
+
if isinstance(kernel_size, (tuple, list)):
|
|
56
|
+
if len(kernel_size) != 2 or kernel_size[0] != kernel_size[1]:
|
|
57
|
+
raise ValueError(f"Only square patch sizes are supported, got {kernel_size}.")
|
|
58
|
+
return int(kernel_size[0])
|
|
59
|
+
return int(kernel_size)
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def real_index(idx: int, num_blocks: int) -> int:
|
|
64
|
+
"""Resolve a possibly-negative block index against the block count."""
|
|
65
|
+
r = idx if idx >= 0 else num_blocks + idx
|
|
66
|
+
if not (0 <= r < num_blocks):
|
|
67
|
+
raise ValueError(f"Layer index {idx} out of range for a model with {num_blocks} blocks.")
|
|
68
|
+
return r
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Custom backend — the escape hatch for any model the built-in backends don't cover.
|
|
2
|
+
|
|
3
|
+
Two ways to use it:
|
|
4
|
+
|
|
5
|
+
1. **Hook target** — pass an already-built ``model`` plus ``hook_target``: either a callable
|
|
6
|
+
``(model, block_idx) -> module`` or the name of a ``ModuleList`` attribute (e.g. ``"blocks"``)
|
|
7
|
+
whose elements are hooked.
|
|
8
|
+
2. **feature_fn** — pass ``feature_fn(model, images) -> [B, N, D]`` (or ``[B, D, h, w]``) and
|
|
9
|
+
FeatLens skips hooks entirely. Best for exotic models / single-layer extraction.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Callable, Optional, Sequence, Union
|
|
15
|
+
|
|
16
|
+
import torch.nn as nn
|
|
17
|
+
|
|
18
|
+
from .base import LoadedModel, infer_patch_size, real_index
|
|
19
|
+
from ..preprocess import IMAGENET_MEAN, IMAGENET_STD
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def load(
|
|
23
|
+
model: nn.Module,
|
|
24
|
+
*,
|
|
25
|
+
patch_size: Optional[int] = None,
|
|
26
|
+
hook_target: Optional[Union[str, Callable[[nn.Module, int], nn.Module]]] = None,
|
|
27
|
+
feature_fn: Optional[Callable] = None,
|
|
28
|
+
num_blocks: Optional[int] = None,
|
|
29
|
+
mean: Sequence[float] = IMAGENET_MEAN,
|
|
30
|
+
std: Sequence[float] = IMAGENET_STD,
|
|
31
|
+
name: str = "custom",
|
|
32
|
+
) -> LoadedModel:
|
|
33
|
+
if (hook_target is None) == (feature_fn is None):
|
|
34
|
+
raise ValueError("Provide exactly one of `hook_target` or `feature_fn`.")
|
|
35
|
+
|
|
36
|
+
if patch_size is None:
|
|
37
|
+
patch_size = infer_patch_size(model)
|
|
38
|
+
if patch_size is None:
|
|
39
|
+
raise ValueError(
|
|
40
|
+
"Could not infer patch_size for the custom model; pass patch_size=... "
|
|
41
|
+
"(the stride/downsample factor between input pixels and the feature grid)."
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
if feature_fn is not None:
|
|
45
|
+
return LoadedModel(
|
|
46
|
+
model=model, num_blocks=num_blocks or 1, embed_dim=0, patch_size=int(patch_size),
|
|
47
|
+
mean=mean, std=std, mode="callable", feature_fn=feature_fn, name=name,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if callable(hook_target):
|
|
51
|
+
hook_fn = hook_target
|
|
52
|
+
nblocks = num_blocks or 0
|
|
53
|
+
else:
|
|
54
|
+
module_list = getattr(model, hook_target, None)
|
|
55
|
+
if module_list is None or not hasattr(module_list, "__getitem__"):
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f"hook_target '{hook_target}' is not a ModuleList attribute on the model."
|
|
58
|
+
)
|
|
59
|
+
nblocks = len(module_list)
|
|
60
|
+
|
|
61
|
+
def hook_fn(m: nn.Module, idx: int) -> nn.Module:
|
|
62
|
+
return getattr(m, hook_target)[real_index(idx, nblocks)]
|
|
63
|
+
|
|
64
|
+
return LoadedModel(
|
|
65
|
+
model=model, num_blocks=nblocks or 1, embed_dim=0, patch_size=int(patch_size),
|
|
66
|
+
mean=mean, std=std, mode="hook", hook_module_fn=hook_fn, name=name,
|
|
67
|
+
)
|