featlens 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
featlens-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Turhan Can Kargın
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,211 @@
1
+ Metadata-Version: 2.4
2
+ Name: featlens
3
+ Version: 0.1.0
4
+ Summary: Model-agnostic feature-map visualization: PCA-to-RGB feature maps from any vision model and any layer.
5
+ Author: Turhan Can Kargın
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/turhancan97/FeatLens
8
+ Keywords: feature-maps,visualization,pca,vision-transformer,interpretability,dino,clip,vjepa
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Requires-Python: >=3.8
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: torch>=1.13
16
+ Requires-Dist: torchvision>=0.14
17
+ Requires-Dist: numpy>=1.21
18
+ Requires-Dist: pillow>=9.0
19
+ Requires-Dist: imageio>=2.20
20
+ Requires-Dist: einops>=0.6
21
+ Requires-Dist: matplotlib>=3.5
22
+ Requires-Dist: pyyaml>=5.4
23
+ Provides-Extra: timm
24
+ Requires-Dist: timm>=0.9; extra == "timm"
25
+ Provides-Extra: hf
26
+ Requires-Dist: transformers>=4.30; extra == "hf"
27
+ Provides-Extra: clip
28
+ Requires-Dist: open_clip_torch>=2.20; extra == "clip"
29
+ Provides-Extra: all
30
+ Requires-Dist: timm>=0.9; extra == "all"
31
+ Requires-Dist: transformers>=4.30; extra == "all"
32
+ Requires-Dist: open_clip_torch>=2.20; extra == "all"
33
+ Dynamic: license-file
34
+
35
+ # FeatLens
36
+
37
+ [![Tests](https://github.com/turhancan97/FeatLens/actions/workflows/test.yml/badge.svg)](https://github.com/turhancan97/FeatLens/actions/workflows/test.yml)
38
+ [![PyPI](https://img.shields.io/pypi/v/featlens)](https://pypi.org/project/featlens/)
39
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
40
+
41
+ **See what any vision model encodes.** FeatLens renders PCA-to-RGB **feature maps** for
42
+ **any** vision model — DINO, DINOv2/v3, CLIP, SigLIP, MAE, DeiT, V-JEPA, CNNs, … — loaded from
43
+ **any** source (timm, HuggingFace `transformers`, `torch.hub`, an external repo, or a model you
44
+ built yourself), and from **any layer**, as a clean **model × layer** grid.
45
+
46
+ <p align="center">
47
+ <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_cat.png" alt="DINO feature maps across layers" width="100%">
48
+ </p>
49
+
50
+ Most "DINO PCA" scripts are welded to one model. FeatLens separates **representation access**
51
+ (a small adapter layer over the model zoo) from **visualization** (robust PCA → RGB), so you can
52
+ point it at a new model in seconds and compare models/layers side by side.
53
+
54
+ ## Gallery
55
+
56
+ All produced by `examples/quickstart.py` on the three bundled images. Sizes below are the
57
+ **originals**; each image is resized to `img_size` (default 224) before the model.
58
+
59
+ **`visualize(...)` — DINO ViT-B/16 feature maps across layers 2 / 5 / 8 / 11:**
60
+
61
+ | Image (original size) | Source | Feature maps |
62
+ |---|---|---|
63
+ | `astronaut.jpg` · 512×512 | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/images/astronaut.jpg" width="110"> | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_astronaut.png" width="430"> |
64
+ | `cat.jpg` · 451×300 | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/images/cat.jpg" width="110"> | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_cat.png" width="430"> |
65
+ | `coffee.jpg` · 600×400 | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/images/coffee.jpg" width="110"> | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_coffee.png" width="430"> |
66
+
67
+ **`grid(...)` — model × layer, overlaid on the image** (DINO vs DINOv2 across layers 2/5/8/11):
68
+
69
+ <p align="center"><img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/grid_overlay.png" alt="model x layer grid overlay" width="100%"></p>
70
+
71
+ **`compare(...)` — models at the final layer** &nbsp;|&nbsp; **`custom_adapter` — a ResNet-50 (CNN escape hatch)**
72
+
73
+ <p align="center">
74
+ <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/compare_models.png" alt="compare models at last layer" height="320">
75
+ &nbsp;&nbsp;
76
+ <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/resnet50.png" alt="resnet50 feature map" height="320">
77
+ </p>
78
+
79
+ ## Install
80
+
81
+ ```bash
82
+ pip install -e ".[timm]" # timm backend (DINO, CLIP, SigLIP, DeiT, ...)
83
+ # extras: [hf] transformers · [clip] open_clip · [all]
84
+ ```
85
+
86
+ Install PyTorch for your platform first (https://pytorch.org).
87
+
88
+ ## Quick start (Python)
89
+
90
+ ```python
91
+ import featlens as ll
92
+
93
+ # One model, scrub layers (shared PCA basis -> colors comparable across the row)
94
+ ll.visualize("dinov2_vitb14", "img.jpg", layers=[2, 5, 8, 11], out="row.png")
95
+
96
+ # Compare models at the final layer (per-tile basis)
97
+ ll.compare(["dino_vitb16", "mae_vitb16", "clip_large_openai"], "img.jpg", layer=-1, out="cmp.png")
98
+
99
+ # Full model x layer grid, overlaid on the image
100
+ ll.grid(["dino_vitb16", "dinov2_vitb14"], "img.jpg", layers=[2, 5, 8, 11], overlay=True, out="grid.png")
101
+ ```
102
+
103
+ ## Quick start (CLI)
104
+
105
+ ```bash
106
+ featlens --models dino_vitb16 clip_large_openai --layers 2 5 8 11 \
107
+ --images examples/images/cat.jpg --mode grid --out out/grid.png
108
+ featlens --config configs/example.yaml --images examples/images/cat.jpg --out out/grid.png
109
+ ```
110
+
111
+ ## Image size & resizing
112
+
113
+ Images are resized to a square **`img_size` × `img_size`** before the model (default **224**).
114
+ `img_size` must be divisible by the model's patch size (multiples of 16 for patch-16 models,
115
+ 14 for patch-14). Larger sizes give a finer feature grid at more compute:
116
+
117
+ ```python
118
+ ll.visualize("dinov2_vitb14", "img.jpg", layers=[2, 5, 8, 11], img_size=448) # 32x32 grid
119
+ ```
120
+
121
+ For **non-square images**, choose how aspect ratio is handled with `resize_mode`:
122
+
123
+ | `resize_mode` | behavior |
124
+ |---------------|----------|
125
+ | `squash` (default) | resize straight to `img_size²` — may distort |
126
+ | `crop` | resize shortest side to `img_size`, center-crop — aspect preserved |
127
+ | `pad` | resize longest side to `img_size`, pad to square — keeps the whole image |
128
+
129
+ ```python
130
+ ll.grid([...], "wide.jpg", resize_mode="crop") # Python
131
+ ```
132
+
133
+ ```bash
134
+ featlens --models dino_vitb16 --images wide.jpg --resize-mode pad --img-size 448 --out g.png
135
+ ```
136
+
137
+ (`FeatureGrid(interpolation_size=…)` is separate — it only upscales the rendered tiles, not the
138
+ model input.)
139
+
140
+ ## Model sources
141
+
142
+ | Source | How to pass it | Needs |
143
+ |--------|----------------|-------|
144
+ | **timm** | friendly name (`dinov2_vitb14`) or raw id (`vit_base_patch16_224`) | `[timm]` |
145
+ | **HuggingFace** | `hf:facebook/dinov2-base` | `[hf]` |
146
+ | **torch.hub (V-JEPA)** | `vjepa2_vitl16` | network for weights |
147
+ | **External repo** (VGGT/SPA/…) | `external_adapter.load(repo_dir, builder, hook_target=…)` | the cloned repo |
148
+ | **Your own model** | `custom_adapter.load(model, feature_fn=…)` | — |
149
+
150
+ Friendly names (see `featlens/registry.py`) cover DINO, DINOv2/v3, CLIP, SigLIP, MAE, DeiT,
151
+ Perception Encoder and V-JEPA; any other timm id works directly.
152
+
153
+ ## Layers
154
+
155
+ `layers=[2, 5, 8, 11]` selects **transformer block indices** (0-based, **negatives allowed**,
156
+ `-1` = last). The same convention holds across backends — for HuggingFace models FeatLens maps
157
+ block `i` to `hidden_states[i+1]` (skipping the embedding output) for you.
158
+
159
+ ## Bring your own model
160
+
161
+ Anything that isn't built in works through the escape hatch — give a feature function or a hook
162
+ target. CNNs work for free (their conv map is already spatial):
163
+
164
+ ```python
165
+ import torch.nn as nn, torchvision
166
+ from featlens import FeatureExtractor, FeatureGrid
167
+ from featlens.adapters import custom_adapter
168
+
169
+ resnet = torchvision.models.resnet50(weights="DEFAULT")
170
+ trunk = nn.Sequential(*list(resnet.children())[:-2]) # -> [B, 2048, h, w]
171
+ lm = custom_adapter.load(trunk, patch_size=32, feature_fn=lambda m, x: m(x), name="resnet50")
172
+ FeatureGrid([FeatureExtractor(lm)]).render("img.jpg", out_path="resnet50.png")
173
+ ```
174
+
175
+ For a model in its own repo, `external_adapter.load(repo_dir, builder, hook_target="blocks")`
176
+ puts the repo on `sys.path`, builds the model, and hooks its blocks.
177
+
178
+ ## How it works
179
+
180
+ 1. **Adapters** resolve a spec → a `LoadedModel` and drive extraction in one of three modes:
181
+ forward **hooks** on per-block modules (ViTs/CNNs/V-JEPA), HF **`output_hidden_states`**, or a
182
+ user **callable**.
183
+ 2. `tokens_to_grid` normalizes whatever a layer emits (`[B,N,D]` tokens with optional
184
+ CLS/register prefixes, or `[B,D,h,w]` maps) into a dense `[B,D,h,w]` grid.
185
+ 3. **Robust PCA** (median-absolute-deviation outlier filtering) projects features to RGB;
186
+ `FeatureGrid` lays out the model × layer tiles with a per-tile or shared-per-model basis.
187
+
188
+ The extraction core adapts the `FrozenBackbone` pattern; the PCA is adapted from the SpaRRTa
189
+ feature-map script.
190
+
191
+ ## Releasing
192
+
193
+ Releases publish to [PyPI](https://pypi.org/project/featlens/) automatically via
194
+ `.github/workflows/publish.yml` (PyPI **Trusted Publishing** — no API token stored in the repo).
195
+
196
+ One-time setup on PyPI: add a *trusted publisher* for the project (Account → Publishing) with
197
+ owner `turhancan97`, repository `FeatLens`, workflow `publish.yml`, environment `pypi`. PyPI
198
+ supports a *pending* publisher so the very first release can also go through Actions.
199
+
200
+ Then cut a release by pushing a tag:
201
+
202
+ ```bash
203
+ # bump the version in pyproject.toml first, then:
204
+ git tag v0.1.0 && git push origin v0.1.0
205
+ ```
206
+
207
+ The workflow builds the sdist + wheel, runs `twine check`, and uploads to PyPI.
208
+
209
+ ## License
210
+
211
+ [MIT](LICENSE).
@@ -0,0 +1,177 @@
1
+ # FeatLens
2
+
3
+ [![Tests](https://github.com/turhancan97/FeatLens/actions/workflows/test.yml/badge.svg)](https://github.com/turhancan97/FeatLens/actions/workflows/test.yml)
4
+ [![PyPI](https://img.shields.io/pypi/v/featlens)](https://pypi.org/project/featlens/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
6
+
7
+ **See what any vision model encodes.** FeatLens renders PCA-to-RGB **feature maps** for
8
+ **any** vision model — DINO, DINOv2/v3, CLIP, SigLIP, MAE, DeiT, V-JEPA, CNNs, … — loaded from
9
+ **any** source (timm, HuggingFace `transformers`, `torch.hub`, an external repo, or a model you
10
+ built yourself), and from **any layer**, as a clean **model × layer** grid.
11
+
12
+ <p align="center">
13
+ <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_cat.png" alt="DINO feature maps across layers" width="100%">
14
+ </p>
15
+
16
+ Most "DINO PCA" scripts are welded to one model. FeatLens separates **representation access**
17
+ (a small adapter layer over the model zoo) from **visualization** (robust PCA → RGB), so you can
18
+ point it at a new model in seconds and compare models/layers side by side.
19
+
20
+ ## Gallery
21
+
22
+ All produced by `examples/quickstart.py` on the three bundled images. Sizes below are the
23
+ **originals**; each image is resized to `img_size` (default 224) before the model.
24
+
25
+ **`visualize(...)` — DINO ViT-B/16 feature maps across layers 2 / 5 / 8 / 11:**
26
+
27
+ | Image (original size) | Source | Feature maps |
28
+ |---|---|---|
29
+ | `astronaut.jpg` · 512×512 | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/images/astronaut.jpg" width="110"> | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_astronaut.png" width="430"> |
30
+ | `cat.jpg` · 451×300 | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/images/cat.jpg" width="110"> | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_cat.png" width="430"> |
31
+ | `coffee.jpg` · 600×400 | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/images/coffee.jpg" width="110"> | <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/feat_coffee.png" width="430"> |
32
+
33
+ **`grid(...)` — model × layer, overlaid on the image** (DINO vs DINOv2 across layers 2/5/8/11):
34
+
35
+ <p align="center"><img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/grid_overlay.png" alt="model x layer grid overlay" width="100%"></p>
36
+
37
+ **`compare(...)` — models at the final layer** &nbsp;|&nbsp; **`custom_adapter` — a ResNet-50 (CNN escape hatch)**
38
+
39
+ <p align="center">
40
+ <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/compare_models.png" alt="compare models at last layer" height="320">
41
+ &nbsp;&nbsp;
42
+ <img src="https://raw.githubusercontent.com/turhancan97/FeatLens/main/examples/resnet50.png" alt="resnet50 feature map" height="320">
43
+ </p>
44
+
45
+ ## Install
46
+
47
+ ```bash
48
+ pip install -e ".[timm]" # timm backend (DINO, CLIP, SigLIP, DeiT, ...)
49
+ # extras: [hf] transformers · [clip] open_clip · [all]
50
+ ```
51
+
52
+ Install PyTorch for your platform first (https://pytorch.org).
53
+
54
+ ## Quick start (Python)
55
+
56
+ ```python
57
+ import featlens as ll
58
+
59
+ # One model, scrub layers (shared PCA basis -> colors comparable across the row)
60
+ ll.visualize("dinov2_vitb14", "img.jpg", layers=[2, 5, 8, 11], out="row.png")
61
+
62
+ # Compare models at the final layer (per-tile basis)
63
+ ll.compare(["dino_vitb16", "mae_vitb16", "clip_large_openai"], "img.jpg", layer=-1, out="cmp.png")
64
+
65
+ # Full model x layer grid, overlaid on the image
66
+ ll.grid(["dino_vitb16", "dinov2_vitb14"], "img.jpg", layers=[2, 5, 8, 11], overlay=True, out="grid.png")
67
+ ```
68
+
69
+ ## Quick start (CLI)
70
+
71
+ ```bash
72
+ featlens --models dino_vitb16 clip_large_openai --layers 2 5 8 11 \
73
+ --images examples/images/cat.jpg --mode grid --out out/grid.png
74
+ featlens --config configs/example.yaml --images examples/images/cat.jpg --out out/grid.png
75
+ ```
76
+
77
+ ## Image size & resizing
78
+
79
+ Images are resized to a square **`img_size` × `img_size`** before the model (default **224**).
80
+ `img_size` must be divisible by the model's patch size (multiples of 16 for patch-16 models,
81
+ 14 for patch-14). Larger sizes give a finer feature grid at more compute:
82
+
83
+ ```python
84
+ ll.visualize("dinov2_vitb14", "img.jpg", layers=[2, 5, 8, 11], img_size=448) # 32x32 grid
85
+ ```
86
+
87
+ For **non-square images**, choose how aspect ratio is handled with `resize_mode`:
88
+
89
+ | `resize_mode` | behavior |
90
+ |---------------|----------|
91
+ | `squash` (default) | resize straight to `img_size²` — may distort |
92
+ | `crop` | resize shortest side to `img_size`, center-crop — aspect preserved |
93
+ | `pad` | resize longest side to `img_size`, pad to square — keeps the whole image |
94
+
95
+ ```python
96
+ ll.grid([...], "wide.jpg", resize_mode="crop") # Python
97
+ ```
98
+
99
+ ```bash
100
+ featlens --models dino_vitb16 --images wide.jpg --resize-mode pad --img-size 448 --out g.png
101
+ ```
102
+
103
+ (`FeatureGrid(interpolation_size=…)` is separate — it only upscales the rendered tiles, not the
104
+ model input.)
105
+
106
+ ## Model sources
107
+
108
+ | Source | How to pass it | Needs |
109
+ |--------|----------------|-------|
110
+ | **timm** | friendly name (`dinov2_vitb14`) or raw id (`vit_base_patch16_224`) | `[timm]` |
111
+ | **HuggingFace** | `hf:facebook/dinov2-base` | `[hf]` |
112
+ | **torch.hub (V-JEPA)** | `vjepa2_vitl16` | network for weights |
113
+ | **External repo** (VGGT/SPA/…) | `external_adapter.load(repo_dir, builder, hook_target=…)` | the cloned repo |
114
+ | **Your own model** | `custom_adapter.load(model, feature_fn=…)` | — |
115
+
116
+ Friendly names (see `featlens/registry.py`) cover DINO, DINOv2/v3, CLIP, SigLIP, MAE, DeiT,
117
+ Perception Encoder and V-JEPA; any other timm id works directly.
118
+
119
+ ## Layers
120
+
121
+ `layers=[2, 5, 8, 11]` selects **transformer block indices** (0-based, **negatives allowed**,
122
+ `-1` = last). The same convention holds across backends — for HuggingFace models FeatLens maps
123
+ block `i` to `hidden_states[i+1]` (skipping the embedding output) for you.
124
+
125
+ ## Bring your own model
126
+
127
+ Anything that isn't built in works through the escape hatch — give a feature function or a hook
128
+ target. CNNs work for free (their conv map is already spatial):
129
+
130
+ ```python
131
+ import torch.nn as nn, torchvision
132
+ from featlens import FeatureExtractor, FeatureGrid
133
+ from featlens.adapters import custom_adapter
134
+
135
+ resnet = torchvision.models.resnet50(weights="DEFAULT")
136
+ trunk = nn.Sequential(*list(resnet.children())[:-2]) # -> [B, 2048, h, w]
137
+ lm = custom_adapter.load(trunk, patch_size=32, feature_fn=lambda m, x: m(x), name="resnet50")
138
+ FeatureGrid([FeatureExtractor(lm)]).render("img.jpg", out_path="resnet50.png")
139
+ ```
140
+
141
+ For a model in its own repo, `external_adapter.load(repo_dir, builder, hook_target="blocks")`
142
+ puts the repo on `sys.path`, builds the model, and hooks its blocks.
143
+
144
+ ## How it works
145
+
146
+ 1. **Adapters** resolve a spec → a `LoadedModel` and drive extraction in one of three modes:
147
+ forward **hooks** on per-block modules (ViTs/CNNs/V-JEPA), HF **`output_hidden_states`**, or a
148
+ user **callable**.
149
+ 2. `tokens_to_grid` normalizes whatever a layer emits (`[B,N,D]` tokens with optional
150
+ CLS/register prefixes, or `[B,D,h,w]` maps) into a dense `[B,D,h,w]` grid.
151
+ 3. **Robust PCA** (median-absolute-deviation outlier filtering) projects features to RGB;
152
+ `FeatureGrid` lays out the model × layer tiles with a per-tile or shared-per-model basis.
153
+
154
+ The extraction core adapts the `FrozenBackbone` pattern; the PCA is adapted from the SpaRRTa
155
+ feature-map script.
156
+
157
+ ## Releasing
158
+
159
+ Releases publish to [PyPI](https://pypi.org/project/featlens/) automatically via
160
+ `.github/workflows/publish.yml` (PyPI **Trusted Publishing** — no API token stored in the repo).
161
+
162
+ One-time setup on PyPI: add a *trusted publisher* for the project (Account → Publishing) with
163
+ owner `turhancan97`, repository `FeatLens`, workflow `publish.yml`, environment `pypi`. PyPI
164
+ supports a *pending* publisher so the very first release can also go through Actions.
165
+
166
+ Then cut a release by pushing a tag:
167
+
168
+ ```bash
169
+ # bump the version in pyproject.toml first, then:
170
+ git tag v0.1.0 && git push origin v0.1.0
171
+ ```
172
+
173
+ The workflow builds the sdist + wheel, runs `twine check`, and uploads to PyPI.
174
+
175
+ ## License
176
+
177
+ [MIT](LICENSE).
@@ -0,0 +1,48 @@
1
+ """FeatLens — model-agnostic feature-map visualization.
2
+
3
+ Load any vision model (timm / HuggingFace / torch.hub / external repo / your own) and render
4
+ PCA-to-RGB feature maps from any layer, as a model × layer grid.
5
+
6
+ Quick start::
7
+
8
+ import featlens as ll
9
+ ll.grid(["dino_vitb16", "clip_large_openai"], "img.jpg", layers=[2, 5, 8, 11], out="grid.png")
10
+ ll.visualize("dinov2_vitb14", "img.jpg", layers=[2, 5, 8, 11], out="row.png") # scrub layers
11
+ ll.compare(["dino_vitb16", "mae_vitb16"], "img.jpg", layer=-1, out="cmp.png") # compare models
12
+ """
13
+
14
+ from typing import Optional, Sequence, Union
15
+ from pathlib import Path
16
+
17
+ from .extractor import FeatureExtractor
18
+ from .grid import FeatureGrid
19
+
20
+ __version__ = "0.1.0"
21
+ __all__ = ["FeatureExtractor", "FeatureGrid", "grid", "visualize", "compare"]
22
+
23
+
24
+ _RENDER_KEYS = ("overlay", "overlay_alpha", "figscale")
25
+
26
+
27
+ def _split_kwargs(kwargs):
28
+ render_kw = {k: kwargs.pop(k) for k in list(kwargs) if k in _RENDER_KEYS}
29
+ return kwargs, render_kw
30
+
31
+
32
+ def grid(models, images, layers=None, out=None, **kwargs):
33
+ """Render a full model × layer grid (per-tile PCA basis by default)."""
34
+ ctor_kw, render_kw = _split_kwargs(kwargs)
35
+ return FeatureGrid(models, layers=layers, **ctor_kw).render(images, out_path=out, **render_kw)
36
+
37
+
38
+ def visualize(model, images, layers=None, out=None, **kwargs):
39
+ """One model across layers — uses a shared PCA basis so colors are comparable across the row."""
40
+ kwargs.setdefault("basis", "shared_per_model")
41
+ ctor_kw, render_kw = _split_kwargs(kwargs)
42
+ return FeatureGrid([model], layers=layers, **ctor_kw).render(images, out_path=out, **render_kw)
43
+
44
+
45
+ def compare(models, images, layer: int = -1, out=None, **kwargs):
46
+ """Many models at a single layer (per-tile PCA basis)."""
47
+ ctor_kw, render_kw = _split_kwargs(kwargs)
48
+ return FeatureGrid(models, layers=[layer], **ctor_kw).render(images, out_path=out, **render_kw)
@@ -0,0 +1,42 @@
1
+ """Adapter registry + dispatch.
2
+
3
+ ``load_spec(spec, ...)`` resolves a string spec (via ``registry.resolve_spec``) to a
4
+ ``LoadedModel`` using the right backend. The ``custom`` and ``external`` backends are not
5
+ string-dispatched — call ``custom_adapter.load(model, ...)`` /
6
+ ``external_adapter.load(repo_dir, builder, ...)`` directly (they need Python objects).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from .base import LoadedModel, infer_patch_size, real_index
12
+ from . import timm_adapter, hf_adapter, torchhub_adapter, custom_adapter, external_adapter
13
+ from ..registry import resolve_spec
14
+
15
+ _STRING_BACKENDS = {
16
+ "timm": timm_adapter.load,
17
+ "hf": hf_adapter.load,
18
+ "hub": torchhub_adapter.load,
19
+ }
20
+
21
+
22
+ def load_spec(spec: str, img_size: int = 224, pretrained: bool = True, **kwargs) -> LoadedModel:
23
+ backend, ident = resolve_spec(spec)
24
+ if backend not in _STRING_BACKENDS:
25
+ raise ValueError(
26
+ f"Backend '{backend}' is not string-dispatchable. Use "
27
+ "custom_adapter.load(model, ...) or external_adapter.load(repo_dir, builder, ...)."
28
+ )
29
+ return _STRING_BACKENDS[backend](ident, img_size=img_size, pretrained=pretrained, **kwargs)
30
+
31
+
32
+ __all__ = [
33
+ "LoadedModel",
34
+ "infer_patch_size",
35
+ "real_index",
36
+ "load_spec",
37
+ "timm_adapter",
38
+ "hf_adapter",
39
+ "torchhub_adapter",
40
+ "custom_adapter",
41
+ "external_adapter",
42
+ ]
@@ -0,0 +1,68 @@
1
+ """Adapter base types.
2
+
3
+ Every backend (timm / hf / torchhub / external / custom) resolves a model spec into a
4
+ ``LoadedModel`` descriptor. The extractor then drives all of them uniformly via one of
5
+ three modes:
6
+
7
+ - ``"hook"`` : register forward hooks on per-block modules (ViTs, CNNs, V-JEPA).
8
+ - ``"hidden_states"`` : run once with ``output_hidden_states=True`` and read the tuple (HF).
9
+ - ``"callable"`` : call a user ``feature_fn(model, images)`` (the escape hatch).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from dataclasses import dataclass, field
15
+ from typing import Callable, Optional, Sequence
16
+
17
+ import torch.nn as nn
18
+
19
+
20
+ @dataclass
21
+ class LoadedModel:
22
+ model: nn.Module
23
+ num_blocks: int
24
+ embed_dim: int
25
+ patch_size: int
26
+ mean: Sequence[float]
27
+ std: Sequence[float]
28
+ mode: str # "hook" | "hidden_states" | "callable"
29
+ # mode="hook": maps (model, block_index) -> the submodule to hook.
30
+ hook_module_fn: Optional[Callable[[nn.Module, int], nn.Module]] = None
31
+ # mode="callable": feature_fn(model, images) -> [B, N, D] or [B, D, h, w].
32
+ feature_fn: Optional[Callable] = None
33
+ # mode="hidden_states": +1 offset so block index i -> hidden_states[i+1] (skip embeddings).
34
+ hidden_states_offset: int = 1
35
+ uses_temporal: bool = False # V-JEPA 2.1 expects a time axis
36
+ name: str = "model"
37
+ extra: dict = field(default_factory=dict)
38
+
39
+
40
+ def infer_patch_size(model: nn.Module) -> Optional[int]:
41
+ """Infer a square ViT patch size from a model's patch_embed, if present."""
42
+ patch_embed = getattr(model, "patch_embed", None)
43
+ if patch_embed is None:
44
+ return None
45
+ patch_size = getattr(patch_embed, "patch_size", None)
46
+ if patch_size is not None:
47
+ if isinstance(patch_size, (tuple, list)):
48
+ if len(patch_size) != 2 or patch_size[0] != patch_size[1]:
49
+ raise ValueError(f"Only square patch sizes are supported, got {patch_size}.")
50
+ return int(patch_size[0])
51
+ return int(patch_size)
52
+ proj = getattr(patch_embed, "proj", None)
53
+ kernel_size = getattr(proj, "kernel_size", None)
54
+ if kernel_size is not None:
55
+ if isinstance(kernel_size, (tuple, list)):
56
+ if len(kernel_size) != 2 or kernel_size[0] != kernel_size[1]:
57
+ raise ValueError(f"Only square patch sizes are supported, got {kernel_size}.")
58
+ return int(kernel_size[0])
59
+ return int(kernel_size)
60
+ return None
61
+
62
+
63
+ def real_index(idx: int, num_blocks: int) -> int:
64
+ """Resolve a possibly-negative block index against the block count."""
65
+ r = idx if idx >= 0 else num_blocks + idx
66
+ if not (0 <= r < num_blocks):
67
+ raise ValueError(f"Layer index {idx} out of range for a model with {num_blocks} blocks.")
68
+ return r
@@ -0,0 +1,67 @@
1
+ """Custom backend — the escape hatch for any model the built-in backends don't cover.
2
+
3
+ Two ways to use it:
4
+
5
+ 1. **Hook target** — pass an already-built ``model`` plus ``hook_target``: either a callable
6
+ ``(model, block_idx) -> module`` or the name of a ``ModuleList`` attribute (e.g. ``"blocks"``)
7
+ whose elements are hooked.
8
+ 2. **feature_fn** — pass ``feature_fn(model, images) -> [B, N, D]`` (or ``[B, D, h, w]``) and
9
+ FeatLens skips hooks entirely. Best for exotic models / single-layer extraction.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Callable, Optional, Sequence, Union
15
+
16
+ import torch.nn as nn
17
+
18
+ from .base import LoadedModel, infer_patch_size, real_index
19
+ from ..preprocess import IMAGENET_MEAN, IMAGENET_STD
20
+
21
+
22
+ def load(
23
+ model: nn.Module,
24
+ *,
25
+ patch_size: Optional[int] = None,
26
+ hook_target: Optional[Union[str, Callable[[nn.Module, int], nn.Module]]] = None,
27
+ feature_fn: Optional[Callable] = None,
28
+ num_blocks: Optional[int] = None,
29
+ mean: Sequence[float] = IMAGENET_MEAN,
30
+ std: Sequence[float] = IMAGENET_STD,
31
+ name: str = "custom",
32
+ ) -> LoadedModel:
33
+ if (hook_target is None) == (feature_fn is None):
34
+ raise ValueError("Provide exactly one of `hook_target` or `feature_fn`.")
35
+
36
+ if patch_size is None:
37
+ patch_size = infer_patch_size(model)
38
+ if patch_size is None:
39
+ raise ValueError(
40
+ "Could not infer patch_size for the custom model; pass patch_size=... "
41
+ "(the stride/downsample factor between input pixels and the feature grid)."
42
+ )
43
+
44
+ if feature_fn is not None:
45
+ return LoadedModel(
46
+ model=model, num_blocks=num_blocks or 1, embed_dim=0, patch_size=int(patch_size),
47
+ mean=mean, std=std, mode="callable", feature_fn=feature_fn, name=name,
48
+ )
49
+
50
+ if callable(hook_target):
51
+ hook_fn = hook_target
52
+ nblocks = num_blocks or 0
53
+ else:
54
+ module_list = getattr(model, hook_target, None)
55
+ if module_list is None or not hasattr(module_list, "__getitem__"):
56
+ raise ValueError(
57
+ f"hook_target '{hook_target}' is not a ModuleList attribute on the model."
58
+ )
59
+ nblocks = len(module_list)
60
+
61
+ def hook_fn(m: nn.Module, idx: int) -> nn.Module:
62
+ return getattr(m, hook_target)[real_index(idx, nblocks)]
63
+
64
+ return LoadedModel(
65
+ model=model, num_blocks=nblocks or 1, embed_dim=0, patch_size=int(patch_size),
66
+ mean=mean, std=std, mode="hook", hook_module_fn=hook_fn, name=name,
67
+ )