brainscore-vision 2.1__py3-none-any.whl → 2.1.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- brainscore_vision/benchmarks/coggan2024_behavior/__init__.py +2 -1
- brainscore_vision/benchmarks/coggan2024_behavior/test.py +2 -2
- brainscore_vision/benchmarks/coggan2024_fMRI/__init__.py +4 -4
- brainscore_vision/benchmarks/coggan2024_fMRI/test.py +2 -2
- brainscore_vision/benchmarks/imagenet/imagenet2012.csv +50000 -50000
- brainscore_vision/benchmarks/imagenet_c/benchmark.py +1 -1
- brainscore_vision/benchmarks/lonnqvist2024/__init__.py +8 -0
- brainscore_vision/benchmarks/lonnqvist2024/benchmark.py +125 -0
- brainscore_vision/benchmarks/lonnqvist2024/test.py +61 -0
- brainscore_vision/benchmarks/malania2007/benchmark.py +3 -0
- brainscore_vision/benchmarks/maniquet2024/benchmark.py +1 -1
- brainscore_vision/data/lonnqvist2024/__init__.py +47 -0
- brainscore_vision/data/lonnqvist2024/data_packaging/lonnqvist_data_assembly.py +53 -0
- brainscore_vision/data/lonnqvist2024/data_packaging/lonnqvist_stimulus_set.py +61 -0
- brainscore_vision/data/lonnqvist2024/test.py +127 -0
- brainscore_vision/model_helpers/brain_transformation/__init__.py +33 -0
- brainscore_vision/models/alexnet/region_layer_map/alexnet.json +1 -0
- brainscore_vision/models/alexnet_7be5be79/setup.py +4 -4
- brainscore_vision/models/alexnet_random/__init__.py +7 -0
- brainscore_vision/models/alexnet_random/model.py +46 -0
- brainscore_vision/models/alexnet_random/setup.py +26 -0
- brainscore_vision/models/alexnet_random/test.py +1 -0
- brainscore_vision/models/cvt_cvt_13_224_in1k_4/__init__.py +9 -0
- brainscore_vision/models/cvt_cvt_13_224_in1k_4/model.py +142 -0
- brainscore_vision/models/cvt_cvt_13_224_in1k_4/region_layer_map/cvt_cvt-13-224-in1k_4.json +6 -0
- brainscore_vision/models/cvt_cvt_13_224_in1k_4/region_layer_map/cvt_cvt-13-224-in1k_4_LucyV4.json +6 -0
- brainscore_vision/models/cvt_cvt_13_224_in1k_4/requirements.txt +4 -0
- brainscore_vision/models/cvt_cvt_13_224_in1k_4/test.py +8 -0
- brainscore_vision/models/cvt_cvt_13_384_in1k_4/__init__.py +9 -0
- brainscore_vision/models/cvt_cvt_13_384_in1k_4/model.py +142 -0
- brainscore_vision/models/cvt_cvt_13_384_in1k_4/region_layer_map/cvt_cvt-13-384-in1k_4_LucyV4.json +6 -0
- brainscore_vision/models/cvt_cvt_13_384_in1k_4/requirements.txt +4 -0
- brainscore_vision/models/cvt_cvt_13_384_in1k_4/test.py +8 -0
- brainscore_vision/models/cvt_cvt_13_384_in22k_finetuned_in1k_4/__init__.py +9 -0
- brainscore_vision/models/cvt_cvt_13_384_in22k_finetuned_in1k_4/model.py +142 -0
- brainscore_vision/models/cvt_cvt_13_384_in22k_finetuned_in1k_4/region_layer_map/cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4.json +6 -0
- brainscore_vision/models/cvt_cvt_13_384_in22k_finetuned_in1k_4/requirements.txt +4 -0
- brainscore_vision/models/cvt_cvt_13_384_in22k_finetuned_in1k_4/test.py +8 -0
- brainscore_vision/models/cvt_cvt_21_224_in1k_4/__init__.py +9 -0
- brainscore_vision/models/cvt_cvt_21_224_in1k_4/model.py +142 -0
- brainscore_vision/models/cvt_cvt_21_224_in1k_4/region_layer_map/cvt_cvt-21-224-in1k_4_LucyV4.json +6 -0
- brainscore_vision/models/cvt_cvt_21_224_in1k_4/requirements.txt +4 -0
- brainscore_vision/models/cvt_cvt_21_224_in1k_4/test.py +8 -0
- brainscore_vision/models/cvt_cvt_21_384_in1k_4/__init__.py +9 -0
- brainscore_vision/models/cvt_cvt_21_384_in1k_4/model.py +142 -0
- brainscore_vision/models/cvt_cvt_21_384_in1k_4/region_layer_map/cvt_cvt-21-384-in1k_4_LucyV4.json +6 -0
- brainscore_vision/models/cvt_cvt_21_384_in1k_4/requirements.txt +4 -0
- brainscore_vision/models/cvt_cvt_21_384_in1k_4/test.py +8 -0
- brainscore_vision/models/cvt_cvt_21_384_in22k_finetuned_in1k_4/__init__.py +9 -0
- brainscore_vision/models/cvt_cvt_21_384_in22k_finetuned_in1k_4/model.py +142 -0
- brainscore_vision/models/cvt_cvt_21_384_in22k_finetuned_in1k_4/region_layer_map/cvt_cvt-21-384-in22k_finetuned-in1k_4_LucyV4.json +6 -0
- brainscore_vision/models/cvt_cvt_21_384_in22k_finetuned_in1k_4/requirements.txt +4 -0
- brainscore_vision/models/cvt_cvt_21_384_in22k_finetuned_in1k_4/test.py +8 -0
- brainscore_vision/models/fixres_resnext101_32x48d_wsl/__init__.py +7 -0
- brainscore_vision/models/fixres_resnext101_32x48d_wsl/model.py +57 -0
- brainscore_vision/models/fixres_resnext101_32x48d_wsl/requirements.txt +5 -0
- brainscore_vision/models/fixres_resnext101_32x48d_wsl/test.py +7 -0
- brainscore_vision/models/inception_v4_pytorch/__init__.py +7 -0
- brainscore_vision/models/inception_v4_pytorch/model.py +64 -0
- brainscore_vision/models/inception_v4_pytorch/requirements.txt +3 -0
- brainscore_vision/models/inception_v4_pytorch/test.py +8 -0
- brainscore_vision/models/mvimgnet_ms_05/__init__.py +9 -0
- brainscore_vision/models/mvimgnet_ms_05/model.py +64 -0
- brainscore_vision/models/mvimgnet_ms_05/setup.py +25 -0
- brainscore_vision/models/mvimgnet_ms_05/test.py +1 -0
- brainscore_vision/models/mvimgnet_rf/__init__.py +9 -0
- brainscore_vision/models/mvimgnet_rf/model.py +64 -0
- brainscore_vision/models/mvimgnet_rf/setup.py +25 -0
- brainscore_vision/models/mvimgnet_rf/test.py +1 -0
- brainscore_vision/models/mvimgnet_ss_00/__init__.py +9 -0
- brainscore_vision/models/mvimgnet_ss_00/model.py +64 -0
- brainscore_vision/models/mvimgnet_ss_00/setup.py +25 -0
- brainscore_vision/models/mvimgnet_ss_00/test.py +1 -0
- brainscore_vision/models/mvimgnet_ss_02/__init__.py +9 -0
- brainscore_vision/models/mvimgnet_ss_02/model.py +64 -0
- brainscore_vision/models/mvimgnet_ss_02/setup.py +25 -0
- brainscore_vision/models/mvimgnet_ss_02/test.py +1 -0
- brainscore_vision/models/mvimgnet_ss_03/__init__.py +9 -0
- brainscore_vision/models/mvimgnet_ss_03/model.py +64 -0
- brainscore_vision/models/mvimgnet_ss_03/setup.py +25 -0
- brainscore_vision/models/mvimgnet_ss_03/test.py +1 -0
- brainscore_vision/models/mvimgnet_ss_04/__init__.py +9 -0
- brainscore_vision/models/mvimgnet_ss_04/model.py +64 -0
- brainscore_vision/models/mvimgnet_ss_04/setup.py +25 -0
- brainscore_vision/models/mvimgnet_ss_04/test.py +1 -0
- brainscore_vision/models/mvimgnet_ss_05/__init__.py +9 -0
- brainscore_vision/models/mvimgnet_ss_05/model.py +64 -0
- brainscore_vision/models/mvimgnet_ss_05/setup.py +25 -0
- brainscore_vision/models/mvimgnet_ss_05/test.py +1 -0
- brainscore_vision/models/resnet50_tutorial/region_layer_map/resnet50_tutorial.json +1 -0
- brainscore_vision/models/sam_test_resnet/__init__.py +5 -0
- brainscore_vision/models/sam_test_resnet/model.py +26 -0
- brainscore_vision/models/sam_test_resnet/requirements.txt +2 -0
- brainscore_vision/models/sam_test_resnet/test.py +8 -0
- brainscore_vision/models/sam_test_resnet_4/__init__.py +5 -0
- brainscore_vision/models/sam_test_resnet_4/model.py +26 -0
- brainscore_vision/models/sam_test_resnet_4/requirements.txt +2 -0
- brainscore_vision/models/sam_test_resnet_4/test.py +8 -0
- brainscore_vision/models/scaling_models/__init__.py +265 -0
- brainscore_vision/models/scaling_models/model.py +148 -0
- brainscore_vision/models/scaling_models/model_configs.json +869 -0
- brainscore_vision/models/scaling_models/region_layer_map/convnext_base_imagenet_full_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/convnext_large_imagenet_full_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/convnext_small_imagenet_100_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/convnext_small_imagenet_10_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/convnext_small_imagenet_1_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/convnext_small_imagenet_full_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/deit_base_imagenet_full_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/deit_large_imagenet_full_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/deit_small_imagenet_100_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/deit_small_imagenet_10_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/deit_small_imagenet_1_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/deit_small_imagenet_full_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/efficientnet_b0_imagenet_full.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/efficientnet_b1_imagenet_full.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/efficientnet_b2_imagenet_full.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/resnet101_ecoset_full.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/resnet101_imagenet_full.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/resnet152_ecoset_full.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/resnet18_ecoset_full.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/resnet18_imagenet_full.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/resnet34_ecoset_full.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/resnet34_imagenet_full.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/resnet50_ecoset_full.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/resnet50_imagenet_100_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/resnet50_imagenet_10_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/resnet50_imagenet_1_seed-0.json +6 -0
- brainscore_vision/models/scaling_models/region_layer_map/resnet50_imagenet_full.json +6 -0
- brainscore_vision/models/scaling_models/requirements.txt +4 -0
- brainscore_vision/models/scaling_models/test.py +0 -0
- brainscore_vision/models/vitb14_dinov2_imagenet1k/__init__.py +5 -0
- brainscore_vision/models/vitb14_dinov2_imagenet1k/model.py +852 -0
- brainscore_vision/models/vitb14_dinov2_imagenet1k/setup.py +25 -0
- brainscore_vision/models/vitb14_dinov2_imagenet1k/test.py +0 -0
- brainscore_vision/models/voneresnet_50_non_stochastic/region_layer_map/voneresnet-50-non_stochastic.json +1 -0
- brainscore_vision/submission/actions_helpers.py +2 -2
- brainscore_vision/submission/endpoints.py +3 -4
- {brainscore_vision-2.1.dist-info → brainscore_vision-2.1.0.dist-info}/METADATA +2 -2
- {brainscore_vision-2.1.dist-info → brainscore_vision-2.1.0.dist-info}/RECORD +143 -18
- {brainscore_vision-2.1.dist-info → brainscore_vision-2.1.0.dist-info}/WHEEL +1 -1
- tests/test_model_helpers/temporal/activations/test_inferencer.py +2 -2
- {brainscore_vision-2.1.dist-info → brainscore_vision-2.1.0.dist-info}/LICENSE +0 -0
- {brainscore_vision-2.1.dist-info → brainscore_vision-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
from setuptools import setup, find_packages
|
5
|
+
|
6
|
+
requirements = [ "torchvision",
|
7
|
+
"torch",
|
8
|
+
"fire"
|
9
|
+
]
|
10
|
+
|
11
|
+
setup(
|
12
|
+
packages=find_packages(exclude=['tests']),
|
13
|
+
include_package_data=True,
|
14
|
+
install_requires=requirements,
|
15
|
+
license="MIT license",
|
16
|
+
zip_safe=False,
|
17
|
+
keywords='brain-score template',
|
18
|
+
classifiers=[
|
19
|
+
'Development Status :: 2 - Pre-Alpha',
|
20
|
+
'Intended Audience :: Developers',
|
21
|
+
'License :: OSI Approved :: MIT License',
|
22
|
+
'Natural Language :: English',
|
23
|
+
'Programming Language :: Python :: 3.7',
|
24
|
+
],
|
25
|
+
test_suite='tests',
|
26
|
+
)
|
@@ -0,0 +1 @@
|
|
1
|
+
# Left empty as part of 2023 models migration
|
@@ -0,0 +1,9 @@
|
|
1
|
+
from brainscore_vision import model_registry
|
2
|
+
from brainscore_vision.model_helpers.brain_transformation import ModelCommitment
|
3
|
+
from .model import get_model, get_layers
|
4
|
+
|
5
|
+
|
6
|
+
model_registry['cvt_cvt-13-224-in1k_4_LucyV4'] = \
|
7
|
+
lambda: ModelCommitment(identifier='cvt_cvt-13-224-in1k_4_LucyV4',
|
8
|
+
activations_model=get_model('cvt_cvt-13-224-in1k_4_LucyV4'),
|
9
|
+
layers=get_layers('cvt_cvt-13-224-in1k_4_LucyV4'))
|
@@ -0,0 +1,142 @@
|
|
1
|
+
from brainscore_vision.model_helpers.check_submission import check_models
|
2
|
+
import functools
|
3
|
+
from transformers import AutoFeatureExtractor, CvtForImageClassification
|
4
|
+
from brainscore_vision.model_helpers.activations.pytorch import PytorchWrapper
|
5
|
+
from PIL import Image
|
6
|
+
import numpy as np
|
7
|
+
import torch
|
8
|
+
|
9
|
+
"""
|
10
|
+
Template module for a base model submission to brain-score
|
11
|
+
"""
|
12
|
+
|
13
|
+
|
14
|
+
def get_model(name):
|
15
|
+
assert name == 'cvt_cvt-13-224-in1k_4_LucyV4'
|
16
|
+
# https://huggingface.co/models?sort=downloads&search=cvt
|
17
|
+
image_size = 224
|
18
|
+
processor = AutoFeatureExtractor.from_pretrained('microsoft/cvt-13')
|
19
|
+
model = CvtForImageClassification.from_pretrained('microsoft/cvt-13')
|
20
|
+
preprocessing = functools.partial(load_preprocess_images, processor=processor, image_size=image_size)
|
21
|
+
wrapper = PytorchWrapper(identifier=name, model=model, preprocessing=preprocessing)
|
22
|
+
wrapper.image_size = image_size
|
23
|
+
|
24
|
+
return wrapper
|
25
|
+
|
26
|
+
|
27
|
+
def get_layers(name):
|
28
|
+
assert name == 'cvt_cvt-13-224-in1k_4_LucyV4'
|
29
|
+
layers = []
|
30
|
+
layers += [f'cvt.encoder.stages.0.layers.{i}' for i in range(1)]
|
31
|
+
layers += [f'cvt.encoder.stages.1.layers.{i}' for i in range(2)]
|
32
|
+
layers += [f'cvt.encoder.stages.2.layers.{i}' for i in range(10)]
|
33
|
+
layers += ['layernorm']
|
34
|
+
return layers
|
35
|
+
|
36
|
+
|
37
|
+
def get_bibtex(model_identifier):
|
38
|
+
"""
|
39
|
+
A method returning the bibtex reference of the requested model as a string.
|
40
|
+
"""
|
41
|
+
return """@misc{wu2021cvtintroducingconvolutionsvision,
|
42
|
+
title={CvT: Introducing Convolutions to Vision Transformers},
|
43
|
+
author={Haiping Wu and Bin Xiao and Noel Codella and Mengchen Liu and Xiyang Dai and Lu Yuan and Lei Zhang},
|
44
|
+
year={2021},
|
45
|
+
eprint={2103.15808},
|
46
|
+
archivePrefix={arXiv},
|
47
|
+
primaryClass={cs.CV},
|
48
|
+
url={https://arxiv.org/abs/2103.15808},
|
49
|
+
}"""
|
50
|
+
|
51
|
+
|
52
|
+
def load_preprocess_images(image_filepaths, image_size, processor=None, **kwargs):
|
53
|
+
images = load_images(image_filepaths)
|
54
|
+
# images = [<PIL.Image.Image image mode=RGB size=400x400 at 0x7F8654B2AC10>, ...]
|
55
|
+
images = [image.resize((image_size, image_size)) for image in images]
|
56
|
+
if processor is not None:
|
57
|
+
images = [processor(images=image, return_tensors="pt", **kwargs) for image in images]
|
58
|
+
if len(images[0].keys()) != 1:
|
59
|
+
raise NotImplementedError(f'unknown processor for getting model {processor}')
|
60
|
+
assert list(images[0].keys())[0] == 'pixel_values'
|
61
|
+
images = [image['pixel_values'] for image in images]
|
62
|
+
images = torch.cat(images)
|
63
|
+
images = images.cpu().numpy()
|
64
|
+
else:
|
65
|
+
images = preprocess_images(images, image_size=image_size, **kwargs)
|
66
|
+
return images
|
67
|
+
|
68
|
+
|
69
|
+
def load_images(image_filepaths):
|
70
|
+
return [load_image(image_filepath) for image_filepath in image_filepaths]
|
71
|
+
|
72
|
+
|
73
|
+
def load_image(image_filepath):
|
74
|
+
with Image.open(image_filepath) as pil_image:
|
75
|
+
if 'L' not in pil_image.mode.upper() and 'A' not in pil_image.mode.upper() \
|
76
|
+
and 'P' not in pil_image.mode.upper(): # not binary and not alpha and not palletized
|
77
|
+
# work around to https://github.com/python-pillow/Pillow/issues/1144,
|
78
|
+
# see https://stackoverflow.com/a/30376272/2225200
|
79
|
+
return pil_image.copy()
|
80
|
+
else: # make sure potential binary images are in RGB
|
81
|
+
rgb_image = Image.new("RGB", pil_image.size)
|
82
|
+
rgb_image.paste(pil_image)
|
83
|
+
return rgb_image
|
84
|
+
|
85
|
+
|
86
|
+
def preprocess_images(images, image_size, **kwargs):
|
87
|
+
preprocess = torchvision_preprocess_input(image_size, **kwargs)
|
88
|
+
images = [preprocess(image) for image in images]
|
89
|
+
images = np.concatenate(images)
|
90
|
+
return images
|
91
|
+
|
92
|
+
|
93
|
+
def torchvision_preprocess_input(image_size, **kwargs):
|
94
|
+
from torchvision import transforms
|
95
|
+
return transforms.Compose([
|
96
|
+
transforms.Resize((image_size, image_size)),
|
97
|
+
torchvision_preprocess(**kwargs),
|
98
|
+
])
|
99
|
+
|
100
|
+
|
101
|
+
def torchvision_preprocess(normalize_mean=(0.485, 0.456, 0.406), normalize_std=(0.229, 0.224, 0.225)):
|
102
|
+
from torchvision import transforms
|
103
|
+
return transforms.Compose([
|
104
|
+
transforms.ToTensor(),
|
105
|
+
transforms.Normalize(mean=normalize_mean, std=normalize_std),
|
106
|
+
lambda img: img.unsqueeze(0)
|
107
|
+
])
|
108
|
+
|
109
|
+
|
110
|
+
def create_static_video(image, num_frames, normalize_0to1=False, channel_dim=3):
|
111
|
+
'''
|
112
|
+
Create a static video with the same image in all frames.
|
113
|
+
Args:
|
114
|
+
image (PIL.Image.Image): Input image.
|
115
|
+
num_frames (int): Number of frames in the video.
|
116
|
+
Returns:
|
117
|
+
result (np.ndarray): np array of frames of shape (num_frames, height, width, 3).
|
118
|
+
'''
|
119
|
+
frames = []
|
120
|
+
for _ in range(num_frames):
|
121
|
+
frame = np.array(image)
|
122
|
+
if normalize_0to1:
|
123
|
+
frame = frame / 255.
|
124
|
+
if channel_dim == 1:
|
125
|
+
frame = frame.transpose(2, 0, 1)
|
126
|
+
frames.append(frame)
|
127
|
+
return np.stack(frames)
|
128
|
+
|
129
|
+
|
130
|
+
if __name__ == '__main__':
|
131
|
+
# Use this method to ensure the correctness of the BaseModel implementations.
|
132
|
+
# It executes a mock run of brain-score benchmarks.
|
133
|
+
check_models.check_base_models(__name__)
|
134
|
+
|
135
|
+
"""
|
136
|
+
Notes on the error:
|
137
|
+
|
138
|
+
- 'channel_x' key error:
|
139
|
+
# 'embeddings.patch_embeddings.projection',
|
140
|
+
https://github.com/search?q=repo%3Abrain-score%2Fmodel-tools%20channel_x&type=code
|
141
|
+
|
142
|
+
"""
|
@@ -0,0 +1,9 @@
|
|
1
|
+
from brainscore_vision import model_registry
|
2
|
+
from brainscore_vision.model_helpers.brain_transformation import ModelCommitment
|
3
|
+
from .model import get_model, get_layers
|
4
|
+
|
5
|
+
|
6
|
+
model_registry['cvt_cvt-13-384-in1k_4_LucyV4'] = \
|
7
|
+
lambda: ModelCommitment(identifier='cvt_cvt-13-384-in1k_4_LucyV4',
|
8
|
+
activations_model=get_model('cvt_cvt-13-384-in1k_4_LucyV4'),
|
9
|
+
layers=get_layers('cvt_cvt-13-384-in1k_4_LucyV4'))
|
@@ -0,0 +1,142 @@
|
|
1
|
+
from brainscore_vision.model_helpers.check_submission import check_models
|
2
|
+
import functools
|
3
|
+
from transformers import AutoFeatureExtractor, CvtForImageClassification
|
4
|
+
from brainscore_vision.model_helpers.activations.pytorch import PytorchWrapper
|
5
|
+
from PIL import Image
|
6
|
+
import numpy as np
|
7
|
+
import torch
|
8
|
+
|
9
|
+
"""
|
10
|
+
Template module for a base model submission to brain-score
|
11
|
+
"""
|
12
|
+
|
13
|
+
|
14
|
+
def get_model(name):
|
15
|
+
assert name == 'cvt_cvt-13-384-in1k_4_LucyV4'
|
16
|
+
# https://huggingface.co/models?sort=downloads&search=cvt
|
17
|
+
image_size = 384
|
18
|
+
processor = AutoFeatureExtractor.from_pretrained('microsoft/cvt-13-384-1k')
|
19
|
+
model = CvtForImageClassification.from_pretrained('microsoft/cvt-13-384-1k')
|
20
|
+
preprocessing = functools.partial(load_preprocess_images, processor=processor, image_size=image_size)
|
21
|
+
wrapper = PytorchWrapper(identifier=name, model=model, preprocessing=preprocessing)
|
22
|
+
wrapper.image_size = image_size
|
23
|
+
|
24
|
+
return wrapper
|
25
|
+
|
26
|
+
|
27
|
+
def get_layers(name):
|
28
|
+
assert name == 'cvt_cvt-13-384-in1k_4_LucyV4'
|
29
|
+
layers = []
|
30
|
+
layers += [f'cvt.encoder.stages.0.layers.{i}' for i in range(1)]
|
31
|
+
layers += [f'cvt.encoder.stages.1.layers.{i}' for i in range(2)]
|
32
|
+
layers += [f'cvt.encoder.stages.2.layers.{i}' for i in range(10)]
|
33
|
+
layers += ['layernorm']
|
34
|
+
return layers
|
35
|
+
|
36
|
+
|
37
|
+
def get_bibtex(model_identifier):
|
38
|
+
"""
|
39
|
+
A method returning the bibtex reference of the requested model as a string.
|
40
|
+
"""
|
41
|
+
return """@misc{wu2021cvtintroducingconvolutionsvision,
|
42
|
+
title={CvT: Introducing Convolutions to Vision Transformers},
|
43
|
+
author={Haiping Wu and Bin Xiao and Noel Codella and Mengchen Liu and Xiyang Dai and Lu Yuan and Lei Zhang},
|
44
|
+
year={2021},
|
45
|
+
eprint={2103.15808},
|
46
|
+
archivePrefix={arXiv},
|
47
|
+
primaryClass={cs.CV},
|
48
|
+
url={https://arxiv.org/abs/2103.15808},
|
49
|
+
}"""
|
50
|
+
|
51
|
+
|
52
|
+
def load_preprocess_images(image_filepaths, image_size, processor=None, **kwargs):
|
53
|
+
images = load_images(image_filepaths)
|
54
|
+
# images = [<PIL.Image.Image image mode=RGB size=400x400 at 0x7F8654B2AC10>, ...]
|
55
|
+
images = [image.resize((image_size, image_size)) for image in images]
|
56
|
+
if processor is not None:
|
57
|
+
images = [processor(images=image, return_tensors="pt", **kwargs) for image in images]
|
58
|
+
if len(images[0].keys()) != 1:
|
59
|
+
raise NotImplementedError(f'unknown processor for getting model {processor}')
|
60
|
+
assert list(images[0].keys())[0] == 'pixel_values'
|
61
|
+
images = [image['pixel_values'] for image in images]
|
62
|
+
images = torch.cat(images)
|
63
|
+
images = images.cpu().numpy()
|
64
|
+
else:
|
65
|
+
images = preprocess_images(images, image_size=image_size, **kwargs)
|
66
|
+
return images
|
67
|
+
|
68
|
+
|
69
|
+
def load_images(image_filepaths):
|
70
|
+
return [load_image(image_filepath) for image_filepath in image_filepaths]
|
71
|
+
|
72
|
+
|
73
|
+
def load_image(image_filepath):
|
74
|
+
with Image.open(image_filepath) as pil_image:
|
75
|
+
if 'L' not in pil_image.mode.upper() and 'A' not in pil_image.mode.upper() \
|
76
|
+
and 'P' not in pil_image.mode.upper(): # not binary and not alpha and not palletized
|
77
|
+
# work around to https://github.com/python-pillow/Pillow/issues/1144,
|
78
|
+
# see https://stackoverflow.com/a/30376272/2225200
|
79
|
+
return pil_image.copy()
|
80
|
+
else: # make sure potential binary images are in RGB
|
81
|
+
rgb_image = Image.new("RGB", pil_image.size)
|
82
|
+
rgb_image.paste(pil_image)
|
83
|
+
return rgb_image
|
84
|
+
|
85
|
+
|
86
|
+
def preprocess_images(images, image_size, **kwargs):
|
87
|
+
preprocess = torchvision_preprocess_input(image_size, **kwargs)
|
88
|
+
images = [preprocess(image) for image in images]
|
89
|
+
images = np.concatenate(images)
|
90
|
+
return images
|
91
|
+
|
92
|
+
|
93
|
+
def torchvision_preprocess_input(image_size, **kwargs):
|
94
|
+
from torchvision import transforms
|
95
|
+
return transforms.Compose([
|
96
|
+
transforms.Resize((image_size, image_size)),
|
97
|
+
torchvision_preprocess(**kwargs),
|
98
|
+
])
|
99
|
+
|
100
|
+
|
101
|
+
def torchvision_preprocess(normalize_mean=(0.485, 0.456, 0.406), normalize_std=(0.229, 0.224, 0.225)):
|
102
|
+
from torchvision import transforms
|
103
|
+
return transforms.Compose([
|
104
|
+
transforms.ToTensor(),
|
105
|
+
transforms.Normalize(mean=normalize_mean, std=normalize_std),
|
106
|
+
lambda img: img.unsqueeze(0)
|
107
|
+
])
|
108
|
+
|
109
|
+
|
110
|
+
def create_static_video(image, num_frames, normalize_0to1=False, channel_dim=3):
|
111
|
+
'''
|
112
|
+
Create a static video with the same image in all frames.
|
113
|
+
Args:
|
114
|
+
image (PIL.Image.Image): Input image.
|
115
|
+
num_frames (int): Number of frames in the video.
|
116
|
+
Returns:
|
117
|
+
result (np.ndarray): np array of frames of shape (num_frames, height, width, 3).
|
118
|
+
'''
|
119
|
+
frames = []
|
120
|
+
for _ in range(num_frames):
|
121
|
+
frame = np.array(image)
|
122
|
+
if normalize_0to1:
|
123
|
+
frame = frame / 255.
|
124
|
+
if channel_dim == 1:
|
125
|
+
frame = frame.transpose(2, 0, 1)
|
126
|
+
frames.append(frame)
|
127
|
+
return np.stack(frames)
|
128
|
+
|
129
|
+
|
130
|
+
if __name__ == '__main__':
|
131
|
+
# Use this method to ensure the correctness of the BaseModel implementations.
|
132
|
+
# It executes a mock run of brain-score benchmarks.
|
133
|
+
check_models.check_base_models(__name__)
|
134
|
+
|
135
|
+
"""
|
136
|
+
Notes on the error:
|
137
|
+
|
138
|
+
- 'channel_x' key error:
|
139
|
+
# 'embeddings.patch_embeddings.projection',
|
140
|
+
https://github.com/search?q=repo%3Abrain-score%2Fmodel-tools%20channel_x&type=code
|
141
|
+
|
142
|
+
"""
|
@@ -0,0 +1,9 @@
|
|
1
|
+
from brainscore_vision import model_registry
|
2
|
+
from brainscore_vision.model_helpers.brain_transformation import ModelCommitment
|
3
|
+
from .model import get_model, get_layers
|
4
|
+
|
5
|
+
|
6
|
+
model_registry['cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4'] = \
|
7
|
+
lambda: ModelCommitment(identifier='cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4',
|
8
|
+
activations_model=get_model('cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4'),
|
9
|
+
layers=get_layers('cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4'))
|
@@ -0,0 +1,142 @@
|
|
1
|
+
from brainscore_vision.model_helpers.check_submission import check_models
|
2
|
+
import functools
|
3
|
+
from transformers import AutoFeatureExtractor, CvtForImageClassification
|
4
|
+
from brainscore_vision.model_helpers.activations.pytorch import PytorchWrapper
|
5
|
+
from PIL import Image
|
6
|
+
import numpy as np
|
7
|
+
import torch
|
8
|
+
|
9
|
+
"""
|
10
|
+
Template module for a base model submission to brain-score
|
11
|
+
"""
|
12
|
+
|
13
|
+
|
14
|
+
def get_model(name):
|
15
|
+
assert name == 'cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4'
|
16
|
+
# https://huggingface.co/models?sort=downloads&search=cvt
|
17
|
+
image_size = 384
|
18
|
+
processor = AutoFeatureExtractor.from_pretrained('microsoft/cvt-13-384-22k')
|
19
|
+
model = CvtForImageClassification.from_pretrained('microsoft/cvt-13-384-22k')
|
20
|
+
preprocessing = functools.partial(load_preprocess_images, processor=processor, image_size=image_size)
|
21
|
+
wrapper = PytorchWrapper(identifier=name, model=model, preprocessing=preprocessing)
|
22
|
+
wrapper.image_size = image_size
|
23
|
+
|
24
|
+
return wrapper
|
25
|
+
|
26
|
+
|
27
|
+
def get_layers(name):
|
28
|
+
assert name == 'cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4'
|
29
|
+
layers = []
|
30
|
+
layers += [f'cvt.encoder.stages.0.layers.{i}' for i in range(1)]
|
31
|
+
layers += [f'cvt.encoder.stages.1.layers.{i}' for i in range(2)]
|
32
|
+
layers += [f'cvt.encoder.stages.2.layers.{i}' for i in range(10)]
|
33
|
+
layers += ['layernorm']
|
34
|
+
return layers
|
35
|
+
|
36
|
+
|
37
|
+
def get_bibtex(model_identifier):
|
38
|
+
"""
|
39
|
+
A method returning the bibtex reference of the requested model as a string.
|
40
|
+
"""
|
41
|
+
return """@misc{wu2021cvtintroducingconvolutionsvision,
|
42
|
+
title={CvT: Introducing Convolutions to Vision Transformers},
|
43
|
+
author={Haiping Wu and Bin Xiao and Noel Codella and Mengchen Liu and Xiyang Dai and Lu Yuan and Lei Zhang},
|
44
|
+
year={2021},
|
45
|
+
eprint={2103.15808},
|
46
|
+
archivePrefix={arXiv},
|
47
|
+
primaryClass={cs.CV},
|
48
|
+
url={https://arxiv.org/abs/2103.15808},
|
49
|
+
}"""
|
50
|
+
|
51
|
+
|
52
|
+
def load_preprocess_images(image_filepaths, image_size, processor=None, **kwargs):
|
53
|
+
images = load_images(image_filepaths)
|
54
|
+
# images = [<PIL.Image.Image image mode=RGB size=400x400 at 0x7F8654B2AC10>, ...]
|
55
|
+
images = [image.resize((image_size, image_size)) for image in images]
|
56
|
+
if processor is not None:
|
57
|
+
images = [processor(images=image, return_tensors="pt", **kwargs) for image in images]
|
58
|
+
if len(images[0].keys()) != 1:
|
59
|
+
raise NotImplementedError(f'unknown processor for getting model {processor}')
|
60
|
+
assert list(images[0].keys())[0] == 'pixel_values'
|
61
|
+
images = [image['pixel_values'] for image in images]
|
62
|
+
images = torch.cat(images)
|
63
|
+
images = images.cpu().numpy()
|
64
|
+
else:
|
65
|
+
images = preprocess_images(images, image_size=image_size, **kwargs)
|
66
|
+
return images
|
67
|
+
|
68
|
+
|
69
|
+
def load_images(image_filepaths):
|
70
|
+
return [load_image(image_filepath) for image_filepath in image_filepaths]
|
71
|
+
|
72
|
+
|
73
|
+
def load_image(image_filepath):
|
74
|
+
with Image.open(image_filepath) as pil_image:
|
75
|
+
if 'L' not in pil_image.mode.upper() and 'A' not in pil_image.mode.upper() \
|
76
|
+
and 'P' not in pil_image.mode.upper(): # not binary and not alpha and not palletized
|
77
|
+
# work around to https://github.com/python-pillow/Pillow/issues/1144,
|
78
|
+
# see https://stackoverflow.com/a/30376272/2225200
|
79
|
+
return pil_image.copy()
|
80
|
+
else: # make sure potential binary images are in RGB
|
81
|
+
rgb_image = Image.new("RGB", pil_image.size)
|
82
|
+
rgb_image.paste(pil_image)
|
83
|
+
return rgb_image
|
84
|
+
|
85
|
+
|
86
|
+
def preprocess_images(images, image_size, **kwargs):
|
87
|
+
preprocess = torchvision_preprocess_input(image_size, **kwargs)
|
88
|
+
images = [preprocess(image) for image in images]
|
89
|
+
images = np.concatenate(images)
|
90
|
+
return images
|
91
|
+
|
92
|
+
|
93
|
+
def torchvision_preprocess_input(image_size, **kwargs):
|
94
|
+
from torchvision import transforms
|
95
|
+
return transforms.Compose([
|
96
|
+
transforms.Resize((image_size, image_size)),
|
97
|
+
torchvision_preprocess(**kwargs),
|
98
|
+
])
|
99
|
+
|
100
|
+
|
101
|
+
def torchvision_preprocess(normalize_mean=(0.485, 0.456, 0.406), normalize_std=(0.229, 0.224, 0.225)):
|
102
|
+
from torchvision import transforms
|
103
|
+
return transforms.Compose([
|
104
|
+
transforms.ToTensor(),
|
105
|
+
transforms.Normalize(mean=normalize_mean, std=normalize_std),
|
106
|
+
lambda img: img.unsqueeze(0)
|
107
|
+
])
|
108
|
+
|
109
|
+
|
110
|
+
def create_static_video(image, num_frames, normalize_0to1=False, channel_dim=3):
|
111
|
+
'''
|
112
|
+
Create a static video with the same image in all frames.
|
113
|
+
Args:
|
114
|
+
image (PIL.Image.Image): Input image.
|
115
|
+
num_frames (int): Number of frames in the video.
|
116
|
+
Returns:
|
117
|
+
result (np.ndarray): np array of frames of shape (num_frames, height, width, 3).
|
118
|
+
'''
|
119
|
+
frames = []
|
120
|
+
for _ in range(num_frames):
|
121
|
+
frame = np.array(image)
|
122
|
+
if normalize_0to1:
|
123
|
+
frame = frame / 255.
|
124
|
+
if channel_dim == 1:
|
125
|
+
frame = frame.transpose(2, 0, 1)
|
126
|
+
frames.append(frame)
|
127
|
+
return np.stack(frames)
|
128
|
+
|
129
|
+
|
130
|
+
if __name__ == '__main__':
|
131
|
+
# Use this method to ensure the correctness of the BaseModel implementations.
|
132
|
+
# It executes a mock run of brain-score benchmarks.
|
133
|
+
check_models.check_base_models(__name__)
|
134
|
+
|
135
|
+
"""
|
136
|
+
Notes on the error:
|
137
|
+
|
138
|
+
- 'channel_x' key error:
|
139
|
+
# 'embeddings.patch_embeddings.projection',
|
140
|
+
https://github.com/search?q=repo%3Abrain-score%2Fmodel-tools%20channel_x&type=code
|
141
|
+
|
142
|
+
"""
|
@@ -0,0 +1,9 @@
|
|
1
|
+
from brainscore_vision import model_registry
|
2
|
+
from brainscore_vision.model_helpers.brain_transformation import ModelCommitment
|
3
|
+
from .model import get_model, get_layers
|
4
|
+
|
5
|
+
|
6
|
+
model_registry['cvt_cvt-21-224-in1k_4_LucyV4'] = \
|
7
|
+
lambda: ModelCommitment(identifier='cvt_cvt-21-224-in1k_4_LucyV4',
|
8
|
+
activations_model=get_model('cvt_cvt-21-224-in1k_4_LucyV4'),
|
9
|
+
layers=get_layers('cvt_cvt-21-224-in1k_4_LucyV4'))
|