brainscore-vision 2.1__py3-none-any.whl → 2.2.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. brainscore_vision/benchmarks/coggan2024_behavior/__init__.py +2 -1
  2. brainscore_vision/benchmarks/coggan2024_behavior/test.py +2 -2
  3. brainscore_vision/benchmarks/coggan2024_fMRI/__init__.py +4 -4
  4. brainscore_vision/benchmarks/coggan2024_fMRI/test.py +2 -2
  5. brainscore_vision/benchmarks/imagenet/imagenet2012.csv +50000 -50000
  6. brainscore_vision/benchmarks/imagenet_c/benchmark.py +1 -1
  7. brainscore_vision/benchmarks/lonnqvist2024/__init__.py +8 -0
  8. brainscore_vision/benchmarks/lonnqvist2024/benchmark.py +125 -0
  9. brainscore_vision/benchmarks/lonnqvist2024/test.py +61 -0
  10. brainscore_vision/benchmarks/malania2007/benchmark.py +3 -0
  11. brainscore_vision/benchmarks/maniquet2024/benchmark.py +1 -1
  12. brainscore_vision/data/lonnqvist2024/__init__.py +47 -0
  13. brainscore_vision/data/lonnqvist2024/data_packaging/lonnqvist_data_assembly.py +53 -0
  14. brainscore_vision/data/lonnqvist2024/data_packaging/lonnqvist_stimulus_set.py +61 -0
  15. brainscore_vision/data/lonnqvist2024/test.py +127 -0
  16. brainscore_vision/model_helpers/brain_transformation/__init__.py +33 -0
  17. brainscore_vision/models/alexnet/region_layer_map/alexnet.json +1 -0
  18. brainscore_vision/models/alexnet_7be5be79/setup.py +4 -4
  19. brainscore_vision/models/alexnet_random/__init__.py +7 -0
  20. brainscore_vision/models/alexnet_random/model.py +46 -0
  21. brainscore_vision/models/alexnet_random/setup.py +26 -0
  22. brainscore_vision/models/alexnet_random/test.py +1 -0
  23. brainscore_vision/models/cvt_cvt_13_224_in1k_4/__init__.py +9 -0
  24. brainscore_vision/models/cvt_cvt_13_224_in1k_4/model.py +142 -0
  25. brainscore_vision/models/cvt_cvt_13_224_in1k_4/region_layer_map/cvt_cvt-13-224-in1k_4.json +6 -0
  26. brainscore_vision/models/cvt_cvt_13_224_in1k_4/region_layer_map/cvt_cvt-13-224-in1k_4_LucyV4.json +6 -0
  27. brainscore_vision/models/cvt_cvt_13_224_in1k_4/requirements.txt +4 -0
  28. brainscore_vision/models/cvt_cvt_13_224_in1k_4/test.py +8 -0
  29. brainscore_vision/models/cvt_cvt_13_384_in1k_4/__init__.py +9 -0
  30. brainscore_vision/models/cvt_cvt_13_384_in1k_4/model.py +142 -0
  31. brainscore_vision/models/cvt_cvt_13_384_in1k_4/region_layer_map/cvt_cvt-13-384-in1k_4_LucyV4.json +6 -0
  32. brainscore_vision/models/cvt_cvt_13_384_in1k_4/requirements.txt +4 -0
  33. brainscore_vision/models/cvt_cvt_13_384_in1k_4/test.py +8 -0
  34. brainscore_vision/models/cvt_cvt_13_384_in22k_finetuned_in1k_4/__init__.py +9 -0
  35. brainscore_vision/models/cvt_cvt_13_384_in22k_finetuned_in1k_4/model.py +142 -0
  36. brainscore_vision/models/cvt_cvt_13_384_in22k_finetuned_in1k_4/region_layer_map/cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4.json +6 -0
  37. brainscore_vision/models/cvt_cvt_13_384_in22k_finetuned_in1k_4/requirements.txt +4 -0
  38. brainscore_vision/models/cvt_cvt_13_384_in22k_finetuned_in1k_4/test.py +8 -0
  39. brainscore_vision/models/cvt_cvt_21_224_in1k_4/__init__.py +9 -0
  40. brainscore_vision/models/cvt_cvt_21_224_in1k_4/model.py +142 -0
  41. brainscore_vision/models/cvt_cvt_21_224_in1k_4/region_layer_map/cvt_cvt-21-224-in1k_4_LucyV4.json +6 -0
  42. brainscore_vision/models/cvt_cvt_21_224_in1k_4/requirements.txt +4 -0
  43. brainscore_vision/models/cvt_cvt_21_224_in1k_4/test.py +8 -0
  44. brainscore_vision/models/cvt_cvt_21_384_in1k_4/__init__.py +9 -0
  45. brainscore_vision/models/cvt_cvt_21_384_in1k_4/model.py +142 -0
  46. brainscore_vision/models/cvt_cvt_21_384_in1k_4/region_layer_map/cvt_cvt-21-384-in1k_4_LucyV4.json +6 -0
  47. brainscore_vision/models/cvt_cvt_21_384_in1k_4/requirements.txt +4 -0
  48. brainscore_vision/models/cvt_cvt_21_384_in1k_4/test.py +8 -0
  49. brainscore_vision/models/cvt_cvt_21_384_in22k_finetuned_in1k_4/__init__.py +9 -0
  50. brainscore_vision/models/cvt_cvt_21_384_in22k_finetuned_in1k_4/model.py +142 -0
  51. brainscore_vision/models/cvt_cvt_21_384_in22k_finetuned_in1k_4/region_layer_map/cvt_cvt-21-384-in22k_finetuned-in1k_4_LucyV4.json +6 -0
  52. brainscore_vision/models/cvt_cvt_21_384_in22k_finetuned_in1k_4/requirements.txt +4 -0
  53. brainscore_vision/models/cvt_cvt_21_384_in22k_finetuned_in1k_4/test.py +8 -0
  54. brainscore_vision/models/fixres_resnext101_32x48d_wsl/__init__.py +7 -0
  55. brainscore_vision/models/fixres_resnext101_32x48d_wsl/model.py +57 -0
  56. brainscore_vision/models/fixres_resnext101_32x48d_wsl/requirements.txt +5 -0
  57. brainscore_vision/models/fixres_resnext101_32x48d_wsl/test.py +7 -0
  58. brainscore_vision/models/inception_v4_pytorch/__init__.py +7 -0
  59. brainscore_vision/models/inception_v4_pytorch/model.py +64 -0
  60. brainscore_vision/models/inception_v4_pytorch/requirements.txt +3 -0
  61. brainscore_vision/models/inception_v4_pytorch/test.py +8 -0
  62. brainscore_vision/models/mvimgnet_ms_05/__init__.py +9 -0
  63. brainscore_vision/models/mvimgnet_ms_05/model.py +64 -0
  64. brainscore_vision/models/mvimgnet_ms_05/setup.py +25 -0
  65. brainscore_vision/models/mvimgnet_ms_05/test.py +1 -0
  66. brainscore_vision/models/mvimgnet_rf/__init__.py +9 -0
  67. brainscore_vision/models/mvimgnet_rf/model.py +64 -0
  68. brainscore_vision/models/mvimgnet_rf/setup.py +25 -0
  69. brainscore_vision/models/mvimgnet_rf/test.py +1 -0
  70. brainscore_vision/models/mvimgnet_ss_00/__init__.py +9 -0
  71. brainscore_vision/models/mvimgnet_ss_00/model.py +64 -0
  72. brainscore_vision/models/mvimgnet_ss_00/setup.py +25 -0
  73. brainscore_vision/models/mvimgnet_ss_00/test.py +1 -0
  74. brainscore_vision/models/mvimgnet_ss_02/__init__.py +9 -0
  75. brainscore_vision/models/mvimgnet_ss_02/model.py +64 -0
  76. brainscore_vision/models/mvimgnet_ss_02/setup.py +25 -0
  77. brainscore_vision/models/mvimgnet_ss_02/test.py +1 -0
  78. brainscore_vision/models/mvimgnet_ss_03/__init__.py +9 -0
  79. brainscore_vision/models/mvimgnet_ss_03/model.py +64 -0
  80. brainscore_vision/models/mvimgnet_ss_03/setup.py +25 -0
  81. brainscore_vision/models/mvimgnet_ss_03/test.py +1 -0
  82. brainscore_vision/models/mvimgnet_ss_04/__init__.py +9 -0
  83. brainscore_vision/models/mvimgnet_ss_04/model.py +64 -0
  84. brainscore_vision/models/mvimgnet_ss_04/setup.py +25 -0
  85. brainscore_vision/models/mvimgnet_ss_04/test.py +1 -0
  86. brainscore_vision/models/mvimgnet_ss_05/__init__.py +9 -0
  87. brainscore_vision/models/mvimgnet_ss_05/model.py +64 -0
  88. brainscore_vision/models/mvimgnet_ss_05/setup.py +25 -0
  89. brainscore_vision/models/mvimgnet_ss_05/test.py +1 -0
  90. brainscore_vision/models/resnet50_tutorial/region_layer_map/resnet50_tutorial.json +1 -0
  91. brainscore_vision/models/sam_test_resnet/__init__.py +5 -0
  92. brainscore_vision/models/sam_test_resnet/model.py +26 -0
  93. brainscore_vision/models/sam_test_resnet/requirements.txt +2 -0
  94. brainscore_vision/models/sam_test_resnet/test.py +8 -0
  95. brainscore_vision/models/sam_test_resnet_4/__init__.py +5 -0
  96. brainscore_vision/models/sam_test_resnet_4/model.py +26 -0
  97. brainscore_vision/models/sam_test_resnet_4/requirements.txt +2 -0
  98. brainscore_vision/models/sam_test_resnet_4/test.py +8 -0
  99. brainscore_vision/models/scaling_models/__init__.py +265 -0
  100. brainscore_vision/models/scaling_models/model.py +148 -0
  101. brainscore_vision/models/scaling_models/model_configs.json +869 -0
  102. brainscore_vision/models/scaling_models/region_layer_map/convnext_base_imagenet_full_seed-0.json +6 -0
  103. brainscore_vision/models/scaling_models/region_layer_map/convnext_large_imagenet_full_seed-0.json +6 -0
  104. brainscore_vision/models/scaling_models/region_layer_map/convnext_small_imagenet_100_seed-0.json +6 -0
  105. brainscore_vision/models/scaling_models/region_layer_map/convnext_small_imagenet_10_seed-0.json +6 -0
  106. brainscore_vision/models/scaling_models/region_layer_map/convnext_small_imagenet_1_seed-0.json +6 -0
  107. brainscore_vision/models/scaling_models/region_layer_map/convnext_small_imagenet_full_seed-0.json +6 -0
  108. brainscore_vision/models/scaling_models/region_layer_map/deit_base_imagenet_full_seed-0.json +6 -0
  109. brainscore_vision/models/scaling_models/region_layer_map/deit_large_imagenet_full_seed-0.json +6 -0
  110. brainscore_vision/models/scaling_models/region_layer_map/deit_small_imagenet_100_seed-0.json +6 -0
  111. brainscore_vision/models/scaling_models/region_layer_map/deit_small_imagenet_10_seed-0.json +6 -0
  112. brainscore_vision/models/scaling_models/region_layer_map/deit_small_imagenet_1_seed-0.json +6 -0
  113. brainscore_vision/models/scaling_models/region_layer_map/deit_small_imagenet_full_seed-0.json +6 -0
  114. brainscore_vision/models/scaling_models/region_layer_map/efficientnet_b0_imagenet_full.json +6 -0
  115. brainscore_vision/models/scaling_models/region_layer_map/efficientnet_b1_imagenet_full.json +6 -0
  116. brainscore_vision/models/scaling_models/region_layer_map/efficientnet_b2_imagenet_full.json +6 -0
  117. brainscore_vision/models/scaling_models/region_layer_map/resnet101_ecoset_full.json +6 -0
  118. brainscore_vision/models/scaling_models/region_layer_map/resnet101_imagenet_full.json +6 -0
  119. brainscore_vision/models/scaling_models/region_layer_map/resnet152_ecoset_full.json +6 -0
  120. brainscore_vision/models/scaling_models/region_layer_map/resnet18_ecoset_full.json +6 -0
  121. brainscore_vision/models/scaling_models/region_layer_map/resnet18_imagenet_full.json +6 -0
  122. brainscore_vision/models/scaling_models/region_layer_map/resnet34_ecoset_full.json +6 -0
  123. brainscore_vision/models/scaling_models/region_layer_map/resnet34_imagenet_full.json +6 -0
  124. brainscore_vision/models/scaling_models/region_layer_map/resnet50_ecoset_full.json +6 -0
  125. brainscore_vision/models/scaling_models/region_layer_map/resnet50_imagenet_100_seed-0.json +6 -0
  126. brainscore_vision/models/scaling_models/region_layer_map/resnet50_imagenet_10_seed-0.json +6 -0
  127. brainscore_vision/models/scaling_models/region_layer_map/resnet50_imagenet_1_seed-0.json +6 -0
  128. brainscore_vision/models/scaling_models/region_layer_map/resnet50_imagenet_full.json +6 -0
  129. brainscore_vision/models/scaling_models/requirements.txt +4 -0
  130. brainscore_vision/models/scaling_models/test.py +0 -0
  131. brainscore_vision/models/vitb14_dinov2_imagenet1k/__init__.py +5 -0
  132. brainscore_vision/models/vitb14_dinov2_imagenet1k/model.py +852 -0
  133. brainscore_vision/models/vitb14_dinov2_imagenet1k/setup.py +25 -0
  134. brainscore_vision/models/vitb14_dinov2_imagenet1k/test.py +0 -0
  135. brainscore_vision/models/voneresnet_50_non_stochastic/region_layer_map/voneresnet-50-non_stochastic.json +1 -0
  136. brainscore_vision/submission/actions_helpers.py +2 -2
  137. brainscore_vision/submission/endpoints.py +3 -4
  138. {brainscore_vision-2.1.dist-info → brainscore_vision-2.2.1.dist-info}/METADATA +2 -2
  139. {brainscore_vision-2.1.dist-info → brainscore_vision-2.2.1.dist-info}/RECORD +143 -18
  140. {brainscore_vision-2.1.dist-info → brainscore_vision-2.2.1.dist-info}/WHEEL +1 -1
  141. tests/test_model_helpers/temporal/activations/test_inferencer.py +2 -2
  142. {brainscore_vision-2.1.dist-info → brainscore_vision-2.2.1.dist-info}/LICENSE +0 -0
  143. {brainscore_vision-2.1.dist-info → brainscore_vision-2.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ from setuptools import setup, find_packages
5
+
6
+ requirements = [ "torchvision",
7
+ "torch",
8
+ "fire"
9
+ ]
10
+
11
+ setup(
12
+ packages=find_packages(exclude=['tests']),
13
+ include_package_data=True,
14
+ install_requires=requirements,
15
+ license="MIT license",
16
+ zip_safe=False,
17
+ keywords='brain-score template',
18
+ classifiers=[
19
+ 'Development Status :: 2 - Pre-Alpha',
20
+ 'Intended Audience :: Developers',
21
+ 'License :: OSI Approved :: MIT License',
22
+ 'Natural Language :: English',
23
+ 'Programming Language :: Python :: 3.7',
24
+ ],
25
+ test_suite='tests',
26
+ )
@@ -0,0 +1 @@
1
+ # Left empty as part of 2023 models migration
@@ -0,0 +1,9 @@
1
+ from brainscore_vision import model_registry
2
+ from brainscore_vision.model_helpers.brain_transformation import ModelCommitment
3
+ from .model import get_model, get_layers
4
+
5
+
6
+ model_registry['cvt_cvt-13-224-in1k_4_LucyV4'] = \
7
+ lambda: ModelCommitment(identifier='cvt_cvt-13-224-in1k_4_LucyV4',
8
+ activations_model=get_model('cvt_cvt-13-224-in1k_4_LucyV4'),
9
+ layers=get_layers('cvt_cvt-13-224-in1k_4_LucyV4'))
@@ -0,0 +1,142 @@
1
+ from brainscore_vision.model_helpers.check_submission import check_models
2
+ import functools
3
+ from transformers import AutoFeatureExtractor, CvtForImageClassification
4
+ from brainscore_vision.model_helpers.activations.pytorch import PytorchWrapper
5
+ from PIL import Image
6
+ import numpy as np
7
+ import torch
8
+
9
+ """
10
+ Template module for a base model submission to brain-score
11
+ """
12
+
13
+
14
+ def get_model(name):
15
+ assert name == 'cvt_cvt-13-224-in1k_4_LucyV4'
16
+ # https://huggingface.co/models?sort=downloads&search=cvt
17
+ image_size = 224
18
+ processor = AutoFeatureExtractor.from_pretrained('microsoft/cvt-13')
19
+ model = CvtForImageClassification.from_pretrained('microsoft/cvt-13')
20
+ preprocessing = functools.partial(load_preprocess_images, processor=processor, image_size=image_size)
21
+ wrapper = PytorchWrapper(identifier=name, model=model, preprocessing=preprocessing)
22
+ wrapper.image_size = image_size
23
+
24
+ return wrapper
25
+
26
+
27
+ def get_layers(name):
28
+ assert name == 'cvt_cvt-13-224-in1k_4_LucyV4'
29
+ layers = []
30
+ layers += [f'cvt.encoder.stages.0.layers.{i}' for i in range(1)]
31
+ layers += [f'cvt.encoder.stages.1.layers.{i}' for i in range(2)]
32
+ layers += [f'cvt.encoder.stages.2.layers.{i}' for i in range(10)]
33
+ layers += ['layernorm']
34
+ return layers
35
+
36
+
37
+ def get_bibtex(model_identifier):
38
+ """
39
+ A method returning the bibtex reference of the requested model as a string.
40
+ """
41
+ return """@misc{wu2021cvtintroducingconvolutionsvision,
42
+ title={CvT: Introducing Convolutions to Vision Transformers},
43
+ author={Haiping Wu and Bin Xiao and Noel Codella and Mengchen Liu and Xiyang Dai and Lu Yuan and Lei Zhang},
44
+ year={2021},
45
+ eprint={2103.15808},
46
+ archivePrefix={arXiv},
47
+ primaryClass={cs.CV},
48
+ url={https://arxiv.org/abs/2103.15808},
49
+ }"""
50
+
51
+
52
+ def load_preprocess_images(image_filepaths, image_size, processor=None, **kwargs):
53
+ images = load_images(image_filepaths)
54
+ # images = [<PIL.Image.Image image mode=RGB size=400x400 at 0x7F8654B2AC10>, ...]
55
+ images = [image.resize((image_size, image_size)) for image in images]
56
+ if processor is not None:
57
+ images = [processor(images=image, return_tensors="pt", **kwargs) for image in images]
58
+ if len(images[0].keys()) != 1:
59
+ raise NotImplementedError(f'unknown processor for getting model {processor}')
60
+ assert list(images[0].keys())[0] == 'pixel_values'
61
+ images = [image['pixel_values'] for image in images]
62
+ images = torch.cat(images)
63
+ images = images.cpu().numpy()
64
+ else:
65
+ images = preprocess_images(images, image_size=image_size, **kwargs)
66
+ return images
67
+
68
+
69
+ def load_images(image_filepaths):
70
+ return [load_image(image_filepath) for image_filepath in image_filepaths]
71
+
72
+
73
+ def load_image(image_filepath):
74
+ with Image.open(image_filepath) as pil_image:
75
+ if 'L' not in pil_image.mode.upper() and 'A' not in pil_image.mode.upper() \
76
+ and 'P' not in pil_image.mode.upper(): # not binary and not alpha and not palletized
77
+ # work around to https://github.com/python-pillow/Pillow/issues/1144,
78
+ # see https://stackoverflow.com/a/30376272/2225200
79
+ return pil_image.copy()
80
+ else: # make sure potential binary images are in RGB
81
+ rgb_image = Image.new("RGB", pil_image.size)
82
+ rgb_image.paste(pil_image)
83
+ return rgb_image
84
+
85
+
86
+ def preprocess_images(images, image_size, **kwargs):
87
+ preprocess = torchvision_preprocess_input(image_size, **kwargs)
88
+ images = [preprocess(image) for image in images]
89
+ images = np.concatenate(images)
90
+ return images
91
+
92
+
93
+ def torchvision_preprocess_input(image_size, **kwargs):
94
+ from torchvision import transforms
95
+ return transforms.Compose([
96
+ transforms.Resize((image_size, image_size)),
97
+ torchvision_preprocess(**kwargs),
98
+ ])
99
+
100
+
101
+ def torchvision_preprocess(normalize_mean=(0.485, 0.456, 0.406), normalize_std=(0.229, 0.224, 0.225)):
102
+ from torchvision import transforms
103
+ return transforms.Compose([
104
+ transforms.ToTensor(),
105
+ transforms.Normalize(mean=normalize_mean, std=normalize_std),
106
+ lambda img: img.unsqueeze(0)
107
+ ])
108
+
109
+
110
+ def create_static_video(image, num_frames, normalize_0to1=False, channel_dim=3):
111
+ '''
112
+ Create a static video with the same image in all frames.
113
+ Args:
114
+ image (PIL.Image.Image): Input image.
115
+ num_frames (int): Number of frames in the video.
116
+ Returns:
117
+ result (np.ndarray): np array of frames of shape (num_frames, height, width, 3).
118
+ '''
119
+ frames = []
120
+ for _ in range(num_frames):
121
+ frame = np.array(image)
122
+ if normalize_0to1:
123
+ frame = frame / 255.
124
+ if channel_dim == 1:
125
+ frame = frame.transpose(2, 0, 1)
126
+ frames.append(frame)
127
+ return np.stack(frames)
128
+
129
+
130
+ if __name__ == '__main__':
131
+ # Use this method to ensure the correctness of the BaseModel implementations.
132
+ # It executes a mock run of brain-score benchmarks.
133
+ check_models.check_base_models(__name__)
134
+
135
+ """
136
+ Notes on the error:
137
+
138
+ - 'channel_x' key error:
139
+ # 'embeddings.patch_embeddings.projection',
140
+ https://github.com/search?q=repo%3Abrain-score%2Fmodel-tools%20channel_x&type=code
141
+
142
+ """
@@ -0,0 +1,6 @@
1
+ {
2
+ "V1": "cvt.encoder.stages.1.layers.0",
3
+ "V2": "cvt.encoder.stages.1.layers.1",
4
+ "V4": "cvt.encoder.stages.2.layers.0",
5
+ "IT": "cvt.encoder.stages.2.layers.1"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "V1": "cvt.encoder.stages.1.layers.0",
3
+ "V2": "cvt.encoder.stages.1.layers.1",
4
+ "V4": "cvt.encoder.stages.2.layers.0",
5
+ "IT": "cvt.encoder.stages.2.layers.1"
6
+ }
@@ -0,0 +1,4 @@
1
+ numpy
2
+ torch
3
+ transformers==4.30.2
4
+ pillow
@@ -0,0 +1,8 @@
1
+ import pytest
2
+ import brainscore_vision
3
+
4
+
5
+ @pytest.mark.travis_slow
6
+ def test_has_identifier():
7
+ model = brainscore_vision.load_model('cvt_cvt-13-224-in1k_4_LucyV4')
8
+ assert model.identifier == 'cvt_cvt-13-224-in1k_4_LucyV4'
@@ -0,0 +1,9 @@
1
+ from brainscore_vision import model_registry
2
+ from brainscore_vision.model_helpers.brain_transformation import ModelCommitment
3
+ from .model import get_model, get_layers
4
+
5
+
6
+ model_registry['cvt_cvt-13-384-in1k_4_LucyV4'] = \
7
+ lambda: ModelCommitment(identifier='cvt_cvt-13-384-in1k_4_LucyV4',
8
+ activations_model=get_model('cvt_cvt-13-384-in1k_4_LucyV4'),
9
+ layers=get_layers('cvt_cvt-13-384-in1k_4_LucyV4'))
@@ -0,0 +1,142 @@
1
+ from brainscore_vision.model_helpers.check_submission import check_models
2
+ import functools
3
+ from transformers import AutoFeatureExtractor, CvtForImageClassification
4
+ from brainscore_vision.model_helpers.activations.pytorch import PytorchWrapper
5
+ from PIL import Image
6
+ import numpy as np
7
+ import torch
8
+
9
+ """
10
+ Template module for a base model submission to brain-score
11
+ """
12
+
13
+
14
+ def get_model(name):
15
+ assert name == 'cvt_cvt-13-384-in1k_4_LucyV4'
16
+ # https://huggingface.co/models?sort=downloads&search=cvt
17
+ image_size = 384
18
+ processor = AutoFeatureExtractor.from_pretrained('microsoft/cvt-13-384-1k')
19
+ model = CvtForImageClassification.from_pretrained('microsoft/cvt-13-384-1k')
20
+ preprocessing = functools.partial(load_preprocess_images, processor=processor, image_size=image_size)
21
+ wrapper = PytorchWrapper(identifier=name, model=model, preprocessing=preprocessing)
22
+ wrapper.image_size = image_size
23
+
24
+ return wrapper
25
+
26
+
27
+ def get_layers(name):
28
+ assert name == 'cvt_cvt-13-384-in1k_4_LucyV4'
29
+ layers = []
30
+ layers += [f'cvt.encoder.stages.0.layers.{i}' for i in range(1)]
31
+ layers += [f'cvt.encoder.stages.1.layers.{i}' for i in range(2)]
32
+ layers += [f'cvt.encoder.stages.2.layers.{i}' for i in range(10)]
33
+ layers += ['layernorm']
34
+ return layers
35
+
36
+
37
+ def get_bibtex(model_identifier):
38
+ """
39
+ A method returning the bibtex reference of the requested model as a string.
40
+ """
41
+ return """@misc{wu2021cvtintroducingconvolutionsvision,
42
+ title={CvT: Introducing Convolutions to Vision Transformers},
43
+ author={Haiping Wu and Bin Xiao and Noel Codella and Mengchen Liu and Xiyang Dai and Lu Yuan and Lei Zhang},
44
+ year={2021},
45
+ eprint={2103.15808},
46
+ archivePrefix={arXiv},
47
+ primaryClass={cs.CV},
48
+ url={https://arxiv.org/abs/2103.15808},
49
+ }"""
50
+
51
+
52
+ def load_preprocess_images(image_filepaths, image_size, processor=None, **kwargs):
53
+ images = load_images(image_filepaths)
54
+ # images = [<PIL.Image.Image image mode=RGB size=400x400 at 0x7F8654B2AC10>, ...]
55
+ images = [image.resize((image_size, image_size)) for image in images]
56
+ if processor is not None:
57
+ images = [processor(images=image, return_tensors="pt", **kwargs) for image in images]
58
+ if len(images[0].keys()) != 1:
59
+ raise NotImplementedError(f'unknown processor for getting model {processor}')
60
+ assert list(images[0].keys())[0] == 'pixel_values'
61
+ images = [image['pixel_values'] for image in images]
62
+ images = torch.cat(images)
63
+ images = images.cpu().numpy()
64
+ else:
65
+ images = preprocess_images(images, image_size=image_size, **kwargs)
66
+ return images
67
+
68
+
69
+ def load_images(image_filepaths):
70
+ return [load_image(image_filepath) for image_filepath in image_filepaths]
71
+
72
+
73
+ def load_image(image_filepath):
74
+ with Image.open(image_filepath) as pil_image:
75
+ if 'L' not in pil_image.mode.upper() and 'A' not in pil_image.mode.upper() \
76
+ and 'P' not in pil_image.mode.upper(): # not binary and not alpha and not palletized
77
+ # work around to https://github.com/python-pillow/Pillow/issues/1144,
78
+ # see https://stackoverflow.com/a/30376272/2225200
79
+ return pil_image.copy()
80
+ else: # make sure potential binary images are in RGB
81
+ rgb_image = Image.new("RGB", pil_image.size)
82
+ rgb_image.paste(pil_image)
83
+ return rgb_image
84
+
85
+
86
+ def preprocess_images(images, image_size, **kwargs):
87
+ preprocess = torchvision_preprocess_input(image_size, **kwargs)
88
+ images = [preprocess(image) for image in images]
89
+ images = np.concatenate(images)
90
+ return images
91
+
92
+
93
+ def torchvision_preprocess_input(image_size, **kwargs):
94
+ from torchvision import transforms
95
+ return transforms.Compose([
96
+ transforms.Resize((image_size, image_size)),
97
+ torchvision_preprocess(**kwargs),
98
+ ])
99
+
100
+
101
+ def torchvision_preprocess(normalize_mean=(0.485, 0.456, 0.406), normalize_std=(0.229, 0.224, 0.225)):
102
+ from torchvision import transforms
103
+ return transforms.Compose([
104
+ transforms.ToTensor(),
105
+ transforms.Normalize(mean=normalize_mean, std=normalize_std),
106
+ lambda img: img.unsqueeze(0)
107
+ ])
108
+
109
+
110
+ def create_static_video(image, num_frames, normalize_0to1=False, channel_dim=3):
111
+ '''
112
+ Create a static video with the same image in all frames.
113
+ Args:
114
+ image (PIL.Image.Image): Input image.
115
+ num_frames (int): Number of frames in the video.
116
+ Returns:
117
+ result (np.ndarray): np array of frames of shape (num_frames, height, width, 3).
118
+ '''
119
+ frames = []
120
+ for _ in range(num_frames):
121
+ frame = np.array(image)
122
+ if normalize_0to1:
123
+ frame = frame / 255.
124
+ if channel_dim == 1:
125
+ frame = frame.transpose(2, 0, 1)
126
+ frames.append(frame)
127
+ return np.stack(frames)
128
+
129
+
130
+ if __name__ == '__main__':
131
+ # Use this method to ensure the correctness of the BaseModel implementations.
132
+ # It executes a mock run of brain-score benchmarks.
133
+ check_models.check_base_models(__name__)
134
+
135
+ """
136
+ Notes on the error:
137
+
138
+ - 'channel_x' key error:
139
+ # 'embeddings.patch_embeddings.projection',
140
+ https://github.com/search?q=repo%3Abrain-score%2Fmodel-tools%20channel_x&type=code
141
+
142
+ """
@@ -0,0 +1,6 @@
1
+ {
2
+ "V1": "cvt.encoder.stages.2.layers.0",
3
+ "V2": "cvt.encoder.stages.2.layers.0",
4
+ "V4": "cvt.encoder.stages.2.layers.0",
5
+ "IT": "cvt.encoder.stages.2.layers.1"
6
+ }
@@ -0,0 +1,4 @@
1
+ numpy
2
+ torch
3
+ transformers==4.30.2
4
+ pillow
@@ -0,0 +1,8 @@
1
+ import pytest
2
+ import brainscore_vision
3
+
4
+
5
+ @pytest.mark.travis_slow
6
+ def test_has_identifier():
7
+ model = brainscore_vision.load_model('cvt_cvt-13-384-in1k_4_LucyV4')
8
+ assert model.identifier == 'cvt_cvt-13-384-in1k_4_LucyV4'
@@ -0,0 +1,9 @@
1
+ from brainscore_vision import model_registry
2
+ from brainscore_vision.model_helpers.brain_transformation import ModelCommitment
3
+ from .model import get_model, get_layers
4
+
5
+
6
+ model_registry['cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4'] = \
7
+ lambda: ModelCommitment(identifier='cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4',
8
+ activations_model=get_model('cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4'),
9
+ layers=get_layers('cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4'))
@@ -0,0 +1,142 @@
1
+ from brainscore_vision.model_helpers.check_submission import check_models
2
+ import functools
3
+ from transformers import AutoFeatureExtractor, CvtForImageClassification
4
+ from brainscore_vision.model_helpers.activations.pytorch import PytorchWrapper
5
+ from PIL import Image
6
+ import numpy as np
7
+ import torch
8
+
9
+ """
10
+ Template module for a base model submission to brain-score
11
+ """
12
+
13
+
14
+ def get_model(name):
15
+ assert name == 'cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4'
16
+ # https://huggingface.co/models?sort=downloads&search=cvt
17
+ image_size = 384
18
+ processor = AutoFeatureExtractor.from_pretrained('microsoft/cvt-13-384-22k')
19
+ model = CvtForImageClassification.from_pretrained('microsoft/cvt-13-384-22k')
20
+ preprocessing = functools.partial(load_preprocess_images, processor=processor, image_size=image_size)
21
+ wrapper = PytorchWrapper(identifier=name, model=model, preprocessing=preprocessing)
22
+ wrapper.image_size = image_size
23
+
24
+ return wrapper
25
+
26
+
27
+ def get_layers(name):
28
+ assert name == 'cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4'
29
+ layers = []
30
+ layers += [f'cvt.encoder.stages.0.layers.{i}' for i in range(1)]
31
+ layers += [f'cvt.encoder.stages.1.layers.{i}' for i in range(2)]
32
+ layers += [f'cvt.encoder.stages.2.layers.{i}' for i in range(10)]
33
+ layers += ['layernorm']
34
+ return layers
35
+
36
+
37
+ def get_bibtex(model_identifier):
38
+ """
39
+ A method returning the bibtex reference of the requested model as a string.
40
+ """
41
+ return """@misc{wu2021cvtintroducingconvolutionsvision,
42
+ title={CvT: Introducing Convolutions to Vision Transformers},
43
+ author={Haiping Wu and Bin Xiao and Noel Codella and Mengchen Liu and Xiyang Dai and Lu Yuan and Lei Zhang},
44
+ year={2021},
45
+ eprint={2103.15808},
46
+ archivePrefix={arXiv},
47
+ primaryClass={cs.CV},
48
+ url={https://arxiv.org/abs/2103.15808},
49
+ }"""
50
+
51
+
52
+ def load_preprocess_images(image_filepaths, image_size, processor=None, **kwargs):
53
+ images = load_images(image_filepaths)
54
+ # images = [<PIL.Image.Image image mode=RGB size=400x400 at 0x7F8654B2AC10>, ...]
55
+ images = [image.resize((image_size, image_size)) for image in images]
56
+ if processor is not None:
57
+ images = [processor(images=image, return_tensors="pt", **kwargs) for image in images]
58
+ if len(images[0].keys()) != 1:
59
+ raise NotImplementedError(f'unknown processor for getting model {processor}')
60
+ assert list(images[0].keys())[0] == 'pixel_values'
61
+ images = [image['pixel_values'] for image in images]
62
+ images = torch.cat(images)
63
+ images = images.cpu().numpy()
64
+ else:
65
+ images = preprocess_images(images, image_size=image_size, **kwargs)
66
+ return images
67
+
68
+
69
+ def load_images(image_filepaths):
70
+ return [load_image(image_filepath) for image_filepath in image_filepaths]
71
+
72
+
73
+ def load_image(image_filepath):
74
+ with Image.open(image_filepath) as pil_image:
75
+ if 'L' not in pil_image.mode.upper() and 'A' not in pil_image.mode.upper() \
76
+ and 'P' not in pil_image.mode.upper(): # not binary and not alpha and not palletized
77
+ # work around to https://github.com/python-pillow/Pillow/issues/1144,
78
+ # see https://stackoverflow.com/a/30376272/2225200
79
+ return pil_image.copy()
80
+ else: # make sure potential binary images are in RGB
81
+ rgb_image = Image.new("RGB", pil_image.size)
82
+ rgb_image.paste(pil_image)
83
+ return rgb_image
84
+
85
+
86
+ def preprocess_images(images, image_size, **kwargs):
87
+ preprocess = torchvision_preprocess_input(image_size, **kwargs)
88
+ images = [preprocess(image) for image in images]
89
+ images = np.concatenate(images)
90
+ return images
91
+
92
+
93
+ def torchvision_preprocess_input(image_size, **kwargs):
94
+ from torchvision import transforms
95
+ return transforms.Compose([
96
+ transforms.Resize((image_size, image_size)),
97
+ torchvision_preprocess(**kwargs),
98
+ ])
99
+
100
+
101
+ def torchvision_preprocess(normalize_mean=(0.485, 0.456, 0.406), normalize_std=(0.229, 0.224, 0.225)):
102
+ from torchvision import transforms
103
+ return transforms.Compose([
104
+ transforms.ToTensor(),
105
+ transforms.Normalize(mean=normalize_mean, std=normalize_std),
106
+ lambda img: img.unsqueeze(0)
107
+ ])
108
+
109
+
110
+ def create_static_video(image, num_frames, normalize_0to1=False, channel_dim=3):
111
+ '''
112
+ Create a static video with the same image in all frames.
113
+ Args:
114
+ image (PIL.Image.Image): Input image.
115
+ num_frames (int): Number of frames in the video.
116
+ Returns:
117
+ result (np.ndarray): np array of frames of shape (num_frames, height, width, 3).
118
+ '''
119
+ frames = []
120
+ for _ in range(num_frames):
121
+ frame = np.array(image)
122
+ if normalize_0to1:
123
+ frame = frame / 255.
124
+ if channel_dim == 1:
125
+ frame = frame.transpose(2, 0, 1)
126
+ frames.append(frame)
127
+ return np.stack(frames)
128
+
129
+
130
+ if __name__ == '__main__':
131
+ # Use this method to ensure the correctness of the BaseModel implementations.
132
+ # It executes a mock run of brain-score benchmarks.
133
+ check_models.check_base_models(__name__)
134
+
135
+ """
136
+ Notes on the error:
137
+
138
+ - 'channel_x' key error:
139
+ # 'embeddings.patch_embeddings.projection',
140
+ https://github.com/search?q=repo%3Abrain-score%2Fmodel-tools%20channel_x&type=code
141
+
142
+ """
@@ -0,0 +1,6 @@
1
+ {
2
+ "V1": "cvt.encoder.stages.1.layers.0",
3
+ "V2": "cvt.encoder.stages.1.layers.1",
4
+ "V4": "cvt.encoder.stages.1.layers.1",
5
+ "IT": "cvt.encoder.stages.2.layers.1"
6
+ }
@@ -0,0 +1,4 @@
1
+ numpy
2
+ torch
3
+ transformers==4.30.2
4
+ pillow
@@ -0,0 +1,8 @@
1
+ import pytest
2
+ import brainscore_vision
3
+
4
+
5
+ @pytest.mark.travis_slow
6
+ def test_has_identifier():
7
+ model = brainscore_vision.load_model('cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4')
8
+ assert model.identifier == 'cvt_cvt-13-384-in22k_finetuned-in1k_4_LucyV4'
@@ -0,0 +1,9 @@
1
+ from brainscore_vision import model_registry
2
+ from brainscore_vision.model_helpers.brain_transformation import ModelCommitment
3
+ from .model import get_model, get_layers
4
+
5
+
6
+ model_registry['cvt_cvt-21-224-in1k_4_LucyV4'] = \
7
+ lambda: ModelCommitment(identifier='cvt_cvt-21-224-in1k_4_LucyV4',
8
+ activations_model=get_model('cvt_cvt-21-224-in1k_4_LucyV4'),
9
+ layers=get_layers('cvt_cvt-21-224-in1k_4_LucyV4'))