evalscope 0.14.0__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (181) hide show
  1. evalscope/arguments.py +2 -1
  2. evalscope/benchmarks/__init__.py +2 -2
  3. evalscope/benchmarks/aigc/__init__.py +0 -0
  4. evalscope/benchmarks/aigc/t2i/__init__.py +0 -0
  5. evalscope/benchmarks/aigc/t2i/base.py +56 -0
  6. evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py +77 -0
  7. evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py +58 -0
  8. evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py +58 -0
  9. evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py +57 -0
  10. evalscope/benchmarks/aigc/t2i/tifa_adapter.py +37 -0
  11. evalscope/benchmarks/aime/aime24_adapter.py +1 -1
  12. evalscope/benchmarks/aime/aime25_adapter.py +4 -4
  13. evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +1 -2
  14. evalscope/benchmarks/arc/arc_adapter.py +1 -1
  15. evalscope/benchmarks/arena_hard/arena_hard_adapter.py +1 -3
  16. evalscope/benchmarks/ceval/ceval_adapter.py +2 -2
  17. evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +1 -3
  18. evalscope/benchmarks/cmmlu/cmmlu_adapter.py +1 -1
  19. evalscope/benchmarks/competition_math/competition_math_adapter.py +1 -2
  20. evalscope/benchmarks/data_adapter.py +16 -9
  21. evalscope/benchmarks/data_collection/data_collection_adapter.py +6 -4
  22. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +2 -2
  23. evalscope/benchmarks/general_qa/general_qa_adapter.py +3 -3
  24. evalscope/benchmarks/live_code_bench/evaluate_utils.py +16 -21
  25. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +4 -1
  26. evalscope/benchmarks/live_code_bench/testing_util.py +6 -3
  27. evalscope/benchmarks/math_500/math_500_adapter.py +1 -1
  28. evalscope/benchmarks/mmlu/mmlu_adapter.py +3 -1
  29. evalscope/benchmarks/simple_qa/simple_qa_adapter.py +1 -2
  30. evalscope/benchmarks/utils.py +7 -16
  31. evalscope/cli/start_app.py +1 -1
  32. evalscope/collections/evaluator.py +16 -4
  33. evalscope/config.py +7 -3
  34. evalscope/constants.py +11 -0
  35. evalscope/evaluator/evaluator.py +9 -3
  36. evalscope/evaluator/reviewer/auto_reviewer.py +1 -1
  37. evalscope/metrics/__init__.py +49 -4
  38. evalscope/metrics/llm_judge.py +1 -1
  39. evalscope/metrics/named_metrics.py +13 -0
  40. evalscope/metrics/t2v_metrics/__init__.py +66 -0
  41. evalscope/metrics/t2v_metrics/clipscore.py +14 -0
  42. evalscope/metrics/t2v_metrics/constants.py +12 -0
  43. evalscope/metrics/t2v_metrics/itmscore.py +14 -0
  44. evalscope/metrics/t2v_metrics/models/__init__.py +0 -0
  45. evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py +30 -0
  46. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py +0 -0
  47. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py +6 -0
  48. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +132 -0
  49. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +286 -0
  50. evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +114 -0
  51. evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +86 -0
  52. evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +85 -0
  53. evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +62 -0
  54. evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py +26 -0
  55. evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +84 -0
  56. evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +97 -0
  57. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +171 -0
  58. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py +0 -0
  59. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +80 -0
  60. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +73 -0
  61. evalscope/metrics/t2v_metrics/models/model.py +45 -0
  62. evalscope/metrics/t2v_metrics/models/utils.py +25 -0
  63. evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py +22 -0
  64. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py +0 -0
  65. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py +1 -0
  66. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +300 -0
  67. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py +12 -0
  68. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +82 -0
  69. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py +50 -0
  70. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +218 -0
  71. evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +150 -0
  72. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py +26 -0
  73. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +465 -0
  74. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +141 -0
  75. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +22 -0
  76. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +188 -0
  77. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +106 -0
  78. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +307 -0
  79. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +416 -0
  80. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +8 -0
  81. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +191 -0
  82. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +318 -0
  83. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml +10 -0
  84. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml +42 -0
  85. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml +42 -0
  86. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml +42 -0
  87. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml +36 -0
  88. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml +43 -0
  89. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml +43 -0
  90. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml +43 -0
  91. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml +43 -0
  92. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml +36 -0
  93. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml +42 -0
  94. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml +42 -0
  95. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml +42 -0
  96. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml +43 -0
  97. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml +42 -0
  98. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml +42 -0
  99. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml +42 -0
  100. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml +37 -0
  101. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml +43 -0
  102. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml +43 -0
  103. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json +21 -0
  104. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json +22 -0
  105. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json +21 -0
  106. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +208 -0
  107. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py +231 -0
  108. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +1093 -0
  109. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py +0 -0
  110. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py +211 -0
  111. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py +109 -0
  112. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +452 -0
  113. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +364 -0
  114. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +755 -0
  115. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +273 -0
  116. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +880 -0
  117. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +1844 -0
  118. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +81 -0
  119. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +56 -0
  120. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py +212 -0
  121. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py +164 -0
  122. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py +202 -0
  123. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +185 -0
  124. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +178 -0
  125. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +112 -0
  126. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py +371 -0
  127. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +344 -0
  128. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +858 -0
  129. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +271 -0
  130. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +503 -0
  131. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +1270 -0
  132. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +473 -0
  133. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +31 -0
  134. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py +27 -0
  135. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py +233 -0
  136. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +392 -0
  137. evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +127 -0
  138. evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +17 -0
  139. evalscope/metrics/t2v_metrics/score.py +78 -0
  140. evalscope/metrics/t2v_metrics/vqascore.py +14 -0
  141. evalscope/models/__init__.py +50 -14
  142. evalscope/models/adapters/__init__.py +17 -0
  143. evalscope/models/{base_adapter.py → adapters/base_adapter.py} +17 -17
  144. evalscope/models/{chat_adapter.py → adapters/chat_adapter.py} +10 -7
  145. evalscope/models/{choice_adapter.py → adapters/choice_adapter.py} +2 -6
  146. evalscope/models/{custom_adapter.py → adapters/custom_adapter.py} +2 -4
  147. evalscope/models/{server_adapter.py → adapters/server_adapter.py} +1 -3
  148. evalscope/models/adapters/t2i_adapter.py +76 -0
  149. evalscope/models/custom/__init__.py +2 -1
  150. evalscope/models/custom/dummy_model.py +11 -13
  151. evalscope/models/local_model.py +82 -33
  152. evalscope/models/model.py +2 -42
  153. evalscope/models/register.py +26 -0
  154. evalscope/perf/benchmark.py +4 -3
  155. evalscope/perf/main.py +4 -2
  156. evalscope/perf/plugin/datasets/flickr8k.py +2 -1
  157. evalscope/perf/utils/benchmark_util.py +2 -2
  158. evalscope/perf/utils/db_util.py +16 -8
  159. evalscope/report/__init__.py +1 -0
  160. evalscope/report/app.py +117 -67
  161. evalscope/report/app_arguments.py +11 -0
  162. evalscope/report/generator.py +1 -1
  163. evalscope/run.py +3 -3
  164. evalscope/third_party/thinkbench/eval.py +19 -7
  165. evalscope/utils/chat_service.py +2 -2
  166. evalscope/utils/import_utils.py +66 -0
  167. evalscope/utils/utils.py +12 -4
  168. evalscope/version.py +2 -2
  169. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/METADATA +20 -3
  170. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/RECORD +178 -66
  171. tests/aigc/__init__.py +1 -0
  172. tests/aigc/test_t2i.py +87 -0
  173. tests/cli/test_run.py +20 -7
  174. tests/perf/test_perf.py +6 -3
  175. evalscope/metrics/code_metric.py +0 -98
  176. evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +0 -58485
  177. evalscope/metrics/resources/gpt2-zhcn3-v4.json +0 -1
  178. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/LICENSE +0 -0
  179. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/WHEEL +0 -0
  180. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/entry_points.txt +0 -0
  181. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,233 @@
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ import re
9
+ from omegaconf import OmegaConf
10
+ from torchvision import transforms
11
+ from torchvision.transforms.functional import InterpolationMode
12
+
13
+ from ..common.registry import registry
14
+ from .base_processor import BaseProcessor
15
+ from .randaugment import RandomAugment
16
+
17
+
18
+ class BlipImageBaseProcessor(BaseProcessor):
19
+
20
+ def __init__(self, mean=None, std=None):
21
+ if mean is None:
22
+ mean = (0.48145466, 0.4578275, 0.40821073)
23
+ if std is None:
24
+ std = (0.26862954, 0.26130258, 0.27577711)
25
+
26
+ self.normalize = transforms.Normalize(mean, std)
27
+
28
+
29
+ @registry.register_processor('blip_caption')
30
+ class BlipCaptionProcessor(BaseProcessor):
31
+
32
+ def __init__(self, prompt='', max_words=50):
33
+ self.prompt = prompt
34
+ self.max_words = max_words
35
+
36
+ def __call__(self, caption):
37
+ caption = self.prompt + self.pre_caption(caption)
38
+
39
+ return caption
40
+
41
+ @classmethod
42
+ def from_config(cls, cfg=None):
43
+ if cfg is None:
44
+ cfg = OmegaConf.create()
45
+
46
+ prompt = cfg.get('prompt', '')
47
+ max_words = cfg.get('max_words', 50)
48
+
49
+ return cls(prompt=prompt, max_words=max_words)
50
+
51
+ def pre_caption(self, caption):
52
+ caption = re.sub(
53
+ r"([.!\"()*#:;~])",
54
+ ' ',
55
+ caption.lower(),
56
+ )
57
+ caption = re.sub(
58
+ r'\s{2,}',
59
+ ' ',
60
+ caption,
61
+ )
62
+ caption = caption.rstrip('\n')
63
+ caption = caption.strip(' ')
64
+
65
+ # truncate caption
66
+ caption_words = caption.split(' ')
67
+ if len(caption_words) > self.max_words:
68
+ caption = ' '.join(caption_words[:self.max_words])
69
+
70
+ return caption
71
+
72
+
73
+ @registry.register_processor('blip_question')
74
+ class BlipQuestionProcessor(BaseProcessor):
75
+
76
+ def __init__(self, max_words=50):
77
+ self.max_words = max_words
78
+
79
+ def __call__(self, question):
80
+ return self.pre_question(question)
81
+
82
+ @classmethod
83
+ def from_config(cls, cfg=None):
84
+ if cfg is None:
85
+ cfg = OmegaConf.create()
86
+
87
+ max_words = cfg.get('max_words', 50)
88
+
89
+ return cls(max_words=max_words)
90
+
91
+ def pre_question(self, question):
92
+ question = re.sub(
93
+ r"([.!\"()*#:;~])",
94
+ '',
95
+ question.lower(),
96
+ )
97
+ question = question.rstrip(' ')
98
+
99
+ # truncate question
100
+ question_words = question.split(' ')
101
+ if len(question_words) > self.max_words:
102
+ question = ' '.join(question_words[:self.max_words])
103
+
104
+ return question
105
+
106
+
107
+ @registry.register_processor('blip_image_train')
108
+ class BlipImageTrainProcessor(BlipImageBaseProcessor):
109
+
110
+ def __init__(self, image_size=384, mean=None, std=None, min_scale=0.5, max_scale=1.0):
111
+ super().__init__(mean=mean, std=std)
112
+
113
+ self.transform = transforms.Compose([
114
+ transforms.RandomResizedCrop(
115
+ image_size,
116
+ scale=(min_scale, max_scale),
117
+ interpolation=InterpolationMode.BICUBIC,
118
+ ),
119
+ transforms.RandomHorizontalFlip(),
120
+ RandomAugment(
121
+ 2,
122
+ 5,
123
+ isPIL=True,
124
+ augs=[
125
+ 'Identity',
126
+ 'AutoContrast',
127
+ 'Brightness',
128
+ 'Sharpness',
129
+ 'Equalize',
130
+ 'ShearX',
131
+ 'ShearY',
132
+ 'TranslateX',
133
+ 'TranslateY',
134
+ 'Rotate',
135
+ ],
136
+ ),
137
+ transforms.ToTensor(),
138
+ self.normalize,
139
+ ])
140
+
141
+ def __call__(self, item):
142
+ return self.transform(item)
143
+
144
+ @classmethod
145
+ def from_config(cls, cfg=None):
146
+ if cfg is None:
147
+ cfg = OmegaConf.create()
148
+
149
+ image_size = cfg.get('image_size', 384)
150
+
151
+ mean = cfg.get('mean', None)
152
+ std = cfg.get('std', None)
153
+
154
+ min_scale = cfg.get('min_scale', 0.5)
155
+ max_scale = cfg.get('max_scale', 1.0)
156
+
157
+ return cls(
158
+ image_size=image_size,
159
+ mean=mean,
160
+ std=std,
161
+ min_scale=min_scale,
162
+ max_scale=max_scale,
163
+ )
164
+
165
+
166
+ @registry.register_processor('blip_image_eval')
167
+ class BlipImageEvalProcessor(BlipImageBaseProcessor):
168
+
169
+ def __init__(self, image_size=384, mean=None, std=None):
170
+ super().__init__(mean=mean, std=std)
171
+
172
+ self.transform = transforms.Compose([
173
+ transforms.Resize((image_size, image_size), interpolation=InterpolationMode.BICUBIC),
174
+ transforms.ToTensor(),
175
+ self.normalize,
176
+ ])
177
+
178
+ def __call__(self, item):
179
+ return self.transform(item)
180
+
181
+ @classmethod
182
+ def from_config(cls, cfg=None):
183
+ if cfg is None:
184
+ cfg = OmegaConf.create()
185
+
186
+ image_size = cfg.get('image_size', 384)
187
+
188
+ mean = cfg.get('mean', None)
189
+ std = cfg.get('std', None)
190
+
191
+ return cls(image_size=image_size, mean=mean, std=std)
192
+
193
+
194
+ @registry.register_processor('blip2_image_train')
195
+ class Blip2ImageTrainProcessor(BlipImageBaseProcessor):
196
+
197
+ def __init__(self, image_size=364, mean=None, std=None, min_scale=0.5, max_scale=1.0):
198
+ super().__init__(mean=mean, std=std)
199
+
200
+ self.transform = transforms.Compose([
201
+ transforms.RandomResizedCrop(
202
+ image_size,
203
+ scale=(min_scale, max_scale),
204
+ interpolation=InterpolationMode.BICUBIC,
205
+ ),
206
+ transforms.RandomHorizontalFlip(),
207
+ transforms.ToTensor(),
208
+ self.normalize,
209
+ ])
210
+
211
+ def __call__(self, item):
212
+ return self.transform(item)
213
+
214
+ @classmethod
215
+ def from_config(cls, cfg=None):
216
+ if cfg is None:
217
+ cfg = OmegaConf.create()
218
+
219
+ image_size = cfg.get('image_size', 364)
220
+
221
+ mean = cfg.get('mean', None)
222
+ std = cfg.get('std', None)
223
+
224
+ min_scale = cfg.get('min_scale', 0.5)
225
+ max_scale = cfg.get('max_scale', 1.0)
226
+
227
+ return cls(
228
+ image_size=image_size,
229
+ mean=mean,
230
+ std=std,
231
+ min_scale=min_scale,
232
+ max_scale=max_scale,
233
+ )
@@ -0,0 +1,392 @@
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ try:
9
+ import cv2
10
+ except ImportError:
11
+ pass
12
+ import numpy as np
13
+ import torch
14
+
15
+
16
+ ## aug functions
17
+ def identity_func(img):
18
+ return img
19
+
20
+
21
+ def autocontrast_func(img, cutoff=0):
22
+ """
23
+ same output as PIL.ImageOps.autocontrast
24
+ """
25
+ n_bins = 256
26
+
27
+ def tune_channel(ch):
28
+ n = ch.size
29
+ cut = cutoff * n // 100
30
+ if cut == 0:
31
+ high, low = ch.max(), ch.min()
32
+ else:
33
+ hist = cv2.calcHist([ch], [0], None, [n_bins], [0, n_bins])
34
+ low = np.argwhere(np.cumsum(hist) > cut)
35
+ low = 0 if low.shape[0] == 0 else low[0]
36
+ high = np.argwhere(np.cumsum(hist[::-1]) > cut)
37
+ high = n_bins - 1 if high.shape[0] == 0 else n_bins - 1 - high[0]
38
+ if high <= low:
39
+ table = np.arange(n_bins)
40
+ else:
41
+ scale = (n_bins - 1) / (high - low)
42
+ offset = -low * scale
43
+ table = np.arange(n_bins) * scale + offset
44
+ table[table < 0] = 0
45
+ table[table > n_bins - 1] = n_bins - 1
46
+ table = table.clip(0, 255).astype(np.uint8)
47
+ return table[ch]
48
+
49
+ channels = [tune_channel(ch) for ch in cv2.split(img)]
50
+ out = cv2.merge(channels)
51
+ return out
52
+
53
+
54
+ def equalize_func(img):
55
+ """
56
+ same output as PIL.ImageOps.equalize
57
+ PIL's implementation is different from cv2.equalize
58
+ """
59
+ n_bins = 256
60
+
61
+ def tune_channel(ch):
62
+ hist = cv2.calcHist([ch], [0], None, [n_bins], [0, n_bins])
63
+ non_zero_hist = hist[hist != 0].reshape(-1)
64
+ step = np.sum(non_zero_hist[:-1]) // (n_bins - 1)
65
+ if step == 0:
66
+ return ch
67
+ n = np.empty_like(hist)
68
+ n[0] = step // 2
69
+ n[1:] = hist[:-1]
70
+ table = (np.cumsum(n) // step).clip(0, 255).astype(np.uint8)
71
+ return table[ch]
72
+
73
+ channels = [tune_channel(ch) for ch in cv2.split(img)]
74
+ out = cv2.merge(channels)
75
+ return out
76
+
77
+
78
+ def rotate_func(img, degree, fill=(0, 0, 0)):
79
+ """
80
+ like PIL, rotate by degree, not radians
81
+ """
82
+ H, W = img.shape[0], img.shape[1]
83
+ center = W / 2, H / 2
84
+ M = cv2.getRotationMatrix2D(center, degree, 1)
85
+ out = cv2.warpAffine(img, M, (W, H), borderValue=fill)
86
+ return out
87
+
88
+
89
+ def solarize_func(img, thresh=128):
90
+ """
91
+ same output as PIL.ImageOps.posterize
92
+ """
93
+ table = np.array([el if el < thresh else 255 - el for el in range(256)])
94
+ table = table.clip(0, 255).astype(np.uint8)
95
+ out = table[img]
96
+ return out
97
+
98
+
99
+ def color_func(img, factor):
100
+ """
101
+ same output as PIL.ImageEnhance.Color
102
+ """
103
+ ## implementation according to PIL definition, quite slow
104
+ # degenerate = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[:, :, np.newaxis]
105
+ # out = blend(degenerate, img, factor)
106
+ # M = (
107
+ # np.eye(3) * factor
108
+ # + np.float32([0.114, 0.587, 0.299]).reshape(3, 1) * (1. - factor)
109
+ # )[np.newaxis, np.newaxis, :]
110
+ M = np.float32([[0.886, -0.114, -0.114], [-0.587, 0.413, -0.587], [-0.299, -0.299, 0.701]]) * factor + np.float32(
111
+ [[0.114], [0.587], [0.299]])
112
+ out = np.matmul(img, M).clip(0, 255).astype(np.uint8)
113
+ return out
114
+
115
+
116
+ def contrast_func(img, factor):
117
+ """
118
+ same output as PIL.ImageEnhance.Contrast
119
+ """
120
+ mean = np.sum(np.mean(img, axis=(0, 1)) * np.array([0.114, 0.587, 0.299]))
121
+ table = (np.array([(el - mean) * factor + mean for el in range(256)]).clip(0, 255).astype(np.uint8))
122
+ out = table[img]
123
+ return out
124
+
125
+
126
+ def brightness_func(img, factor):
127
+ """
128
+ same output as PIL.ImageEnhance.Contrast
129
+ """
130
+ table = (np.arange(256, dtype=np.float32) * factor).clip(0, 255).astype(np.uint8)
131
+ out = table[img]
132
+ return out
133
+
134
+
135
+ def sharpness_func(img, factor):
136
+ """
137
+ The differences the this result and PIL are all on the 4 boundaries, the center
138
+ areas are same
139
+ """
140
+ kernel = np.ones((3, 3), dtype=np.float32)
141
+ kernel[1][1] = 5
142
+ kernel /= 13
143
+ degenerate = cv2.filter2D(img, -1, kernel)
144
+ if factor == 0.0:
145
+ out = degenerate
146
+ elif factor == 1.0:
147
+ out = img
148
+ else:
149
+ out = img.astype(np.float32)
150
+ degenerate = degenerate.astype(np.float32)[1:-1, 1:-1, :]
151
+ out[1:-1, 1:-1, :] = degenerate + factor * (out[1:-1, 1:-1, :] - degenerate)
152
+ out = out.astype(np.uint8)
153
+ return out
154
+
155
+
156
+ def shear_x_func(img, factor, fill=(0, 0, 0)):
157
+ H, W = img.shape[0], img.shape[1]
158
+ M = np.float32([[1, factor, 0], [0, 1, 0]])
159
+ out = cv2.warpAffine(img, M, (W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8)
160
+ return out
161
+
162
+
163
+ def translate_x_func(img, offset, fill=(0, 0, 0)):
164
+ """
165
+ same output as PIL.Image.transform
166
+ """
167
+ H, W = img.shape[0], img.shape[1]
168
+ M = np.float32([[1, 0, -offset], [0, 1, 0]])
169
+ out = cv2.warpAffine(img, M, (W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8)
170
+ return out
171
+
172
+
173
+ def translate_y_func(img, offset, fill=(0, 0, 0)):
174
+ """
175
+ same output as PIL.Image.transform
176
+ """
177
+ H, W = img.shape[0], img.shape[1]
178
+ M = np.float32([[1, 0, 0], [0, 1, -offset]])
179
+ out = cv2.warpAffine(img, M, (W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8)
180
+ return out
181
+
182
+
183
+ def posterize_func(img, bits):
184
+ """
185
+ same output as PIL.ImageOps.posterize
186
+ """
187
+ out = np.bitwise_and(img, np.uint8(255 << (8 - bits)))
188
+ return out
189
+
190
+
191
+ def shear_y_func(img, factor, fill=(0, 0, 0)):
192
+ H, W = img.shape[0], img.shape[1]
193
+ M = np.float32([[1, 0, 0], [factor, 1, 0]])
194
+ out = cv2.warpAffine(img, M, (W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8)
195
+ return out
196
+
197
+
198
+ def cutout_func(img, pad_size, replace=(0, 0, 0)):
199
+ replace = np.array(replace, dtype=np.uint8)
200
+ H, W = img.shape[0], img.shape[1]
201
+ rh, rw = np.random.random(2)
202
+ pad_size = pad_size // 2
203
+ ch, cw = int(rh * H), int(rw * W)
204
+ x1, x2 = max(ch - pad_size, 0), min(ch + pad_size, H)
205
+ y1, y2 = max(cw - pad_size, 0), min(cw + pad_size, W)
206
+ out = img.copy()
207
+ out[x1:x2, y1:y2, :] = replace
208
+ return out
209
+
210
+
211
+ ### level to args
212
+ def enhance_level_to_args(MAX_LEVEL):
213
+
214
+ def level_to_args(level):
215
+ return ((level / MAX_LEVEL) * 1.8 + 0.1, )
216
+
217
+ return level_to_args
218
+
219
+
220
+ def shear_level_to_args(MAX_LEVEL, replace_value):
221
+
222
+ def level_to_args(level):
223
+ level = (level / MAX_LEVEL) * 0.3
224
+ if np.random.random() > 0.5:
225
+ level = -level
226
+ return (level, replace_value)
227
+
228
+ return level_to_args
229
+
230
+
231
+ def translate_level_to_args(translate_const, MAX_LEVEL, replace_value):
232
+
233
+ def level_to_args(level):
234
+ level = (level / MAX_LEVEL) * float(translate_const)
235
+ if np.random.random() > 0.5:
236
+ level = -level
237
+ return (level, replace_value)
238
+
239
+ return level_to_args
240
+
241
+
242
+ def cutout_level_to_args(cutout_const, MAX_LEVEL, replace_value):
243
+
244
+ def level_to_args(level):
245
+ level = int((level / MAX_LEVEL) * cutout_const)
246
+ return (level, replace_value)
247
+
248
+ return level_to_args
249
+
250
+
251
+ def solarize_level_to_args(MAX_LEVEL):
252
+
253
+ def level_to_args(level):
254
+ level = int((level / MAX_LEVEL) * 256)
255
+ return (level, )
256
+
257
+ return level_to_args
258
+
259
+
260
+ def none_level_to_args(level):
261
+ return ()
262
+
263
+
264
+ def posterize_level_to_args(MAX_LEVEL):
265
+
266
+ def level_to_args(level):
267
+ level = int((level / MAX_LEVEL) * 4)
268
+ return (level, )
269
+
270
+ return level_to_args
271
+
272
+
273
+ def rotate_level_to_args(MAX_LEVEL, replace_value):
274
+
275
+ def level_to_args(level):
276
+ level = (level / MAX_LEVEL) * 30
277
+ if np.random.random() < 0.5:
278
+ level = -level
279
+ return (level, replace_value)
280
+
281
+ return level_to_args
282
+
283
+
284
+ func_dict = {
285
+ 'Identity': identity_func,
286
+ 'AutoContrast': autocontrast_func,
287
+ 'Equalize': equalize_func,
288
+ 'Rotate': rotate_func,
289
+ 'Solarize': solarize_func,
290
+ 'Color': color_func,
291
+ 'Contrast': contrast_func,
292
+ 'Brightness': brightness_func,
293
+ 'Sharpness': sharpness_func,
294
+ 'ShearX': shear_x_func,
295
+ 'TranslateX': translate_x_func,
296
+ 'TranslateY': translate_y_func,
297
+ 'Posterize': posterize_func,
298
+ 'ShearY': shear_y_func,
299
+ }
300
+
301
+ translate_const = 10
302
+ MAX_LEVEL = 10
303
+ replace_value = (128, 128, 128)
304
+ arg_dict = {
305
+ 'Identity': none_level_to_args,
306
+ 'AutoContrast': none_level_to_args,
307
+ 'Equalize': none_level_to_args,
308
+ 'Rotate': rotate_level_to_args(MAX_LEVEL, replace_value),
309
+ 'Solarize': solarize_level_to_args(MAX_LEVEL),
310
+ 'Color': enhance_level_to_args(MAX_LEVEL),
311
+ 'Contrast': enhance_level_to_args(MAX_LEVEL),
312
+ 'Brightness': enhance_level_to_args(MAX_LEVEL),
313
+ 'Sharpness': enhance_level_to_args(MAX_LEVEL),
314
+ 'ShearX': shear_level_to_args(MAX_LEVEL, replace_value),
315
+ 'TranslateX': translate_level_to_args(translate_const, MAX_LEVEL, replace_value),
316
+ 'TranslateY': translate_level_to_args(translate_const, MAX_LEVEL, replace_value),
317
+ 'Posterize': posterize_level_to_args(MAX_LEVEL),
318
+ 'ShearY': shear_level_to_args(MAX_LEVEL, replace_value),
319
+ }
320
+
321
+
322
+ class RandomAugment(object):
323
+
324
+ def __init__(self, N=2, M=10, isPIL=False, augs=[]):
325
+ self.N = N
326
+ self.M = M
327
+ self.isPIL = isPIL
328
+ if augs:
329
+ self.augs = augs
330
+ else:
331
+ self.augs = list(arg_dict.keys())
332
+
333
+ def get_random_ops(self):
334
+ sampled_ops = np.random.choice(self.augs, self.N)
335
+ return [(op, 0.5, self.M) for op in sampled_ops]
336
+
337
+ def __call__(self, img):
338
+ if self.isPIL:
339
+ img = np.array(img)
340
+ ops = self.get_random_ops()
341
+ for name, prob, level in ops:
342
+ if np.random.random() > prob:
343
+ continue
344
+ args = arg_dict[name](level)
345
+ img = func_dict[name](img, *args)
346
+ return img
347
+
348
+
349
+ class VideoRandomAugment(object):
350
+
351
+ def __init__(self, N=2, M=10, p=0.0, tensor_in_tensor_out=True, augs=[]):
352
+ self.N = N
353
+ self.M = M
354
+ self.p = p
355
+ self.tensor_in_tensor_out = tensor_in_tensor_out
356
+ if augs:
357
+ self.augs = augs
358
+ else:
359
+ self.augs = list(arg_dict.keys())
360
+
361
+ def get_random_ops(self):
362
+ sampled_ops = np.random.choice(self.augs, self.N, replace=False)
363
+ return [(op, self.M) for op in sampled_ops]
364
+
365
+ def __call__(self, frames):
366
+ assert (frames.shape[-1] == 3), 'Expecting last dimension for 3-channels RGB (b, h, w, c).'
367
+
368
+ if self.tensor_in_tensor_out:
369
+ frames = frames.numpy().astype(np.uint8)
370
+
371
+ num_frames = frames.shape[0]
372
+
373
+ ops = num_frames * [self.get_random_ops()]
374
+ apply_or_not = num_frames * [np.random.random(size=self.N) > self.p]
375
+
376
+ frames = torch.stack(list(map(self._aug, frames, ops, apply_or_not)), dim=0).float()
377
+
378
+ return frames
379
+
380
+ def _aug(self, img, ops, apply_or_not):
381
+ for i, (name, level) in enumerate(ops):
382
+ if not apply_or_not[i]:
383
+ continue
384
+ args = arg_dict[name](level)
385
+ img = func_dict[name](img, *args)
386
+ return torch.from_numpy(img)
387
+
388
+
389
+ if __name__ == '__main__':
390
+ a = RandomAugment()
391
+ img = np.random.randn(32, 32, 3)
392
+ a(img)