evalscope 0.14.0__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (181) hide show
  1. evalscope/arguments.py +2 -1
  2. evalscope/benchmarks/__init__.py +2 -2
  3. evalscope/benchmarks/aigc/__init__.py +0 -0
  4. evalscope/benchmarks/aigc/t2i/__init__.py +0 -0
  5. evalscope/benchmarks/aigc/t2i/base.py +56 -0
  6. evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py +77 -0
  7. evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py +58 -0
  8. evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py +58 -0
  9. evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py +57 -0
  10. evalscope/benchmarks/aigc/t2i/tifa_adapter.py +37 -0
  11. evalscope/benchmarks/aime/aime24_adapter.py +1 -1
  12. evalscope/benchmarks/aime/aime25_adapter.py +4 -4
  13. evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +1 -2
  14. evalscope/benchmarks/arc/arc_adapter.py +1 -1
  15. evalscope/benchmarks/arena_hard/arena_hard_adapter.py +1 -3
  16. evalscope/benchmarks/ceval/ceval_adapter.py +2 -2
  17. evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +1 -3
  18. evalscope/benchmarks/cmmlu/cmmlu_adapter.py +1 -1
  19. evalscope/benchmarks/competition_math/competition_math_adapter.py +1 -2
  20. evalscope/benchmarks/data_adapter.py +16 -9
  21. evalscope/benchmarks/data_collection/data_collection_adapter.py +6 -4
  22. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +2 -2
  23. evalscope/benchmarks/general_qa/general_qa_adapter.py +3 -3
  24. evalscope/benchmarks/live_code_bench/evaluate_utils.py +16 -21
  25. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +4 -1
  26. evalscope/benchmarks/live_code_bench/testing_util.py +6 -3
  27. evalscope/benchmarks/math_500/math_500_adapter.py +1 -1
  28. evalscope/benchmarks/mmlu/mmlu_adapter.py +3 -1
  29. evalscope/benchmarks/simple_qa/simple_qa_adapter.py +1 -2
  30. evalscope/benchmarks/utils.py +7 -16
  31. evalscope/cli/start_app.py +1 -1
  32. evalscope/collections/evaluator.py +16 -4
  33. evalscope/config.py +7 -3
  34. evalscope/constants.py +11 -0
  35. evalscope/evaluator/evaluator.py +9 -3
  36. evalscope/evaluator/reviewer/auto_reviewer.py +1 -1
  37. evalscope/metrics/__init__.py +49 -4
  38. evalscope/metrics/llm_judge.py +1 -1
  39. evalscope/metrics/named_metrics.py +13 -0
  40. evalscope/metrics/t2v_metrics/__init__.py +66 -0
  41. evalscope/metrics/t2v_metrics/clipscore.py +14 -0
  42. evalscope/metrics/t2v_metrics/constants.py +12 -0
  43. evalscope/metrics/t2v_metrics/itmscore.py +14 -0
  44. evalscope/metrics/t2v_metrics/models/__init__.py +0 -0
  45. evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py +30 -0
  46. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py +0 -0
  47. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py +6 -0
  48. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +132 -0
  49. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +286 -0
  50. evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +114 -0
  51. evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +86 -0
  52. evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +85 -0
  53. evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +62 -0
  54. evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py +26 -0
  55. evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +84 -0
  56. evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +97 -0
  57. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +171 -0
  58. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py +0 -0
  59. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +80 -0
  60. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +73 -0
  61. evalscope/metrics/t2v_metrics/models/model.py +45 -0
  62. evalscope/metrics/t2v_metrics/models/utils.py +25 -0
  63. evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py +22 -0
  64. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py +0 -0
  65. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py +1 -0
  66. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +300 -0
  67. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py +12 -0
  68. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +82 -0
  69. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py +50 -0
  70. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +218 -0
  71. evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +150 -0
  72. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py +26 -0
  73. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +465 -0
  74. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +141 -0
  75. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +22 -0
  76. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +188 -0
  77. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +106 -0
  78. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +307 -0
  79. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +416 -0
  80. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +8 -0
  81. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +191 -0
  82. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +318 -0
  83. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml +10 -0
  84. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml +42 -0
  85. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml +42 -0
  86. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml +42 -0
  87. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml +36 -0
  88. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml +43 -0
  89. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml +43 -0
  90. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml +43 -0
  91. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml +43 -0
  92. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml +36 -0
  93. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml +42 -0
  94. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml +42 -0
  95. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml +42 -0
  96. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml +43 -0
  97. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml +42 -0
  98. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml +42 -0
  99. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml +42 -0
  100. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml +37 -0
  101. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml +43 -0
  102. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml +43 -0
  103. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json +21 -0
  104. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json +22 -0
  105. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json +21 -0
  106. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +208 -0
  107. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py +231 -0
  108. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +1093 -0
  109. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py +0 -0
  110. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py +211 -0
  111. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py +109 -0
  112. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +452 -0
  113. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +364 -0
  114. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +755 -0
  115. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +273 -0
  116. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +880 -0
  117. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +1844 -0
  118. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +81 -0
  119. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +56 -0
  120. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py +212 -0
  121. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py +164 -0
  122. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py +202 -0
  123. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +185 -0
  124. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +178 -0
  125. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +112 -0
  126. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py +371 -0
  127. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +344 -0
  128. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +858 -0
  129. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +271 -0
  130. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +503 -0
  131. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +1270 -0
  132. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +473 -0
  133. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +31 -0
  134. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py +27 -0
  135. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py +233 -0
  136. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +392 -0
  137. evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +127 -0
  138. evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +17 -0
  139. evalscope/metrics/t2v_metrics/score.py +78 -0
  140. evalscope/metrics/t2v_metrics/vqascore.py +14 -0
  141. evalscope/models/__init__.py +50 -14
  142. evalscope/models/adapters/__init__.py +17 -0
  143. evalscope/models/{base_adapter.py → adapters/base_adapter.py} +17 -17
  144. evalscope/models/{chat_adapter.py → adapters/chat_adapter.py} +10 -7
  145. evalscope/models/{choice_adapter.py → adapters/choice_adapter.py} +2 -6
  146. evalscope/models/{custom_adapter.py → adapters/custom_adapter.py} +2 -4
  147. evalscope/models/{server_adapter.py → adapters/server_adapter.py} +1 -3
  148. evalscope/models/adapters/t2i_adapter.py +76 -0
  149. evalscope/models/custom/__init__.py +2 -1
  150. evalscope/models/custom/dummy_model.py +11 -13
  151. evalscope/models/local_model.py +82 -33
  152. evalscope/models/model.py +2 -42
  153. evalscope/models/register.py +26 -0
  154. evalscope/perf/benchmark.py +4 -3
  155. evalscope/perf/main.py +4 -2
  156. evalscope/perf/plugin/datasets/flickr8k.py +2 -1
  157. evalscope/perf/utils/benchmark_util.py +2 -2
  158. evalscope/perf/utils/db_util.py +16 -8
  159. evalscope/report/__init__.py +1 -0
  160. evalscope/report/app.py +117 -67
  161. evalscope/report/app_arguments.py +11 -0
  162. evalscope/report/generator.py +1 -1
  163. evalscope/run.py +3 -3
  164. evalscope/third_party/thinkbench/eval.py +19 -7
  165. evalscope/utils/chat_service.py +2 -2
  166. evalscope/utils/import_utils.py +66 -0
  167. evalscope/utils/utils.py +12 -4
  168. evalscope/version.py +2 -2
  169. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/METADATA +20 -3
  170. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/RECORD +178 -66
  171. tests/aigc/__init__.py +1 -0
  172. tests/aigc/test_t2i.py +87 -0
  173. tests/cli/test_run.py +20 -7
  174. tests/perf/test_perf.py +6 -3
  175. evalscope/metrics/code_metric.py +0 -98
  176. evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +0 -58485
  177. evalscope/metrics/resources/gpt2-zhcn3-v4.json +0 -1
  178. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/LICENSE +0 -0
  179. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/WHEEL +0 -0
  180. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/entry_points.txt +0 -0
  181. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,318 @@
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ # coding=utf-8
9
+
10
+ __author__ = 'aagrawal'
11
+
12
+ import re
13
+ # This code is based on the code written by Tsung-Yi Lin for MSCOCO Python API available at the following link:
14
+ # (https://github.com/tylin/coco-caption/blob/master/pycocoevalcap/eval.py).
15
+ import sys
16
+
17
+
18
+ class VQAEval:
19
+
20
+ def __init__(self, vqa=None, vqaRes=None, n=2):
21
+ self.n = n
22
+ self.accuracy = {}
23
+ self.evalQA = {}
24
+ self.evalQuesType = {}
25
+ self.evalAnsType = {}
26
+ self.vqa = vqa
27
+ self.vqaRes = vqaRes
28
+ if vqa is not None:
29
+ self.params = {'question_id': vqa.getQuesIds()}
30
+ self.contractions = {
31
+ 'aint': "ain't",
32
+ 'arent': "aren't",
33
+ 'cant': "can't",
34
+ 'couldve': "could've",
35
+ 'couldnt': "couldn't",
36
+ "couldn'tve": "couldn't've",
37
+ "couldnt've": "couldn't've",
38
+ 'didnt': "didn't",
39
+ 'doesnt': "doesn't",
40
+ 'dont': "don't",
41
+ 'hadnt': "hadn't",
42
+ "hadnt've": "hadn't've",
43
+ "hadn'tve": "hadn't've",
44
+ 'hasnt': "hasn't",
45
+ 'havent': "haven't",
46
+ 'hed': "he'd",
47
+ "hed've": "he'd've",
48
+ "he'dve": "he'd've",
49
+ 'hes': "he's",
50
+ 'howd': "how'd",
51
+ 'howll': "how'll",
52
+ 'hows': "how's",
53
+ "Id've": "I'd've",
54
+ "I'dve": "I'd've",
55
+ 'Im': "I'm",
56
+ 'Ive': "I've",
57
+ 'isnt': "isn't",
58
+ 'itd': "it'd",
59
+ "itd've": "it'd've",
60
+ "it'dve": "it'd've",
61
+ 'itll': "it'll",
62
+ "let's": "let's",
63
+ 'maam': "ma'am",
64
+ 'mightnt': "mightn't",
65
+ "mightnt've": "mightn't've",
66
+ "mightn'tve": "mightn't've",
67
+ 'mightve': "might've",
68
+ 'mustnt': "mustn't",
69
+ 'mustve': "must've",
70
+ 'neednt': "needn't",
71
+ 'notve': "not've",
72
+ 'oclock': "o'clock",
73
+ 'oughtnt': "oughtn't",
74
+ "ow's'at": "'ow's'at",
75
+ "'ows'at": "'ow's'at",
76
+ "'ow'sat": "'ow's'at",
77
+ 'shant': "shan't",
78
+ "shed've": "she'd've",
79
+ "she'dve": "she'd've",
80
+ "she's": "she's",
81
+ 'shouldve': "should've",
82
+ 'shouldnt': "shouldn't",
83
+ "shouldnt've": "shouldn't've",
84
+ "shouldn'tve": "shouldn't've",
85
+ "somebody'd": 'somebodyd',
86
+ "somebodyd've": "somebody'd've",
87
+ "somebody'dve": "somebody'd've",
88
+ 'somebodyll': "somebody'll",
89
+ 'somebodys': "somebody's",
90
+ 'someoned': "someone'd",
91
+ "someoned've": "someone'd've",
92
+ "someone'dve": "someone'd've",
93
+ 'someonell': "someone'll",
94
+ 'someones': "someone's",
95
+ 'somethingd': "something'd",
96
+ "somethingd've": "something'd've",
97
+ "something'dve": "something'd've",
98
+ 'somethingll': "something'll",
99
+ 'thats': "that's",
100
+ 'thered': "there'd",
101
+ "thered've": "there'd've",
102
+ "there'dve": "there'd've",
103
+ 'therere': "there're",
104
+ 'theres': "there's",
105
+ 'theyd': "they'd",
106
+ "theyd've": "they'd've",
107
+ "they'dve": "they'd've",
108
+ 'theyll': "they'll",
109
+ 'theyre': "they're",
110
+ 'theyve': "they've",
111
+ 'twas': "'twas",
112
+ 'wasnt': "wasn't",
113
+ "wed've": "we'd've",
114
+ "we'dve": "we'd've",
115
+ 'weve': "we've",
116
+ 'werent': "weren't",
117
+ 'whatll': "what'll",
118
+ 'whatre': "what're",
119
+ 'whats': "what's",
120
+ 'whatve': "what've",
121
+ 'whens': "when's",
122
+ 'whered': "where'd",
123
+ 'wheres': "where's",
124
+ 'whereve': "where've",
125
+ 'whod': "who'd",
126
+ "whod've": "who'd've",
127
+ "who'dve": "who'd've",
128
+ 'wholl': "who'll",
129
+ 'whos': "who's",
130
+ 'whove': "who've",
131
+ 'whyll': "why'll",
132
+ 'whyre': "why're",
133
+ 'whys': "why's",
134
+ 'wont': "won't",
135
+ 'wouldve': "would've",
136
+ 'wouldnt': "wouldn't",
137
+ "wouldnt've": "wouldn't've",
138
+ "wouldn'tve": "wouldn't've",
139
+ 'yall': "y'all",
140
+ "yall'll": "y'all'll",
141
+ "y'allll": "y'all'll",
142
+ "yall'd've": "y'all'd've",
143
+ "y'alld've": "y'all'd've",
144
+ "y'all'dve": "y'all'd've",
145
+ 'youd': "you'd",
146
+ "youd've": "you'd've",
147
+ "you'dve": "you'd've",
148
+ 'youll': "you'll",
149
+ 'youre': "you're",
150
+ 'youve': "you've",
151
+ }
152
+ self.manualMap = {
153
+ 'none': '0',
154
+ 'zero': '0',
155
+ 'one': '1',
156
+ 'two': '2',
157
+ 'three': '3',
158
+ 'four': '4',
159
+ 'five': '5',
160
+ 'six': '6',
161
+ 'seven': '7',
162
+ 'eight': '8',
163
+ 'nine': '9',
164
+ 'ten': '10',
165
+ }
166
+ self.articles = ['a', 'an', 'the']
167
+
168
+ self.periodStrip = re.compile('(?!<=\d)(\.)(?!\d)')
169
+ self.commaStrip = re.compile('(\d)(,)(\d)')
170
+ self.punct = [
171
+ ';',
172
+ r'/',
173
+ '[',
174
+ ']',
175
+ '"',
176
+ '{',
177
+ '}',
178
+ '(',
179
+ ')',
180
+ '=',
181
+ '+',
182
+ '\\',
183
+ '_',
184
+ '-',
185
+ '>',
186
+ '<',
187
+ '@',
188
+ '`',
189
+ ',',
190
+ '?',
191
+ '!',
192
+ ]
193
+
194
+ def evaluate(self, quesIds=None):
195
+ if quesIds == None:
196
+ quesIds = [quesId for quesId in self.params['question_id']]
197
+ gts = {}
198
+ res = {}
199
+ for quesId in quesIds:
200
+ gts[quesId] = self.vqa.qa[quesId]
201
+ res[quesId] = self.vqaRes.qa[quesId]
202
+
203
+ # =================================================
204
+ # Compute accuracy
205
+ # =================================================
206
+ accQA = []
207
+ accQuesType = {}
208
+ accAnsType = {}
209
+ print('computing accuracy')
210
+ step = 0
211
+ for quesId in quesIds:
212
+ resAns = res[quesId]['answer']
213
+ resAns = resAns.replace('\n', ' ')
214
+ resAns = resAns.replace('\t', ' ')
215
+ resAns = resAns.strip()
216
+ resAns = self.processPunctuation(resAns)
217
+ resAns = self.processDigitArticle(resAns)
218
+ gtAcc = []
219
+ gtAnswers = [ans['answer'] for ans in gts[quesId]['answers']]
220
+ if len(set(gtAnswers)) > 1:
221
+ for ansDic in gts[quesId]['answers']:
222
+ ansDic['answer'] = self.processPunctuation(ansDic['answer'])
223
+ for gtAnsDatum in gts[quesId]['answers']:
224
+ otherGTAns = [item for item in gts[quesId]['answers'] if item != gtAnsDatum]
225
+ matchingAns = [item for item in otherGTAns if item['answer'] == resAns]
226
+ acc = min(1, float(len(matchingAns)) / 3)
227
+ gtAcc.append(acc)
228
+ quesType = gts[quesId]['question_type']
229
+ ansType = gts[quesId]['answer_type']
230
+ avgGTAcc = float(sum(gtAcc)) / len(gtAcc)
231
+ accQA.append(avgGTAcc)
232
+ if quesType not in accQuesType:
233
+ accQuesType[quesType] = []
234
+ accQuesType[quesType].append(avgGTAcc)
235
+ if ansType not in accAnsType:
236
+ accAnsType[ansType] = []
237
+ accAnsType[ansType].append(avgGTAcc)
238
+ self.setEvalQA(quesId, avgGTAcc)
239
+ self.setEvalQuesType(quesId, quesType, avgGTAcc)
240
+ self.setEvalAnsType(quesId, ansType, avgGTAcc)
241
+ if step % 100 == 0:
242
+ self.updateProgress(step / float(len(quesIds)))
243
+ step = step + 1
244
+
245
+ self.setAccuracy(accQA, accQuesType, accAnsType)
246
+ print('Done computing accuracy')
247
+
248
+ def processPunctuation(self, inText):
249
+ outText = inText
250
+ for p in self.punct:
251
+ if (p + ' ' in inText or ' ' + p in inText) or (re.search(self.commaStrip, inText) != None):
252
+ outText = outText.replace(p, '')
253
+ else:
254
+ outText = outText.replace(p, ' ')
255
+ outText = self.periodStrip.sub('', outText, re.UNICODE)
256
+ return outText
257
+
258
+ def processDigitArticle(self, inText):
259
+ outText = []
260
+ tempText = inText.lower().split()
261
+ for word in tempText:
262
+ word = self.manualMap.setdefault(word, word)
263
+ if word not in self.articles:
264
+ outText.append(word)
265
+ else:
266
+ pass
267
+ for wordId, word in enumerate(outText):
268
+ if word in self.contractions:
269
+ outText[wordId] = self.contractions[word]
270
+ outText = ' '.join(outText)
271
+ return outText
272
+
273
+ def setAccuracy(self, accQA, accQuesType, accAnsType):
274
+ self.accuracy['overall'] = round(100 * float(sum(accQA)) / len(accQA), self.n)
275
+ self.accuracy['perQuestionType'] = {
276
+ quesType: round(
277
+ 100 * float(sum(accQuesType[quesType])) / len(accQuesType[quesType]),
278
+ self.n,
279
+ )
280
+ for quesType in accQuesType
281
+ }
282
+ self.accuracy['perAnswerType'] = {
283
+ ansType: round(100 * float(sum(accAnsType[ansType])) / len(accAnsType[ansType]), self.n)
284
+ for ansType in accAnsType
285
+ }
286
+
287
+ def setEvalQA(self, quesId, acc):
288
+ self.evalQA[quesId] = round(100 * acc, self.n)
289
+
290
+ def setEvalQuesType(self, quesId, quesType, acc):
291
+ if quesType not in self.evalQuesType:
292
+ self.evalQuesType[quesType] = {}
293
+ self.evalQuesType[quesType][quesId] = round(100 * acc, self.n)
294
+
295
+ def setEvalAnsType(self, quesId, ansType, acc):
296
+ if ansType not in self.evalAnsType:
297
+ self.evalAnsType[ansType] = {}
298
+ self.evalAnsType[ansType][quesId] = round(100 * acc, self.n)
299
+
300
+ def updateProgress(self, progress):
301
+ barLength = 20
302
+ status = ''
303
+ if isinstance(progress, int):
304
+ progress = float(progress)
305
+ if not isinstance(progress, float):
306
+ progress = 0
307
+ status = 'error: progress var must be float\r\n'
308
+ if progress < 0:
309
+ progress = 0
310
+ status = 'Halt...\r\n'
311
+ if progress >= 1:
312
+ progress = 1
313
+ status = 'Done...\r\n'
314
+ block = int(round(barLength * progress))
315
+ text = '\rFinshed Percent: [{0}] {1}% {2}'.format('#' * block + '-' * (barLength - block), int(progress * 100),
316
+ status)
317
+ sys.stdout.write(text)
318
+ sys.stdout.flush()
@@ -0,0 +1,10 @@
1
+ # Copyright (c) 2022, salesforce.com, inc.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
5
+
6
+ env:
7
+ # For default users
8
+ # cache_root: "cache"
9
+ # For internal use with persistent storage
10
+ cache_root: "/export/home/.cache/lavis"
@@ -0,0 +1,42 @@
1
+ # Copyright (c) 2022, salesforce.com, inc.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
5
+
6
+ model:
7
+ arch: caption_coco_flant5xl
8
+ load_finetuned: True
9
+
10
+ pretrained: "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_pretrained_flant5xl.pth"
11
+ finetuned: "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_caption_flant5xl.pth"
12
+
13
+ # vit encoder
14
+ image_size: 364
15
+ drop_path_rate: 0
16
+ use_grad_checkpoint: False
17
+ vit_precision: "fp32"
18
+ freeze_vit: False
19
+
20
+ # Q-Former
21
+ num_query_token: 32
22
+
23
+ # T5
24
+ t5_model: "google/flan-t5-xl"
25
+
26
+ # generation configs
27
+ prompt: "a photo of"
28
+
29
+
30
+ preprocess:
31
+ vis_processor:
32
+ train:
33
+ name: "blip_image_train"
34
+ image_size: 364
35
+ eval:
36
+ name: "blip_image_eval"
37
+ image_size: 364
38
+ text_processor:
39
+ train:
40
+ name: "blip_caption"
41
+ eval:
42
+ name: "blip_caption"
@@ -0,0 +1,42 @@
1
+ # Copyright (c) 2022, salesforce.com, inc.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
5
+
6
+ model:
7
+ arch: caption_coco_opt2.7b
8
+ load_finetuned: True
9
+
10
+ pretrained: "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_pretrained_opt2.7b.pth"
11
+ finetuned: "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_caption_opt2.7b.pth"
12
+
13
+ # vit encoder
14
+ image_size: 364
15
+ drop_path_rate: 0
16
+ use_grad_checkpoint: False
17
+ vit_precision: "fp32"
18
+ freeze_vit: False
19
+
20
+ # Q-Former
21
+ num_query_token: 32
22
+
23
+ # OPT
24
+ opt_model: "facebook/opt-2.7b"
25
+
26
+ # generation configs
27
+ prompt: "a photo of"
28
+
29
+
30
+ preprocess:
31
+ vis_processor:
32
+ train:
33
+ name: "blip_image_train"
34
+ image_size: 364
35
+ eval:
36
+ name: "blip_image_eval"
37
+ image_size: 364
38
+ text_processor:
39
+ train:
40
+ name: "blip_caption"
41
+ eval:
42
+ name: "blip_caption"
@@ -0,0 +1,42 @@
1
+ # Copyright (c) 2022, salesforce.com, inc.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
5
+
6
+ model:
7
+ arch: caption_coco_opt6.7b
8
+ load_finetuned: True
9
+
10
+ pretrained: "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_pretrained_opt6.7b.pth"
11
+ finetuned: "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_caption_opt6.7b.pth"
12
+
13
+ # vit encoder
14
+ image_size: 364
15
+ drop_path_rate: 0
16
+ use_grad_checkpoint: False
17
+ vit_precision: "fp32"
18
+ freeze_vit: False
19
+
20
+ # Q-Former
21
+ num_query_token: 32
22
+
23
+ # OPT
24
+ opt_model: "facebook/opt-6.7b"
25
+
26
+ # generation configs
27
+ prompt: "a photo of"
28
+
29
+
30
+ preprocess:
31
+ vis_processor:
32
+ train:
33
+ name: "blip_image_train"
34
+ image_size: 364
35
+ eval:
36
+ name: "blip_image_eval"
37
+ image_size: 364
38
+ text_processor:
39
+ train:
40
+ name: "blip_caption"
41
+ eval:
42
+ name: "blip_caption"
@@ -0,0 +1,36 @@
1
+ # Copyright (c) 2022, salesforce.com, inc.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
5
+
6
+ model:
7
+ arch: coco
8
+ load_finetuned: True
9
+
10
+ pretrained: "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_pretrained.pth"
11
+ finetuned: "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_finetune_coco.pth"
12
+
13
+ # vit encoder
14
+ image_size: 364
15
+ drop_path_rate: 0
16
+ use_grad_checkpoint: True
17
+ vit_precision: "fp32"
18
+ freeze_vit: False
19
+
20
+ # Q-Former
21
+ num_query_token: 32
22
+
23
+
24
+ preprocess:
25
+ vis_processor:
26
+ train:
27
+ name: "blip_image_train"
28
+ image_size: 364
29
+ eval:
30
+ name: "blip_image_eval"
31
+ image_size: 364
32
+ text_processor:
33
+ train:
34
+ name: "blip_caption"
35
+ eval:
36
+ name: "blip_caption"
@@ -0,0 +1,43 @@
1
+ # Copyright (c) 2022, salesforce.com, inc.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
5
+
6
+ model:
7
+ arch: flant5xl
8
+ load_finetuned: False
9
+ load_pretrained: True
10
+
11
+ pretrained: "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/InstructBLIP/instruct_blip_flanxl_trimmed.pth"
12
+ finetuned: ""
13
+
14
+ # vit encoder
15
+ image_size: 224
16
+ drop_path_rate: 0
17
+ use_grad_checkpoint: False
18
+ vit_precision: "fp16"
19
+ freeze_vit: True
20
+
21
+ # Q-Former
22
+ num_query_token: 32
23
+
24
+ # T5
25
+ t5_model: "google/flan-t5-xl"
26
+
27
+ # generation configs
28
+ prompt: ""
29
+
30
+
31
+ preprocess:
32
+ vis_processor:
33
+ train:
34
+ name: "blip_image_train"
35
+ image_size: 224
36
+ eval:
37
+ name: "blip_image_eval"
38
+ image_size: 224
39
+ text_processor:
40
+ train:
41
+ name: "blip_caption"
42
+ eval:
43
+ name: "blip_caption"
@@ -0,0 +1,43 @@
1
+ # Copyright (c) 2022, salesforce.com, inc.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
5
+
6
+ model:
7
+ arch: flant5xxl
8
+ load_finetuned: False
9
+ load_pretrained: True
10
+
11
+ pretrained: "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/InstructBLIP/instruct_blip_flanxxl_trimmed.pth"
12
+ finetuned: ""
13
+
14
+ # vit encoder
15
+ image_size: 224
16
+ drop_path_rate: 0
17
+ use_grad_checkpoint: False
18
+ vit_precision: "fp16"
19
+ freeze_vit: True
20
+
21
+ # Q-Former
22
+ num_query_token: 32
23
+
24
+ # T5
25
+ t5_model: "google/flan-t5-xxl"
26
+
27
+ # generation configs
28
+ prompt: ""
29
+
30
+
31
+ preprocess:
32
+ vis_processor:
33
+ train:
34
+ name: "blip_image_train"
35
+ image_size: 224
36
+ eval:
37
+ name: "blip_image_eval"
38
+ image_size: 224
39
+ text_processor:
40
+ train:
41
+ name: "blip_caption"
42
+ eval:
43
+ name: "blip_caption"
@@ -0,0 +1,43 @@
1
+ # Copyright (c) 2022, salesforce.com, inc.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
5
+
6
+ model:
7
+ arch: instruct_vicuna13b
8
+ load_finetuned: False
9
+ load_pretrained: True
10
+
11
+ pretrained: "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/InstructBLIP/instruct_blip_vicuna13b_trimmed.pth"
12
+ finetuned: ""
13
+
14
+ # vit encoder
15
+ image_size: 224
16
+ drop_path_rate: 0
17
+ use_grad_checkpoint: False
18
+ vit_precision: "fp16"
19
+ freeze_vit: True
20
+
21
+ # Q-Former
22
+ num_query_token: 32
23
+
24
+ # path to Vicuna checkpoint
25
+ llm_model: "lmsys/vicuna-13b-v1.1"
26
+
27
+ # generation configs
28
+ prompt: ""
29
+
30
+
31
+ preprocess:
32
+ vis_processor:
33
+ train:
34
+ name: "blip2_image_train"
35
+ image_size: 224
36
+ eval:
37
+ name: "blip_image_eval"
38
+ image_size: 224
39
+ text_processor:
40
+ train:
41
+ name: "blip_caption"
42
+ eval:
43
+ name: "blip_caption"
@@ -0,0 +1,43 @@
1
+ # Copyright (c) 2022, salesforce.com, inc.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: BSD-3-Clause
4
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
5
+
6
+ model:
7
+ arch: instruct_vicuna7b
8
+ load_finetuned: False
9
+ load_pretrained: True
10
+
11
+ pretrained: "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/InstructBLIP/instruct_blip_vicuna7b_trimmed.pth"
12
+ finetuned: ""
13
+
14
+ # vit encoder
15
+ image_size: 224
16
+ drop_path_rate: 0
17
+ use_grad_checkpoint: False
18
+ vit_precision: "fp16"
19
+ freeze_vit: True
20
+
21
+ # Q-Former
22
+ num_query_token: 32
23
+
24
+ # path to Vicuna checkpoint
25
+ llm_model: "lmsys/vicuna-7b-v1.1"
26
+
27
+ # generation configs
28
+ prompt: ""
29
+
30
+
31
+ preprocess:
32
+ vis_processor:
33
+ train:
34
+ name: "blip2_image_train"
35
+ image_size: 224
36
+ eval:
37
+ name: "blip_image_eval"
38
+ image_size: 224
39
+ text_processor:
40
+ train:
41
+ name: "blip_caption"
42
+ eval:
43
+ name: "blip_caption"