evalscope 0.14.0__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (181) hide show
  1. evalscope/arguments.py +2 -1
  2. evalscope/benchmarks/__init__.py +2 -2
  3. evalscope/benchmarks/aigc/__init__.py +0 -0
  4. evalscope/benchmarks/aigc/t2i/__init__.py +0 -0
  5. evalscope/benchmarks/aigc/t2i/base.py +56 -0
  6. evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py +77 -0
  7. evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py +58 -0
  8. evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py +58 -0
  9. evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py +57 -0
  10. evalscope/benchmarks/aigc/t2i/tifa_adapter.py +37 -0
  11. evalscope/benchmarks/aime/aime24_adapter.py +1 -1
  12. evalscope/benchmarks/aime/aime25_adapter.py +4 -4
  13. evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +1 -2
  14. evalscope/benchmarks/arc/arc_adapter.py +1 -1
  15. evalscope/benchmarks/arena_hard/arena_hard_adapter.py +1 -3
  16. evalscope/benchmarks/ceval/ceval_adapter.py +2 -2
  17. evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +1 -3
  18. evalscope/benchmarks/cmmlu/cmmlu_adapter.py +1 -1
  19. evalscope/benchmarks/competition_math/competition_math_adapter.py +1 -2
  20. evalscope/benchmarks/data_adapter.py +16 -9
  21. evalscope/benchmarks/data_collection/data_collection_adapter.py +6 -4
  22. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +2 -2
  23. evalscope/benchmarks/general_qa/general_qa_adapter.py +3 -3
  24. evalscope/benchmarks/live_code_bench/evaluate_utils.py +16 -21
  25. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +4 -1
  26. evalscope/benchmarks/live_code_bench/testing_util.py +6 -3
  27. evalscope/benchmarks/math_500/math_500_adapter.py +1 -1
  28. evalscope/benchmarks/mmlu/mmlu_adapter.py +3 -1
  29. evalscope/benchmarks/simple_qa/simple_qa_adapter.py +1 -2
  30. evalscope/benchmarks/utils.py +7 -16
  31. evalscope/cli/start_app.py +1 -1
  32. evalscope/collections/evaluator.py +16 -4
  33. evalscope/config.py +7 -3
  34. evalscope/constants.py +11 -0
  35. evalscope/evaluator/evaluator.py +9 -3
  36. evalscope/evaluator/reviewer/auto_reviewer.py +1 -1
  37. evalscope/metrics/__init__.py +49 -4
  38. evalscope/metrics/llm_judge.py +1 -1
  39. evalscope/metrics/named_metrics.py +13 -0
  40. evalscope/metrics/t2v_metrics/__init__.py +66 -0
  41. evalscope/metrics/t2v_metrics/clipscore.py +14 -0
  42. evalscope/metrics/t2v_metrics/constants.py +12 -0
  43. evalscope/metrics/t2v_metrics/itmscore.py +14 -0
  44. evalscope/metrics/t2v_metrics/models/__init__.py +0 -0
  45. evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py +30 -0
  46. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py +0 -0
  47. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py +6 -0
  48. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +132 -0
  49. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +286 -0
  50. evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +114 -0
  51. evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +86 -0
  52. evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +85 -0
  53. evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +62 -0
  54. evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py +26 -0
  55. evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +84 -0
  56. evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +97 -0
  57. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +171 -0
  58. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py +0 -0
  59. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +80 -0
  60. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +73 -0
  61. evalscope/metrics/t2v_metrics/models/model.py +45 -0
  62. evalscope/metrics/t2v_metrics/models/utils.py +25 -0
  63. evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py +22 -0
  64. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py +0 -0
  65. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py +1 -0
  66. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +300 -0
  67. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py +12 -0
  68. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +82 -0
  69. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py +50 -0
  70. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +218 -0
  71. evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +150 -0
  72. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py +26 -0
  73. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +465 -0
  74. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +141 -0
  75. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +22 -0
  76. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +188 -0
  77. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +106 -0
  78. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +307 -0
  79. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +416 -0
  80. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +8 -0
  81. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +191 -0
  82. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +318 -0
  83. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml +10 -0
  84. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml +42 -0
  85. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml +42 -0
  86. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml +42 -0
  87. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml +36 -0
  88. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml +43 -0
  89. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml +43 -0
  90. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml +43 -0
  91. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml +43 -0
  92. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml +36 -0
  93. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml +42 -0
  94. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml +42 -0
  95. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml +42 -0
  96. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml +43 -0
  97. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml +42 -0
  98. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml +42 -0
  99. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml +42 -0
  100. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml +37 -0
  101. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml +43 -0
  102. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml +43 -0
  103. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json +21 -0
  104. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json +22 -0
  105. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json +21 -0
  106. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +208 -0
  107. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py +231 -0
  108. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +1093 -0
  109. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py +0 -0
  110. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py +211 -0
  111. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py +109 -0
  112. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +452 -0
  113. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +364 -0
  114. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +755 -0
  115. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +273 -0
  116. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +880 -0
  117. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +1844 -0
  118. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +81 -0
  119. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +56 -0
  120. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py +212 -0
  121. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py +164 -0
  122. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py +202 -0
  123. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +185 -0
  124. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +178 -0
  125. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +112 -0
  126. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py +371 -0
  127. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +344 -0
  128. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +858 -0
  129. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +271 -0
  130. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +503 -0
  131. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +1270 -0
  132. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +473 -0
  133. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +31 -0
  134. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py +27 -0
  135. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py +233 -0
  136. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +392 -0
  137. evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +127 -0
  138. evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +17 -0
  139. evalscope/metrics/t2v_metrics/score.py +78 -0
  140. evalscope/metrics/t2v_metrics/vqascore.py +14 -0
  141. evalscope/models/__init__.py +50 -14
  142. evalscope/models/adapters/__init__.py +17 -0
  143. evalscope/models/{base_adapter.py → adapters/base_adapter.py} +17 -17
  144. evalscope/models/{chat_adapter.py → adapters/chat_adapter.py} +10 -7
  145. evalscope/models/{choice_adapter.py → adapters/choice_adapter.py} +2 -6
  146. evalscope/models/{custom_adapter.py → adapters/custom_adapter.py} +2 -4
  147. evalscope/models/{server_adapter.py → adapters/server_adapter.py} +1 -3
  148. evalscope/models/adapters/t2i_adapter.py +76 -0
  149. evalscope/models/custom/__init__.py +2 -1
  150. evalscope/models/custom/dummy_model.py +11 -13
  151. evalscope/models/local_model.py +82 -33
  152. evalscope/models/model.py +2 -42
  153. evalscope/models/register.py +26 -0
  154. evalscope/perf/benchmark.py +4 -3
  155. evalscope/perf/main.py +4 -2
  156. evalscope/perf/plugin/datasets/flickr8k.py +2 -1
  157. evalscope/perf/utils/benchmark_util.py +2 -2
  158. evalscope/perf/utils/db_util.py +16 -8
  159. evalscope/report/__init__.py +1 -0
  160. evalscope/report/app.py +117 -67
  161. evalscope/report/app_arguments.py +11 -0
  162. evalscope/report/generator.py +1 -1
  163. evalscope/run.py +3 -3
  164. evalscope/third_party/thinkbench/eval.py +19 -7
  165. evalscope/utils/chat_service.py +2 -2
  166. evalscope/utils/import_utils.py +66 -0
  167. evalscope/utils/utils.py +12 -4
  168. evalscope/version.py +2 -2
  169. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/METADATA +20 -3
  170. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/RECORD +178 -66
  171. tests/aigc/__init__.py +1 -0
  172. tests/aigc/test_t2i.py +87 -0
  173. tests/cli/test_run.py +20 -7
  174. tests/perf/test_perf.py +6 -3
  175. evalscope/metrics/code_metric.py +0 -98
  176. evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +0 -58485
  177. evalscope/metrics/resources/gpt2-zhcn3-v4.json +0 -1
  178. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/LICENSE +0 -0
  179. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/WHEEL +0 -0
  180. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/entry_points.txt +0 -0
  181. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,465 @@
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from omegaconf import OmegaConf
11
+ from typing import Dict
12
+
13
+ from ..common.registry import registry
14
+
15
+
16
+ class Config:
17
+
18
+ def __init__(self, args):
19
+ self.config = {}
20
+
21
+ self.args = args
22
+
23
+ # Register the config and configuration for setup
24
+ registry.register('configuration', self)
25
+
26
+ user_config = self._build_opt_list(self.args.options)
27
+
28
+ config = OmegaConf.load(self.args.cfg_path)
29
+
30
+ runner_config = self.build_runner_config(config)
31
+ model_config = self.build_model_config(config, **user_config)
32
+ dataset_config = self.build_dataset_config(config)
33
+
34
+ # Validate the user-provided runner configuration
35
+ # model and dataset configuration are supposed to be validated by the respective classes
36
+ # [TODO] validate the model/dataset configuration
37
+ # self._validate_runner_config(runner_config)
38
+
39
+ # Override the default configuration with user options.
40
+ self.config = OmegaConf.merge(runner_config, model_config, dataset_config, user_config)
41
+
42
+ def _validate_runner_config(self, runner_config):
43
+ """
44
+ This method validates the configuration, such that
45
+ 1) all the user specified options are valid;
46
+ 2) no type mismatches between the user specified options and the config.
47
+ """
48
+ runner_config_validator = create_runner_config_validator()
49
+ runner_config_validator.validate(runner_config)
50
+
51
+ def _build_opt_list(self, opts):
52
+ opts_dot_list = self._convert_to_dot_list(opts)
53
+ return OmegaConf.from_dotlist(opts_dot_list)
54
+
55
+ @staticmethod
56
+ def build_model_config(config, **kwargs):
57
+ model = config.get('model', None)
58
+ assert model is not None, 'Missing model configuration file.'
59
+
60
+ model_cls = registry.get_model_class(model.arch)
61
+ assert model_cls is not None, f"Model '{model.arch}' has not been registered."
62
+
63
+ model_type = kwargs.get('model.model_type', None)
64
+ if not model_type:
65
+ model_type = model.get('model_type', None)
66
+ # else use the model type selected by user.
67
+
68
+ assert model_type is not None, 'Missing model_type.'
69
+
70
+ model_config_path = model_cls.default_config_path(model_type=model_type)
71
+
72
+ model_config = OmegaConf.create()
73
+ # hiararchy override, customized config > default config
74
+ model_config = OmegaConf.merge(
75
+ model_config,
76
+ OmegaConf.load(model_config_path),
77
+ {'model': config['model']},
78
+ )
79
+
80
+ return model_config
81
+
82
+ @staticmethod
83
+ def build_runner_config(config):
84
+ return {'run': config.run}
85
+
86
+ @staticmethod
87
+ def build_dataset_config(config):
88
+ datasets = config.get('datasets', None)
89
+ if datasets is None:
90
+ raise KeyError("Expecting 'datasets' as the root key for dataset configuration.")
91
+
92
+ dataset_config = OmegaConf.create()
93
+
94
+ for dataset_name in datasets:
95
+ builder_cls = registry.get_builder_class(dataset_name)
96
+
97
+ dataset_config_type = datasets[dataset_name].get('type', 'default')
98
+ dataset_config_path = builder_cls.default_config_path(type=dataset_config_type)
99
+
100
+ # hiararchy override, customized config > default config
101
+ dataset_config = OmegaConf.merge(
102
+ dataset_config,
103
+ OmegaConf.load(dataset_config_path),
104
+ {'datasets': {
105
+ dataset_name: config['datasets'][dataset_name]
106
+ }},
107
+ )
108
+
109
+ return dataset_config
110
+
111
+ def _convert_to_dot_list(self, opts):
112
+ if opts is None:
113
+ opts = []
114
+
115
+ if len(opts) == 0:
116
+ return opts
117
+
118
+ has_equal = opts[0].find('=') != -1
119
+
120
+ if has_equal:
121
+ return opts
122
+
123
+ return [(opt + '=' + value) for opt, value in zip(opts[0::2], opts[1::2])]
124
+
125
+ def get_config(self):
126
+ return self.config
127
+
128
+ @property
129
+ def run_cfg(self):
130
+ return self.config.run
131
+
132
+ @property
133
+ def datasets_cfg(self):
134
+ return self.config.datasets
135
+
136
+ @property
137
+ def model_cfg(self):
138
+ return self.config.model
139
+
140
+ def pretty_print(self):
141
+ logging.info('\n===== Running Parameters =====')
142
+ logging.info(self._convert_node_to_json(self.config.run))
143
+
144
+ logging.info('\n====== Dataset Attributes ======')
145
+ datasets = self.config.datasets
146
+
147
+ for dataset in datasets:
148
+ if dataset in self.config.datasets:
149
+ logging.info(f'\n======== {dataset} =======')
150
+ dataset_config = self.config.datasets[dataset]
151
+ logging.info(self._convert_node_to_json(dataset_config))
152
+ else:
153
+ logging.warning(f"No dataset named '{dataset}' in config. Skipping")
154
+
155
+ logging.info(f'\n====== Model Attributes ======')
156
+ logging.info(self._convert_node_to_json(self.config.model))
157
+
158
+ def _convert_node_to_json(self, node):
159
+ container = OmegaConf.to_container(node, resolve=True)
160
+ return json.dumps(container, indent=4, sort_keys=True)
161
+
162
+ def to_dict(self):
163
+ return OmegaConf.to_container(self.config)
164
+
165
+
166
+ def node_to_dict(node):
167
+ return OmegaConf.to_container(node)
168
+
169
+
170
+ class ConfigValidator:
171
+ """
172
+ This is a preliminary implementation to centralize and validate the configuration.
173
+ May be altered in the future.
174
+
175
+ A helper class to validate configurations from yaml file.
176
+
177
+ This serves the following purposes:
178
+ 1. Ensure all the options in the yaml are defined, raise error if not.
179
+ 2. when type mismatches are found, the validator will raise an error.
180
+ 3. a central place to store and display helpful messages for supported configurations.
181
+
182
+ """
183
+
184
+ class _Argument:
185
+
186
+ def __init__(self, name, choices=None, type=None, help=None):
187
+ self.name = name
188
+ self.val = None
189
+ self.choices = choices
190
+ self.type = type
191
+ self.help = help
192
+
193
+ def __str__(self):
194
+ s = f'{self.name}={self.val}'
195
+ if self.type is not None:
196
+ s += f', ({self.type})'
197
+ if self.choices is not None:
198
+ s += f', choices: {self.choices}'
199
+ if self.help is not None:
200
+ s += f', ({self.help})'
201
+ return s
202
+
203
+ def __init__(self, description):
204
+ self.description = description
205
+
206
+ self.arguments = dict()
207
+
208
+ self.parsed_args = None
209
+
210
+ def __getitem__(self, key):
211
+ assert self.parsed_args is not None, 'No arguments parsed yet.'
212
+
213
+ return self.parsed_args[key]
214
+
215
+ def __str__(self) -> str:
216
+ return self.format_help()
217
+
218
+ def add_argument(self, *args, **kwargs):
219
+ """
220
+ Assume the first argument is the name of the argument.
221
+ """
222
+ self.arguments[args[0]] = self._Argument(*args, **kwargs)
223
+
224
+ def validate(self, config=None):
225
+ """
226
+ Convert yaml config (dict-like) to list, required by argparse.
227
+ """
228
+ for k, v in config.items():
229
+ assert (
230
+ k
231
+ in self.arguments), f"""{k} is not a valid argument. Support arguments are {self.format_arguments()}."""
232
+
233
+ if self.arguments[k].type is not None:
234
+ try:
235
+ self.arguments[k].val = self.arguments[k].type(v)
236
+ except ValueError:
237
+ raise ValueError(f'{k} is not a valid {self.arguments[k].type}.')
238
+
239
+ if self.arguments[k].choices is not None:
240
+ assert (v in self.arguments[k].choices), f"""{k} must be one of {self.arguments[k].choices}."""
241
+
242
+ return config
243
+
244
+ def format_arguments(self):
245
+ return str([f'{k}' for k in sorted(self.arguments.keys())])
246
+
247
+ def format_help(self):
248
+ # description + key-value pair string for each argument
249
+ help_msg = str(self.description)
250
+ return help_msg + ', available arguments: ' + self.format_arguments()
251
+
252
+ def print_help(self):
253
+ # display help message
254
+ print(self.format_help())
255
+
256
+
257
+ def create_runner_config_validator():
258
+ validator = ConfigValidator(description='Runner configurations')
259
+
260
+ validator.add_argument(
261
+ 'runner',
262
+ type=str,
263
+ choices=['runner_base', 'runner_iter'],
264
+ help="""Runner to use. The "runner_base" uses epoch-based training while iter-based
265
+ runner runs based on iters. Default: runner_base""",
266
+ )
267
+ # add argumetns for training dataset ratios
268
+ validator.add_argument(
269
+ 'train_dataset_ratios',
270
+ type=Dict[str, float],
271
+ help="""Ratios of training dataset. This is used in iteration-based runner.
272
+ Do not support for epoch-based runner because how to define an epoch becomes tricky.
273
+ Default: None""",
274
+ )
275
+ validator.add_argument(
276
+ 'max_iters',
277
+ type=float,
278
+ help='Maximum number of iterations to run.',
279
+ )
280
+ validator.add_argument(
281
+ 'max_epoch',
282
+ type=int,
283
+ help='Maximum number of epochs to run.',
284
+ )
285
+ # add arguments for iters_per_inner_epoch
286
+ validator.add_argument(
287
+ 'iters_per_inner_epoch',
288
+ type=float,
289
+ help='Number of iterations per inner epoch. This is required when runner is runner_iter.',
290
+ )
291
+ lr_scheds_choices = registry.list_lr_schedulers()
292
+ validator.add_argument(
293
+ 'lr_sched',
294
+ type=str,
295
+ choices=lr_scheds_choices,
296
+ help='Learning rate scheduler to use, from {}'.format(lr_scheds_choices),
297
+ )
298
+ task_choices = registry.list_tasks()
299
+ validator.add_argument(
300
+ 'task',
301
+ type=str,
302
+ choices=task_choices,
303
+ help='Task to use, from {}'.format(task_choices),
304
+ )
305
+ # add arguments for init_lr
306
+ validator.add_argument(
307
+ 'init_lr',
308
+ type=float,
309
+ help='Initial learning rate. This will be the learning rate after warmup and before decay.',
310
+ )
311
+ # add arguments for min_lr
312
+ validator.add_argument(
313
+ 'min_lr',
314
+ type=float,
315
+ help='Minimum learning rate (after decay).',
316
+ )
317
+ # add arguments for warmup_lr
318
+ validator.add_argument(
319
+ 'warmup_lr',
320
+ type=float,
321
+ help='Starting learning rate for warmup.',
322
+ )
323
+ # add arguments for learning rate decay rate
324
+ validator.add_argument(
325
+ 'lr_decay_rate',
326
+ type=float,
327
+ help='Learning rate decay rate. Required if using a decaying learning rate scheduler.',
328
+ )
329
+ # add arguments for weight decay
330
+ validator.add_argument(
331
+ 'weight_decay',
332
+ type=float,
333
+ help='Weight decay rate.',
334
+ )
335
+ # add arguments for training batch size
336
+ validator.add_argument(
337
+ 'batch_size_train',
338
+ type=int,
339
+ help='Training batch size.',
340
+ )
341
+ # add arguments for evaluation batch size
342
+ validator.add_argument(
343
+ 'batch_size_eval',
344
+ type=int,
345
+ help='Evaluation batch size, including validation and testing.',
346
+ )
347
+ # add arguments for number of workers for data loading
348
+ validator.add_argument(
349
+ 'num_workers',
350
+ help='Number of workers for data loading.',
351
+ )
352
+ # add arguments for warm up steps
353
+ validator.add_argument(
354
+ 'warmup_steps',
355
+ type=int,
356
+ help='Number of warmup steps. Required if a warmup schedule is used.',
357
+ )
358
+ # add arguments for random seed
359
+ validator.add_argument(
360
+ 'seed',
361
+ type=int,
362
+ help='Random seed.',
363
+ )
364
+ # add arguments for output directory
365
+ validator.add_argument(
366
+ 'output_dir',
367
+ type=str,
368
+ help='Output directory to save checkpoints and logs.',
369
+ )
370
+ # add arguments for whether only use evaluation
371
+ validator.add_argument(
372
+ 'evaluate',
373
+ help='Whether to only evaluate the model. If true, training will not be performed.',
374
+ )
375
+ # add arguments for splits used for training, e.g. ["train", "val"]
376
+ validator.add_argument(
377
+ 'train_splits',
378
+ type=list,
379
+ help='Splits to use for training.',
380
+ )
381
+ # add arguments for splits used for validation, e.g. ["val"]
382
+ validator.add_argument(
383
+ 'valid_splits',
384
+ type=list,
385
+ help='Splits to use for validation. If not provided, will skip the validation.',
386
+ )
387
+ # add arguments for splits used for testing, e.g. ["test"]
388
+ validator.add_argument(
389
+ 'test_splits',
390
+ type=list,
391
+ help='Splits to use for testing. If not provided, will skip the testing.',
392
+ )
393
+ # add arguments for accumulating gradient for iterations
394
+ validator.add_argument(
395
+ 'accum_grad_iters',
396
+ type=int,
397
+ help='Number of iterations to accumulate gradient for.',
398
+ )
399
+
400
+ # ====== distributed training ======
401
+ validator.add_argument(
402
+ 'device',
403
+ type=str,
404
+ choices=['cpu', 'cuda'],
405
+ help="Device to use. Support 'cuda' or 'cpu' as for now.",
406
+ )
407
+ validator.add_argument(
408
+ 'world_size',
409
+ type=int,
410
+ help='Number of processes participating in the job.',
411
+ )
412
+ validator.add_argument('dist_url', type=str)
413
+ validator.add_argument('distributed', type=bool)
414
+ # add arguments to opt using distributed sampler during evaluation or not
415
+ validator.add_argument(
416
+ 'use_dist_eval_sampler',
417
+ type=bool,
418
+ help='Whether to use distributed sampler during evaluation or not.',
419
+ )
420
+
421
+ # ====== task specific ======
422
+ # generation task specific arguments
423
+ # add arguments for maximal length of text output
424
+ validator.add_argument(
425
+ 'max_len',
426
+ type=int,
427
+ help='Maximal length of text output.',
428
+ )
429
+ # add arguments for minimal length of text output
430
+ validator.add_argument(
431
+ 'min_len',
432
+ type=int,
433
+ help='Minimal length of text output.',
434
+ )
435
+ # add arguments number of beams
436
+ validator.add_argument(
437
+ 'num_beams',
438
+ type=int,
439
+ help='Number of beams used for beam search.',
440
+ )
441
+
442
+ # vqa task specific arguments
443
+ # add arguments for number of answer candidates
444
+ validator.add_argument(
445
+ 'num_ans_candidates',
446
+ type=int,
447
+ help=
448
+ """For ALBEF and BLIP, these models first rank answers according to likelihood to select answer candidates.""",
449
+ )
450
+ # add arguments for inference method
451
+ validator.add_argument(
452
+ 'inference_method',
453
+ type=str,
454
+ choices=['genearte', 'rank'],
455
+ help="""Inference method to use for question answering. If rank, requires a answer list.""",
456
+ )
457
+
458
+ # ====== model specific ======
459
+ validator.add_argument(
460
+ 'k_test',
461
+ type=int,
462
+ help='Number of top k most similar samples from ITC/VTC selection to be tested.',
463
+ )
464
+
465
+ return validator
@@ -0,0 +1,141 @@
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ import datetime
9
+ import functools
10
+ import os
11
+ import torch
12
+ import torch.distributed as dist
13
+
14
+
15
+ def setup_for_distributed(is_master):
16
+ """
17
+ This function disables printing when not in master process
18
+ """
19
+ import builtins as __builtin__
20
+
21
+ builtin_print = __builtin__.print
22
+
23
+ def print(*args, **kwargs):
24
+ force = kwargs.pop('force', False)
25
+ if is_master or force:
26
+ builtin_print(*args, **kwargs)
27
+
28
+ __builtin__.print = print
29
+
30
+
31
+ def is_dist_avail_and_initialized():
32
+ if not dist.is_available():
33
+ return False
34
+ if not dist.is_initialized():
35
+ return False
36
+ return True
37
+
38
+
39
+ def get_world_size():
40
+ if not is_dist_avail_and_initialized():
41
+ return 1
42
+ return dist.get_world_size()
43
+
44
+
45
+ def get_rank():
46
+ if not is_dist_avail_and_initialized():
47
+ return 0
48
+ return dist.get_rank()
49
+
50
+
51
+ def is_main_process():
52
+ return get_rank() == 0
53
+
54
+
55
+ def init_distributed_mode(args):
56
+ if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
57
+ args.rank = int(os.environ['RANK'])
58
+ args.world_size = int(os.environ['WORLD_SIZE'])
59
+ args.gpu = int(os.environ['LOCAL_RANK'])
60
+ elif 'SLURM_PROCID' in os.environ:
61
+ args.rank = int(os.environ['SLURM_PROCID'])
62
+ args.gpu = args.rank % torch.cuda.device_count()
63
+ else:
64
+ print('Not using distributed mode')
65
+ args.distributed = False
66
+ return
67
+
68
+ args.distributed = True
69
+
70
+ torch.cuda.set_device(args.gpu)
71
+ args.dist_backend = 'nccl'
72
+ print(
73
+ '| distributed init (rank {}, world {}): {}'.format(args.rank, args.world_size, args.dist_url),
74
+ flush=True,
75
+ )
76
+ torch.distributed.init_process_group(
77
+ backend=args.dist_backend,
78
+ init_method=args.dist_url,
79
+ world_size=args.world_size,
80
+ rank=args.rank,
81
+ timeout=datetime.timedelta(days=365), # allow auto-downloading and de-compressing
82
+ )
83
+ torch.distributed.barrier()
84
+ setup_for_distributed(args.rank == 0)
85
+
86
+
87
+ def get_dist_info():
88
+ if torch.__version__ < '1.0':
89
+ initialized = dist._initialized
90
+ else:
91
+ initialized = dist.is_initialized()
92
+ if initialized:
93
+ rank = dist.get_rank()
94
+ world_size = dist.get_world_size()
95
+ else: # non-distributed training
96
+ rank = 0
97
+ world_size = 1
98
+ return rank, world_size
99
+
100
+
101
+ def main_process(func):
102
+
103
+ @functools.wraps(func)
104
+ def wrapper(*args, **kwargs):
105
+ rank, _ = get_dist_info()
106
+ if rank == 0:
107
+ return func(*args, **kwargs)
108
+
109
+ return wrapper
110
+
111
+
112
+ def download_cached_file(url, check_hash=True, progress=False):
113
+ """
114
+ Download a file from a URL and cache it locally. If the file already exists, it is not downloaded again.
115
+ If distributed, only the main process downloads the file, and the other processes wait for the file to be downloaded.
116
+ """
117
+ import timm.models as timm_hub
118
+
119
+ def download_file(model_id, file_name, cache_dir=None):
120
+ # download file from modelscope
121
+ from modelscope import snapshot_download
122
+ local_path = snapshot_download(model_id=model_id, cache_dir=cache_dir, allow_patterns=file_name)
123
+ file_path = os.path.join(local_path, file_name)
124
+
125
+ return file_path
126
+
127
+ parts = torch.hub.urlparse(url)
128
+ filename = os.path.basename(parts.path)
129
+
130
+ if is_main_process():
131
+ try:
132
+ cached_file = download_file('AI-ModelScope/BLIP2-Pretrain', filename)
133
+ except Exception as e:
134
+ print(f'Error downloading file: {e}')
135
+ timm_hub.download_cached_file(url, check_hash, progress)
136
+ cached_file = os.path.join(timm_hub.get_cache_dir(), filename)
137
+
138
+ if is_dist_avail_and_initialized():
139
+ dist.barrier()
140
+
141
+ return cached_file
@@ -0,0 +1,22 @@
1
+ import numpy as np
2
+ from matplotlib import pyplot as plt
3
+ from scipy.ndimage import filters
4
+ from skimage import transform as skimage_transform
5
+
6
+
7
+ def getAttMap(img, attMap, blur=True, overlap=True):
8
+ attMap -= attMap.min()
9
+ if attMap.max() > 0:
10
+ attMap /= attMap.max()
11
+ attMap = skimage_transform.resize(attMap, (img.shape[:2]), order=3, mode='constant')
12
+ if blur:
13
+ attMap = filters.gaussian_filter(attMap, 0.02 * max(img.shape[:2]))
14
+ attMap -= attMap.min()
15
+ attMap /= attMap.max()
16
+ cmap = plt.get_cmap('jet')
17
+ attMapV = cmap(attMap)
18
+ attMapV = np.delete(attMapV, 3, 2)
19
+ if overlap:
20
+ attMap = (1 * (1 - attMap**0.7).reshape(attMap.shape + (1, )) * img +
21
+ (attMap**0.7).reshape(attMap.shape + (1, )) * attMapV)
22
+ return attMap