evalscope 0.14.0__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of evalscope might be problematic. Click here for more details.
- evalscope/arguments.py +2 -1
- evalscope/benchmarks/__init__.py +2 -2
- evalscope/benchmarks/aigc/__init__.py +0 -0
- evalscope/benchmarks/aigc/t2i/__init__.py +0 -0
- evalscope/benchmarks/aigc/t2i/base.py +56 -0
- evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py +77 -0
- evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py +58 -0
- evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py +58 -0
- evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py +57 -0
- evalscope/benchmarks/aigc/t2i/tifa_adapter.py +37 -0
- evalscope/benchmarks/aime/aime24_adapter.py +1 -1
- evalscope/benchmarks/aime/aime25_adapter.py +4 -4
- evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +1 -2
- evalscope/benchmarks/arc/arc_adapter.py +1 -1
- evalscope/benchmarks/arena_hard/arena_hard_adapter.py +1 -3
- evalscope/benchmarks/ceval/ceval_adapter.py +2 -2
- evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +1 -3
- evalscope/benchmarks/cmmlu/cmmlu_adapter.py +1 -1
- evalscope/benchmarks/competition_math/competition_math_adapter.py +1 -2
- evalscope/benchmarks/data_adapter.py +16 -9
- evalscope/benchmarks/data_collection/data_collection_adapter.py +6 -4
- evalscope/benchmarks/general_mcq/general_mcq_adapter.py +2 -2
- evalscope/benchmarks/general_qa/general_qa_adapter.py +3 -3
- evalscope/benchmarks/live_code_bench/evaluate_utils.py +16 -21
- evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +4 -1
- evalscope/benchmarks/live_code_bench/testing_util.py +6 -3
- evalscope/benchmarks/math_500/math_500_adapter.py +1 -1
- evalscope/benchmarks/mmlu/mmlu_adapter.py +3 -1
- evalscope/benchmarks/simple_qa/simple_qa_adapter.py +1 -2
- evalscope/benchmarks/utils.py +7 -16
- evalscope/cli/start_app.py +1 -1
- evalscope/collections/evaluator.py +16 -4
- evalscope/config.py +7 -3
- evalscope/constants.py +11 -0
- evalscope/evaluator/evaluator.py +9 -3
- evalscope/evaluator/reviewer/auto_reviewer.py +1 -1
- evalscope/metrics/__init__.py +49 -4
- evalscope/metrics/llm_judge.py +1 -1
- evalscope/metrics/named_metrics.py +13 -0
- evalscope/metrics/t2v_metrics/__init__.py +66 -0
- evalscope/metrics/t2v_metrics/clipscore.py +14 -0
- evalscope/metrics/t2v_metrics/constants.py +12 -0
- evalscope/metrics/t2v_metrics/itmscore.py +14 -0
- evalscope/metrics/t2v_metrics/models/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py +30 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py +6 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +132 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +286 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +114 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +86 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +85 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +62 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py +26 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +84 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +97 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +171 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +80 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +73 -0
- evalscope/metrics/t2v_metrics/models/model.py +45 -0
- evalscope/metrics/t2v_metrics/models/utils.py +25 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py +22 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py +1 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +300 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py +12 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +82 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py +50 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +218 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +150 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py +26 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +465 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +141 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +22 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +188 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +106 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +307 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +416 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +8 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +191 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +318 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml +10 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml +36 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml +36 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml +37 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json +21 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json +22 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json +21 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +208 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py +231 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +1093 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py +211 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py +109 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +452 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +364 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +755 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +273 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +880 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +1844 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +81 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +56 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py +212 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py +164 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py +202 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +185 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +178 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +112 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py +371 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +344 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +858 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +271 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +503 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +1270 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +473 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +31 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py +27 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py +233 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +392 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +127 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +17 -0
- evalscope/metrics/t2v_metrics/score.py +78 -0
- evalscope/metrics/t2v_metrics/vqascore.py +14 -0
- evalscope/models/__init__.py +50 -14
- evalscope/models/adapters/__init__.py +17 -0
- evalscope/models/{base_adapter.py → adapters/base_adapter.py} +17 -17
- evalscope/models/{chat_adapter.py → adapters/chat_adapter.py} +10 -7
- evalscope/models/{choice_adapter.py → adapters/choice_adapter.py} +2 -6
- evalscope/models/{custom_adapter.py → adapters/custom_adapter.py} +2 -4
- evalscope/models/{server_adapter.py → adapters/server_adapter.py} +1 -3
- evalscope/models/adapters/t2i_adapter.py +76 -0
- evalscope/models/custom/__init__.py +2 -1
- evalscope/models/custom/dummy_model.py +11 -13
- evalscope/models/local_model.py +82 -33
- evalscope/models/model.py +2 -42
- evalscope/models/register.py +26 -0
- evalscope/perf/benchmark.py +4 -3
- evalscope/perf/main.py +4 -2
- evalscope/perf/plugin/datasets/flickr8k.py +2 -1
- evalscope/perf/utils/benchmark_util.py +2 -2
- evalscope/perf/utils/db_util.py +16 -8
- evalscope/report/__init__.py +1 -0
- evalscope/report/app.py +117 -67
- evalscope/report/app_arguments.py +11 -0
- evalscope/report/generator.py +1 -1
- evalscope/run.py +3 -3
- evalscope/third_party/thinkbench/eval.py +19 -7
- evalscope/utils/chat_service.py +2 -2
- evalscope/utils/import_utils.py +66 -0
- evalscope/utils/utils.py +12 -4
- evalscope/version.py +2 -2
- {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/METADATA +20 -3
- {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/RECORD +178 -66
- tests/aigc/__init__.py +1 -0
- tests/aigc/test_t2i.py +87 -0
- tests/cli/test_run.py +20 -7
- tests/perf/test_perf.py +6 -3
- evalscope/metrics/code_metric.py +0 -98
- evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +0 -58485
- evalscope/metrics/resources/gpt2-zhcn3-v4.json +0 -1
- {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/LICENSE +0 -0
- {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/WHEEL +0 -0
- {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/entry_points.txt +0 -0
- {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright (c) 2022, salesforce.com, inc.
|
|
3
|
+
All rights reserved.
|
|
4
|
+
SPDX-License-Identifier: BSD-3-Clause
|
|
5
|
+
For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
from omegaconf import OmegaConf
|
|
11
|
+
from typing import Dict
|
|
12
|
+
|
|
13
|
+
from ..common.registry import registry
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Config:
|
|
17
|
+
|
|
18
|
+
def __init__(self, args):
|
|
19
|
+
self.config = {}
|
|
20
|
+
|
|
21
|
+
self.args = args
|
|
22
|
+
|
|
23
|
+
# Register the config and configuration for setup
|
|
24
|
+
registry.register('configuration', self)
|
|
25
|
+
|
|
26
|
+
user_config = self._build_opt_list(self.args.options)
|
|
27
|
+
|
|
28
|
+
config = OmegaConf.load(self.args.cfg_path)
|
|
29
|
+
|
|
30
|
+
runner_config = self.build_runner_config(config)
|
|
31
|
+
model_config = self.build_model_config(config, **user_config)
|
|
32
|
+
dataset_config = self.build_dataset_config(config)
|
|
33
|
+
|
|
34
|
+
# Validate the user-provided runner configuration
|
|
35
|
+
# model and dataset configuration are supposed to be validated by the respective classes
|
|
36
|
+
# [TODO] validate the model/dataset configuration
|
|
37
|
+
# self._validate_runner_config(runner_config)
|
|
38
|
+
|
|
39
|
+
# Override the default configuration with user options.
|
|
40
|
+
self.config = OmegaConf.merge(runner_config, model_config, dataset_config, user_config)
|
|
41
|
+
|
|
42
|
+
def _validate_runner_config(self, runner_config):
|
|
43
|
+
"""
|
|
44
|
+
This method validates the configuration, such that
|
|
45
|
+
1) all the user specified options are valid;
|
|
46
|
+
2) no type mismatches between the user specified options and the config.
|
|
47
|
+
"""
|
|
48
|
+
runner_config_validator = create_runner_config_validator()
|
|
49
|
+
runner_config_validator.validate(runner_config)
|
|
50
|
+
|
|
51
|
+
def _build_opt_list(self, opts):
|
|
52
|
+
opts_dot_list = self._convert_to_dot_list(opts)
|
|
53
|
+
return OmegaConf.from_dotlist(opts_dot_list)
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def build_model_config(config, **kwargs):
|
|
57
|
+
model = config.get('model', None)
|
|
58
|
+
assert model is not None, 'Missing model configuration file.'
|
|
59
|
+
|
|
60
|
+
model_cls = registry.get_model_class(model.arch)
|
|
61
|
+
assert model_cls is not None, f"Model '{model.arch}' has not been registered."
|
|
62
|
+
|
|
63
|
+
model_type = kwargs.get('model.model_type', None)
|
|
64
|
+
if not model_type:
|
|
65
|
+
model_type = model.get('model_type', None)
|
|
66
|
+
# else use the model type selected by user.
|
|
67
|
+
|
|
68
|
+
assert model_type is not None, 'Missing model_type.'
|
|
69
|
+
|
|
70
|
+
model_config_path = model_cls.default_config_path(model_type=model_type)
|
|
71
|
+
|
|
72
|
+
model_config = OmegaConf.create()
|
|
73
|
+
# hiararchy override, customized config > default config
|
|
74
|
+
model_config = OmegaConf.merge(
|
|
75
|
+
model_config,
|
|
76
|
+
OmegaConf.load(model_config_path),
|
|
77
|
+
{'model': config['model']},
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
return model_config
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def build_runner_config(config):
|
|
84
|
+
return {'run': config.run}
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def build_dataset_config(config):
|
|
88
|
+
datasets = config.get('datasets', None)
|
|
89
|
+
if datasets is None:
|
|
90
|
+
raise KeyError("Expecting 'datasets' as the root key for dataset configuration.")
|
|
91
|
+
|
|
92
|
+
dataset_config = OmegaConf.create()
|
|
93
|
+
|
|
94
|
+
for dataset_name in datasets:
|
|
95
|
+
builder_cls = registry.get_builder_class(dataset_name)
|
|
96
|
+
|
|
97
|
+
dataset_config_type = datasets[dataset_name].get('type', 'default')
|
|
98
|
+
dataset_config_path = builder_cls.default_config_path(type=dataset_config_type)
|
|
99
|
+
|
|
100
|
+
# hiararchy override, customized config > default config
|
|
101
|
+
dataset_config = OmegaConf.merge(
|
|
102
|
+
dataset_config,
|
|
103
|
+
OmegaConf.load(dataset_config_path),
|
|
104
|
+
{'datasets': {
|
|
105
|
+
dataset_name: config['datasets'][dataset_name]
|
|
106
|
+
}},
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
return dataset_config
|
|
110
|
+
|
|
111
|
+
def _convert_to_dot_list(self, opts):
|
|
112
|
+
if opts is None:
|
|
113
|
+
opts = []
|
|
114
|
+
|
|
115
|
+
if len(opts) == 0:
|
|
116
|
+
return opts
|
|
117
|
+
|
|
118
|
+
has_equal = opts[0].find('=') != -1
|
|
119
|
+
|
|
120
|
+
if has_equal:
|
|
121
|
+
return opts
|
|
122
|
+
|
|
123
|
+
return [(opt + '=' + value) for opt, value in zip(opts[0::2], opts[1::2])]
|
|
124
|
+
|
|
125
|
+
def get_config(self):
|
|
126
|
+
return self.config
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def run_cfg(self):
|
|
130
|
+
return self.config.run
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def datasets_cfg(self):
|
|
134
|
+
return self.config.datasets
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def model_cfg(self):
|
|
138
|
+
return self.config.model
|
|
139
|
+
|
|
140
|
+
def pretty_print(self):
|
|
141
|
+
logging.info('\n===== Running Parameters =====')
|
|
142
|
+
logging.info(self._convert_node_to_json(self.config.run))
|
|
143
|
+
|
|
144
|
+
logging.info('\n====== Dataset Attributes ======')
|
|
145
|
+
datasets = self.config.datasets
|
|
146
|
+
|
|
147
|
+
for dataset in datasets:
|
|
148
|
+
if dataset in self.config.datasets:
|
|
149
|
+
logging.info(f'\n======== {dataset} =======')
|
|
150
|
+
dataset_config = self.config.datasets[dataset]
|
|
151
|
+
logging.info(self._convert_node_to_json(dataset_config))
|
|
152
|
+
else:
|
|
153
|
+
logging.warning(f"No dataset named '{dataset}' in config. Skipping")
|
|
154
|
+
|
|
155
|
+
logging.info(f'\n====== Model Attributes ======')
|
|
156
|
+
logging.info(self._convert_node_to_json(self.config.model))
|
|
157
|
+
|
|
158
|
+
def _convert_node_to_json(self, node):
|
|
159
|
+
container = OmegaConf.to_container(node, resolve=True)
|
|
160
|
+
return json.dumps(container, indent=4, sort_keys=True)
|
|
161
|
+
|
|
162
|
+
def to_dict(self):
|
|
163
|
+
return OmegaConf.to_container(self.config)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def node_to_dict(node):
|
|
167
|
+
return OmegaConf.to_container(node)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class ConfigValidator:
|
|
171
|
+
"""
|
|
172
|
+
This is a preliminary implementation to centralize and validate the configuration.
|
|
173
|
+
May be altered in the future.
|
|
174
|
+
|
|
175
|
+
A helper class to validate configurations from yaml file.
|
|
176
|
+
|
|
177
|
+
This serves the following purposes:
|
|
178
|
+
1. Ensure all the options in the yaml are defined, raise error if not.
|
|
179
|
+
2. when type mismatches are found, the validator will raise an error.
|
|
180
|
+
3. a central place to store and display helpful messages for supported configurations.
|
|
181
|
+
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
class _Argument:
|
|
185
|
+
|
|
186
|
+
def __init__(self, name, choices=None, type=None, help=None):
|
|
187
|
+
self.name = name
|
|
188
|
+
self.val = None
|
|
189
|
+
self.choices = choices
|
|
190
|
+
self.type = type
|
|
191
|
+
self.help = help
|
|
192
|
+
|
|
193
|
+
def __str__(self):
|
|
194
|
+
s = f'{self.name}={self.val}'
|
|
195
|
+
if self.type is not None:
|
|
196
|
+
s += f', ({self.type})'
|
|
197
|
+
if self.choices is not None:
|
|
198
|
+
s += f', choices: {self.choices}'
|
|
199
|
+
if self.help is not None:
|
|
200
|
+
s += f', ({self.help})'
|
|
201
|
+
return s
|
|
202
|
+
|
|
203
|
+
def __init__(self, description):
|
|
204
|
+
self.description = description
|
|
205
|
+
|
|
206
|
+
self.arguments = dict()
|
|
207
|
+
|
|
208
|
+
self.parsed_args = None
|
|
209
|
+
|
|
210
|
+
def __getitem__(self, key):
|
|
211
|
+
assert self.parsed_args is not None, 'No arguments parsed yet.'
|
|
212
|
+
|
|
213
|
+
return self.parsed_args[key]
|
|
214
|
+
|
|
215
|
+
def __str__(self) -> str:
|
|
216
|
+
return self.format_help()
|
|
217
|
+
|
|
218
|
+
def add_argument(self, *args, **kwargs):
|
|
219
|
+
"""
|
|
220
|
+
Assume the first argument is the name of the argument.
|
|
221
|
+
"""
|
|
222
|
+
self.arguments[args[0]] = self._Argument(*args, **kwargs)
|
|
223
|
+
|
|
224
|
+
def validate(self, config=None):
|
|
225
|
+
"""
|
|
226
|
+
Convert yaml config (dict-like) to list, required by argparse.
|
|
227
|
+
"""
|
|
228
|
+
for k, v in config.items():
|
|
229
|
+
assert (
|
|
230
|
+
k
|
|
231
|
+
in self.arguments), f"""{k} is not a valid argument. Support arguments are {self.format_arguments()}."""
|
|
232
|
+
|
|
233
|
+
if self.arguments[k].type is not None:
|
|
234
|
+
try:
|
|
235
|
+
self.arguments[k].val = self.arguments[k].type(v)
|
|
236
|
+
except ValueError:
|
|
237
|
+
raise ValueError(f'{k} is not a valid {self.arguments[k].type}.')
|
|
238
|
+
|
|
239
|
+
if self.arguments[k].choices is not None:
|
|
240
|
+
assert (v in self.arguments[k].choices), f"""{k} must be one of {self.arguments[k].choices}."""
|
|
241
|
+
|
|
242
|
+
return config
|
|
243
|
+
|
|
244
|
+
def format_arguments(self):
|
|
245
|
+
return str([f'{k}' for k in sorted(self.arguments.keys())])
|
|
246
|
+
|
|
247
|
+
def format_help(self):
|
|
248
|
+
# description + key-value pair string for each argument
|
|
249
|
+
help_msg = str(self.description)
|
|
250
|
+
return help_msg + ', available arguments: ' + self.format_arguments()
|
|
251
|
+
|
|
252
|
+
def print_help(self):
|
|
253
|
+
# display help message
|
|
254
|
+
print(self.format_help())
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def create_runner_config_validator():
|
|
258
|
+
validator = ConfigValidator(description='Runner configurations')
|
|
259
|
+
|
|
260
|
+
validator.add_argument(
|
|
261
|
+
'runner',
|
|
262
|
+
type=str,
|
|
263
|
+
choices=['runner_base', 'runner_iter'],
|
|
264
|
+
help="""Runner to use. The "runner_base" uses epoch-based training while iter-based
|
|
265
|
+
runner runs based on iters. Default: runner_base""",
|
|
266
|
+
)
|
|
267
|
+
# add argumetns for training dataset ratios
|
|
268
|
+
validator.add_argument(
|
|
269
|
+
'train_dataset_ratios',
|
|
270
|
+
type=Dict[str, float],
|
|
271
|
+
help="""Ratios of training dataset. This is used in iteration-based runner.
|
|
272
|
+
Do not support for epoch-based runner because how to define an epoch becomes tricky.
|
|
273
|
+
Default: None""",
|
|
274
|
+
)
|
|
275
|
+
validator.add_argument(
|
|
276
|
+
'max_iters',
|
|
277
|
+
type=float,
|
|
278
|
+
help='Maximum number of iterations to run.',
|
|
279
|
+
)
|
|
280
|
+
validator.add_argument(
|
|
281
|
+
'max_epoch',
|
|
282
|
+
type=int,
|
|
283
|
+
help='Maximum number of epochs to run.',
|
|
284
|
+
)
|
|
285
|
+
# add arguments for iters_per_inner_epoch
|
|
286
|
+
validator.add_argument(
|
|
287
|
+
'iters_per_inner_epoch',
|
|
288
|
+
type=float,
|
|
289
|
+
help='Number of iterations per inner epoch. This is required when runner is runner_iter.',
|
|
290
|
+
)
|
|
291
|
+
lr_scheds_choices = registry.list_lr_schedulers()
|
|
292
|
+
validator.add_argument(
|
|
293
|
+
'lr_sched',
|
|
294
|
+
type=str,
|
|
295
|
+
choices=lr_scheds_choices,
|
|
296
|
+
help='Learning rate scheduler to use, from {}'.format(lr_scheds_choices),
|
|
297
|
+
)
|
|
298
|
+
task_choices = registry.list_tasks()
|
|
299
|
+
validator.add_argument(
|
|
300
|
+
'task',
|
|
301
|
+
type=str,
|
|
302
|
+
choices=task_choices,
|
|
303
|
+
help='Task to use, from {}'.format(task_choices),
|
|
304
|
+
)
|
|
305
|
+
# add arguments for init_lr
|
|
306
|
+
validator.add_argument(
|
|
307
|
+
'init_lr',
|
|
308
|
+
type=float,
|
|
309
|
+
help='Initial learning rate. This will be the learning rate after warmup and before decay.',
|
|
310
|
+
)
|
|
311
|
+
# add arguments for min_lr
|
|
312
|
+
validator.add_argument(
|
|
313
|
+
'min_lr',
|
|
314
|
+
type=float,
|
|
315
|
+
help='Minimum learning rate (after decay).',
|
|
316
|
+
)
|
|
317
|
+
# add arguments for warmup_lr
|
|
318
|
+
validator.add_argument(
|
|
319
|
+
'warmup_lr',
|
|
320
|
+
type=float,
|
|
321
|
+
help='Starting learning rate for warmup.',
|
|
322
|
+
)
|
|
323
|
+
# add arguments for learning rate decay rate
|
|
324
|
+
validator.add_argument(
|
|
325
|
+
'lr_decay_rate',
|
|
326
|
+
type=float,
|
|
327
|
+
help='Learning rate decay rate. Required if using a decaying learning rate scheduler.',
|
|
328
|
+
)
|
|
329
|
+
# add arguments for weight decay
|
|
330
|
+
validator.add_argument(
|
|
331
|
+
'weight_decay',
|
|
332
|
+
type=float,
|
|
333
|
+
help='Weight decay rate.',
|
|
334
|
+
)
|
|
335
|
+
# add arguments for training batch size
|
|
336
|
+
validator.add_argument(
|
|
337
|
+
'batch_size_train',
|
|
338
|
+
type=int,
|
|
339
|
+
help='Training batch size.',
|
|
340
|
+
)
|
|
341
|
+
# add arguments for evaluation batch size
|
|
342
|
+
validator.add_argument(
|
|
343
|
+
'batch_size_eval',
|
|
344
|
+
type=int,
|
|
345
|
+
help='Evaluation batch size, including validation and testing.',
|
|
346
|
+
)
|
|
347
|
+
# add arguments for number of workers for data loading
|
|
348
|
+
validator.add_argument(
|
|
349
|
+
'num_workers',
|
|
350
|
+
help='Number of workers for data loading.',
|
|
351
|
+
)
|
|
352
|
+
# add arguments for warm up steps
|
|
353
|
+
validator.add_argument(
|
|
354
|
+
'warmup_steps',
|
|
355
|
+
type=int,
|
|
356
|
+
help='Number of warmup steps. Required if a warmup schedule is used.',
|
|
357
|
+
)
|
|
358
|
+
# add arguments for random seed
|
|
359
|
+
validator.add_argument(
|
|
360
|
+
'seed',
|
|
361
|
+
type=int,
|
|
362
|
+
help='Random seed.',
|
|
363
|
+
)
|
|
364
|
+
# add arguments for output directory
|
|
365
|
+
validator.add_argument(
|
|
366
|
+
'output_dir',
|
|
367
|
+
type=str,
|
|
368
|
+
help='Output directory to save checkpoints and logs.',
|
|
369
|
+
)
|
|
370
|
+
# add arguments for whether only use evaluation
|
|
371
|
+
validator.add_argument(
|
|
372
|
+
'evaluate',
|
|
373
|
+
help='Whether to only evaluate the model. If true, training will not be performed.',
|
|
374
|
+
)
|
|
375
|
+
# add arguments for splits used for training, e.g. ["train", "val"]
|
|
376
|
+
validator.add_argument(
|
|
377
|
+
'train_splits',
|
|
378
|
+
type=list,
|
|
379
|
+
help='Splits to use for training.',
|
|
380
|
+
)
|
|
381
|
+
# add arguments for splits used for validation, e.g. ["val"]
|
|
382
|
+
validator.add_argument(
|
|
383
|
+
'valid_splits',
|
|
384
|
+
type=list,
|
|
385
|
+
help='Splits to use for validation. If not provided, will skip the validation.',
|
|
386
|
+
)
|
|
387
|
+
# add arguments for splits used for testing, e.g. ["test"]
|
|
388
|
+
validator.add_argument(
|
|
389
|
+
'test_splits',
|
|
390
|
+
type=list,
|
|
391
|
+
help='Splits to use for testing. If not provided, will skip the testing.',
|
|
392
|
+
)
|
|
393
|
+
# add arguments for accumulating gradient for iterations
|
|
394
|
+
validator.add_argument(
|
|
395
|
+
'accum_grad_iters',
|
|
396
|
+
type=int,
|
|
397
|
+
help='Number of iterations to accumulate gradient for.',
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
# ====== distributed training ======
|
|
401
|
+
validator.add_argument(
|
|
402
|
+
'device',
|
|
403
|
+
type=str,
|
|
404
|
+
choices=['cpu', 'cuda'],
|
|
405
|
+
help="Device to use. Support 'cuda' or 'cpu' as for now.",
|
|
406
|
+
)
|
|
407
|
+
validator.add_argument(
|
|
408
|
+
'world_size',
|
|
409
|
+
type=int,
|
|
410
|
+
help='Number of processes participating in the job.',
|
|
411
|
+
)
|
|
412
|
+
validator.add_argument('dist_url', type=str)
|
|
413
|
+
validator.add_argument('distributed', type=bool)
|
|
414
|
+
# add arguments to opt using distributed sampler during evaluation or not
|
|
415
|
+
validator.add_argument(
|
|
416
|
+
'use_dist_eval_sampler',
|
|
417
|
+
type=bool,
|
|
418
|
+
help='Whether to use distributed sampler during evaluation or not.',
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# ====== task specific ======
|
|
422
|
+
# generation task specific arguments
|
|
423
|
+
# add arguments for maximal length of text output
|
|
424
|
+
validator.add_argument(
|
|
425
|
+
'max_len',
|
|
426
|
+
type=int,
|
|
427
|
+
help='Maximal length of text output.',
|
|
428
|
+
)
|
|
429
|
+
# add arguments for minimal length of text output
|
|
430
|
+
validator.add_argument(
|
|
431
|
+
'min_len',
|
|
432
|
+
type=int,
|
|
433
|
+
help='Minimal length of text output.',
|
|
434
|
+
)
|
|
435
|
+
# add arguments number of beams
|
|
436
|
+
validator.add_argument(
|
|
437
|
+
'num_beams',
|
|
438
|
+
type=int,
|
|
439
|
+
help='Number of beams used for beam search.',
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
# vqa task specific arguments
|
|
443
|
+
# add arguments for number of answer candidates
|
|
444
|
+
validator.add_argument(
|
|
445
|
+
'num_ans_candidates',
|
|
446
|
+
type=int,
|
|
447
|
+
help=
|
|
448
|
+
"""For ALBEF and BLIP, these models first rank answers according to likelihood to select answer candidates.""",
|
|
449
|
+
)
|
|
450
|
+
# add arguments for inference method
|
|
451
|
+
validator.add_argument(
|
|
452
|
+
'inference_method',
|
|
453
|
+
type=str,
|
|
454
|
+
choices=['genearte', 'rank'],
|
|
455
|
+
help="""Inference method to use for question answering. If rank, requires a answer list.""",
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
# ====== model specific ======
|
|
459
|
+
validator.add_argument(
|
|
460
|
+
'k_test',
|
|
461
|
+
type=int,
|
|
462
|
+
help='Number of top k most similar samples from ITC/VTC selection to be tested.',
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
return validator
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright (c) 2022, salesforce.com, inc.
|
|
3
|
+
All rights reserved.
|
|
4
|
+
SPDX-License-Identifier: BSD-3-Clause
|
|
5
|
+
For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import datetime
|
|
9
|
+
import functools
|
|
10
|
+
import os
|
|
11
|
+
import torch
|
|
12
|
+
import torch.distributed as dist
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def setup_for_distributed(is_master):
|
|
16
|
+
"""
|
|
17
|
+
This function disables printing when not in master process
|
|
18
|
+
"""
|
|
19
|
+
import builtins as __builtin__
|
|
20
|
+
|
|
21
|
+
builtin_print = __builtin__.print
|
|
22
|
+
|
|
23
|
+
def print(*args, **kwargs):
|
|
24
|
+
force = kwargs.pop('force', False)
|
|
25
|
+
if is_master or force:
|
|
26
|
+
builtin_print(*args, **kwargs)
|
|
27
|
+
|
|
28
|
+
__builtin__.print = print
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def is_dist_avail_and_initialized():
|
|
32
|
+
if not dist.is_available():
|
|
33
|
+
return False
|
|
34
|
+
if not dist.is_initialized():
|
|
35
|
+
return False
|
|
36
|
+
return True
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_world_size():
|
|
40
|
+
if not is_dist_avail_and_initialized():
|
|
41
|
+
return 1
|
|
42
|
+
return dist.get_world_size()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_rank():
|
|
46
|
+
if not is_dist_avail_and_initialized():
|
|
47
|
+
return 0
|
|
48
|
+
return dist.get_rank()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def is_main_process():
|
|
52
|
+
return get_rank() == 0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def init_distributed_mode(args):
|
|
56
|
+
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
|
|
57
|
+
args.rank = int(os.environ['RANK'])
|
|
58
|
+
args.world_size = int(os.environ['WORLD_SIZE'])
|
|
59
|
+
args.gpu = int(os.environ['LOCAL_RANK'])
|
|
60
|
+
elif 'SLURM_PROCID' in os.environ:
|
|
61
|
+
args.rank = int(os.environ['SLURM_PROCID'])
|
|
62
|
+
args.gpu = args.rank % torch.cuda.device_count()
|
|
63
|
+
else:
|
|
64
|
+
print('Not using distributed mode')
|
|
65
|
+
args.distributed = False
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
args.distributed = True
|
|
69
|
+
|
|
70
|
+
torch.cuda.set_device(args.gpu)
|
|
71
|
+
args.dist_backend = 'nccl'
|
|
72
|
+
print(
|
|
73
|
+
'| distributed init (rank {}, world {}): {}'.format(args.rank, args.world_size, args.dist_url),
|
|
74
|
+
flush=True,
|
|
75
|
+
)
|
|
76
|
+
torch.distributed.init_process_group(
|
|
77
|
+
backend=args.dist_backend,
|
|
78
|
+
init_method=args.dist_url,
|
|
79
|
+
world_size=args.world_size,
|
|
80
|
+
rank=args.rank,
|
|
81
|
+
timeout=datetime.timedelta(days=365), # allow auto-downloading and de-compressing
|
|
82
|
+
)
|
|
83
|
+
torch.distributed.barrier()
|
|
84
|
+
setup_for_distributed(args.rank == 0)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_dist_info():
|
|
88
|
+
if torch.__version__ < '1.0':
|
|
89
|
+
initialized = dist._initialized
|
|
90
|
+
else:
|
|
91
|
+
initialized = dist.is_initialized()
|
|
92
|
+
if initialized:
|
|
93
|
+
rank = dist.get_rank()
|
|
94
|
+
world_size = dist.get_world_size()
|
|
95
|
+
else: # non-distributed training
|
|
96
|
+
rank = 0
|
|
97
|
+
world_size = 1
|
|
98
|
+
return rank, world_size
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def main_process(func):
|
|
102
|
+
|
|
103
|
+
@functools.wraps(func)
|
|
104
|
+
def wrapper(*args, **kwargs):
|
|
105
|
+
rank, _ = get_dist_info()
|
|
106
|
+
if rank == 0:
|
|
107
|
+
return func(*args, **kwargs)
|
|
108
|
+
|
|
109
|
+
return wrapper
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def download_cached_file(url, check_hash=True, progress=False):
|
|
113
|
+
"""
|
|
114
|
+
Download a file from a URL and cache it locally. If the file already exists, it is not downloaded again.
|
|
115
|
+
If distributed, only the main process downloads the file, and the other processes wait for the file to be downloaded.
|
|
116
|
+
"""
|
|
117
|
+
import timm.models as timm_hub
|
|
118
|
+
|
|
119
|
+
def download_file(model_id, file_name, cache_dir=None):
|
|
120
|
+
# download file from modelscope
|
|
121
|
+
from modelscope import snapshot_download
|
|
122
|
+
local_path = snapshot_download(model_id=model_id, cache_dir=cache_dir, allow_patterns=file_name)
|
|
123
|
+
file_path = os.path.join(local_path, file_name)
|
|
124
|
+
|
|
125
|
+
return file_path
|
|
126
|
+
|
|
127
|
+
parts = torch.hub.urlparse(url)
|
|
128
|
+
filename = os.path.basename(parts.path)
|
|
129
|
+
|
|
130
|
+
if is_main_process():
|
|
131
|
+
try:
|
|
132
|
+
cached_file = download_file('AI-ModelScope/BLIP2-Pretrain', filename)
|
|
133
|
+
except Exception as e:
|
|
134
|
+
print(f'Error downloading file: {e}')
|
|
135
|
+
timm_hub.download_cached_file(url, check_hash, progress)
|
|
136
|
+
cached_file = os.path.join(timm_hub.get_cache_dir(), filename)
|
|
137
|
+
|
|
138
|
+
if is_dist_avail_and_initialized():
|
|
139
|
+
dist.barrier()
|
|
140
|
+
|
|
141
|
+
return cached_file
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from matplotlib import pyplot as plt
|
|
3
|
+
from scipy.ndimage import filters
|
|
4
|
+
from skimage import transform as skimage_transform
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def getAttMap(img, attMap, blur=True, overlap=True):
|
|
8
|
+
attMap -= attMap.min()
|
|
9
|
+
if attMap.max() > 0:
|
|
10
|
+
attMap /= attMap.max()
|
|
11
|
+
attMap = skimage_transform.resize(attMap, (img.shape[:2]), order=3, mode='constant')
|
|
12
|
+
if blur:
|
|
13
|
+
attMap = filters.gaussian_filter(attMap, 0.02 * max(img.shape[:2]))
|
|
14
|
+
attMap -= attMap.min()
|
|
15
|
+
attMap /= attMap.max()
|
|
16
|
+
cmap = plt.get_cmap('jet')
|
|
17
|
+
attMapV = cmap(attMap)
|
|
18
|
+
attMapV = np.delete(attMapV, 3, 2)
|
|
19
|
+
if overlap:
|
|
20
|
+
attMap = (1 * (1 - attMap**0.7).reshape(attMap.shape + (1, )) * img +
|
|
21
|
+
(attMap**0.7).reshape(attMap.shape + (1, )) * attMapV)
|
|
22
|
+
return attMap
|