evalscope 0.14.0__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of evalscope might be problematic. Click here for more details.
- evalscope/arguments.py +2 -1
- evalscope/benchmarks/__init__.py +2 -2
- evalscope/benchmarks/aigc/__init__.py +0 -0
- evalscope/benchmarks/aigc/t2i/__init__.py +0 -0
- evalscope/benchmarks/aigc/t2i/base.py +56 -0
- evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py +77 -0
- evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py +58 -0
- evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py +58 -0
- evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py +57 -0
- evalscope/benchmarks/aigc/t2i/tifa_adapter.py +37 -0
- evalscope/benchmarks/aime/aime24_adapter.py +1 -1
- evalscope/benchmarks/aime/aime25_adapter.py +4 -4
- evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +1 -2
- evalscope/benchmarks/arc/arc_adapter.py +1 -1
- evalscope/benchmarks/arena_hard/arena_hard_adapter.py +1 -3
- evalscope/benchmarks/ceval/ceval_adapter.py +2 -2
- evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +1 -3
- evalscope/benchmarks/cmmlu/cmmlu_adapter.py +1 -1
- evalscope/benchmarks/competition_math/competition_math_adapter.py +1 -2
- evalscope/benchmarks/data_adapter.py +16 -9
- evalscope/benchmarks/data_collection/data_collection_adapter.py +6 -4
- evalscope/benchmarks/general_mcq/general_mcq_adapter.py +2 -2
- evalscope/benchmarks/live_code_bench/evaluate_utils.py +16 -21
- evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +4 -1
- evalscope/benchmarks/live_code_bench/testing_util.py +6 -3
- evalscope/benchmarks/math_500/math_500_adapter.py +1 -1
- evalscope/benchmarks/mmlu/mmlu_adapter.py +3 -1
- evalscope/benchmarks/simple_qa/simple_qa_adapter.py +1 -2
- evalscope/benchmarks/utils.py +7 -16
- evalscope/cli/start_app.py +1 -1
- evalscope/collections/evaluator.py +16 -4
- evalscope/config.py +7 -3
- evalscope/constants.py +11 -0
- evalscope/evaluator/evaluator.py +2 -2
- evalscope/evaluator/reviewer/auto_reviewer.py +1 -1
- evalscope/metrics/__init__.py +49 -4
- evalscope/metrics/llm_judge.py +1 -1
- evalscope/metrics/named_metrics.py +13 -0
- evalscope/metrics/t2v_metrics/__init__.py +66 -0
- evalscope/metrics/t2v_metrics/clipscore.py +14 -0
- evalscope/metrics/t2v_metrics/constants.py +12 -0
- evalscope/metrics/t2v_metrics/itmscore.py +14 -0
- evalscope/metrics/t2v_metrics/models/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py +30 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py +6 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +132 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +286 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +114 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +86 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +85 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +62 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py +26 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +84 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +97 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +171 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +80 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +73 -0
- evalscope/metrics/t2v_metrics/models/model.py +45 -0
- evalscope/metrics/t2v_metrics/models/utils.py +25 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py +22 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py +1 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +300 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py +12 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +82 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py +50 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +218 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +150 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py +26 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +465 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +141 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +22 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +188 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +106 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +307 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +416 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +8 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +191 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +318 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml +10 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml +36 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml +36 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml +37 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json +21 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json +22 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json +21 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +208 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py +231 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +1093 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py +211 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py +109 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +452 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +364 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +755 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +273 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +880 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +1844 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +81 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +56 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py +212 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py +164 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py +202 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +185 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +178 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +112 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py +371 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +344 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +858 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +271 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +503 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +1270 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +473 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +31 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py +27 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py +233 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +392 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +127 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +17 -0
- evalscope/metrics/t2v_metrics/score.py +78 -0
- evalscope/metrics/t2v_metrics/vqascore.py +14 -0
- evalscope/models/__init__.py +50 -14
- evalscope/models/adapters/__init__.py +17 -0
- evalscope/models/{base_adapter.py → adapters/base_adapter.py} +17 -17
- evalscope/models/{chat_adapter.py → adapters/chat_adapter.py} +10 -7
- evalscope/models/{choice_adapter.py → adapters/choice_adapter.py} +2 -6
- evalscope/models/{custom_adapter.py → adapters/custom_adapter.py} +2 -4
- evalscope/models/{server_adapter.py → adapters/server_adapter.py} +1 -3
- evalscope/models/adapters/t2i_adapter.py +76 -0
- evalscope/models/custom/__init__.py +2 -1
- evalscope/models/custom/dummy_model.py +11 -13
- evalscope/models/local_model.py +82 -33
- evalscope/models/model.py +2 -42
- evalscope/models/register.py +26 -0
- evalscope/perf/plugin/datasets/flickr8k.py +2 -1
- evalscope/perf/utils/benchmark_util.py +2 -2
- evalscope/perf/utils/db_util.py +8 -2
- evalscope/report/__init__.py +1 -0
- evalscope/report/app.py +117 -67
- evalscope/report/app_arguments.py +11 -0
- evalscope/report/generator.py +1 -1
- evalscope/run.py +3 -3
- evalscope/third_party/thinkbench/eval.py +19 -7
- evalscope/utils/chat_service.py +2 -2
- evalscope/utils/import_utils.py +66 -0
- evalscope/utils/utils.py +12 -4
- evalscope/version.py +2 -2
- {evalscope-0.14.0.dist-info → evalscope-0.15.0.dist-info}/METADATA +18 -1
- {evalscope-0.14.0.dist-info → evalscope-0.15.0.dist-info}/RECORD +175 -63
- tests/aigc/__init__.py +1 -0
- tests/aigc/test_t2i.py +87 -0
- tests/cli/test_run.py +11 -5
- tests/perf/test_perf.py +2 -1
- evalscope/metrics/code_metric.py +0 -98
- evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +0 -58485
- evalscope/metrics/resources/gpt2-zhcn3-v4.json +0 -1
- {evalscope-0.14.0.dist-info → evalscope-0.15.0.dist-info}/LICENSE +0 -0
- {evalscope-0.14.0.dist-info → evalscope-0.15.0.dist-info}/WHEEL +0 -0
- {evalscope-0.14.0.dist-info → evalscope-0.15.0.dist-info}/entry_points.txt +0 -0
- {evalscope-0.14.0.dist-info → evalscope-0.15.0.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
evalscope/__init__.py,sha256=XZYDn3ShhM_48je5qQgwymtSdpTt8zYEnNfanYnpBdA,181
|
|
2
|
-
evalscope/arguments.py,sha256=
|
|
3
|
-
evalscope/config.py,sha256=
|
|
4
|
-
evalscope/constants.py,sha256=
|
|
5
|
-
evalscope/run.py,sha256=
|
|
2
|
+
evalscope/arguments.py,sha256=jywTxu_HWhgf0_OlnaOyRSzUHenr5Zio2vmcCgcfbxg,5453
|
|
3
|
+
evalscope/config.py,sha256=O3kjjVFRGSrlLD5EI4t99Z-m6oFtQVmEudvE62x92wY,9648
|
|
4
|
+
evalscope/constants.py,sha256=PHnsGndB4N5-jvmawPxMK5b9geE2Es5cUe8ZKYSuKgM,4016
|
|
5
|
+
evalscope/run.py,sha256=_DKbxgQGwhweBnQrI7lQhu5eoz4LYPVeNanzD4lHuJA,6476
|
|
6
6
|
evalscope/run_arena.py,sha256=WXPCT0L-b_KvLBQ9KnrVW6y8icdDcqVhaXjTZMpS8k8,8572
|
|
7
7
|
evalscope/summarizer.py,sha256=61kU5ZoSh1dd8HMJPqP3ZvJwcY9szwWFCZdu2lfATJA,5920
|
|
8
|
-
evalscope/version.py,sha256=
|
|
8
|
+
evalscope/version.py,sha256=X2BkdAHDhsMo9BTAegfd5uYheDVI8rh_UG5YqMwwXUE,119
|
|
9
9
|
evalscope/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
evalscope/backend/base.py,sha256=qYu8Shokrtrx-N6T_BAJk_6OCpovUBYuN0p3wngt-dw,1030
|
|
11
11
|
evalscope/backend/opencompass/__init__.py,sha256=UP_TW5KBq6V_Nvqkeb7PGvGGX3rVYussT43npwCwDgE,135
|
|
@@ -54,20 +54,28 @@ evalscope/backend/rag_eval/utils/llm.py,sha256=acaD5QHPJUstJGpW1sNJ-3ZPT5J_Z8beO
|
|
|
54
54
|
evalscope/backend/rag_eval/utils/tools.py,sha256=FU7tNu-8y8V_o_kArFVTTLM_GzL12KBNeXiwQw5SpJA,1529
|
|
55
55
|
evalscope/backend/vlm_eval_kit/__init__.py,sha256=R-GuBm8dAwvDF73XHaGpPSjlt7Y4tycyy-FJgzLdjeY,84
|
|
56
56
|
evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=sUYvQxCtPl6CrcwhQpY8lJjW5skqWc-fvHUSnXd_MvQ,6054
|
|
57
|
-
evalscope/benchmarks/__init__.py,sha256=
|
|
57
|
+
evalscope/benchmarks/__init__.py,sha256=5AXNhhmbaBFEe3u7y5TtIrviYzFI-hC8oKqxFILs1pE,937
|
|
58
58
|
evalscope/benchmarks/benchmark.py,sha256=a_7Ctz36McuTyBSTYi56jis9pvOdWhg7JVSPFrbxqR4,2535
|
|
59
|
-
evalscope/benchmarks/data_adapter.py,sha256=
|
|
60
|
-
evalscope/benchmarks/utils.py,sha256=
|
|
59
|
+
evalscope/benchmarks/data_adapter.py,sha256=mWdxtHbordS577NqZUQZmIjlewjGDlStqc-iDvqpAyU,18061
|
|
60
|
+
evalscope/benchmarks/utils.py,sha256=yXQyszzrILNiBuUrbB1BtgotQSaNA8w6X935AL1dNAw,1074
|
|
61
|
+
evalscope/benchmarks/aigc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
|
+
evalscope/benchmarks/aigc/t2i/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
+
evalscope/benchmarks/aigc/t2i/base.py,sha256=4GFAvceT1Gpt5teDLRCZi62RwvPazuhG3zwft3gN3X4,2102
|
|
64
|
+
evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py,sha256=WV9w3z8TxWNzVzn9A_g0xqeHh76ydnHL5xLwyg63VmU,2992
|
|
65
|
+
evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py,sha256=baDGFRpVcSKpc1CdzNAMBtjeCZDUpyEc5l1KyrPNoEU,1892
|
|
66
|
+
evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py,sha256=But2hcQU3X3v58poF8Qg2agrxTAP6gnjZYJs8Tr0g_4,2047
|
|
67
|
+
evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py,sha256=U0RKN3apyD3YyZfIvqgO8TNuDO-zctlftHsSfBRyQxU,1825
|
|
68
|
+
evalscope/benchmarks/aigc/t2i/tifa_adapter.py,sha256=vOOiOe26H2dk9VN2WbB_Oi3lzavMIaYDBq6sqeSIiAU,1093
|
|
61
69
|
evalscope/benchmarks/aime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
|
-
evalscope/benchmarks/aime/aime24_adapter.py,sha256=
|
|
63
|
-
evalscope/benchmarks/aime/aime25_adapter.py,sha256=
|
|
70
|
+
evalscope/benchmarks/aime/aime24_adapter.py,sha256=GrIxCHpUwgUy8tXGTB7iQOt8k7wG8MJB0CWbwBmIy-8,1703
|
|
71
|
+
evalscope/benchmarks/aime/aime25_adapter.py,sha256=yxo5roCb8ryX9ROUU2FdZ-WBTUPZ14MrBzEL0zPOh-U,1718
|
|
64
72
|
evalscope/benchmarks/alpaca_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
65
|
-
evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py,sha256=
|
|
73
|
+
evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py,sha256=em1YM2PxnJ8Of7Li3eqrw8PtwfeXSinfVIr-CIKVb60,4026
|
|
66
74
|
evalscope/benchmarks/arc/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
67
75
|
evalscope/benchmarks/arc/ai2_arc.py,sha256=WtL4Z_ulcCU2KfptWTjTm75T2I2rVGd9aDBBB76P14w,5697
|
|
68
|
-
evalscope/benchmarks/arc/arc_adapter.py,sha256=
|
|
76
|
+
evalscope/benchmarks/arc/arc_adapter.py,sha256=0h-eT4BBmUJQrakKMPUNE1nSRwK6LHB-cflWpWzY978,6364
|
|
69
77
|
evalscope/benchmarks/arena_hard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
|
-
evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=
|
|
78
|
+
evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=S3FQ_UD3GC8M7FU-PPeuJm5YVrG5qhnVE5T1jRpPuxo,6131
|
|
71
79
|
evalscope/benchmarks/arena_hard/utils.py,sha256=NstI1VR5fTaT-bfXRj0cLqm0DtH8EY4EQHR-K9HJubI,5089
|
|
72
80
|
evalscope/benchmarks/bbh/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
73
81
|
evalscope/benchmarks/bbh/bbh_adapter.py,sha256=fROpzenrjpEBWtnvM_RL_m0uXPOhXTtYAglJEZbzUdY,8330
|
|
@@ -99,21 +107,21 @@ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt
|
|
|
99
107
|
evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt,sha256=s_x6u5MLeKpuAHZj3GNQqY1I8vWqQIfJasOp9XcM7Ck,2945
|
|
100
108
|
evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt,sha256=qfTZafCzNiz9ULBaDlfy_LISL617NyH5Nc0-nO0K0LE,2164
|
|
101
109
|
evalscope/benchmarks/ceval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
102
|
-
evalscope/benchmarks/ceval/ceval_adapter.py,sha256=
|
|
110
|
+
evalscope/benchmarks/ceval/ceval_adapter.py,sha256=1ITBXI0f01Dt1p7sb2RGswIeg9685Bkk2S2xmA1vat8,11295
|
|
103
111
|
evalscope/benchmarks/ceval/ceval_exam.py,sha256=ngOvb6Fymt7iPWIb2fzrUVpqmUT2VBoqh7X_IH8Bcsc,4824
|
|
104
112
|
evalscope/benchmarks/chinese_simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
105
|
-
evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=
|
|
113
|
+
evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=zY8dfvrTeCHAQ3d7AM02CexZw5CVKH51ZOhtT7Q1Gko,8031
|
|
106
114
|
evalscope/benchmarks/cmmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
107
115
|
evalscope/benchmarks/cmmlu/cmmlu.py,sha256=Y59NIGUFzJEztJbkehZsG4Cz0J_v9Cyju6xazHMYIcA,5022
|
|
108
|
-
evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=
|
|
116
|
+
evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=r9zael_Y2Jso0ashevYpF8e5SHOBh8iMcPIJU5WT3pQ,10367
|
|
109
117
|
evalscope/benchmarks/cmmlu/samples.jsonl,sha256=FXbyPQSDorKBGSD0lnOzioZmFjG07lIL87FRDRaMPSY,1722
|
|
110
118
|
evalscope/benchmarks/competition_math/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
111
119
|
evalscope/benchmarks/competition_math/competition_math.py,sha256=Cehyokift7oDKjc8TdmfblZ6mMc39wQWtqqbUi34QLc,2629
|
|
112
|
-
evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=
|
|
120
|
+
evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=wgejW-_QswtT8_3JKAQ_H6svH8IotDJDBEH7X4nP4bY,6760
|
|
113
121
|
evalscope/benchmarks/data_collection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
|
-
evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=
|
|
122
|
+
evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=QgLgIrjD3q53T-lu1UWTV6T4h1cKGoCQDh0O4QxFezw,2569
|
|
115
123
|
evalscope/benchmarks/general_mcq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
116
|
-
evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=
|
|
124
|
+
evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=fqbt61owPP7t2H4B2zbYVZTs0VBGuXNvWGvkukwhRYc,5039
|
|
117
125
|
evalscope/benchmarks/general_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
118
126
|
evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=8d5znAcQmFSmvyKV-JuMQzbY5k6xDNQQdrWZ7zgPTK4,4603
|
|
119
127
|
evalscope/benchmarks/gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -137,20 +145,20 @@ evalscope/benchmarks/ifeval/utils.py,sha256=TKrM1m2qDCUauahogItDdICf4mDk0OjasSxg
|
|
|
137
145
|
evalscope/benchmarks/iquiz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
138
146
|
evalscope/benchmarks/iquiz/iquiz_adapter.py,sha256=16whmFkJt9fLbei9d-kmjnWB_5y5vsiX9tK5kSuxDw8,2449
|
|
139
147
|
evalscope/benchmarks/live_code_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
140
|
-
evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=
|
|
148
|
+
evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=iqmVUMZmyRhzOOXXQ-NN9P1nGvvbzTjOSEp6djbN_rw,6503
|
|
141
149
|
evalscope/benchmarks/live_code_bench/extract_utils.py,sha256=ZcQ8y741uawPo6I_1_XglR3eqJFDNrqc8fILKZupVRs,2375
|
|
142
|
-
evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=
|
|
150
|
+
evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=AkvlQ-3oS8Tr3xZgx3omMt5w8jia6yH07D5Bq27Q5wc,3490
|
|
143
151
|
evalscope/benchmarks/live_code_bench/load_utils.py,sha256=5i9wtdPLYR8ckjx5MaYQVC2LFYvjKzR6Fa6UZmeOTRc,2445
|
|
144
152
|
evalscope/benchmarks/live_code_bench/pass_k_utils.py,sha256=Ktrp_lXdfFzoHtQNQNdGfIl26ySjaPCHm4Zv-dFvRqM,2024
|
|
145
153
|
evalscope/benchmarks/live_code_bench/prompts.py,sha256=P4KILIAIDT1MKDck0xHYV_6v9820wDZRhxVMazmlL-g,12600
|
|
146
|
-
evalscope/benchmarks/live_code_bench/testing_util.py,sha256=
|
|
154
|
+
evalscope/benchmarks/live_code_bench/testing_util.py,sha256=abjlwp6HDayf88mMI_daOKm06nEOeNBaMkmGWqk2DJo,17286
|
|
147
155
|
evalscope/benchmarks/maritime_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
148
156
|
evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py,sha256=RVbsiglxmEW37-tDYgr4Drywh26I94DRGhwv7uP2aYk,2829
|
|
149
157
|
evalscope/benchmarks/math_500/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
150
|
-
evalscope/benchmarks/math_500/math_500_adapter.py,sha256=
|
|
158
|
+
evalscope/benchmarks/math_500/math_500_adapter.py,sha256=opT73il3CbM1zZhuqRHZu_4O4WEZCZPvZe06I4U8YGM,1911
|
|
151
159
|
evalscope/benchmarks/mmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
152
160
|
evalscope/benchmarks/mmlu/mmlu.py,sha256=sA8AC0bN7iURrSazqkY31s_reNVbDZSUCB-NCTQsVeI,5042
|
|
153
|
-
evalscope/benchmarks/mmlu/mmlu_adapter.py,sha256=
|
|
161
|
+
evalscope/benchmarks/mmlu/mmlu_adapter.py,sha256=__BrO2f7_AZ87a00HCRGPm5ZK8B4JTZKzRBRQY3yf3Q,11635
|
|
154
162
|
evalscope/benchmarks/mmlu/samples.jsonl,sha256=f5Y2vwbEvNtpE7vrl9BHoJzsdceI4vUAo1frexYyX2o,1345
|
|
155
163
|
evalscope/benchmarks/mmlu_pro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
156
164
|
evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=hPqxDqDhqin3TxfimfhIxfEc_8UfzTDGAfX7iDrWy28,4248
|
|
@@ -166,7 +174,7 @@ evalscope/benchmarks/race/race.py,sha256=TtFC3opqEA6q8AQIAFQRGx07FjD9z7iW8wmtxeO
|
|
|
166
174
|
evalscope/benchmarks/race/race_adapter.py,sha256=RD0B-i5dzeNKuhqnWbremgf4tk9jmOO4_eLAiITB1F0,6381
|
|
167
175
|
evalscope/benchmarks/race/samples.jsonl,sha256=bhSktBgU6axYQCClRtQ7nN8D1x815AU8xMAIG1oflG0,1243
|
|
168
176
|
evalscope/benchmarks/simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
169
|
-
evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=
|
|
177
|
+
evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=to4kSKc29BmtG4q9R2PeM-sdHiL8toSyoVi1D9WMRKk,8949
|
|
170
178
|
evalscope/benchmarks/super_gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
171
179
|
evalscope/benchmarks/super_gpqa/five_shot_prompt.txt,sha256=vD3RMeQustxY_oWA8IobntjywT8ZUO7Jaub--rElDT4,4718
|
|
172
180
|
evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py,sha256=BqNLL8BYnK6tRuIdV6ijL4Uym2SejH_h1BV06XNjSE4,9331
|
|
@@ -182,42 +190,142 @@ evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py,sha256=ueUU860kg5_xf_MtU
|
|
|
182
190
|
evalscope/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
183
191
|
evalscope/cli/base.py,sha256=m1DFlF16L0Lyrn0YNuFj8ByGjVJIoI0jKzAoodIXjRk,404
|
|
184
192
|
evalscope/cli/cli.py,sha256=w_dtXljur9s5lmTn6LbbFL_viTPQB1WAEzhYcId09Og,729
|
|
185
|
-
evalscope/cli/start_app.py,sha256=
|
|
193
|
+
evalscope/cli/start_app.py,sha256=PoAnmYLw_UdWpA7qrUkSIx8hRoIGRy9yXrbH8bYOSL4,804
|
|
186
194
|
evalscope/cli/start_eval.py,sha256=MXhVDeaMFd6ny88-gnVtQflH660UaDj240YGYnHccx8,775
|
|
187
195
|
evalscope/cli/start_perf.py,sha256=5hLi5jWgM9BJPXLd8d9D1zqrcj_5c0KvkfB1DgD4_RU,831
|
|
188
196
|
evalscope/cli/start_server.py,sha256=DQRIfbsHaOAsVcLGF6iRyJnxmd5Sf_tgytpJNfiWCeE,3662
|
|
189
197
|
evalscope/collections/__init__.py,sha256=hd68Qf-ryeDsz5Pu-Dh83M5V5RE5mhLsG-vc55n5n0o,228
|
|
190
|
-
evalscope/collections/evaluator.py,sha256=
|
|
198
|
+
evalscope/collections/evaluator.py,sha256=Ll-qLet04aEp1WxoCKAuvZVWEZuy1lS_D-vZIN3zSQQ,13425
|
|
191
199
|
evalscope/collections/sampler.py,sha256=2NwvhJVdi-mrDeK7RWwEGOoE7DdxtpyASRUZU_D6hWw,4855
|
|
192
200
|
evalscope/collections/schema.py,sha256=mjJfNmy_athJ1TmnuJRkrKRlefzefuQXZuTtjn8SHKo,4073
|
|
193
201
|
evalscope/evaluator/__init__.py,sha256=S6MU1O_iiNAaKxNIhO9MEmdW-BSNf_YH2l6NQ9lxVNo,103
|
|
194
|
-
evalscope/evaluator/evaluator.py,sha256=
|
|
202
|
+
evalscope/evaluator/evaluator.py,sha256=M1JrsoZZ5OvcZfzgLrNSMtbbz5gvvCd0GwJArJQV0lk,19797
|
|
195
203
|
evalscope/evaluator/rating_eval.py,sha256=uo0uj9z_TDsxdYlT8WIfNZhFLAfRkW9zn_wlu-F72O0,5575
|
|
196
204
|
evalscope/evaluator/reviewer/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
197
|
-
evalscope/evaluator/reviewer/auto_reviewer.py,sha256=
|
|
198
|
-
evalscope/metrics/__init__.py,sha256=
|
|
199
|
-
evalscope/metrics/
|
|
200
|
-
evalscope/metrics/llm_judge.py,sha256=Di0Q1c6VHLl0nQ_TVOZOOQlMApDIU83HuDPTOV8XrTA,4023
|
|
205
|
+
evalscope/evaluator/reviewer/auto_reviewer.py,sha256=5WRYuXFTDgVmolrOdiTysk-mXrpw6Qg87-iuY-VD1W4,16618
|
|
206
|
+
evalscope/metrics/__init__.py,sha256=y1sdj5FBKYW1q5kLC6QREzoITHwstJRUdji6p0X5aAE,1363
|
|
207
|
+
evalscope/metrics/llm_judge.py,sha256=MjyTC-xiSThk8Rd4IdUbsCXeeikoOORv6wt8H7SW8s4,4008
|
|
201
208
|
evalscope/metrics/math_parser.py,sha256=uTDudn305G3b8-GboWTrDE6OfrEwAW-areHnoGXZ6Is,17302
|
|
202
209
|
evalscope/metrics/metrics.py,sha256=_YI7RhxlFu_JOgeE3LF9UKu6mJruvyu4FgqVf78Bjb8,13813
|
|
203
|
-
evalscope/metrics/named_metrics.py,sha256=
|
|
210
|
+
evalscope/metrics/named_metrics.py,sha256=PrzU_1mGTeRFxVJFT1aXxIOiS7MnNoWyZsb8uCRVDeE,2278
|
|
204
211
|
evalscope/metrics/rouge_metric.py,sha256=zhIUqenSuxnORR9tamLQBGjFwP91Zei2UiLtcOyseVM,4639
|
|
205
212
|
evalscope/metrics/bundled_rouge_score/__init__.py,sha256=PwbTdk8168FwDJe_l8XIqDuBgZQooDsP31vj7di05Fs,650
|
|
206
213
|
evalscope/metrics/bundled_rouge_score/rouge_scorer.py,sha256=Kq6AObenmLVQ5tN3NgN042a6mgRFQmRO21-ohd9mSa8,11972
|
|
207
|
-
evalscope/metrics/
|
|
208
|
-
evalscope/metrics/
|
|
209
|
-
evalscope/
|
|
210
|
-
evalscope/
|
|
211
|
-
evalscope/
|
|
212
|
-
evalscope/
|
|
213
|
-
evalscope/models/
|
|
214
|
-
evalscope/models/
|
|
215
|
-
evalscope/models/
|
|
216
|
-
evalscope/models/
|
|
217
|
-
evalscope/models/
|
|
218
|
-
evalscope/models/
|
|
214
|
+
evalscope/metrics/t2v_metrics/__init__.py,sha256=GBxgKTPVy_qhW_F3M4Oi6QMWhdAi4PqGX5w3t6Tueho,1783
|
|
215
|
+
evalscope/metrics/t2v_metrics/clipscore.py,sha256=IsrYKIlFb04-FfBq4MbSv4diS6706J15Y3G4qEFIwfU,455
|
|
216
|
+
evalscope/metrics/t2v_metrics/constants.py,sha256=oY5l5fOFl8qylah9eeebZm0pgY1PYmHDa7JlUC8Qls0,451
|
|
217
|
+
evalscope/metrics/t2v_metrics/itmscore.py,sha256=cIaz_urio_Of1FiA2DZW7pWRIvo487zr33-x8C3Wx0o,443
|
|
218
|
+
evalscope/metrics/t2v_metrics/score.py,sha256=6tIKZoQprlQOBoV-2E-3InIi2Jl29a9W2BFPjKnV1nw,3044
|
|
219
|
+
evalscope/metrics/t2v_metrics/vqascore.py,sha256=UmcSSdQN8mzs3b11sD5Z31WIyQVQUpgXKWQ1XYoX1c8,469
|
|
220
|
+
evalscope/metrics/t2v_metrics/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
221
|
+
evalscope/metrics/t2v_metrics/models/model.py,sha256=zL2LMvJqXyyZo3KEBl4o_0cGqkTeVTOfs8xJihOKWpk,1295
|
|
222
|
+
evalscope/metrics/t2v_metrics/models/utils.py,sha256=c9A8YGepQ0wier9rMTWkdiyQRfQEaRyEQKDtt_iVkS4,888
|
|
223
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py,sha256=_Mwyud2HZVZAhkSmDXlHOkKkT5CwXQUChmQr1xRGtm4,1076
|
|
224
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py,sha256=QhksCBA12Ekm67H4TiROkC84dcbHB4zL5oO7BU4fwnI,8099
|
|
225
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py,sha256=h5z0HcnobkGw7vEeIwiVauwYC5GRyKczdevZi60a1aw,3328
|
|
226
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py,sha256=atX4JAxR4xAmBZ0WIVf_K8g3tNvqeuXNIIUX50bzo4Q,3806
|
|
227
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py,sha256=dp6ZyWKU9sJ2MjsyQJvTi_tBoEs6l2-KYmjz8cN_SL0,2394
|
|
228
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
229
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py,sha256=Nxo0b7Xj0qTMlVg4O3vbj05X1eNTdVXrFTsVEq8j75g,79
|
|
230
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py,sha256=7Jl2noVHFZUN5rXd9XDBr2ILChP56JPOM1mbJSq5G8c,5047
|
|
231
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py,sha256=I6KFiy1DPCtev7TLOFxjUjkC13Mt2eBBha_XaN31nlI,7940
|
|
232
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py,sha256=iPug2fxMo_VXn_77yTLLyjUqyAvh8qOqYF2saHiuPQA,982
|
|
233
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py,sha256=9IGXCC8d6a6WTIICcC_KgZf0KdDcJ3L_HOSXILJmMIA,3447
|
|
234
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py,sha256=3uduuEmoliezG5BwymoNbRm3FXvKh9gtkN74_LX9yIs,3674
|
|
235
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py,sha256=OtnnYsW0G2vGoUHfyB0F-m5r10A5-N6k9agFRXav-Uw,3199
|
|
236
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py,sha256=IPQcC4-cYeJjHGRysh26E3iNGHz1UJA-oxxEpSIXpX0,6021
|
|
237
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
238
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py,sha256=ZCN_PJ3jz-a9I0oGbeuOcGuMOJT5iVb-yh5Dzq49VwE,2700
|
|
239
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py,sha256=sMET64JKY_rqVu8f24UcGfUVb9O5hzTKA6PlMEDe8DE,727
|
|
240
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py,sha256=krWGPe--eLHwK9M2tqWkmu7iKlbAM_qanP46NUkmkhM,9896
|
|
241
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py,sha256=R5VS77aDRX4HVcwJ7xOAnf_uP8jhix4PXbxWim1BOdg,5903
|
|
242
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py,sha256=p22TqpCDUFV39I9anYjl5zehNXOCtPQ15fHnEeDrF3A,4712
|
|
243
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py,sha256=oEsZOQCZl6NS0SaNn9mM4S3NSg6lT5Lm_HH9Ju7i8O0,587
|
|
244
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
245
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py,sha256=yDqpm4jIeJbq-Ej28OJwWbF2eWoxVv8CXxl_OelJ1lA,97
|
|
246
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py,sha256=mMDRPYpSUsnbc3ijicy4IPD6J2z44iAIgKUdhkf5Nkw,14037
|
|
247
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py,sha256=aXBQpNrmk9dbUDK-gNGne0hfgti2cYiYTq8fRMNfNx4,525
|
|
248
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py,sha256=Em8li9Mqoc3Xv3sDz4lAlKU4h9vZpUkzycGGyM6a-sM,2807
|
|
249
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py,sha256=jq0zLZypPsoieM8JR33k3fb3Tzal-Zb1ZT5i6Rl2g_U,1394
|
|
250
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py,sha256=RtW7q0OrIyJa6Lcjr2AGmRwfePuIRVHQw2sso1IUV8A,848
|
|
251
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py,sha256=R38FVQMznUzTAfq6DLp-Y34XYYWkqmy1-aMfNcGKthk,14998
|
|
252
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py,sha256=U0xsstadVQrKS7ggO-Mh4lGt9VKwHJCv-V_RiTeqzHk,3956
|
|
253
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py,sha256=0spv43GzXh_5l9BUZN8tuPZgGKkkU8InWjvFuUuRbLw,801
|
|
254
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py,sha256=LPZP_XK612apDxYkvGsH8B8E2Z2Q2CaT6JY0T1dghEk,5866
|
|
255
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py,sha256=1eMz5jxA06uoK-sZyD7SNnBy87gbwplt8526koTRBLo,3330
|
|
256
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py,sha256=he84q4G6OdycZDQHVkHUxENuIQBBH8WJ-9GBiMpYC-A,9715
|
|
257
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py,sha256=TgWnH1IblIrcTTEe3AXG4E66pX6R1314ZZ4Cx6HdYq4,13678
|
|
258
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py,sha256=ABgzv5fGmXjYuQnV77280hzJWOwLt5YjuaBfdWjXcu8,246
|
|
259
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py,sha256=F2OldVATVzyvaSxO2l-tBexhOkmoVb7n23S89JfFIEc,8313
|
|
260
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py,sha256=QKYKGyXpQbDWJfz6EDZCB5meB5HGj59ygmoPm00Q1dQ,10955
|
|
261
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml,sha256=tGWYH9wsUFC2BqlJ-Uv_v9IbAvvaY89PFqkSnx0v7T8,360
|
|
262
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json,sha256=ZcTVdwa_pISMxp8J3F0Uaee3yyrQIn65lqT3_y4KncI,490
|
|
263
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json,sha256=4Yuqi1OutvXMdCfAVIe14uEIZIhApndd6uqc1vpGwL4,511
|
|
264
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json,sha256=zVHOJiAdTS92rHzg62Q0oTZZsZalondGMqDJJfbolAU,491
|
|
265
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml,sha256=Ls2ZfsKV3gDzg6F2zBHPhFbK-3na7ozNGWFmMq_8hTg,1074
|
|
266
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml,sha256=yXP6HQVyEYc1X2C_SawNIye4eoaQPxl8JazV7CXUPDc,1073
|
|
267
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml,sha256=OpdOvll7UX6nURi84rDvWiFZrLsNNHtoFRWdugVPvdA,1073
|
|
268
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml,sha256=hPHF831LSjBGbo1fg9fqhbeSAGOVW-iiZbWHVQVs8wU,957
|
|
269
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml,sha256=xMLIGVhkgwBsP9IbKFoZNW_lbVwwjz44ArlSRPS1Q98,980
|
|
270
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml,sha256=kyfbbtLOG8cY9CUFm-_g56djMMwfZhOgXSFV2gRMomQ,983
|
|
271
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml,sha256=_e45RN28lhhyFRg7JsDczNMU_nJbwirn3eVB3mgkmrI,1022
|
|
272
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml,sha256=6ycWPKz5alaQCxpuPuqX1e_whroRULgb8gICOWLDBO4,1019
|
|
273
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml,sha256=svzg_Ao0g0-tAnBKT0Jj4PDRvv1ikSxS1Dq5YkzrUTU,860
|
|
274
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml,sha256=ZZE6AWe8iiLTXYiJk60P0J4cRLwehLYzRn1ohZxgstI,955
|
|
275
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml,sha256=T7L97c2yFLZ5N3_4NFqvRxShvr7relE2GNREuukufCU,955
|
|
276
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml,sha256=U0s_BMVb21E2aGnLGBstzdR3WSTP_gk4Hubnnt50lcg,952
|
|
277
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml,sha256=AIgEAeTUqFiNVq-uIjbE_zh7jDPLFwchZsw0fCvWqU0,982
|
|
278
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml,sha256=CQZQICT2ogqwKklzWVUnfWidOY-Deflh_WD-vq08sys,958
|
|
279
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml,sha256=UGuL20xRPZPy3sZeqMgIzovdd1BOTESwTS2gfwsdGFk,955
|
|
280
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml,sha256=eKuyCU7yFIU1VSHNRzEu4Bm7NY6NPppIHcTd0RKXUrI,955
|
|
281
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml,sha256=7AWFlM92SDySB4-InH9aw83yBhQ3HSKqvGofm-xiDM4,887
|
|
282
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml,sha256=xKS5v94CTLIIgQ4NAEuBpVjToRQ7yLme276gN5O_J0w,974
|
|
283
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml,sha256=8keYB132xFDzBsMF5nk0lOqfEIT9qupBtDiQRC3nH9o,1004
|
|
284
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py,sha256=9QnWMeulCQm_c__-b9cBYbjqihLDSblW3-luGnxfBXQ,6391
|
|
285
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py,sha256=OOr1JD9kTlUGXZNG5b3kvkUaNz7QTmhaGoHhIKL69qo,7613
|
|
286
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py,sha256=eCm7opG-Ld--sZEG67creYqoRsHjWBuWwUha2s13AuM,9806
|
|
287
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py,sha256=1vuUf6EGzfiyMOHmacI3NhjCIq1LFhANpLJ_Frgh3Mk,20423
|
|
288
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=PobqtMnfh7HuD4zdlj5tBMufvufIO5fIL0_NvTOK4AQ,52965
|
|
289
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py,sha256=L20LYyiI8AGOTKfTJuYZgrkcvg2UHsnFJE866v5jlyE,18674
|
|
290
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=gKOIPQhyB8SVbx0wW3W-VQNmZhdA4186C1XVHHWYkjc,46810
|
|
291
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
292
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py,sha256=s7EkhtrIJ0LPUuLBArws8N23R1MoIoNaYUjwsbUqRkY,7994
|
|
293
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py,sha256=FnUyxxazEVaP69pAq9cig3j-mcX37BX-unPj0SVKUJI,3805
|
|
294
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py,sha256=A5y_qCsmW9j78w92L9VEjXRaqcsyI5FCu0Z9QJvKF_4,18960
|
|
295
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py,sha256=hcPHQjb-QJRAEFpDackLqjKeu8z_uKz_5VyyeU3AQQI,13879
|
|
296
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py,sha256=DM1W0vZ0ZhoR9vvbl4O_MlShApZHv2MtK37zHsVoCjc,30527
|
|
297
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py,sha256=5Ub0AYnQFcG32ByitEah6Hog6oAKNrK7HTnmlLI4XfE,11325
|
|
298
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py,sha256=uT-ubavkEWPsqVtFy-rPQCw2rf-VOd-ZLqP_oDyNTIs,39581
|
|
299
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py,sha256=grUl2rtGJVDoP6u47jywBzcmqL9kylqCf1IkyKCgNLE,83469
|
|
300
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py,sha256=iuiXv-jZcWVoUle6GDQXLy1SMXt4WPJEOD4qhjPe6U8,3856
|
|
301
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py,sha256=1XxKgsA6QYBkaADLUoVwFDPtRJzCvcSrk-1lwGKzF_c,2286
|
|
302
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py,sha256=LFvs5dx8bnhLOZc4jM5uxXhyhOHoTprKn9B7gCmIOKU,8600
|
|
303
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py,sha256=hTIleqj6--CMndUNCT-HFPxGer8c_l2KbkUvi3U24oM,5502
|
|
304
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py,sha256=Au8iMYscDk7va-EKpwLuFJpNjfV1aChNRStkA0dzlWQ,7679
|
|
305
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py,sha256=uLV8Qp8lRGkMVq5EtvbPa4l8ZpykN6godzblV7oj8bg,7086
|
|
306
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py,sha256=r-CRo6u9qmFcdSYNz9pWeuwj0XKykuWyFm7pQVK2yI4,6939
|
|
307
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py,sha256=Vk1Fm7ED1eAHW6lmSKF7VBZa55EN4h1zBEBdm5uKW24,4303
|
|
308
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py,sha256=-DprR09KYuwNEzEbhPvFRI3MR4_VdPMUGLPN6sL9Ym8,14625
|
|
309
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py,sha256=8THNmMvp7bYNIfExYGRfNJ88cuEvdK9KTO-3-51fUsY,13961
|
|
310
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=TkSEKghtqXW_1bHFsG9dhGVKv2XAmTMcdAH2UDvuSFM,36639
|
|
311
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py,sha256=rGifS4I0o3faXrLnW-eSbksmkx7ppMkRlnVBomsbD3Q,851
|
|
312
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py,sha256=LqMHlUTy2LEzoVwjALtrAw0UYmzIuHnFjQiVmn5nv-I,605
|
|
313
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py,sha256=d4HInkL_Phk0Bgg2cWaOvhsPa6lkqDeovFW86PL0I18,6371
|
|
314
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py,sha256=XzebAHBAjOpkIMZm43dd55PESgmyq_J45Ji6bogYR3s,11204
|
|
315
|
+
evalscope/models/__init__.py,sha256=yB4NuKvSd3Jd4GRQvJeGPxwigd8RJErdop5PzSQhsMY,1565
|
|
316
|
+
evalscope/models/local_model.py,sha256=1yjwt7NHE7pI8xoGv38NTql9KcCd80x1mjlELqkNHBQ,4110
|
|
317
|
+
evalscope/models/model.py,sha256=MxvJAUNkuT7IA3bchnmJDur_YCKj9ShOD2Uq40dBcGc,6308
|
|
318
|
+
evalscope/models/register.py,sha256=pNC69YUvw-lodYpOXmByHm26h4m0Lofgd_om-JhOBq4,1882
|
|
319
|
+
evalscope/models/adapters/__init__.py,sha256=mduiDZ6LgmkefNf4CtObZk6heOB93HxxgqTuYvrqWoo,590
|
|
320
|
+
evalscope/models/adapters/base_adapter.py,sha256=f2FY8DLERudkfb4_anxNVFE_D19xCJj9BObiHWspewI,3268
|
|
321
|
+
evalscope/models/adapters/chat_adapter.py,sha256=HD1jAKlAv5KRjzB0s21E4rTEIhryZhZHMpSctF9xrN8,7306
|
|
322
|
+
evalscope/models/adapters/choice_adapter.py,sha256=4fuz3MFEqK8ln4mMs3goMCdRPBwYmmgN70HTdr_sW_U,8005
|
|
323
|
+
evalscope/models/adapters/custom_adapter.py,sha256=w8cD0b3xgcdhSZelcat67CGJnALOfz5IALzURnLjab8,2275
|
|
324
|
+
evalscope/models/adapters/server_adapter.py,sha256=5kH1yDAjETogR7aOdnCEueYE1bREI40OdXdBiJpMdIM,6734
|
|
325
|
+
evalscope/models/adapters/t2i_adapter.py,sha256=xkMRyZ61yTiJfmULK-p9du4nNox41pkHiV2CTFBO3qM,2659
|
|
326
|
+
evalscope/models/custom/__init__.py,sha256=MZylegALg1HerOYtp-qbzu4Wb6PW3JbrxwONHU-PAVs,131
|
|
219
327
|
evalscope/models/custom/custom_model.py,sha256=rBQLAuPEw_OPUtRSCEmxEfpcA8jPj8bAdsmtKs4ygus,1566
|
|
220
|
-
evalscope/models/custom/dummy_model.py,sha256=
|
|
328
|
+
evalscope/models/custom/dummy_model.py,sha256=WRT_aCBZLXnC4yRCgggkuySkhM71C47O2Txx_YNc3UM,1933
|
|
221
329
|
evalscope/perf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
222
330
|
evalscope/perf/arguments.py,sha256=UZKlkbDE2N408dY8Ji-WB8sl1rcmamywzxLvNXpnY0w,10194
|
|
223
331
|
evalscope/perf/benchmark.py,sha256=nv7gtCkeKnLKQQiKM4G0MYO2ambcuwsbx67OgEQG0nM,7917
|
|
@@ -233,7 +341,7 @@ evalscope/perf/plugin/api/openai_api.py,sha256=kTL_2OACuKhzd2W0Pf4DirpMumzk4V3rq
|
|
|
233
341
|
evalscope/perf/plugin/datasets/__init__.py,sha256=Z6Jc0RxJS_z0nBBV1-b0-56Ija60AtQ7I_67gY6ZfdQ,568
|
|
234
342
|
evalscope/perf/plugin/datasets/base.py,sha256=Z-INWueeYjfEZhP4lbTlBMVwIa6BcXZKWx-w7Pop3mA,1786
|
|
235
343
|
evalscope/perf/plugin/datasets/custom.py,sha256=npreC7H1VsdTGYkqlMESvyOhtXOfZQA7_-ICmxe3FWk,936
|
|
236
|
-
evalscope/perf/plugin/datasets/flickr8k.py,sha256=
|
|
344
|
+
evalscope/perf/plugin/datasets/flickr8k.py,sha256=MbJKEB0XqZE0nDEenwYs0FLH9QL658Vn9uQmUH4hPvk,1605
|
|
237
345
|
evalscope/perf/plugin/datasets/line_by_line.py,sha256=AqZYG6tVL3BIGnzh_2Tev8lDYezJG_1gqJY8bSNQl3Q,957
|
|
238
346
|
evalscope/perf/plugin/datasets/longalpaca.py,sha256=XelLris0-c3StLInQ-Oav4jqGcXPNfJxEDeYvaetEbI,1297
|
|
239
347
|
evalscope/perf/plugin/datasets/openqa.py,sha256=4Pnx5duFJzoiTUfZCbcK7LO8f-skmcpYNUUrtNR_UUc,1463
|
|
@@ -241,8 +349,8 @@ evalscope/perf/plugin/datasets/random_dataset.py,sha256=SIlsjAE_Stknfr6o1CBFvANB
|
|
|
241
349
|
evalscope/perf/plugin/datasets/speed_benchmark.py,sha256=J6q7AF_Re5eHLVejXEw9c1jlk1T1PPmist0yO9UFTPE,2432
|
|
242
350
|
evalscope/perf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
243
351
|
evalscope/perf/utils/analysis_result.py,sha256=ESzaZHGTpr2LoJR3jpOzqMphxSrr79d364ZzD159PmY,1169
|
|
244
|
-
evalscope/perf/utils/benchmark_util.py,sha256=
|
|
245
|
-
evalscope/perf/utils/db_util.py,sha256=
|
|
352
|
+
evalscope/perf/utils/benchmark_util.py,sha256=CftjnxYA7d1aeAL_iuyXcJPwCL5A8zWGZSkNtjrMyW8,6309
|
|
353
|
+
evalscope/perf/utils/db_util.py,sha256=efz6qQtMIYAIpG0sAEjLwuzTHBUiuzAV1n7_DCGrN5o,9461
|
|
246
354
|
evalscope/perf/utils/handler.py,sha256=HyKIxbzC0XCyQanlbb7UEY7yaeqjJTePNea8kMV3Sdc,1192
|
|
247
355
|
evalscope/perf/utils/local_server.py,sha256=clF8i0UFmaxBBB6gX05KvVCyzSv0xzsAidz0_sLLlAk,4627
|
|
248
356
|
evalscope/perf/utils/log_utils.py,sha256=1jmB31W3ol9ukPAPbQ8xG3yoZ9oi3tjEyMK5M3ERmbw,1471
|
|
@@ -267,10 +375,11 @@ evalscope/registry/tasks/general_qa.yaml,sha256=S3kdlrazWX2VAX2PMhNtBnFZVSnUKBNi
|
|
|
267
375
|
evalscope/registry/tasks/gsm8k.yaml,sha256=M2I7otwOSy0usD8yG8d6QziASQlKdhKLflRHMG0LXiM,729
|
|
268
376
|
evalscope/registry/tasks/mmlu.yaml,sha256=cJcMH1Cvgo9PlYoTmeGx2bcZayysltaa6ehK57dDkvo,726
|
|
269
377
|
evalscope/registry/tasks/mmlu_mini.yaml,sha256=K8ouHh7ve5ZsbkqRtV3Jl-DF01YFPuObfwEdACJA4Pk,778
|
|
270
|
-
evalscope/report/__init__.py,sha256=
|
|
271
|
-
evalscope/report/app.py,sha256=
|
|
378
|
+
evalscope/report/__init__.py,sha256=iLNqx7CnHSHQmOBqWUK_vt2VIjnvGslJTqn--7B4y_s,316
|
|
379
|
+
evalscope/report/app.py,sha256=8pcQi5oYAYa9hXoMoMUNfy9jSvSR9DDiXyLcyPd9AmA,28459
|
|
380
|
+
evalscope/report/app_arguments.py,sha256=1wHTLeFx1G94cKXYOeOVe_wTiOY2D929UctIRGOtRaQ,699
|
|
272
381
|
evalscope/report/combinator.py,sha256=O3QirwtYhDhdaWVT4STJMCGZMwoX8BTeJ3HtS9iwnWQ,2567
|
|
273
|
-
evalscope/report/generator.py,sha256=
|
|
382
|
+
evalscope/report/generator.py,sha256=q9aHWNjQgvutAKtpjfWOpfu5zNFdnXilO9OqBqt_Phg,3612
|
|
274
383
|
evalscope/report/utils.py,sha256=DRlbjbqHEmM8rGlA4pwtlHFhOZtyUzcqiS-mejfIDkU,4584
|
|
275
384
|
evalscope/third_party/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
276
385
|
evalscope/third_party/longbench_write/README.md,sha256=1yLKeSVIcihpoc4KXr8NpK86JbcLssCPx76aOKdPbYI,5431
|
|
@@ -290,7 +399,7 @@ evalscope/third_party/longbench_write/tools/__init__.py,sha256=I_ANdxdcIHpkIzIXc
|
|
|
290
399
|
evalscope/third_party/longbench_write/tools/data_etl.py,sha256=T7a-4PwZg5alZQh-oTi1zjMxjGmVVZYVwSR9-diZlF8,5971
|
|
291
400
|
evalscope/third_party/longbench_write/tools/openai_api.py,sha256=PiIvvDYJkn041SJkLoroXwl1B8TtwpB7licVfqNSeuQ,8168
|
|
292
401
|
evalscope/third_party/thinkbench/__init__.py,sha256=C0aSu71_dc1upUVkKmq2VgDd9plpRcYUdCE6BjUWJcA,110
|
|
293
|
-
evalscope/third_party/thinkbench/eval.py,sha256=
|
|
402
|
+
evalscope/third_party/thinkbench/eval.py,sha256=IyfVTm6arhjBgvGMG5OZwopqQTmWVMJ8zYbbVSLtrvk,19503
|
|
294
403
|
evalscope/third_party/thinkbench/infer.py,sha256=2L4DAJKn3wAhNEKnKudQT60igGOJSKH80FR4nS7DHYk,3952
|
|
295
404
|
evalscope/third_party/thinkbench/resources/critique_template.txt,sha256=d4Egc-qH--4lG8X_EcmgymnuZgiCMbee1M5pt4HrRKA,535
|
|
296
405
|
evalscope/third_party/thinkbench/resources/reformat_template.txt,sha256=zTZyVAzmMBtAwI9lHly9EXsqX471OW-VTg538PDcB30,1775
|
|
@@ -309,21 +418,24 @@ evalscope/third_party/toolbench_static/llm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1
|
|
|
309
418
|
evalscope/third_party/toolbench_static/llm/swift_infer.py,sha256=GITEbyiER10Zi-ZWpSqYCdAsiVtNeGK24hvR3kmYn2s,2689
|
|
310
419
|
evalscope/utils/__init__.py,sha256=jLVoGryuqUh4Km9QWWQBzpqkcVNRK0MbwNaSgckqdiU,139
|
|
311
420
|
evalscope/utils/arena_utils.py,sha256=Gf8VpH4C_oF2Abif_QeL0rAP6tvTzsc0gglpdNkUE48,7155
|
|
312
|
-
evalscope/utils/chat_service.py,sha256=
|
|
421
|
+
evalscope/utils/chat_service.py,sha256=U2jtrkOa2asRp16Zam0zIi_38mCyWQqql_L6JSwii4I,8749
|
|
313
422
|
evalscope/utils/completion_parsers.py,sha256=YWHkLkSfURTcUjNNlCL6PPDICd4F2Ns9figgPN4C97c,2933
|
|
314
423
|
evalscope/utils/filters.py,sha256=x_NX40uWMmUsVrAGHCeeV2e63HZZFugWUgdUhk64ivM,1523
|
|
424
|
+
evalscope/utils/import_utils.py,sha256=Oo8saX_mMw4U1RrA7_pn8FmV6P9laru4fEgecqqwpqk,2585
|
|
315
425
|
evalscope/utils/io_utils.py,sha256=Tjdgen1FsAA4ArqiUzu734L0Px5NuiS0GKRRiGIzxSA,4192
|
|
316
426
|
evalscope/utils/logger.py,sha256=barHSdtbEu21ynGQj_wS-rd7B02wPPR5AgaWCQzvG4w,3638
|
|
317
427
|
evalscope/utils/model_utils.py,sha256=hB9W334ecAb6553FhooT6_jM0g-tjj6AU48IV3K1CKw,1131
|
|
318
|
-
evalscope/utils/utils.py,sha256=
|
|
428
|
+
evalscope/utils/utils.py,sha256=hP_ntROFsZ-zaNVpJtT2prNo8iX-UAKfRtdxbLtPJng,11105
|
|
319
429
|
tests/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
320
430
|
tests/test_run_all.py,sha256=YcMTlWoFpvWY8jevWyIf2G_tz8hgDD1cAwSvmyZt96M,429
|
|
431
|
+
tests/aigc/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
432
|
+
tests/aigc/test_t2i.py,sha256=_M3WxY5ruBM4RD7rYHhgizcIhH-ny5XD9M16Ayl3UPk,2619
|
|
321
433
|
tests/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
322
434
|
tests/cli/test_all.py,sha256=pwup--iNxckUEsR_aFjIAbEQo3UogSu5aIWf9ryLP2o,4022
|
|
323
435
|
tests/cli/test_collection.py,sha256=y8FjoPziPRf5BdJK8DHjcXn26ETKz1OyqjnCpwjt-F4,4096
|
|
324
|
-
tests/cli/test_run.py,sha256=
|
|
436
|
+
tests/cli/test_run.py,sha256=4B-6sOyotK3omirZWWyg7-CcnUSeZjiaU3aXHr0hH_Y,16804
|
|
325
437
|
tests/perf/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
326
|
-
tests/perf/test_perf.py,sha256=
|
|
438
|
+
tests/perf/test_perf.py,sha256=8K5tGlWwOpYWnJ0GaCpqSw9zPOiM8fEKJaDil2mpTSQ,3831
|
|
327
439
|
tests/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
328
440
|
tests/rag/test_clip_benchmark.py,sha256=ZCBtgnF8Vuji6WQlb92-_RIvXlUX_Xt-cHZP4AN_DNI,2552
|
|
329
441
|
tests/rag/test_mteb.py,sha256=YJw6X1jwX6SYNB-ryVb-OHJWu3vsE3Y4STATI75rdG0,5619
|
|
@@ -334,9 +446,9 @@ tests/swift/test_run_swift_vlm_eval.py,sha256=C8DftjewnZaerQWfERI70bU3sQLWQ-ejZU
|
|
|
334
446
|
tests/swift/test_run_swift_vlm_jugde_eval.py,sha256=THZEXUOSqm9rWslwJHmZyh-Ytv5c_QKpgRW5J2s_69E,6017
|
|
335
447
|
tests/vlm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
336
448
|
tests/vlm/test_vlmeval.py,sha256=UqRiBPMU3vRtLIG1Qu4ZVhyUQx-zGYQuLCgobwf-7a4,3176
|
|
337
|
-
evalscope-0.
|
|
338
|
-
evalscope-0.
|
|
339
|
-
evalscope-0.
|
|
340
|
-
evalscope-0.
|
|
341
|
-
evalscope-0.
|
|
342
|
-
evalscope-0.
|
|
449
|
+
evalscope-0.15.0.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
|
|
450
|
+
evalscope-0.15.0.dist-info/METADATA,sha256=MLn0s_L7s0oeQPWL1XuhihDAFJnzLdVTvdrep-9Bgag,34053
|
|
451
|
+
evalscope-0.15.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
452
|
+
evalscope-0.15.0.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
|
|
453
|
+
evalscope-0.15.0.dist-info/top_level.txt,sha256=Yv0iprOqZQ4rfUO-AWJp7Ni6m0Twxny1yvZwO-8hUDM,16
|
|
454
|
+
evalscope-0.15.0.dist-info/RECORD,,
|
tests/aigc/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
tests/aigc/test_t2i.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from dotenv import dotenv_values
|
|
2
|
+
|
|
3
|
+
env = dotenv_values('.env')
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import unittest
|
|
7
|
+
|
|
8
|
+
from evalscope.config import TaskConfig
|
|
9
|
+
from evalscope.constants import EvalType, JudgeStrategy, ModelTask, OutputType
|
|
10
|
+
from evalscope.run import run_task
|
|
11
|
+
from evalscope.utils import test_level_list
|
|
12
|
+
from evalscope.utils.logger import get_logger
|
|
13
|
+
|
|
14
|
+
os.environ['LOG_LEVEL'] = 'DEBUG'
|
|
15
|
+
|
|
16
|
+
logger = get_logger()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestRun(unittest.TestCase):
|
|
20
|
+
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
|
|
21
|
+
def test_run_general(self):
|
|
22
|
+
from evalscope.config import TaskConfig
|
|
23
|
+
|
|
24
|
+
task_cfg = TaskConfig(
|
|
25
|
+
datasets=[
|
|
26
|
+
'general_t2i'
|
|
27
|
+
],
|
|
28
|
+
dataset_args={
|
|
29
|
+
'general_t2i': {
|
|
30
|
+
'metric_list': [
|
|
31
|
+
'PickScore',
|
|
32
|
+
'CLIPScore',
|
|
33
|
+
'HPSv2Score',
|
|
34
|
+
'HPSv2.1Score',
|
|
35
|
+
'BLIPv2Score',
|
|
36
|
+
'ImageRewardScore',
|
|
37
|
+
'VQAScore',
|
|
38
|
+
'FGA_BLIP2Score',
|
|
39
|
+
'MPS'
|
|
40
|
+
],
|
|
41
|
+
'dataset_id': 'custom_eval/multimodal/t2i/example.jsonl',
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
run_task(task_cfg=task_cfg)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
|
|
50
|
+
def test_run_benchmark(self):
|
|
51
|
+
|
|
52
|
+
task_cfg = TaskConfig(
|
|
53
|
+
model='stabilityai/stable-diffusion-xl-base-1.0', # model on modelscope
|
|
54
|
+
model_task=ModelTask.IMAGE_GENERATION, # must be IMAGE_GENERATION
|
|
55
|
+
model_args={
|
|
56
|
+
'use_safetensors': True,
|
|
57
|
+
'variant': 'fp16',
|
|
58
|
+
'torch_dtype': 'torch.float16',
|
|
59
|
+
},
|
|
60
|
+
datasets=[
|
|
61
|
+
'tifa160',
|
|
62
|
+
'genai_bench',
|
|
63
|
+
'evalmuse',
|
|
64
|
+
'hpdv2',
|
|
65
|
+
],
|
|
66
|
+
dataset_args={
|
|
67
|
+
'tifa160': {
|
|
68
|
+
'metric_list': [
|
|
69
|
+
'PickScore',
|
|
70
|
+
# 'CLIPScore',
|
|
71
|
+
# 'HPSv2Score',
|
|
72
|
+
# 'BLIPv2Score',
|
|
73
|
+
# 'ImageRewardScore',
|
|
74
|
+
# 'VQAScore',
|
|
75
|
+
# 'FGA_BLIP2Score',
|
|
76
|
+
]
|
|
77
|
+
}
|
|
78
|
+
},
|
|
79
|
+
limit=5,
|
|
80
|
+
generation_config={
|
|
81
|
+
'num_inference_steps': 50,
|
|
82
|
+
'guidance_scale': 7.5
|
|
83
|
+
},
|
|
84
|
+
use_cache='outputs/20250427_134122',
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
run_task(task_cfg=task_cfg)
|
tests/cli/test_run.py
CHANGED
|
@@ -209,17 +209,23 @@ class TestRun(unittest.TestCase):
|
|
|
209
209
|
task_cfg = TaskConfig(
|
|
210
210
|
model='Qwen/Qwen2.5-0.5B-Instruct',
|
|
211
211
|
datasets=[
|
|
212
|
-
'iquiz',
|
|
212
|
+
# 'iquiz',
|
|
213
213
|
# 'math_500',
|
|
214
214
|
# 'aime24',
|
|
215
|
-
# 'competition_math'
|
|
215
|
+
# 'competition_math',
|
|
216
|
+
'mmlu',
|
|
216
217
|
],
|
|
217
218
|
dataset_args={
|
|
218
219
|
'competition_math': {
|
|
219
220
|
'subset_list': ['Level 4', 'Level 5']
|
|
220
|
-
}
|
|
221
|
+
},
|
|
222
|
+
'mmlu': {
|
|
223
|
+
'subset_list': ['elementary_mathematics', 'high_school_european_history', 'nutrition'],
|
|
224
|
+
'few_shot_num': 0
|
|
225
|
+
},
|
|
221
226
|
},
|
|
222
|
-
limit=
|
|
227
|
+
limit=10,
|
|
228
|
+
eval_batch_size=10,
|
|
223
229
|
)
|
|
224
230
|
|
|
225
231
|
run_task(task_cfg=task_cfg)
|
|
@@ -284,7 +290,7 @@ class TestRun(unittest.TestCase):
|
|
|
284
290
|
# 'general_qa'
|
|
285
291
|
# 'super_gpqa',
|
|
286
292
|
# 'mmlu_redux',
|
|
287
|
-
'maritime_bench'
|
|
293
|
+
# 'maritime_bench'
|
|
288
294
|
],
|
|
289
295
|
dataset_args={
|
|
290
296
|
'mmlu': {
|
tests/perf/test_perf.py
CHANGED
|
@@ -112,9 +112,10 @@ class TestPerf(unittest.TestCase):
|
|
|
112
112
|
prefix_length=0,
|
|
113
113
|
min_prompt_length=1024,
|
|
114
114
|
max_prompt_length=1024,
|
|
115
|
-
number=
|
|
115
|
+
number=20,
|
|
116
116
|
tokenizer_path='Qwen/Qwen2.5-0.5B-Instruct',
|
|
117
117
|
seed=None,
|
|
118
|
+
extra_args={'ignore_eos': True}
|
|
118
119
|
)
|
|
119
120
|
run_perf_benchmark(task_cfg)
|
|
120
121
|
|