evalscope 0.13.2__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of evalscope might be problematic. Click here for more details.
- evalscope/arguments.py +2 -1
- evalscope/backend/rag_eval/__init__.py +1 -1
- evalscope/backend/rag_eval/backend_manager.py +21 -5
- evalscope/backend/rag_eval/cmteb/arguments.py +10 -0
- evalscope/backend/rag_eval/ragas/arguments.py +0 -1
- evalscope/backend/rag_eval/ragas/tasks/testset_generation.py +7 -2
- evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py +0 -5
- evalscope/backend/rag_eval/utils/embedding.py +49 -3
- evalscope/backend/rag_eval/utils/llm.py +4 -4
- evalscope/backend/vlm_eval_kit/backend_manager.py +4 -2
- evalscope/benchmarks/__init__.py +2 -2
- evalscope/benchmarks/aigc/__init__.py +0 -0
- evalscope/benchmarks/aigc/t2i/__init__.py +0 -0
- evalscope/benchmarks/aigc/t2i/base.py +56 -0
- evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py +77 -0
- evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py +58 -0
- evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py +58 -0
- evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py +57 -0
- evalscope/benchmarks/aigc/t2i/tifa_adapter.py +37 -0
- evalscope/benchmarks/aime/aime24_adapter.py +1 -1
- evalscope/benchmarks/aime/aime25_adapter.py +4 -4
- evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +1 -2
- evalscope/benchmarks/arc/arc_adapter.py +2 -2
- evalscope/benchmarks/arena_hard/arena_hard_adapter.py +1 -3
- evalscope/benchmarks/ceval/ceval_adapter.py +2 -2
- evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +1 -3
- evalscope/benchmarks/cmmlu/cmmlu_adapter.py +1 -1
- evalscope/benchmarks/competition_math/competition_math_adapter.py +1 -2
- evalscope/benchmarks/data_adapter.py +21 -10
- evalscope/benchmarks/data_collection/data_collection_adapter.py +6 -4
- evalscope/benchmarks/general_mcq/general_mcq_adapter.py +2 -2
- evalscope/benchmarks/general_qa/general_qa_adapter.py +1 -1
- evalscope/benchmarks/hellaswag/hellaswag_adapter.py +1 -1
- evalscope/benchmarks/live_code_bench/evaluate_utils.py +16 -21
- evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +5 -4
- evalscope/benchmarks/live_code_bench/testing_util.py +369 -550
- evalscope/benchmarks/maritime_bench/__init__.py +0 -0
- evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py +79 -0
- evalscope/benchmarks/math_500/math_500_adapter.py +1 -1
- evalscope/benchmarks/mmlu/mmlu_adapter.py +8 -8
- evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py +1 -1
- evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py +1 -1
- evalscope/benchmarks/musr/musr_adapter.py +1 -1
- evalscope/benchmarks/simple_qa/simple_qa_adapter.py +1 -2
- evalscope/benchmarks/utils.py +7 -16
- evalscope/cli/start_app.py +1 -1
- evalscope/collections/evaluator.py +20 -6
- evalscope/config.py +8 -4
- evalscope/constants.py +11 -0
- evalscope/evaluator/evaluator.py +2 -2
- evalscope/evaluator/reviewer/auto_reviewer.py +1 -1
- evalscope/metrics/__init__.py +49 -4
- evalscope/metrics/llm_judge.py +1 -1
- evalscope/metrics/named_metrics.py +13 -0
- evalscope/metrics/t2v_metrics/__init__.py +66 -0
- evalscope/metrics/t2v_metrics/clipscore.py +14 -0
- evalscope/metrics/t2v_metrics/constants.py +12 -0
- evalscope/metrics/t2v_metrics/itmscore.py +14 -0
- evalscope/metrics/t2v_metrics/models/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py +30 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py +6 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +132 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +286 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +114 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +86 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +85 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +62 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py +26 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +84 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +97 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +171 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +80 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +73 -0
- evalscope/metrics/t2v_metrics/models/model.py +45 -0
- evalscope/metrics/t2v_metrics/models/utils.py +25 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py +22 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py +1 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +300 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py +12 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +82 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py +50 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +218 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +150 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py +26 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +465 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +141 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +22 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +188 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +106 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +307 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +416 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +8 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +191 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +318 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml +10 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml +36 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml +36 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml +37 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json +21 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json +22 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json +21 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +208 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py +231 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +1093 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py +211 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py +109 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +452 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +364 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +755 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +273 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +880 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +1844 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +81 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +56 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py +212 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py +164 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py +202 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +185 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +178 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +112 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py +371 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +344 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +858 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +271 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +503 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +1270 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +473 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +31 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py +27 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py +233 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +392 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +127 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +17 -0
- evalscope/metrics/t2v_metrics/score.py +78 -0
- evalscope/metrics/t2v_metrics/vqascore.py +14 -0
- evalscope/models/__init__.py +50 -14
- evalscope/models/adapters/__init__.py +17 -0
- evalscope/models/{base_adapter.py → adapters/base_adapter.py} +17 -17
- evalscope/models/{chat_adapter.py → adapters/chat_adapter.py} +10 -7
- evalscope/models/{choice_adapter.py → adapters/choice_adapter.py} +2 -6
- evalscope/models/{custom_adapter.py → adapters/custom_adapter.py} +2 -4
- evalscope/models/{server_adapter.py → adapters/server_adapter.py} +1 -3
- evalscope/models/adapters/t2i_adapter.py +76 -0
- evalscope/models/custom/__init__.py +2 -1
- evalscope/models/custom/dummy_model.py +11 -13
- evalscope/models/local_model.py +82 -33
- evalscope/models/model.py +2 -42
- evalscope/models/register.py +26 -0
- evalscope/perf/arguments.py +24 -5
- evalscope/perf/benchmark.py +28 -42
- evalscope/perf/http_client.py +2 -3
- evalscope/perf/plugin/api/custom_api.py +1 -1
- evalscope/perf/plugin/api/openai_api.py +2 -2
- evalscope/perf/plugin/datasets/custom.py +4 -1
- evalscope/perf/plugin/datasets/flickr8k.py +2 -1
- evalscope/perf/plugin/datasets/line_by_line.py +4 -1
- evalscope/perf/plugin/datasets/longalpaca.py +4 -1
- evalscope/perf/plugin/datasets/openqa.py +4 -1
- evalscope/perf/plugin/datasets/random_dataset.py +13 -6
- evalscope/perf/utils/benchmark_util.py +14 -8
- evalscope/perf/utils/db_util.py +9 -3
- evalscope/perf/utils/log_utils.py +41 -0
- evalscope/report/__init__.py +1 -0
- evalscope/report/app.py +128 -78
- evalscope/report/app_arguments.py +11 -0
- evalscope/report/generator.py +1 -1
- evalscope/run.py +10 -3
- evalscope/summarizer.py +2 -1
- evalscope/third_party/thinkbench/eval.py +19 -7
- evalscope/utils/chat_service.py +2 -2
- evalscope/utils/import_utils.py +66 -0
- evalscope/utils/utils.py +48 -29
- evalscope/version.py +2 -2
- {evalscope-0.13.2.dist-info → evalscope-0.15.0.dist-info}/METADATA +37 -15
- {evalscope-0.13.2.dist-info → evalscope-0.15.0.dist-info}/RECORD +209 -96
- tests/aigc/__init__.py +1 -0
- tests/aigc/test_t2i.py +87 -0
- tests/cli/test_all.py +4 -4
- tests/cli/test_collection.py +2 -1
- tests/cli/test_run.py +19 -12
- tests/perf/test_perf.py +3 -3
- tests/rag/test_clip_benchmark.py +0 -1
- tests/rag/test_mteb.py +37 -8
- tests/rag/test_ragas.py +29 -26
- tests/vlm/test_vlmeval.py +37 -1
- evalscope/backend/vlm_eval_kit/custom_dataset.py +0 -46
- evalscope/benchmarks/live_code_bench/execute_utils.py +0 -267
- evalscope/metrics/code_metric.py +0 -98
- evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +0 -58485
- evalscope/metrics/resources/gpt2-zhcn3-v4.json +0 -1
- {evalscope-0.13.2.dist-info → evalscope-0.15.0.dist-info}/LICENSE +0 -0
- {evalscope-0.13.2.dist-info → evalscope-0.15.0.dist-info}/WHEEL +0 -0
- {evalscope-0.13.2.dist-info → evalscope-0.15.0.dist-info}/entry_points.txt +0 -0
- {evalscope-0.13.2.dist-info → evalscope-0.15.0.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
evalscope/__init__.py,sha256=XZYDn3ShhM_48je5qQgwymtSdpTt8zYEnNfanYnpBdA,181
|
|
2
|
-
evalscope/arguments.py,sha256=
|
|
3
|
-
evalscope/config.py,sha256=
|
|
4
|
-
evalscope/constants.py,sha256=
|
|
5
|
-
evalscope/run.py,sha256=
|
|
2
|
+
evalscope/arguments.py,sha256=jywTxu_HWhgf0_OlnaOyRSzUHenr5Zio2vmcCgcfbxg,5453
|
|
3
|
+
evalscope/config.py,sha256=O3kjjVFRGSrlLD5EI4t99Z-m6oFtQVmEudvE62x92wY,9648
|
|
4
|
+
evalscope/constants.py,sha256=PHnsGndB4N5-jvmawPxMK5b9geE2Es5cUe8ZKYSuKgM,4016
|
|
5
|
+
evalscope/run.py,sha256=_DKbxgQGwhweBnQrI7lQhu5eoz4LYPVeNanzD4lHuJA,6476
|
|
6
6
|
evalscope/run_arena.py,sha256=WXPCT0L-b_KvLBQ9KnrVW6y8icdDcqVhaXjTZMpS8k8,8572
|
|
7
|
-
evalscope/summarizer.py,sha256=
|
|
8
|
-
evalscope/version.py,sha256=
|
|
7
|
+
evalscope/summarizer.py,sha256=61kU5ZoSh1dd8HMJPqP3ZvJwcY9szwWFCZdu2lfATJA,5920
|
|
8
|
+
evalscope/version.py,sha256=X2BkdAHDhsMo9BTAegfd5uYheDVI8rh_UG5YqMwwXUE,119
|
|
9
9
|
evalscope/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
evalscope/backend/base.py,sha256=qYu8Shokrtrx-N6T_BAJk_6OCpovUBYuN0p3wngt-dw,1030
|
|
11
11
|
evalscope/backend/opencompass/__init__.py,sha256=UP_TW5KBq6V_Nvqkeb7PGvGGX3rVYussT43npwCwDgE,135
|
|
@@ -14,8 +14,8 @@ evalscope/backend/opencompass/backend_manager.py,sha256=y5NnAIY1pI7E1ZSeKU3acrD-
|
|
|
14
14
|
evalscope/backend/opencompass/tasks/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
15
15
|
evalscope/backend/opencompass/tasks/eval_api.py,sha256=ZaGdUbEOtAW5VX3ZXmpHIttg_QrID34EnBTylD3uvos,1152
|
|
16
16
|
evalscope/backend/opencompass/tasks/eval_datasets.py,sha256=JHSq4EnPJgv4sRJJplLH80EqE3ghtkn2k8HnV6DaDew,5406
|
|
17
|
-
evalscope/backend/rag_eval/__init__.py,sha256=
|
|
18
|
-
evalscope/backend/rag_eval/backend_manager.py,sha256=
|
|
17
|
+
evalscope/backend/rag_eval/__init__.py,sha256=Tbj7HboP5zzJ77-9qVEwwhHKjHL5V8MwLFr6sw1oeoA,291
|
|
18
|
+
evalscope/backend/rag_eval/backend_manager.py,sha256=OEFADT8kdsuVMU0QOfiafzFQopY7bKbWZ_jhdXyYElY,3472
|
|
19
19
|
evalscope/backend/rag_eval/clip_benchmark/__init__.py,sha256=C8Vetf52nyHiRwY2Pm74Bjn3UpWboQeghCGNh67X1EM,151
|
|
20
20
|
evalscope/backend/rag_eval/clip_benchmark/arguments.py,sha256=d5UkbC3RXb6iyzy_ILumToAVO1AdwvDeyOiX5KB2u0g,1530
|
|
21
21
|
evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py,sha256=anuIhRk9OC8y0LNBjvttSXppc99gbz-f0TYQjnyLLyU,8347
|
|
@@ -27,7 +27,7 @@ evalscope/backend/rag_eval/clip_benchmark/tasks/zeroshot_retrieval.py,sha256=t0U
|
|
|
27
27
|
evalscope/backend/rag_eval/clip_benchmark/utils/webdataset_convert.py,sha256=rZY-TulG-Cb8b6GTBxqTDYQ_4Ois3kbgKhuunZq8Ato,8407
|
|
28
28
|
evalscope/backend/rag_eval/clip_benchmark/utils/webdatasets.txt,sha256=eiiAaxhS48b5rVLy5O9VvFfV2AfxY86ITu_iqT7ZLkQ,649
|
|
29
29
|
evalscope/backend/rag_eval/cmteb/__init__.py,sha256=I502GHPFYo8BwlFvoljGKI24PY76eBXJQiquWk8nJNU,280
|
|
30
|
-
evalscope/backend/rag_eval/cmteb/arguments.py,sha256=
|
|
30
|
+
evalscope/backend/rag_eval/cmteb/arguments.py,sha256=y2iTbs3a7R747NgS00nK2j3zO7gmREh8n7mWMrzF1js,2653
|
|
31
31
|
evalscope/backend/rag_eval/cmteb/base.py,sha256=UCobQ81dHkiTmIz_0BJ_VANj_uG6mkJbYLKJztvMXfo,2849
|
|
32
32
|
evalscope/backend/rag_eval/cmteb/task_template.py,sha256=FyFs1reefcsFCrWyi7Ya5dnFYvBhtxph2wIaFtOtFls,2595
|
|
33
33
|
evalscope/backend/rag_eval/cmteb/tasks/Classification.py,sha256=sqbH0XmSiIm4n5UX5sXMwJHby1r-d35mwW1tKIhb2Hg,10848
|
|
@@ -39,36 +39,43 @@ evalscope/backend/rag_eval/cmteb/tasks/Retrieval.py,sha256=ofmmeoieXHmU6O14JKWO9
|
|
|
39
39
|
evalscope/backend/rag_eval/cmteb/tasks/STS.py,sha256=uhGLsQTo5lM3-L2Na3WJGqOLQw3c1WxHDA22ePJPxtU,12285
|
|
40
40
|
evalscope/backend/rag_eval/cmteb/tasks/__init__.py,sha256=PKBNyp45hIa3FYNA1psiwtwfwUcn7s9eNt6r5aUpyyY,1505
|
|
41
41
|
evalscope/backend/rag_eval/ragas/__init__.py,sha256=D0yJkN9SuNGIAL3niZw4BI08Yh3HznsUUewdIAa_-LM,171
|
|
42
|
-
evalscope/backend/rag_eval/ragas/arguments.py,sha256=
|
|
42
|
+
evalscope/backend/rag_eval/ragas/arguments.py,sha256=S6M1nsqwMQ8lnZZDtlQTdzyOCfLn9WP0QJ_7wAEsVgc,1695
|
|
43
43
|
evalscope/backend/rag_eval/ragas/task_template.py,sha256=a_3bWfLx0j2zJkWgEWNStO0XXAeUFdnFpeukpoGfxLg,1669
|
|
44
44
|
evalscope/backend/rag_eval/ragas/prompts/persona_prompt.py,sha256=fX9sCci787ViGiL3BhGsykx0bnWfOWWEFueaJKyR8g4,793
|
|
45
45
|
evalscope/backend/rag_eval/ragas/tasks/__init__.py,sha256=hErdWKbvV9aRqOpQTzdFHw1tcYoDbnttmic7GpZzKx8,173
|
|
46
46
|
evalscope/backend/rag_eval/ragas/tasks/build_distribution.py,sha256=vFfemiqtPx22u5pwwZxEQJKYf3B9efYmwbpWDI5hY30,1491
|
|
47
47
|
evalscope/backend/rag_eval/ragas/tasks/build_transform.py,sha256=GtAYqdVOy7BxIGyC4rSZ_UfXagKYzE6eEtXbaOI_g-k,5425
|
|
48
|
-
evalscope/backend/rag_eval/ragas/tasks/testset_generation.py,sha256=
|
|
49
|
-
evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py,sha256=
|
|
48
|
+
evalscope/backend/rag_eval/ragas/tasks/testset_generation.py,sha256=YSqpaXMFVe8mkVfq3i_oJg1MSnPm98E7WdOBdyUwMpA,5784
|
|
49
|
+
evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py,sha256=6x-4O2pgsjZCVfJNvwZEKcgLe_QhSknPg-f2jGjZkU4,1890
|
|
50
50
|
evalscope/backend/rag_eval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
51
|
evalscope/backend/rag_eval/utils/clip.py,sha256=GLHhPCac2AH35AvRLvVqePA1gIMAewHTFmCJCDZzvqU,5015
|
|
52
|
-
evalscope/backend/rag_eval/utils/embedding.py,sha256=
|
|
53
|
-
evalscope/backend/rag_eval/utils/llm.py,sha256=
|
|
52
|
+
evalscope/backend/rag_eval/utils/embedding.py,sha256=tFMepPAMO4Kkqeqh-XxXIDYRjGbCMlk7lwuUW7FNvCA,7977
|
|
53
|
+
evalscope/backend/rag_eval/utils/llm.py,sha256=acaD5QHPJUstJGpW1sNJ-3ZPT5J_Z8beOWb61Rtz07U,2607
|
|
54
54
|
evalscope/backend/rag_eval/utils/tools.py,sha256=FU7tNu-8y8V_o_kArFVTTLM_GzL12KBNeXiwQw5SpJA,1529
|
|
55
55
|
evalscope/backend/vlm_eval_kit/__init__.py,sha256=R-GuBm8dAwvDF73XHaGpPSjlt7Y4tycyy-FJgzLdjeY,84
|
|
56
|
-
evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=
|
|
57
|
-
evalscope/
|
|
58
|
-
evalscope/benchmarks/__init__.py,sha256=b_SWdV1ZyOqFiwc_9lIjKrIvK1rwnF2cCIF7XN9CN8E,932
|
|
56
|
+
evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=sUYvQxCtPl6CrcwhQpY8lJjW5skqWc-fvHUSnXd_MvQ,6054
|
|
57
|
+
evalscope/benchmarks/__init__.py,sha256=5AXNhhmbaBFEe3u7y5TtIrviYzFI-hC8oKqxFILs1pE,937
|
|
59
58
|
evalscope/benchmarks/benchmark.py,sha256=a_7Ctz36McuTyBSTYi56jis9pvOdWhg7JVSPFrbxqR4,2535
|
|
60
|
-
evalscope/benchmarks/data_adapter.py,sha256=
|
|
61
|
-
evalscope/benchmarks/utils.py,sha256=
|
|
59
|
+
evalscope/benchmarks/data_adapter.py,sha256=mWdxtHbordS577NqZUQZmIjlewjGDlStqc-iDvqpAyU,18061
|
|
60
|
+
evalscope/benchmarks/utils.py,sha256=yXQyszzrILNiBuUrbB1BtgotQSaNA8w6X935AL1dNAw,1074
|
|
61
|
+
evalscope/benchmarks/aigc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
|
+
evalscope/benchmarks/aigc/t2i/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
+
evalscope/benchmarks/aigc/t2i/base.py,sha256=4GFAvceT1Gpt5teDLRCZi62RwvPazuhG3zwft3gN3X4,2102
|
|
64
|
+
evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py,sha256=WV9w3z8TxWNzVzn9A_g0xqeHh76ydnHL5xLwyg63VmU,2992
|
|
65
|
+
evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py,sha256=baDGFRpVcSKpc1CdzNAMBtjeCZDUpyEc5l1KyrPNoEU,1892
|
|
66
|
+
evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py,sha256=But2hcQU3X3v58poF8Qg2agrxTAP6gnjZYJs8Tr0g_4,2047
|
|
67
|
+
evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py,sha256=U0RKN3apyD3YyZfIvqgO8TNuDO-zctlftHsSfBRyQxU,1825
|
|
68
|
+
evalscope/benchmarks/aigc/t2i/tifa_adapter.py,sha256=vOOiOe26H2dk9VN2WbB_Oi3lzavMIaYDBq6sqeSIiAU,1093
|
|
62
69
|
evalscope/benchmarks/aime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
-
evalscope/benchmarks/aime/aime24_adapter.py,sha256=
|
|
64
|
-
evalscope/benchmarks/aime/aime25_adapter.py,sha256=
|
|
70
|
+
evalscope/benchmarks/aime/aime24_adapter.py,sha256=GrIxCHpUwgUy8tXGTB7iQOt8k7wG8MJB0CWbwBmIy-8,1703
|
|
71
|
+
evalscope/benchmarks/aime/aime25_adapter.py,sha256=yxo5roCb8ryX9ROUU2FdZ-WBTUPZ14MrBzEL0zPOh-U,1718
|
|
65
72
|
evalscope/benchmarks/alpaca_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
|
-
evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py,sha256=
|
|
73
|
+
evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py,sha256=em1YM2PxnJ8Of7Li3eqrw8PtwfeXSinfVIr-CIKVb60,4026
|
|
67
74
|
evalscope/benchmarks/arc/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
68
75
|
evalscope/benchmarks/arc/ai2_arc.py,sha256=WtL4Z_ulcCU2KfptWTjTm75T2I2rVGd9aDBBB76P14w,5697
|
|
69
|
-
evalscope/benchmarks/arc/arc_adapter.py,sha256=
|
|
76
|
+
evalscope/benchmarks/arc/arc_adapter.py,sha256=0h-eT4BBmUJQrakKMPUNE1nSRwK6LHB-cflWpWzY978,6364
|
|
70
77
|
evalscope/benchmarks/arena_hard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
|
-
evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=
|
|
78
|
+
evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=S3FQ_UD3GC8M7FU-PPeuJm5YVrG5qhnVE5T1jRpPuxo,6131
|
|
72
79
|
evalscope/benchmarks/arena_hard/utils.py,sha256=NstI1VR5fTaT-bfXRj0cLqm0DtH8EY4EQHR-K9HJubI,5089
|
|
73
80
|
evalscope/benchmarks/bbh/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
74
81
|
evalscope/benchmarks/bbh/bbh_adapter.py,sha256=fROpzenrjpEBWtnvM_RL_m0uXPOhXTtYAglJEZbzUdY,8330
|
|
@@ -100,23 +107,23 @@ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt
|
|
|
100
107
|
evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt,sha256=s_x6u5MLeKpuAHZj3GNQqY1I8vWqQIfJasOp9XcM7Ck,2945
|
|
101
108
|
evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt,sha256=qfTZafCzNiz9ULBaDlfy_LISL617NyH5Nc0-nO0K0LE,2164
|
|
102
109
|
evalscope/benchmarks/ceval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
103
|
-
evalscope/benchmarks/ceval/ceval_adapter.py,sha256=
|
|
110
|
+
evalscope/benchmarks/ceval/ceval_adapter.py,sha256=1ITBXI0f01Dt1p7sb2RGswIeg9685Bkk2S2xmA1vat8,11295
|
|
104
111
|
evalscope/benchmarks/ceval/ceval_exam.py,sha256=ngOvb6Fymt7iPWIb2fzrUVpqmUT2VBoqh7X_IH8Bcsc,4824
|
|
105
112
|
evalscope/benchmarks/chinese_simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
|
-
evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=
|
|
113
|
+
evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=zY8dfvrTeCHAQ3d7AM02CexZw5CVKH51ZOhtT7Q1Gko,8031
|
|
107
114
|
evalscope/benchmarks/cmmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
108
115
|
evalscope/benchmarks/cmmlu/cmmlu.py,sha256=Y59NIGUFzJEztJbkehZsG4Cz0J_v9Cyju6xazHMYIcA,5022
|
|
109
|
-
evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=
|
|
116
|
+
evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=r9zael_Y2Jso0ashevYpF8e5SHOBh8iMcPIJU5WT3pQ,10367
|
|
110
117
|
evalscope/benchmarks/cmmlu/samples.jsonl,sha256=FXbyPQSDorKBGSD0lnOzioZmFjG07lIL87FRDRaMPSY,1722
|
|
111
118
|
evalscope/benchmarks/competition_math/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
112
119
|
evalscope/benchmarks/competition_math/competition_math.py,sha256=Cehyokift7oDKjc8TdmfblZ6mMc39wQWtqqbUi34QLc,2629
|
|
113
|
-
evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=
|
|
120
|
+
evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=wgejW-_QswtT8_3JKAQ_H6svH8IotDJDBEH7X4nP4bY,6760
|
|
114
121
|
evalscope/benchmarks/data_collection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
115
|
-
evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=
|
|
122
|
+
evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=QgLgIrjD3q53T-lu1UWTV6T4h1cKGoCQDh0O4QxFezw,2569
|
|
116
123
|
evalscope/benchmarks/general_mcq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
117
|
-
evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=
|
|
124
|
+
evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=fqbt61owPP7t2H4B2zbYVZTs0VBGuXNvWGvkukwhRYc,5039
|
|
118
125
|
evalscope/benchmarks/general_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
119
|
-
evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=
|
|
126
|
+
evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=8d5znAcQmFSmvyKV-JuMQzbY5k6xDNQQdrWZ7zgPTK4,4603
|
|
120
127
|
evalscope/benchmarks/gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
121
128
|
evalscope/benchmarks/gpqa/chain_of_thought.txt,sha256=pgoT5a-DMPJaMhoH_M8zfU5s80ibWDTVW6vnonITd8k,5610
|
|
122
129
|
evalscope/benchmarks/gpqa/gpqa_adapter.py,sha256=UB287DtnbkSQXZsbReFJqmQRwbo672DTCeXXilR_-Vc,4790
|
|
@@ -125,7 +132,7 @@ evalscope/benchmarks/gsm8k/gsm8k.py,sha256=ZDN5lfeZyc_pkTDVY0voC_zUExHE1ZoEgEaTv
|
|
|
125
132
|
evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=ZZZ-9oja53IwiU33Kjm7NTk4MbFGWyvonhnHrn_3Na8,10557
|
|
126
133
|
evalscope/benchmarks/hellaswag/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
127
134
|
evalscope/benchmarks/hellaswag/hellaswag.py,sha256=5_c9WbaS1LIdvgXzqEcvjAEtKi2V2Yn0YtszPlFqhXI,4610
|
|
128
|
-
evalscope/benchmarks/hellaswag/hellaswag_adapter.py,sha256=
|
|
135
|
+
evalscope/benchmarks/hellaswag/hellaswag_adapter.py,sha256=SRM_-AKlWtKXi4zrlBAH9YceFnrktZDNsjvQOiPizUM,5893
|
|
129
136
|
evalscope/benchmarks/humaneval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
130
137
|
evalscope/benchmarks/humaneval/humaneval.py,sha256=2Exsg6u8FEu0buADY2tETJluSM8tWacvX06nykKKLSE,3395
|
|
131
138
|
evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=UOjakV31J0g7TYbrRls0ItcopWOJu54ucPfaqSJB7Os,5250
|
|
@@ -138,26 +145,27 @@ evalscope/benchmarks/ifeval/utils.py,sha256=TKrM1m2qDCUauahogItDdICf4mDk0OjasSxg
|
|
|
138
145
|
evalscope/benchmarks/iquiz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
139
146
|
evalscope/benchmarks/iquiz/iquiz_adapter.py,sha256=16whmFkJt9fLbei9d-kmjnWB_5y5vsiX9tK5kSuxDw8,2449
|
|
140
147
|
evalscope/benchmarks/live_code_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
141
|
-
evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=
|
|
142
|
-
evalscope/benchmarks/live_code_bench/execute_utils.py,sha256=MreaMLI0IicNZawpfqcyoRLt67EZ3CJvmxxRTYwhAbU,7397
|
|
148
|
+
evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=iqmVUMZmyRhzOOXXQ-NN9P1nGvvbzTjOSEp6djbN_rw,6503
|
|
143
149
|
evalscope/benchmarks/live_code_bench/extract_utils.py,sha256=ZcQ8y741uawPo6I_1_XglR3eqJFDNrqc8fILKZupVRs,2375
|
|
144
|
-
evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=
|
|
150
|
+
evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=AkvlQ-3oS8Tr3xZgx3omMt5w8jia6yH07D5Bq27Q5wc,3490
|
|
145
151
|
evalscope/benchmarks/live_code_bench/load_utils.py,sha256=5i9wtdPLYR8ckjx5MaYQVC2LFYvjKzR6Fa6UZmeOTRc,2445
|
|
146
152
|
evalscope/benchmarks/live_code_bench/pass_k_utils.py,sha256=Ktrp_lXdfFzoHtQNQNdGfIl26ySjaPCHm4Zv-dFvRqM,2024
|
|
147
153
|
evalscope/benchmarks/live_code_bench/prompts.py,sha256=P4KILIAIDT1MKDck0xHYV_6v9820wDZRhxVMazmlL-g,12600
|
|
148
|
-
evalscope/benchmarks/live_code_bench/testing_util.py,sha256=
|
|
154
|
+
evalscope/benchmarks/live_code_bench/testing_util.py,sha256=abjlwp6HDayf88mMI_daOKm06nEOeNBaMkmGWqk2DJo,17286
|
|
155
|
+
evalscope/benchmarks/maritime_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
156
|
+
evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py,sha256=RVbsiglxmEW37-tDYgr4Drywh26I94DRGhwv7uP2aYk,2829
|
|
149
157
|
evalscope/benchmarks/math_500/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
150
|
-
evalscope/benchmarks/math_500/math_500_adapter.py,sha256=
|
|
158
|
+
evalscope/benchmarks/math_500/math_500_adapter.py,sha256=opT73il3CbM1zZhuqRHZu_4O4WEZCZPvZe06I4U8YGM,1911
|
|
151
159
|
evalscope/benchmarks/mmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
152
160
|
evalscope/benchmarks/mmlu/mmlu.py,sha256=sA8AC0bN7iURrSazqkY31s_reNVbDZSUCB-NCTQsVeI,5042
|
|
153
|
-
evalscope/benchmarks/mmlu/mmlu_adapter.py,sha256=
|
|
161
|
+
evalscope/benchmarks/mmlu/mmlu_adapter.py,sha256=__BrO2f7_AZ87a00HCRGPm5ZK8B4JTZKzRBRQY3yf3Q,11635
|
|
154
162
|
evalscope/benchmarks/mmlu/samples.jsonl,sha256=f5Y2vwbEvNtpE7vrl9BHoJzsdceI4vUAo1frexYyX2o,1345
|
|
155
163
|
evalscope/benchmarks/mmlu_pro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
156
|
-
evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=
|
|
164
|
+
evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=hPqxDqDhqin3TxfimfhIxfEc_8UfzTDGAfX7iDrWy28,4248
|
|
157
165
|
evalscope/benchmarks/mmlu_redux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
158
|
-
evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py,sha256=
|
|
166
|
+
evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py,sha256=Kr30i_exxBJRz9PLB5g6F04e2HJ4WuF6LDyAwaRh2MY,9578
|
|
159
167
|
evalscope/benchmarks/musr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
160
|
-
evalscope/benchmarks/musr/musr_adapter.py,sha256=
|
|
168
|
+
evalscope/benchmarks/musr/musr_adapter.py,sha256=85P0sY7H9pthYdCjkE2AOxaiNhcIBW1iZmODkz3FN0M,2464
|
|
161
169
|
evalscope/benchmarks/process_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
170
|
evalscope/benchmarks/process_bench/critique_template.txt,sha256=tycx8n42QEC0uGcwbIvHfZvfTnchlRxGz8Tp1R2_e_Y,489
|
|
163
171
|
evalscope/benchmarks/process_bench/process_bench_adapter.py,sha256=ydU-r1T0DaYhOxkhZgGL7PhDd4XoeqOBzVO9oiFPd8M,3422
|
|
@@ -166,7 +174,7 @@ evalscope/benchmarks/race/race.py,sha256=TtFC3opqEA6q8AQIAFQRGx07FjD9z7iW8wmtxeO
|
|
|
166
174
|
evalscope/benchmarks/race/race_adapter.py,sha256=RD0B-i5dzeNKuhqnWbremgf4tk9jmOO4_eLAiITB1F0,6381
|
|
167
175
|
evalscope/benchmarks/race/samples.jsonl,sha256=bhSktBgU6axYQCClRtQ7nN8D1x815AU8xMAIG1oflG0,1243
|
|
168
176
|
evalscope/benchmarks/simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
169
|
-
evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=
|
|
177
|
+
evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=to4kSKc29BmtG4q9R2PeM-sdHiL8toSyoVi1D9WMRKk,8949
|
|
170
178
|
evalscope/benchmarks/super_gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
171
179
|
evalscope/benchmarks/super_gpqa/five_shot_prompt.txt,sha256=vD3RMeQustxY_oWA8IobntjywT8ZUO7Jaub--rElDT4,4718
|
|
172
180
|
evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py,sha256=BqNLL8BYnK6tRuIdV6ijL4Uym2SejH_h1BV06XNjSE4,9331
|
|
@@ -182,69 +190,170 @@ evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py,sha256=ueUU860kg5_xf_MtU
|
|
|
182
190
|
evalscope/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
183
191
|
evalscope/cli/base.py,sha256=m1DFlF16L0Lyrn0YNuFj8ByGjVJIoI0jKzAoodIXjRk,404
|
|
184
192
|
evalscope/cli/cli.py,sha256=w_dtXljur9s5lmTn6LbbFL_viTPQB1WAEzhYcId09Og,729
|
|
185
|
-
evalscope/cli/start_app.py,sha256=
|
|
193
|
+
evalscope/cli/start_app.py,sha256=PoAnmYLw_UdWpA7qrUkSIx8hRoIGRy9yXrbH8bYOSL4,804
|
|
186
194
|
evalscope/cli/start_eval.py,sha256=MXhVDeaMFd6ny88-gnVtQflH660UaDj240YGYnHccx8,775
|
|
187
195
|
evalscope/cli/start_perf.py,sha256=5hLi5jWgM9BJPXLd8d9D1zqrcj_5c0KvkfB1DgD4_RU,831
|
|
188
196
|
evalscope/cli/start_server.py,sha256=DQRIfbsHaOAsVcLGF6iRyJnxmd5Sf_tgytpJNfiWCeE,3662
|
|
189
197
|
evalscope/collections/__init__.py,sha256=hd68Qf-ryeDsz5Pu-Dh83M5V5RE5mhLsG-vc55n5n0o,228
|
|
190
|
-
evalscope/collections/evaluator.py,sha256=
|
|
198
|
+
evalscope/collections/evaluator.py,sha256=Ll-qLet04aEp1WxoCKAuvZVWEZuy1lS_D-vZIN3zSQQ,13425
|
|
191
199
|
evalscope/collections/sampler.py,sha256=2NwvhJVdi-mrDeK7RWwEGOoE7DdxtpyASRUZU_D6hWw,4855
|
|
192
200
|
evalscope/collections/schema.py,sha256=mjJfNmy_athJ1TmnuJRkrKRlefzefuQXZuTtjn8SHKo,4073
|
|
193
201
|
evalscope/evaluator/__init__.py,sha256=S6MU1O_iiNAaKxNIhO9MEmdW-BSNf_YH2l6NQ9lxVNo,103
|
|
194
|
-
evalscope/evaluator/evaluator.py,sha256=
|
|
202
|
+
evalscope/evaluator/evaluator.py,sha256=M1JrsoZZ5OvcZfzgLrNSMtbbz5gvvCd0GwJArJQV0lk,19797
|
|
195
203
|
evalscope/evaluator/rating_eval.py,sha256=uo0uj9z_TDsxdYlT8WIfNZhFLAfRkW9zn_wlu-F72O0,5575
|
|
196
204
|
evalscope/evaluator/reviewer/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
197
|
-
evalscope/evaluator/reviewer/auto_reviewer.py,sha256=
|
|
198
|
-
evalscope/metrics/__init__.py,sha256=
|
|
199
|
-
evalscope/metrics/
|
|
200
|
-
evalscope/metrics/llm_judge.py,sha256=Di0Q1c6VHLl0nQ_TVOZOOQlMApDIU83HuDPTOV8XrTA,4023
|
|
205
|
+
evalscope/evaluator/reviewer/auto_reviewer.py,sha256=5WRYuXFTDgVmolrOdiTysk-mXrpw6Qg87-iuY-VD1W4,16618
|
|
206
|
+
evalscope/metrics/__init__.py,sha256=y1sdj5FBKYW1q5kLC6QREzoITHwstJRUdji6p0X5aAE,1363
|
|
207
|
+
evalscope/metrics/llm_judge.py,sha256=MjyTC-xiSThk8Rd4IdUbsCXeeikoOORv6wt8H7SW8s4,4008
|
|
201
208
|
evalscope/metrics/math_parser.py,sha256=uTDudn305G3b8-GboWTrDE6OfrEwAW-areHnoGXZ6Is,17302
|
|
202
209
|
evalscope/metrics/metrics.py,sha256=_YI7RhxlFu_JOgeE3LF9UKu6mJruvyu4FgqVf78Bjb8,13813
|
|
203
|
-
evalscope/metrics/named_metrics.py,sha256=
|
|
210
|
+
evalscope/metrics/named_metrics.py,sha256=PrzU_1mGTeRFxVJFT1aXxIOiS7MnNoWyZsb8uCRVDeE,2278
|
|
204
211
|
evalscope/metrics/rouge_metric.py,sha256=zhIUqenSuxnORR9tamLQBGjFwP91Zei2UiLtcOyseVM,4639
|
|
205
212
|
evalscope/metrics/bundled_rouge_score/__init__.py,sha256=PwbTdk8168FwDJe_l8XIqDuBgZQooDsP31vj7di05Fs,650
|
|
206
213
|
evalscope/metrics/bundled_rouge_score/rouge_scorer.py,sha256=Kq6AObenmLVQ5tN3NgN042a6mgRFQmRO21-ohd9mSa8,11972
|
|
207
|
-
evalscope/metrics/
|
|
208
|
-
evalscope/metrics/
|
|
209
|
-
evalscope/
|
|
210
|
-
evalscope/
|
|
211
|
-
evalscope/
|
|
212
|
-
evalscope/
|
|
213
|
-
evalscope/models/
|
|
214
|
-
evalscope/models/
|
|
215
|
-
evalscope/models/
|
|
216
|
-
evalscope/models/
|
|
217
|
-
evalscope/models/
|
|
218
|
-
evalscope/models/
|
|
214
|
+
evalscope/metrics/t2v_metrics/__init__.py,sha256=GBxgKTPVy_qhW_F3M4Oi6QMWhdAi4PqGX5w3t6Tueho,1783
|
|
215
|
+
evalscope/metrics/t2v_metrics/clipscore.py,sha256=IsrYKIlFb04-FfBq4MbSv4diS6706J15Y3G4qEFIwfU,455
|
|
216
|
+
evalscope/metrics/t2v_metrics/constants.py,sha256=oY5l5fOFl8qylah9eeebZm0pgY1PYmHDa7JlUC8Qls0,451
|
|
217
|
+
evalscope/metrics/t2v_metrics/itmscore.py,sha256=cIaz_urio_Of1FiA2DZW7pWRIvo487zr33-x8C3Wx0o,443
|
|
218
|
+
evalscope/metrics/t2v_metrics/score.py,sha256=6tIKZoQprlQOBoV-2E-3InIi2Jl29a9W2BFPjKnV1nw,3044
|
|
219
|
+
evalscope/metrics/t2v_metrics/vqascore.py,sha256=UmcSSdQN8mzs3b11sD5Z31WIyQVQUpgXKWQ1XYoX1c8,469
|
|
220
|
+
evalscope/metrics/t2v_metrics/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
221
|
+
evalscope/metrics/t2v_metrics/models/model.py,sha256=zL2LMvJqXyyZo3KEBl4o_0cGqkTeVTOfs8xJihOKWpk,1295
|
|
222
|
+
evalscope/metrics/t2v_metrics/models/utils.py,sha256=c9A8YGepQ0wier9rMTWkdiyQRfQEaRyEQKDtt_iVkS4,888
|
|
223
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py,sha256=_Mwyud2HZVZAhkSmDXlHOkKkT5CwXQUChmQr1xRGtm4,1076
|
|
224
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py,sha256=QhksCBA12Ekm67H4TiROkC84dcbHB4zL5oO7BU4fwnI,8099
|
|
225
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py,sha256=h5z0HcnobkGw7vEeIwiVauwYC5GRyKczdevZi60a1aw,3328
|
|
226
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py,sha256=atX4JAxR4xAmBZ0WIVf_K8g3tNvqeuXNIIUX50bzo4Q,3806
|
|
227
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py,sha256=dp6ZyWKU9sJ2MjsyQJvTi_tBoEs6l2-KYmjz8cN_SL0,2394
|
|
228
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
229
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py,sha256=Nxo0b7Xj0qTMlVg4O3vbj05X1eNTdVXrFTsVEq8j75g,79
|
|
230
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py,sha256=7Jl2noVHFZUN5rXd9XDBr2ILChP56JPOM1mbJSq5G8c,5047
|
|
231
|
+
evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py,sha256=I6KFiy1DPCtev7TLOFxjUjkC13Mt2eBBha_XaN31nlI,7940
|
|
232
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py,sha256=iPug2fxMo_VXn_77yTLLyjUqyAvh8qOqYF2saHiuPQA,982
|
|
233
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py,sha256=9IGXCC8d6a6WTIICcC_KgZf0KdDcJ3L_HOSXILJmMIA,3447
|
|
234
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py,sha256=3uduuEmoliezG5BwymoNbRm3FXvKh9gtkN74_LX9yIs,3674
|
|
235
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py,sha256=OtnnYsW0G2vGoUHfyB0F-m5r10A5-N6k9agFRXav-Uw,3199
|
|
236
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py,sha256=IPQcC4-cYeJjHGRysh26E3iNGHz1UJA-oxxEpSIXpX0,6021
|
|
237
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
238
|
+
evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py,sha256=ZCN_PJ3jz-a9I0oGbeuOcGuMOJT5iVb-yh5Dzq49VwE,2700
|
|
239
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py,sha256=sMET64JKY_rqVu8f24UcGfUVb9O5hzTKA6PlMEDe8DE,727
|
|
240
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py,sha256=krWGPe--eLHwK9M2tqWkmu7iKlbAM_qanP46NUkmkhM,9896
|
|
241
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py,sha256=R5VS77aDRX4HVcwJ7xOAnf_uP8jhix4PXbxWim1BOdg,5903
|
|
242
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py,sha256=p22TqpCDUFV39I9anYjl5zehNXOCtPQ15fHnEeDrF3A,4712
|
|
243
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py,sha256=oEsZOQCZl6NS0SaNn9mM4S3NSg6lT5Lm_HH9Ju7i8O0,587
|
|
244
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
245
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py,sha256=yDqpm4jIeJbq-Ej28OJwWbF2eWoxVv8CXxl_OelJ1lA,97
|
|
246
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py,sha256=mMDRPYpSUsnbc3ijicy4IPD6J2z44iAIgKUdhkf5Nkw,14037
|
|
247
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py,sha256=aXBQpNrmk9dbUDK-gNGne0hfgti2cYiYTq8fRMNfNx4,525
|
|
248
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py,sha256=Em8li9Mqoc3Xv3sDz4lAlKU4h9vZpUkzycGGyM6a-sM,2807
|
|
249
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py,sha256=jq0zLZypPsoieM8JR33k3fb3Tzal-Zb1ZT5i6Rl2g_U,1394
|
|
250
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py,sha256=RtW7q0OrIyJa6Lcjr2AGmRwfePuIRVHQw2sso1IUV8A,848
|
|
251
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py,sha256=R38FVQMznUzTAfq6DLp-Y34XYYWkqmy1-aMfNcGKthk,14998
|
|
252
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py,sha256=U0xsstadVQrKS7ggO-Mh4lGt9VKwHJCv-V_RiTeqzHk,3956
|
|
253
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py,sha256=0spv43GzXh_5l9BUZN8tuPZgGKkkU8InWjvFuUuRbLw,801
|
|
254
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py,sha256=LPZP_XK612apDxYkvGsH8B8E2Z2Q2CaT6JY0T1dghEk,5866
|
|
255
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py,sha256=1eMz5jxA06uoK-sZyD7SNnBy87gbwplt8526koTRBLo,3330
|
|
256
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py,sha256=he84q4G6OdycZDQHVkHUxENuIQBBH8WJ-9GBiMpYC-A,9715
|
|
257
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py,sha256=TgWnH1IblIrcTTEe3AXG4E66pX6R1314ZZ4Cx6HdYq4,13678
|
|
258
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py,sha256=ABgzv5fGmXjYuQnV77280hzJWOwLt5YjuaBfdWjXcu8,246
|
|
259
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py,sha256=F2OldVATVzyvaSxO2l-tBexhOkmoVb7n23S89JfFIEc,8313
|
|
260
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py,sha256=QKYKGyXpQbDWJfz6EDZCB5meB5HGj59ygmoPm00Q1dQ,10955
|
|
261
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml,sha256=tGWYH9wsUFC2BqlJ-Uv_v9IbAvvaY89PFqkSnx0v7T8,360
|
|
262
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json,sha256=ZcTVdwa_pISMxp8J3F0Uaee3yyrQIn65lqT3_y4KncI,490
|
|
263
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json,sha256=4Yuqi1OutvXMdCfAVIe14uEIZIhApndd6uqc1vpGwL4,511
|
|
264
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json,sha256=zVHOJiAdTS92rHzg62Q0oTZZsZalondGMqDJJfbolAU,491
|
|
265
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml,sha256=Ls2ZfsKV3gDzg6F2zBHPhFbK-3na7ozNGWFmMq_8hTg,1074
|
|
266
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml,sha256=yXP6HQVyEYc1X2C_SawNIye4eoaQPxl8JazV7CXUPDc,1073
|
|
267
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml,sha256=OpdOvll7UX6nURi84rDvWiFZrLsNNHtoFRWdugVPvdA,1073
|
|
268
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml,sha256=hPHF831LSjBGbo1fg9fqhbeSAGOVW-iiZbWHVQVs8wU,957
|
|
269
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml,sha256=xMLIGVhkgwBsP9IbKFoZNW_lbVwwjz44ArlSRPS1Q98,980
|
|
270
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml,sha256=kyfbbtLOG8cY9CUFm-_g56djMMwfZhOgXSFV2gRMomQ,983
|
|
271
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml,sha256=_e45RN28lhhyFRg7JsDczNMU_nJbwirn3eVB3mgkmrI,1022
|
|
272
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml,sha256=6ycWPKz5alaQCxpuPuqX1e_whroRULgb8gICOWLDBO4,1019
|
|
273
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml,sha256=svzg_Ao0g0-tAnBKT0Jj4PDRvv1ikSxS1Dq5YkzrUTU,860
|
|
274
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml,sha256=ZZE6AWe8iiLTXYiJk60P0J4cRLwehLYzRn1ohZxgstI,955
|
|
275
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml,sha256=T7L97c2yFLZ5N3_4NFqvRxShvr7relE2GNREuukufCU,955
|
|
276
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml,sha256=U0s_BMVb21E2aGnLGBstzdR3WSTP_gk4Hubnnt50lcg,952
|
|
277
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml,sha256=AIgEAeTUqFiNVq-uIjbE_zh7jDPLFwchZsw0fCvWqU0,982
|
|
278
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml,sha256=CQZQICT2ogqwKklzWVUnfWidOY-Deflh_WD-vq08sys,958
|
|
279
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml,sha256=UGuL20xRPZPy3sZeqMgIzovdd1BOTESwTS2gfwsdGFk,955
|
|
280
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml,sha256=eKuyCU7yFIU1VSHNRzEu4Bm7NY6NPppIHcTd0RKXUrI,955
|
|
281
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml,sha256=7AWFlM92SDySB4-InH9aw83yBhQ3HSKqvGofm-xiDM4,887
|
|
282
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml,sha256=xKS5v94CTLIIgQ4NAEuBpVjToRQ7yLme276gN5O_J0w,974
|
|
283
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml,sha256=8keYB132xFDzBsMF5nk0lOqfEIT9qupBtDiQRC3nH9o,1004
|
|
284
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py,sha256=9QnWMeulCQm_c__-b9cBYbjqihLDSblW3-luGnxfBXQ,6391
|
|
285
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py,sha256=OOr1JD9kTlUGXZNG5b3kvkUaNz7QTmhaGoHhIKL69qo,7613
|
|
286
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py,sha256=eCm7opG-Ld--sZEG67creYqoRsHjWBuWwUha2s13AuM,9806
|
|
287
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py,sha256=1vuUf6EGzfiyMOHmacI3NhjCIq1LFhANpLJ_Frgh3Mk,20423
|
|
288
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=PobqtMnfh7HuD4zdlj5tBMufvufIO5fIL0_NvTOK4AQ,52965
|
|
289
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py,sha256=L20LYyiI8AGOTKfTJuYZgrkcvg2UHsnFJE866v5jlyE,18674
|
|
290
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=gKOIPQhyB8SVbx0wW3W-VQNmZhdA4186C1XVHHWYkjc,46810
|
|
291
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
292
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py,sha256=s7EkhtrIJ0LPUuLBArws8N23R1MoIoNaYUjwsbUqRkY,7994
|
|
293
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py,sha256=FnUyxxazEVaP69pAq9cig3j-mcX37BX-unPj0SVKUJI,3805
|
|
294
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py,sha256=A5y_qCsmW9j78w92L9VEjXRaqcsyI5FCu0Z9QJvKF_4,18960
|
|
295
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py,sha256=hcPHQjb-QJRAEFpDackLqjKeu8z_uKz_5VyyeU3AQQI,13879
|
|
296
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py,sha256=DM1W0vZ0ZhoR9vvbl4O_MlShApZHv2MtK37zHsVoCjc,30527
|
|
297
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py,sha256=5Ub0AYnQFcG32ByitEah6Hog6oAKNrK7HTnmlLI4XfE,11325
|
|
298
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py,sha256=uT-ubavkEWPsqVtFy-rPQCw2rf-VOd-ZLqP_oDyNTIs,39581
|
|
299
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py,sha256=grUl2rtGJVDoP6u47jywBzcmqL9kylqCf1IkyKCgNLE,83469
|
|
300
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py,sha256=iuiXv-jZcWVoUle6GDQXLy1SMXt4WPJEOD4qhjPe6U8,3856
|
|
301
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py,sha256=1XxKgsA6QYBkaADLUoVwFDPtRJzCvcSrk-1lwGKzF_c,2286
|
|
302
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py,sha256=LFvs5dx8bnhLOZc4jM5uxXhyhOHoTprKn9B7gCmIOKU,8600
|
|
303
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py,sha256=hTIleqj6--CMndUNCT-HFPxGer8c_l2KbkUvi3U24oM,5502
|
|
304
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py,sha256=Au8iMYscDk7va-EKpwLuFJpNjfV1aChNRStkA0dzlWQ,7679
|
|
305
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py,sha256=uLV8Qp8lRGkMVq5EtvbPa4l8ZpykN6godzblV7oj8bg,7086
|
|
306
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py,sha256=r-CRo6u9qmFcdSYNz9pWeuwj0XKykuWyFm7pQVK2yI4,6939
|
|
307
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py,sha256=Vk1Fm7ED1eAHW6lmSKF7VBZa55EN4h1zBEBdm5uKW24,4303
|
|
308
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py,sha256=-DprR09KYuwNEzEbhPvFRI3MR4_VdPMUGLPN6sL9Ym8,14625
|
|
309
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py,sha256=8THNmMvp7bYNIfExYGRfNJ88cuEvdK9KTO-3-51fUsY,13961
|
|
310
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=TkSEKghtqXW_1bHFsG9dhGVKv2XAmTMcdAH2UDvuSFM,36639
|
|
311
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py,sha256=rGifS4I0o3faXrLnW-eSbksmkx7ppMkRlnVBomsbD3Q,851
|
|
312
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py,sha256=LqMHlUTy2LEzoVwjALtrAw0UYmzIuHnFjQiVmn5nv-I,605
|
|
313
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py,sha256=d4HInkL_Phk0Bgg2cWaOvhsPa6lkqDeovFW86PL0I18,6371
|
|
314
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py,sha256=XzebAHBAjOpkIMZm43dd55PESgmyq_J45Ji6bogYR3s,11204
|
|
315
|
+
evalscope/models/__init__.py,sha256=yB4NuKvSd3Jd4GRQvJeGPxwigd8RJErdop5PzSQhsMY,1565
|
|
316
|
+
evalscope/models/local_model.py,sha256=1yjwt7NHE7pI8xoGv38NTql9KcCd80x1mjlELqkNHBQ,4110
|
|
317
|
+
evalscope/models/model.py,sha256=MxvJAUNkuT7IA3bchnmJDur_YCKj9ShOD2Uq40dBcGc,6308
|
|
318
|
+
evalscope/models/register.py,sha256=pNC69YUvw-lodYpOXmByHm26h4m0Lofgd_om-JhOBq4,1882
|
|
319
|
+
evalscope/models/adapters/__init__.py,sha256=mduiDZ6LgmkefNf4CtObZk6heOB93HxxgqTuYvrqWoo,590
|
|
320
|
+
evalscope/models/adapters/base_adapter.py,sha256=f2FY8DLERudkfb4_anxNVFE_D19xCJj9BObiHWspewI,3268
|
|
321
|
+
evalscope/models/adapters/chat_adapter.py,sha256=HD1jAKlAv5KRjzB0s21E4rTEIhryZhZHMpSctF9xrN8,7306
|
|
322
|
+
evalscope/models/adapters/choice_adapter.py,sha256=4fuz3MFEqK8ln4mMs3goMCdRPBwYmmgN70HTdr_sW_U,8005
|
|
323
|
+
evalscope/models/adapters/custom_adapter.py,sha256=w8cD0b3xgcdhSZelcat67CGJnALOfz5IALzURnLjab8,2275
|
|
324
|
+
evalscope/models/adapters/server_adapter.py,sha256=5kH1yDAjETogR7aOdnCEueYE1bREI40OdXdBiJpMdIM,6734
|
|
325
|
+
evalscope/models/adapters/t2i_adapter.py,sha256=xkMRyZ61yTiJfmULK-p9du4nNox41pkHiV2CTFBO3qM,2659
|
|
326
|
+
evalscope/models/custom/__init__.py,sha256=MZylegALg1HerOYtp-qbzu4Wb6PW3JbrxwONHU-PAVs,131
|
|
219
327
|
evalscope/models/custom/custom_model.py,sha256=rBQLAuPEw_OPUtRSCEmxEfpcA8jPj8bAdsmtKs4ygus,1566
|
|
220
|
-
evalscope/models/custom/dummy_model.py,sha256=
|
|
328
|
+
evalscope/models/custom/dummy_model.py,sha256=WRT_aCBZLXnC4yRCgggkuySkhM71C47O2Txx_YNc3UM,1933
|
|
221
329
|
evalscope/perf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
222
|
-
evalscope/perf/arguments.py,sha256=
|
|
223
|
-
evalscope/perf/benchmark.py,sha256=
|
|
224
|
-
evalscope/perf/http_client.py,sha256
|
|
330
|
+
evalscope/perf/arguments.py,sha256=UZKlkbDE2N408dY8Ji-WB8sl1rcmamywzxLvNXpnY0w,10194
|
|
331
|
+
evalscope/perf/benchmark.py,sha256=nv7gtCkeKnLKQQiKM4G0MYO2ambcuwsbx67OgEQG0nM,7917
|
|
332
|
+
evalscope/perf/http_client.py,sha256=-c3-N7bxKsj3d5DVsKSaYA3XAHJDzZgoqZBbhuDYIGk,7419
|
|
225
333
|
evalscope/perf/main.py,sha256=w-yDbl0osaTAMgC-JNPpqIq2LQ7U4c-Ht7Amj8Nbjc8,1278
|
|
226
334
|
evalscope/perf/plugin/__init__.py,sha256=1sl5s-csrwKb_LVTnpF3HqArz06TRD5LYJ0hpqvokUA,85
|
|
227
335
|
evalscope/perf/plugin/registry.py,sha256=w1IAt6GDdluzSYK5i-yrntvx3_EvIIqJamEL0xZv3zA,1323
|
|
228
336
|
evalscope/perf/plugin/api/__init__.py,sha256=Ckzbq4CkSMVQTedQcDHCYlRd6FTwQAElt2mHB-VXJac,195
|
|
229
337
|
evalscope/perf/plugin/api/base.py,sha256=B_H04qKx7eRTn155rnDrbTYur7PK1mvxfQKYcqYbndU,2118
|
|
230
|
-
evalscope/perf/plugin/api/custom_api.py,sha256=
|
|
338
|
+
evalscope/perf/plugin/api/custom_api.py,sha256=ssE4J8AynA0n5SnXSQyk7K5Co3dwUN6Opph08clZna0,3785
|
|
231
339
|
evalscope/perf/plugin/api/dashscope_api.py,sha256=V5fwn-p_fLH0dWKzhN9TvYSHRgla4INfXC4NDaIjoQ8,3825
|
|
232
|
-
evalscope/perf/plugin/api/openai_api.py,sha256=
|
|
340
|
+
evalscope/perf/plugin/api/openai_api.py,sha256=kTL_2OACuKhzd2W0Pf4DirpMumzk4V3rqKZ2mvBZVCs,7655
|
|
233
341
|
evalscope/perf/plugin/datasets/__init__.py,sha256=Z6Jc0RxJS_z0nBBV1-b0-56Ija60AtQ7I_67gY6ZfdQ,568
|
|
234
342
|
evalscope/perf/plugin/datasets/base.py,sha256=Z-INWueeYjfEZhP4lbTlBMVwIa6BcXZKWx-w7Pop3mA,1786
|
|
235
|
-
evalscope/perf/plugin/datasets/custom.py,sha256=
|
|
236
|
-
evalscope/perf/plugin/datasets/flickr8k.py,sha256=
|
|
237
|
-
evalscope/perf/plugin/datasets/line_by_line.py,sha256=
|
|
238
|
-
evalscope/perf/plugin/datasets/longalpaca.py,sha256=
|
|
239
|
-
evalscope/perf/plugin/datasets/openqa.py,sha256=
|
|
240
|
-
evalscope/perf/plugin/datasets/random_dataset.py,sha256=
|
|
343
|
+
evalscope/perf/plugin/datasets/custom.py,sha256=npreC7H1VsdTGYkqlMESvyOhtXOfZQA7_-ICmxe3FWk,936
|
|
344
|
+
evalscope/perf/plugin/datasets/flickr8k.py,sha256=MbJKEB0XqZE0nDEenwYs0FLH9QL658Vn9uQmUH4hPvk,1605
|
|
345
|
+
evalscope/perf/plugin/datasets/line_by_line.py,sha256=AqZYG6tVL3BIGnzh_2Tev8lDYezJG_1gqJY8bSNQl3Q,957
|
|
346
|
+
evalscope/perf/plugin/datasets/longalpaca.py,sha256=XelLris0-c3StLInQ-Oav4jqGcXPNfJxEDeYvaetEbI,1297
|
|
347
|
+
evalscope/perf/plugin/datasets/openqa.py,sha256=4Pnx5duFJzoiTUfZCbcK7LO8f-skmcpYNUUrtNR_UUc,1463
|
|
348
|
+
evalscope/perf/plugin/datasets/random_dataset.py,sha256=SIlsjAE_Stknfr6o1CBFvANBGCSgSExFbscLwSM_Gmk,2958
|
|
241
349
|
evalscope/perf/plugin/datasets/speed_benchmark.py,sha256=J6q7AF_Re5eHLVejXEw9c1jlk1T1PPmist0yO9UFTPE,2432
|
|
242
350
|
evalscope/perf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
243
351
|
evalscope/perf/utils/analysis_result.py,sha256=ESzaZHGTpr2LoJR3jpOzqMphxSrr79d364ZzD159PmY,1169
|
|
244
|
-
evalscope/perf/utils/benchmark_util.py,sha256=
|
|
245
|
-
evalscope/perf/utils/db_util.py,sha256=
|
|
352
|
+
evalscope/perf/utils/benchmark_util.py,sha256=CftjnxYA7d1aeAL_iuyXcJPwCL5A8zWGZSkNtjrMyW8,6309
|
|
353
|
+
evalscope/perf/utils/db_util.py,sha256=efz6qQtMIYAIpG0sAEjLwuzTHBUiuzAV1n7_DCGrN5o,9461
|
|
246
354
|
evalscope/perf/utils/handler.py,sha256=HyKIxbzC0XCyQanlbb7UEY7yaeqjJTePNea8kMV3Sdc,1192
|
|
247
355
|
evalscope/perf/utils/local_server.py,sha256=clF8i0UFmaxBBB6gX05KvVCyzSv0xzsAidz0_sLLlAk,4627
|
|
356
|
+
evalscope/perf/utils/log_utils.py,sha256=1jmB31W3ol9ukPAPbQ8xG3yoZ9oi3tjEyMK5M3ERmbw,1471
|
|
248
357
|
evalscope/registry/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
249
358
|
evalscope/registry/config/cfg_arena.yaml,sha256=rub6ceaQxxB1mbSjdoFf0IaVgGfbOonV2nYRebv2OKo,3292
|
|
250
359
|
evalscope/registry/config/cfg_arena_zhihu.yaml,sha256=tvvihBwvoTjoezwTSaZwoGOB44ysofpnin4pNyY9TfQ,2755
|
|
@@ -266,10 +375,11 @@ evalscope/registry/tasks/general_qa.yaml,sha256=S3kdlrazWX2VAX2PMhNtBnFZVSnUKBNi
|
|
|
266
375
|
evalscope/registry/tasks/gsm8k.yaml,sha256=M2I7otwOSy0usD8yG8d6QziASQlKdhKLflRHMG0LXiM,729
|
|
267
376
|
evalscope/registry/tasks/mmlu.yaml,sha256=cJcMH1Cvgo9PlYoTmeGx2bcZayysltaa6ehK57dDkvo,726
|
|
268
377
|
evalscope/registry/tasks/mmlu_mini.yaml,sha256=K8ouHh7ve5ZsbkqRtV3Jl-DF01YFPuObfwEdACJA4Pk,778
|
|
269
|
-
evalscope/report/__init__.py,sha256=
|
|
270
|
-
evalscope/report/app.py,sha256=
|
|
378
|
+
evalscope/report/__init__.py,sha256=iLNqx7CnHSHQmOBqWUK_vt2VIjnvGslJTqn--7B4y_s,316
|
|
379
|
+
evalscope/report/app.py,sha256=8pcQi5oYAYa9hXoMoMUNfy9jSvSR9DDiXyLcyPd9AmA,28459
|
|
380
|
+
evalscope/report/app_arguments.py,sha256=1wHTLeFx1G94cKXYOeOVe_wTiOY2D929UctIRGOtRaQ,699
|
|
271
381
|
evalscope/report/combinator.py,sha256=O3QirwtYhDhdaWVT4STJMCGZMwoX8BTeJ3HtS9iwnWQ,2567
|
|
272
|
-
evalscope/report/generator.py,sha256=
|
|
382
|
+
evalscope/report/generator.py,sha256=q9aHWNjQgvutAKtpjfWOpfu5zNFdnXilO9OqBqt_Phg,3612
|
|
273
383
|
evalscope/report/utils.py,sha256=DRlbjbqHEmM8rGlA4pwtlHFhOZtyUzcqiS-mejfIDkU,4584
|
|
274
384
|
evalscope/third_party/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
275
385
|
evalscope/third_party/longbench_write/README.md,sha256=1yLKeSVIcihpoc4KXr8NpK86JbcLssCPx76aOKdPbYI,5431
|
|
@@ -289,7 +399,7 @@ evalscope/third_party/longbench_write/tools/__init__.py,sha256=I_ANdxdcIHpkIzIXc
|
|
|
289
399
|
evalscope/third_party/longbench_write/tools/data_etl.py,sha256=T7a-4PwZg5alZQh-oTi1zjMxjGmVVZYVwSR9-diZlF8,5971
|
|
290
400
|
evalscope/third_party/longbench_write/tools/openai_api.py,sha256=PiIvvDYJkn041SJkLoroXwl1B8TtwpB7licVfqNSeuQ,8168
|
|
291
401
|
evalscope/third_party/thinkbench/__init__.py,sha256=C0aSu71_dc1upUVkKmq2VgDd9plpRcYUdCE6BjUWJcA,110
|
|
292
|
-
evalscope/third_party/thinkbench/eval.py,sha256=
|
|
402
|
+
evalscope/third_party/thinkbench/eval.py,sha256=IyfVTm6arhjBgvGMG5OZwopqQTmWVMJ8zYbbVSLtrvk,19503
|
|
293
403
|
evalscope/third_party/thinkbench/infer.py,sha256=2L4DAJKn3wAhNEKnKudQT60igGOJSKH80FR4nS7DHYk,3952
|
|
294
404
|
evalscope/third_party/thinkbench/resources/critique_template.txt,sha256=d4Egc-qH--4lG8X_EcmgymnuZgiCMbee1M5pt4HrRKA,535
|
|
295
405
|
evalscope/third_party/thinkbench/resources/reformat_template.txt,sha256=zTZyVAzmMBtAwI9lHly9EXsqX471OW-VTg538PDcB30,1775
|
|
@@ -308,34 +418,37 @@ evalscope/third_party/toolbench_static/llm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1
|
|
|
308
418
|
evalscope/third_party/toolbench_static/llm/swift_infer.py,sha256=GITEbyiER10Zi-ZWpSqYCdAsiVtNeGK24hvR3kmYn2s,2689
|
|
309
419
|
evalscope/utils/__init__.py,sha256=jLVoGryuqUh4Km9QWWQBzpqkcVNRK0MbwNaSgckqdiU,139
|
|
310
420
|
evalscope/utils/arena_utils.py,sha256=Gf8VpH4C_oF2Abif_QeL0rAP6tvTzsc0gglpdNkUE48,7155
|
|
311
|
-
evalscope/utils/chat_service.py,sha256=
|
|
421
|
+
evalscope/utils/chat_service.py,sha256=U2jtrkOa2asRp16Zam0zIi_38mCyWQqql_L6JSwii4I,8749
|
|
312
422
|
evalscope/utils/completion_parsers.py,sha256=YWHkLkSfURTcUjNNlCL6PPDICd4F2Ns9figgPN4C97c,2933
|
|
313
423
|
evalscope/utils/filters.py,sha256=x_NX40uWMmUsVrAGHCeeV2e63HZZFugWUgdUhk64ivM,1523
|
|
424
|
+
evalscope/utils/import_utils.py,sha256=Oo8saX_mMw4U1RrA7_pn8FmV6P9laru4fEgecqqwpqk,2585
|
|
314
425
|
evalscope/utils/io_utils.py,sha256=Tjdgen1FsAA4ArqiUzu734L0Px5NuiS0GKRRiGIzxSA,4192
|
|
315
426
|
evalscope/utils/logger.py,sha256=barHSdtbEu21ynGQj_wS-rd7B02wPPR5AgaWCQzvG4w,3638
|
|
316
427
|
evalscope/utils/model_utils.py,sha256=hB9W334ecAb6553FhooT6_jM0g-tjj6AU48IV3K1CKw,1131
|
|
317
|
-
evalscope/utils/utils.py,sha256=
|
|
428
|
+
evalscope/utils/utils.py,sha256=hP_ntROFsZ-zaNVpJtT2prNo8iX-UAKfRtdxbLtPJng,11105
|
|
318
429
|
tests/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
319
430
|
tests/test_run_all.py,sha256=YcMTlWoFpvWY8jevWyIf2G_tz8hgDD1cAwSvmyZt96M,429
|
|
431
|
+
tests/aigc/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
432
|
+
tests/aigc/test_t2i.py,sha256=_M3WxY5ruBM4RD7rYHhgizcIhH-ny5XD9M16Ayl3UPk,2619
|
|
320
433
|
tests/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
321
|
-
tests/cli/test_all.py,sha256=
|
|
322
|
-
tests/cli/test_collection.py,sha256=
|
|
323
|
-
tests/cli/test_run.py,sha256=
|
|
434
|
+
tests/cli/test_all.py,sha256=pwup--iNxckUEsR_aFjIAbEQo3UogSu5aIWf9ryLP2o,4022
|
|
435
|
+
tests/cli/test_collection.py,sha256=y8FjoPziPRf5BdJK8DHjcXn26ETKz1OyqjnCpwjt-F4,4096
|
|
436
|
+
tests/cli/test_run.py,sha256=4B-6sOyotK3omirZWWyg7-CcnUSeZjiaU3aXHr0hH_Y,16804
|
|
324
437
|
tests/perf/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
325
|
-
tests/perf/test_perf.py,sha256=
|
|
438
|
+
tests/perf/test_perf.py,sha256=8K5tGlWwOpYWnJ0GaCpqSw9zPOiM8fEKJaDil2mpTSQ,3831
|
|
326
439
|
tests/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
327
|
-
tests/rag/test_clip_benchmark.py,sha256=
|
|
328
|
-
tests/rag/test_mteb.py,sha256=
|
|
329
|
-
tests/rag/test_ragas.py,sha256=
|
|
440
|
+
tests/rag/test_clip_benchmark.py,sha256=ZCBtgnF8Vuji6WQlb92-_RIvXlUX_Xt-cHZP4AN_DNI,2552
|
|
441
|
+
tests/rag/test_mteb.py,sha256=YJw6X1jwX6SYNB-ryVb-OHJWu3vsE3Y4STATI75rdG0,5619
|
|
442
|
+
tests/rag/test_ragas.py,sha256=E7rfKpKtBqglOL1GcW9adfY8nsOZMuoB8GC55UL1Q3c,4517
|
|
330
443
|
tests/swift/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
331
444
|
tests/swift/test_run_swift_eval.py,sha256=JKG-0BwTxkbg-XeiXxujPqnVIM3f2EFaJ_9a7p_R4dk,5748
|
|
332
445
|
tests/swift/test_run_swift_vlm_eval.py,sha256=C8DftjewnZaerQWfERI70bU3sQLWQ-ejZUQhtYO5e0o,4898
|
|
333
446
|
tests/swift/test_run_swift_vlm_jugde_eval.py,sha256=THZEXUOSqm9rWslwJHmZyh-Ytv5c_QKpgRW5J2s_69E,6017
|
|
334
447
|
tests/vlm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
335
|
-
tests/vlm/test_vlmeval.py,sha256=
|
|
336
|
-
evalscope-0.
|
|
337
|
-
evalscope-0.
|
|
338
|
-
evalscope-0.
|
|
339
|
-
evalscope-0.
|
|
340
|
-
evalscope-0.
|
|
341
|
-
evalscope-0.
|
|
448
|
+
tests/vlm/test_vlmeval.py,sha256=UqRiBPMU3vRtLIG1Qu4ZVhyUQx-zGYQuLCgobwf-7a4,3176
|
|
449
|
+
evalscope-0.15.0.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
|
|
450
|
+
evalscope-0.15.0.dist-info/METADATA,sha256=MLn0s_L7s0oeQPWL1XuhihDAFJnzLdVTvdrep-9Bgag,34053
|
|
451
|
+
evalscope-0.15.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
452
|
+
evalscope-0.15.0.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
|
|
453
|
+
evalscope-0.15.0.dist-info/top_level.txt,sha256=Yv0iprOqZQ4rfUO-AWJp7Ni6m0Twxny1yvZwO-8hUDM,16
|
|
454
|
+
evalscope-0.15.0.dist-info/RECORD,,
|
tests/aigc/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
tests/aigc/test_t2i.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from dotenv import dotenv_values
|
|
2
|
+
|
|
3
|
+
env = dotenv_values('.env')
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import unittest
|
|
7
|
+
|
|
8
|
+
from evalscope.config import TaskConfig
|
|
9
|
+
from evalscope.constants import EvalType, JudgeStrategy, ModelTask, OutputType
|
|
10
|
+
from evalscope.run import run_task
|
|
11
|
+
from evalscope.utils import test_level_list
|
|
12
|
+
from evalscope.utils.logger import get_logger
|
|
13
|
+
|
|
14
|
+
os.environ['LOG_LEVEL'] = 'DEBUG'
|
|
15
|
+
|
|
16
|
+
logger = get_logger()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestRun(unittest.TestCase):
|
|
20
|
+
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
|
|
21
|
+
def test_run_general(self):
|
|
22
|
+
from evalscope.config import TaskConfig
|
|
23
|
+
|
|
24
|
+
task_cfg = TaskConfig(
|
|
25
|
+
datasets=[
|
|
26
|
+
'general_t2i'
|
|
27
|
+
],
|
|
28
|
+
dataset_args={
|
|
29
|
+
'general_t2i': {
|
|
30
|
+
'metric_list': [
|
|
31
|
+
'PickScore',
|
|
32
|
+
'CLIPScore',
|
|
33
|
+
'HPSv2Score',
|
|
34
|
+
'HPSv2.1Score',
|
|
35
|
+
'BLIPv2Score',
|
|
36
|
+
'ImageRewardScore',
|
|
37
|
+
'VQAScore',
|
|
38
|
+
'FGA_BLIP2Score',
|
|
39
|
+
'MPS'
|
|
40
|
+
],
|
|
41
|
+
'dataset_id': 'custom_eval/multimodal/t2i/example.jsonl',
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
run_task(task_cfg=task_cfg)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
|
|
50
|
+
def test_run_benchmark(self):
|
|
51
|
+
|
|
52
|
+
task_cfg = TaskConfig(
|
|
53
|
+
model='stabilityai/stable-diffusion-xl-base-1.0', # model on modelscope
|
|
54
|
+
model_task=ModelTask.IMAGE_GENERATION, # must be IMAGE_GENERATION
|
|
55
|
+
model_args={
|
|
56
|
+
'use_safetensors': True,
|
|
57
|
+
'variant': 'fp16',
|
|
58
|
+
'torch_dtype': 'torch.float16',
|
|
59
|
+
},
|
|
60
|
+
datasets=[
|
|
61
|
+
'tifa160',
|
|
62
|
+
'genai_bench',
|
|
63
|
+
'evalmuse',
|
|
64
|
+
'hpdv2',
|
|
65
|
+
],
|
|
66
|
+
dataset_args={
|
|
67
|
+
'tifa160': {
|
|
68
|
+
'metric_list': [
|
|
69
|
+
'PickScore',
|
|
70
|
+
# 'CLIPScore',
|
|
71
|
+
# 'HPSv2Score',
|
|
72
|
+
# 'BLIPv2Score',
|
|
73
|
+
# 'ImageRewardScore',
|
|
74
|
+
# 'VQAScore',
|
|
75
|
+
# 'FGA_BLIP2Score',
|
|
76
|
+
]
|
|
77
|
+
}
|
|
78
|
+
},
|
|
79
|
+
limit=5,
|
|
80
|
+
generation_config={
|
|
81
|
+
'num_inference_steps': 50,
|
|
82
|
+
'guidance_scale': 7.5
|
|
83
|
+
},
|
|
84
|
+
use_cache='outputs/20250427_134122',
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
run_task(task_cfg=task_cfg)
|