evalscope 0.13.2__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (214) hide show
  1. evalscope/arguments.py +2 -1
  2. evalscope/backend/rag_eval/__init__.py +1 -1
  3. evalscope/backend/rag_eval/backend_manager.py +21 -5
  4. evalscope/backend/rag_eval/cmteb/arguments.py +10 -0
  5. evalscope/backend/rag_eval/ragas/arguments.py +0 -1
  6. evalscope/backend/rag_eval/ragas/tasks/testset_generation.py +7 -2
  7. evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py +0 -5
  8. evalscope/backend/rag_eval/utils/embedding.py +49 -3
  9. evalscope/backend/rag_eval/utils/llm.py +4 -4
  10. evalscope/backend/vlm_eval_kit/backend_manager.py +4 -2
  11. evalscope/benchmarks/__init__.py +2 -2
  12. evalscope/benchmarks/aigc/__init__.py +0 -0
  13. evalscope/benchmarks/aigc/t2i/__init__.py +0 -0
  14. evalscope/benchmarks/aigc/t2i/base.py +56 -0
  15. evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py +77 -0
  16. evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py +58 -0
  17. evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py +58 -0
  18. evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py +57 -0
  19. evalscope/benchmarks/aigc/t2i/tifa_adapter.py +37 -0
  20. evalscope/benchmarks/aime/aime24_adapter.py +1 -1
  21. evalscope/benchmarks/aime/aime25_adapter.py +4 -4
  22. evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +1 -2
  23. evalscope/benchmarks/arc/arc_adapter.py +2 -2
  24. evalscope/benchmarks/arena_hard/arena_hard_adapter.py +1 -3
  25. evalscope/benchmarks/ceval/ceval_adapter.py +2 -2
  26. evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +1 -3
  27. evalscope/benchmarks/cmmlu/cmmlu_adapter.py +1 -1
  28. evalscope/benchmarks/competition_math/competition_math_adapter.py +1 -2
  29. evalscope/benchmarks/data_adapter.py +21 -10
  30. evalscope/benchmarks/data_collection/data_collection_adapter.py +6 -4
  31. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +2 -2
  32. evalscope/benchmarks/general_qa/general_qa_adapter.py +1 -1
  33. evalscope/benchmarks/hellaswag/hellaswag_adapter.py +1 -1
  34. evalscope/benchmarks/live_code_bench/evaluate_utils.py +16 -21
  35. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +5 -4
  36. evalscope/benchmarks/live_code_bench/testing_util.py +369 -550
  37. evalscope/benchmarks/maritime_bench/__init__.py +0 -0
  38. evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py +79 -0
  39. evalscope/benchmarks/math_500/math_500_adapter.py +1 -1
  40. evalscope/benchmarks/mmlu/mmlu_adapter.py +8 -8
  41. evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py +1 -1
  42. evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py +1 -1
  43. evalscope/benchmarks/musr/musr_adapter.py +1 -1
  44. evalscope/benchmarks/simple_qa/simple_qa_adapter.py +1 -2
  45. evalscope/benchmarks/utils.py +7 -16
  46. evalscope/cli/start_app.py +1 -1
  47. evalscope/collections/evaluator.py +20 -6
  48. evalscope/config.py +8 -4
  49. evalscope/constants.py +11 -0
  50. evalscope/evaluator/evaluator.py +2 -2
  51. evalscope/evaluator/reviewer/auto_reviewer.py +1 -1
  52. evalscope/metrics/__init__.py +49 -4
  53. evalscope/metrics/llm_judge.py +1 -1
  54. evalscope/metrics/named_metrics.py +13 -0
  55. evalscope/metrics/t2v_metrics/__init__.py +66 -0
  56. evalscope/metrics/t2v_metrics/clipscore.py +14 -0
  57. evalscope/metrics/t2v_metrics/constants.py +12 -0
  58. evalscope/metrics/t2v_metrics/itmscore.py +14 -0
  59. evalscope/metrics/t2v_metrics/models/__init__.py +0 -0
  60. evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py +30 -0
  61. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py +0 -0
  62. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py +6 -0
  63. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +132 -0
  64. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +286 -0
  65. evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +114 -0
  66. evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +86 -0
  67. evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +85 -0
  68. evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +62 -0
  69. evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py +26 -0
  70. evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +84 -0
  71. evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +97 -0
  72. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +171 -0
  73. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py +0 -0
  74. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +80 -0
  75. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +73 -0
  76. evalscope/metrics/t2v_metrics/models/model.py +45 -0
  77. evalscope/metrics/t2v_metrics/models/utils.py +25 -0
  78. evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py +22 -0
  79. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py +0 -0
  80. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py +1 -0
  81. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +300 -0
  82. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py +12 -0
  83. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +82 -0
  84. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py +50 -0
  85. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +218 -0
  86. evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +150 -0
  87. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py +26 -0
  88. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +465 -0
  89. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +141 -0
  90. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +22 -0
  91. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +188 -0
  92. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +106 -0
  93. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +307 -0
  94. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +416 -0
  95. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +8 -0
  96. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +191 -0
  97. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +318 -0
  98. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml +10 -0
  99. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml +42 -0
  100. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml +42 -0
  101. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml +42 -0
  102. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml +36 -0
  103. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml +43 -0
  104. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml +43 -0
  105. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml +43 -0
  106. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml +43 -0
  107. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml +36 -0
  108. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml +42 -0
  109. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml +42 -0
  110. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml +42 -0
  111. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml +43 -0
  112. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml +42 -0
  113. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml +42 -0
  114. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml +42 -0
  115. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml +37 -0
  116. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml +43 -0
  117. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml +43 -0
  118. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json +21 -0
  119. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json +22 -0
  120. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json +21 -0
  121. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +208 -0
  122. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py +231 -0
  123. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +1093 -0
  124. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py +0 -0
  125. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py +211 -0
  126. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py +109 -0
  127. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +452 -0
  128. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +364 -0
  129. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +755 -0
  130. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +273 -0
  131. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +880 -0
  132. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +1844 -0
  133. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +81 -0
  134. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +56 -0
  135. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py +212 -0
  136. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py +164 -0
  137. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py +202 -0
  138. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +185 -0
  139. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +178 -0
  140. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +112 -0
  141. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py +371 -0
  142. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +344 -0
  143. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +858 -0
  144. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +271 -0
  145. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +503 -0
  146. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +1270 -0
  147. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +473 -0
  148. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +31 -0
  149. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py +27 -0
  150. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py +233 -0
  151. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +392 -0
  152. evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +127 -0
  153. evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +17 -0
  154. evalscope/metrics/t2v_metrics/score.py +78 -0
  155. evalscope/metrics/t2v_metrics/vqascore.py +14 -0
  156. evalscope/models/__init__.py +50 -14
  157. evalscope/models/adapters/__init__.py +17 -0
  158. evalscope/models/{base_adapter.py → adapters/base_adapter.py} +17 -17
  159. evalscope/models/{chat_adapter.py → adapters/chat_adapter.py} +10 -7
  160. evalscope/models/{choice_adapter.py → adapters/choice_adapter.py} +2 -6
  161. evalscope/models/{custom_adapter.py → adapters/custom_adapter.py} +2 -4
  162. evalscope/models/{server_adapter.py → adapters/server_adapter.py} +1 -3
  163. evalscope/models/adapters/t2i_adapter.py +76 -0
  164. evalscope/models/custom/__init__.py +2 -1
  165. evalscope/models/custom/dummy_model.py +11 -13
  166. evalscope/models/local_model.py +82 -33
  167. evalscope/models/model.py +2 -42
  168. evalscope/models/register.py +26 -0
  169. evalscope/perf/arguments.py +24 -5
  170. evalscope/perf/benchmark.py +28 -42
  171. evalscope/perf/http_client.py +2 -3
  172. evalscope/perf/plugin/api/custom_api.py +1 -1
  173. evalscope/perf/plugin/api/openai_api.py +2 -2
  174. evalscope/perf/plugin/datasets/custom.py +4 -1
  175. evalscope/perf/plugin/datasets/flickr8k.py +2 -1
  176. evalscope/perf/plugin/datasets/line_by_line.py +4 -1
  177. evalscope/perf/plugin/datasets/longalpaca.py +4 -1
  178. evalscope/perf/plugin/datasets/openqa.py +4 -1
  179. evalscope/perf/plugin/datasets/random_dataset.py +13 -6
  180. evalscope/perf/utils/benchmark_util.py +14 -8
  181. evalscope/perf/utils/db_util.py +9 -3
  182. evalscope/perf/utils/log_utils.py +41 -0
  183. evalscope/report/__init__.py +1 -0
  184. evalscope/report/app.py +128 -78
  185. evalscope/report/app_arguments.py +11 -0
  186. evalscope/report/generator.py +1 -1
  187. evalscope/run.py +10 -3
  188. evalscope/summarizer.py +2 -1
  189. evalscope/third_party/thinkbench/eval.py +19 -7
  190. evalscope/utils/chat_service.py +2 -2
  191. evalscope/utils/import_utils.py +66 -0
  192. evalscope/utils/utils.py +48 -29
  193. evalscope/version.py +2 -2
  194. {evalscope-0.13.2.dist-info → evalscope-0.15.0.dist-info}/METADATA +37 -15
  195. {evalscope-0.13.2.dist-info → evalscope-0.15.0.dist-info}/RECORD +209 -96
  196. tests/aigc/__init__.py +1 -0
  197. tests/aigc/test_t2i.py +87 -0
  198. tests/cli/test_all.py +4 -4
  199. tests/cli/test_collection.py +2 -1
  200. tests/cli/test_run.py +19 -12
  201. tests/perf/test_perf.py +3 -3
  202. tests/rag/test_clip_benchmark.py +0 -1
  203. tests/rag/test_mteb.py +37 -8
  204. tests/rag/test_ragas.py +29 -26
  205. tests/vlm/test_vlmeval.py +37 -1
  206. evalscope/backend/vlm_eval_kit/custom_dataset.py +0 -46
  207. evalscope/benchmarks/live_code_bench/execute_utils.py +0 -267
  208. evalscope/metrics/code_metric.py +0 -98
  209. evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +0 -58485
  210. evalscope/metrics/resources/gpt2-zhcn3-v4.json +0 -1
  211. {evalscope-0.13.2.dist-info → evalscope-0.15.0.dist-info}/LICENSE +0 -0
  212. {evalscope-0.13.2.dist-info → evalscope-0.15.0.dist-info}/WHEEL +0 -0
  213. {evalscope-0.13.2.dist-info → evalscope-0.15.0.dist-info}/entry_points.txt +0 -0
  214. {evalscope-0.13.2.dist-info → evalscope-0.15.0.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,11 @@
1
1
  evalscope/__init__.py,sha256=XZYDn3ShhM_48je5qQgwymtSdpTt8zYEnNfanYnpBdA,181
2
- evalscope/arguments.py,sha256=OPYmX_ar7rXFm0ETPuE2hs-knDQtwQ0pFwSazjn3S9Q,5241
3
- evalscope/config.py,sha256=CkNBE83S335iyu0VRMkblaJw5nGM8pXv4NhK5ySE3cs,9476
4
- evalscope/constants.py,sha256=Cgzkoz4R3MC3YLtbCM2fmSwF8Z2kuxYdOC8t9FWJj9w,3740
5
- evalscope/run.py,sha256=LUCdnNzNIfHSWvxu3gxAsHEDX7hT5mcVnV4lSY5h0iA,6007
2
+ evalscope/arguments.py,sha256=jywTxu_HWhgf0_OlnaOyRSzUHenr5Zio2vmcCgcfbxg,5453
3
+ evalscope/config.py,sha256=O3kjjVFRGSrlLD5EI4t99Z-m6oFtQVmEudvE62x92wY,9648
4
+ evalscope/constants.py,sha256=PHnsGndB4N5-jvmawPxMK5b9geE2Es5cUe8ZKYSuKgM,4016
5
+ evalscope/run.py,sha256=_DKbxgQGwhweBnQrI7lQhu5eoz4LYPVeNanzD4lHuJA,6476
6
6
  evalscope/run_arena.py,sha256=WXPCT0L-b_KvLBQ9KnrVW6y8icdDcqVhaXjTZMpS8k8,8572
7
- evalscope/summarizer.py,sha256=Wnt8Y61PasOkigo8zMpi1qdFzfETjfnDtCDDQ6VwgDw,5867
8
- evalscope/version.py,sha256=JzXnfz-D9eKhVPZu2TQUPFaTFhRiZ3iK4jcIuxfnQE8,119
7
+ evalscope/summarizer.py,sha256=61kU5ZoSh1dd8HMJPqP3ZvJwcY9szwWFCZdu2lfATJA,5920
8
+ evalscope/version.py,sha256=X2BkdAHDhsMo9BTAegfd5uYheDVI8rh_UG5YqMwwXUE,119
9
9
  evalscope/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  evalscope/backend/base.py,sha256=qYu8Shokrtrx-N6T_BAJk_6OCpovUBYuN0p3wngt-dw,1030
11
11
  evalscope/backend/opencompass/__init__.py,sha256=UP_TW5KBq6V_Nvqkeb7PGvGGX3rVYussT43npwCwDgE,135
@@ -14,8 +14,8 @@ evalscope/backend/opencompass/backend_manager.py,sha256=y5NnAIY1pI7E1ZSeKU3acrD-
14
14
  evalscope/backend/opencompass/tasks/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
15
15
  evalscope/backend/opencompass/tasks/eval_api.py,sha256=ZaGdUbEOtAW5VX3ZXmpHIttg_QrID34EnBTylD3uvos,1152
16
16
  evalscope/backend/opencompass/tasks/eval_datasets.py,sha256=JHSq4EnPJgv4sRJJplLH80EqE3ghtkn2k8HnV6DaDew,5406
17
- evalscope/backend/rag_eval/__init__.py,sha256=jFWj8l8bPAu1sz7wtX5gGIweBFC8c2LzXUPz7tGambE,284
18
- evalscope/backend/rag_eval/backend_manager.py,sha256=Cw322R1j-L8vMERAWEXUTT-0a1K-V6KhQOtrOhgKVMM,2857
17
+ evalscope/backend/rag_eval/__init__.py,sha256=Tbj7HboP5zzJ77-9qVEwwhHKjHL5V8MwLFr6sw1oeoA,291
18
+ evalscope/backend/rag_eval/backend_manager.py,sha256=OEFADT8kdsuVMU0QOfiafzFQopY7bKbWZ_jhdXyYElY,3472
19
19
  evalscope/backend/rag_eval/clip_benchmark/__init__.py,sha256=C8Vetf52nyHiRwY2Pm74Bjn3UpWboQeghCGNh67X1EM,151
20
20
  evalscope/backend/rag_eval/clip_benchmark/arguments.py,sha256=d5UkbC3RXb6iyzy_ILumToAVO1AdwvDeyOiX5KB2u0g,1530
21
21
  evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py,sha256=anuIhRk9OC8y0LNBjvttSXppc99gbz-f0TYQjnyLLyU,8347
@@ -27,7 +27,7 @@ evalscope/backend/rag_eval/clip_benchmark/tasks/zeroshot_retrieval.py,sha256=t0U
27
27
  evalscope/backend/rag_eval/clip_benchmark/utils/webdataset_convert.py,sha256=rZY-TulG-Cb8b6GTBxqTDYQ_4Ois3kbgKhuunZq8Ato,8407
28
28
  evalscope/backend/rag_eval/clip_benchmark/utils/webdatasets.txt,sha256=eiiAaxhS48b5rVLy5O9VvFfV2AfxY86ITu_iqT7ZLkQ,649
29
29
  evalscope/backend/rag_eval/cmteb/__init__.py,sha256=I502GHPFYo8BwlFvoljGKI24PY76eBXJQiquWk8nJNU,280
30
- evalscope/backend/rag_eval/cmteb/arguments.py,sha256=Z3GkGi7zjK85JynG-7CSVPmAxPRcGYuykkgfbxgn7_E,2317
30
+ evalscope/backend/rag_eval/cmteb/arguments.py,sha256=y2iTbs3a7R747NgS00nK2j3zO7gmREh8n7mWMrzF1js,2653
31
31
  evalscope/backend/rag_eval/cmteb/base.py,sha256=UCobQ81dHkiTmIz_0BJ_VANj_uG6mkJbYLKJztvMXfo,2849
32
32
  evalscope/backend/rag_eval/cmteb/task_template.py,sha256=FyFs1reefcsFCrWyi7Ya5dnFYvBhtxph2wIaFtOtFls,2595
33
33
  evalscope/backend/rag_eval/cmteb/tasks/Classification.py,sha256=sqbH0XmSiIm4n5UX5sXMwJHby1r-d35mwW1tKIhb2Hg,10848
@@ -39,36 +39,43 @@ evalscope/backend/rag_eval/cmteb/tasks/Retrieval.py,sha256=ofmmeoieXHmU6O14JKWO9
39
39
  evalscope/backend/rag_eval/cmteb/tasks/STS.py,sha256=uhGLsQTo5lM3-L2Na3WJGqOLQw3c1WxHDA22ePJPxtU,12285
40
40
  evalscope/backend/rag_eval/cmteb/tasks/__init__.py,sha256=PKBNyp45hIa3FYNA1psiwtwfwUcn7s9eNt6r5aUpyyY,1505
41
41
  evalscope/backend/rag_eval/ragas/__init__.py,sha256=D0yJkN9SuNGIAL3niZw4BI08Yh3HznsUUewdIAa_-LM,171
42
- evalscope/backend/rag_eval/ragas/arguments.py,sha256=8SYCV15d25ocdDHRqmGMQzd9zR6gwfOrVSFBe4T-KCo,1806
42
+ evalscope/backend/rag_eval/ragas/arguments.py,sha256=S6M1nsqwMQ8lnZZDtlQTdzyOCfLn9WP0QJ_7wAEsVgc,1695
43
43
  evalscope/backend/rag_eval/ragas/task_template.py,sha256=a_3bWfLx0j2zJkWgEWNStO0XXAeUFdnFpeukpoGfxLg,1669
44
44
  evalscope/backend/rag_eval/ragas/prompts/persona_prompt.py,sha256=fX9sCci787ViGiL3BhGsykx0bnWfOWWEFueaJKyR8g4,793
45
45
  evalscope/backend/rag_eval/ragas/tasks/__init__.py,sha256=hErdWKbvV9aRqOpQTzdFHw1tcYoDbnttmic7GpZzKx8,173
46
46
  evalscope/backend/rag_eval/ragas/tasks/build_distribution.py,sha256=vFfemiqtPx22u5pwwZxEQJKYf3B9efYmwbpWDI5hY30,1491
47
47
  evalscope/backend/rag_eval/ragas/tasks/build_transform.py,sha256=GtAYqdVOy7BxIGyC4rSZ_UfXagKYzE6eEtXbaOI_g-k,5425
48
- evalscope/backend/rag_eval/ragas/tasks/testset_generation.py,sha256=B5ZETlQw5XTEDnO-VR5yXjSbbg1eUtjGts7M5msK2ik,5618
49
- evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py,sha256=aP8U9zjIDl26X_YF82SXLpkxoJ4nUurmdKSEoJ-qsLY,2129
48
+ evalscope/backend/rag_eval/ragas/tasks/testset_generation.py,sha256=YSqpaXMFVe8mkVfq3i_oJg1MSnPm98E7WdOBdyUwMpA,5784
49
+ evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py,sha256=6x-4O2pgsjZCVfJNvwZEKcgLe_QhSknPg-f2jGjZkU4,1890
50
50
  evalscope/backend/rag_eval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  evalscope/backend/rag_eval/utils/clip.py,sha256=GLHhPCac2AH35AvRLvVqePA1gIMAewHTFmCJCDZzvqU,5015
52
- evalscope/backend/rag_eval/utils/embedding.py,sha256=x9HAEfZSSAnT2Tdbf-9a5UmBVagCr__ay5A2nMCPMpg,6258
53
- evalscope/backend/rag_eval/utils/llm.py,sha256=UIfdvkxVViYkIpX-MoM8sAwGEAozzVFyzX-YoFxXC1E,2607
52
+ evalscope/backend/rag_eval/utils/embedding.py,sha256=tFMepPAMO4Kkqeqh-XxXIDYRjGbCMlk7lwuUW7FNvCA,7977
53
+ evalscope/backend/rag_eval/utils/llm.py,sha256=acaD5QHPJUstJGpW1sNJ-3ZPT5J_Z8beOWb61Rtz07U,2607
54
54
  evalscope/backend/rag_eval/utils/tools.py,sha256=FU7tNu-8y8V_o_kArFVTTLM_GzL12KBNeXiwQw5SpJA,1529
55
55
  evalscope/backend/vlm_eval_kit/__init__.py,sha256=R-GuBm8dAwvDF73XHaGpPSjlt7Y4tycyy-FJgzLdjeY,84
56
- evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=ZB0wYzPBPyIM0zjfp5C71GcjAgKxKoWxF7RrhuDQBYM,5931
57
- evalscope/backend/vlm_eval_kit/custom_dataset.py,sha256=D-8Sr0tZblf-3ZR5T0lOggbgFRuy5wq9Nm02GUpNyAs,1620
58
- evalscope/benchmarks/__init__.py,sha256=b_SWdV1ZyOqFiwc_9lIjKrIvK1rwnF2cCIF7XN9CN8E,932
56
+ evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=sUYvQxCtPl6CrcwhQpY8lJjW5skqWc-fvHUSnXd_MvQ,6054
57
+ evalscope/benchmarks/__init__.py,sha256=5AXNhhmbaBFEe3u7y5TtIrviYzFI-hC8oKqxFILs1pE,937
59
58
  evalscope/benchmarks/benchmark.py,sha256=a_7Ctz36McuTyBSTYi56jis9pvOdWhg7JVSPFrbxqR4,2535
60
- evalscope/benchmarks/data_adapter.py,sha256=UvbJJTNBvA0aM-xmsaj9jEEsNksn9pTDDr90FfFX2pg,17606
61
- evalscope/benchmarks/utils.py,sha256=6kxeBz4w8Fw68AYH05a4ncjgkaUV4bU3eaFVLqOdkMI,1321
59
+ evalscope/benchmarks/data_adapter.py,sha256=mWdxtHbordS577NqZUQZmIjlewjGDlStqc-iDvqpAyU,18061
60
+ evalscope/benchmarks/utils.py,sha256=yXQyszzrILNiBuUrbB1BtgotQSaNA8w6X935AL1dNAw,1074
61
+ evalscope/benchmarks/aigc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
+ evalscope/benchmarks/aigc/t2i/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
+ evalscope/benchmarks/aigc/t2i/base.py,sha256=4GFAvceT1Gpt5teDLRCZi62RwvPazuhG3zwft3gN3X4,2102
64
+ evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py,sha256=WV9w3z8TxWNzVzn9A_g0xqeHh76ydnHL5xLwyg63VmU,2992
65
+ evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py,sha256=baDGFRpVcSKpc1CdzNAMBtjeCZDUpyEc5l1KyrPNoEU,1892
66
+ evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py,sha256=But2hcQU3X3v58poF8Qg2agrxTAP6gnjZYJs8Tr0g_4,2047
67
+ evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py,sha256=U0RKN3apyD3YyZfIvqgO8TNuDO-zctlftHsSfBRyQxU,1825
68
+ evalscope/benchmarks/aigc/t2i/tifa_adapter.py,sha256=vOOiOe26H2dk9VN2WbB_Oi3lzavMIaYDBq6sqeSIiAU,1093
62
69
  evalscope/benchmarks/aime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
- evalscope/benchmarks/aime/aime24_adapter.py,sha256=dBm9yukt4-CByEPUlAPAIN6mL3VkZcI-dw2kz4oQBMo,1715
64
- evalscope/benchmarks/aime/aime25_adapter.py,sha256=FB_NufY2V7uYdxVnrY_4y81gyyfYDnvedz1_zHdDWt4,1709
70
+ evalscope/benchmarks/aime/aime24_adapter.py,sha256=GrIxCHpUwgUy8tXGTB7iQOt8k7wG8MJB0CWbwBmIy-8,1703
71
+ evalscope/benchmarks/aime/aime25_adapter.py,sha256=yxo5roCb8ryX9ROUU2FdZ-WBTUPZ14MrBzEL0zPOh-U,1718
65
72
  evalscope/benchmarks/alpaca_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
- evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py,sha256=2a6wHJSLe89Xh18u1LBkMQEZzfOURiek6o0-k2lCQgM,4065
73
+ evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py,sha256=em1YM2PxnJ8Of7Li3eqrw8PtwfeXSinfVIr-CIKVb60,4026
67
74
  evalscope/benchmarks/arc/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
68
75
  evalscope/benchmarks/arc/ai2_arc.py,sha256=WtL4Z_ulcCU2KfptWTjTm75T2I2rVGd9aDBBB76P14w,5697
69
- evalscope/benchmarks/arc/arc_adapter.py,sha256=lkhDz-DYjPQ1vHzo8X4j-0Lq_rBxAnws35_R00pIbNI,6347
76
+ evalscope/benchmarks/arc/arc_adapter.py,sha256=0h-eT4BBmUJQrakKMPUNE1nSRwK6LHB-cflWpWzY978,6364
70
77
  evalscope/benchmarks/arena_hard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
- evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=bdQfLTWB5pFo4hET0uFqu5zMX9PNQNwdoLoGrL5jCBE,6213
78
+ evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=S3FQ_UD3GC8M7FU-PPeuJm5YVrG5qhnVE5T1jRpPuxo,6131
72
79
  evalscope/benchmarks/arena_hard/utils.py,sha256=NstI1VR5fTaT-bfXRj0cLqm0DtH8EY4EQHR-K9HJubI,5089
73
80
  evalscope/benchmarks/bbh/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
74
81
  evalscope/benchmarks/bbh/bbh_adapter.py,sha256=fROpzenrjpEBWtnvM_RL_m0uXPOhXTtYAglJEZbzUdY,8330
@@ -100,23 +107,23 @@ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt
100
107
  evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt,sha256=s_x6u5MLeKpuAHZj3GNQqY1I8vWqQIfJasOp9XcM7Ck,2945
101
108
  evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt,sha256=qfTZafCzNiz9ULBaDlfy_LISL617NyH5Nc0-nO0K0LE,2164
102
109
  evalscope/benchmarks/ceval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
103
- evalscope/benchmarks/ceval/ceval_adapter.py,sha256=E4QobCjSSkMZtPJyaT_XBVxiqEqa1bta1I9aFnaHOqs,11308
110
+ evalscope/benchmarks/ceval/ceval_adapter.py,sha256=1ITBXI0f01Dt1p7sb2RGswIeg9685Bkk2S2xmA1vat8,11295
104
111
  evalscope/benchmarks/ceval/ceval_exam.py,sha256=ngOvb6Fymt7iPWIb2fzrUVpqmUT2VBoqh7X_IH8Bcsc,4824
105
112
  evalscope/benchmarks/chinese_simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
106
- evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=fYvkJn1UcWM3aqhPMTTtBPVzjTL-Rm_g9UwUJx1FvJc,8106
113
+ evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=zY8dfvrTeCHAQ3d7AM02CexZw5CVKH51ZOhtT7Q1Gko,8031
107
114
  evalscope/benchmarks/cmmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
108
115
  evalscope/benchmarks/cmmlu/cmmlu.py,sha256=Y59NIGUFzJEztJbkehZsG4Cz0J_v9Cyju6xazHMYIcA,5022
109
- evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=TTq2jRz46Hqc_D_ZBaiw_OwKub1FZX6w8C7g7COIdGs,10372
116
+ evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=r9zael_Y2Jso0ashevYpF8e5SHOBh8iMcPIJU5WT3pQ,10367
110
117
  evalscope/benchmarks/cmmlu/samples.jsonl,sha256=FXbyPQSDorKBGSD0lnOzioZmFjG07lIL87FRDRaMPSY,1722
111
118
  evalscope/benchmarks/competition_math/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
112
119
  evalscope/benchmarks/competition_math/competition_math.py,sha256=Cehyokift7oDKjc8TdmfblZ6mMc39wQWtqqbUi34QLc,2629
113
- evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=lD7sDro0dSWKgYaM_ZgWbBdetxVURpjo_2q1gvVt1XU,6815
120
+ evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=wgejW-_QswtT8_3JKAQ_H6svH8IotDJDBEH7X4nP4bY,6760
114
121
  evalscope/benchmarks/data_collection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
115
- evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=ecNwAE3p2eKIeC4whSUdZpeJ8NgidbSFZbIYtSW26Xo,2394
122
+ evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=QgLgIrjD3q53T-lu1UWTV6T4h1cKGoCQDh0O4QxFezw,2569
116
123
  evalscope/benchmarks/general_mcq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
117
- evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=ZVGzUuuQ0UTOqQtXE40ZyBeMOSl8saSiFEQ5_siJ-c8,5052
124
+ evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=fqbt61owPP7t2H4B2zbYVZTs0VBGuXNvWGvkukwhRYc,5039
118
125
  evalscope/benchmarks/general_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
119
- evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=KBZDP1T-t7uu8vBLGL_unVdj7rDko3KWBPKqWlw31JQ,4596
126
+ evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=8d5znAcQmFSmvyKV-JuMQzbY5k6xDNQQdrWZ7zgPTK4,4603
120
127
  evalscope/benchmarks/gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
121
128
  evalscope/benchmarks/gpqa/chain_of_thought.txt,sha256=pgoT5a-DMPJaMhoH_M8zfU5s80ibWDTVW6vnonITd8k,5610
122
129
  evalscope/benchmarks/gpqa/gpqa_adapter.py,sha256=UB287DtnbkSQXZsbReFJqmQRwbo672DTCeXXilR_-Vc,4790
@@ -125,7 +132,7 @@ evalscope/benchmarks/gsm8k/gsm8k.py,sha256=ZDN5lfeZyc_pkTDVY0voC_zUExHE1ZoEgEaTv
125
132
  evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=ZZZ-9oja53IwiU33Kjm7NTk4MbFGWyvonhnHrn_3Na8,10557
126
133
  evalscope/benchmarks/hellaswag/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
127
134
  evalscope/benchmarks/hellaswag/hellaswag.py,sha256=5_c9WbaS1LIdvgXzqEcvjAEtKi2V2Yn0YtszPlFqhXI,4610
128
- evalscope/benchmarks/hellaswag/hellaswag_adapter.py,sha256=QYZZuxbjkKxAjxuoWn0M5WgusO55vzeAcyKnWUMow3M,5871
135
+ evalscope/benchmarks/hellaswag/hellaswag_adapter.py,sha256=SRM_-AKlWtKXi4zrlBAH9YceFnrktZDNsjvQOiPizUM,5893
129
136
  evalscope/benchmarks/humaneval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
130
137
  evalscope/benchmarks/humaneval/humaneval.py,sha256=2Exsg6u8FEu0buADY2tETJluSM8tWacvX06nykKKLSE,3395
131
138
  evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=UOjakV31J0g7TYbrRls0ItcopWOJu54ucPfaqSJB7Os,5250
@@ -138,26 +145,27 @@ evalscope/benchmarks/ifeval/utils.py,sha256=TKrM1m2qDCUauahogItDdICf4mDk0OjasSxg
138
145
  evalscope/benchmarks/iquiz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
146
  evalscope/benchmarks/iquiz/iquiz_adapter.py,sha256=16whmFkJt9fLbei9d-kmjnWB_5y5vsiX9tK5kSuxDw8,2449
140
147
  evalscope/benchmarks/live_code_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
141
- evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=rOWaG8PV4AGIRhS_gqwxEhphEVe1Cqg57Eudwm5HTjI,6820
142
- evalscope/benchmarks/live_code_bench/execute_utils.py,sha256=MreaMLI0IicNZawpfqcyoRLt67EZ3CJvmxxRTYwhAbU,7397
148
+ evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=iqmVUMZmyRhzOOXXQ-NN9P1nGvvbzTjOSEp6djbN_rw,6503
143
149
  evalscope/benchmarks/live_code_bench/extract_utils.py,sha256=ZcQ8y741uawPo6I_1_XglR3eqJFDNrqc8fILKZupVRs,2375
144
- evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=8MOECcweLG465JFgUzP20QlKyBAO90oFHhH7Z77FuUY,3521
150
+ evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=AkvlQ-3oS8Tr3xZgx3omMt5w8jia6yH07D5Bq27Q5wc,3490
145
151
  evalscope/benchmarks/live_code_bench/load_utils.py,sha256=5i9wtdPLYR8ckjx5MaYQVC2LFYvjKzR6Fa6UZmeOTRc,2445
146
152
  evalscope/benchmarks/live_code_bench/pass_k_utils.py,sha256=Ktrp_lXdfFzoHtQNQNdGfIl26ySjaPCHm4Zv-dFvRqM,2024
147
153
  evalscope/benchmarks/live_code_bench/prompts.py,sha256=P4KILIAIDT1MKDck0xHYV_6v9820wDZRhxVMazmlL-g,12600
148
- evalscope/benchmarks/live_code_bench/testing_util.py,sha256=s5oa--dOcugcpBmHsbeqnTRTDhdiCNXkIQuRc6EgA8o,28241
154
+ evalscope/benchmarks/live_code_bench/testing_util.py,sha256=abjlwp6HDayf88mMI_daOKm06nEOeNBaMkmGWqk2DJo,17286
155
+ evalscope/benchmarks/maritime_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
156
+ evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py,sha256=RVbsiglxmEW37-tDYgr4Drywh26I94DRGhwv7uP2aYk,2829
149
157
  evalscope/benchmarks/math_500/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
150
- evalscope/benchmarks/math_500/math_500_adapter.py,sha256=SB2eb4Z7DTXdptqirEoctqTdDLEu28s7bLeCAMBmAFo,1923
158
+ evalscope/benchmarks/math_500/math_500_adapter.py,sha256=opT73il3CbM1zZhuqRHZu_4O4WEZCZPvZe06I4U8YGM,1911
151
159
  evalscope/benchmarks/mmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
152
160
  evalscope/benchmarks/mmlu/mmlu.py,sha256=sA8AC0bN7iURrSazqkY31s_reNVbDZSUCB-NCTQsVeI,5042
153
- evalscope/benchmarks/mmlu/mmlu_adapter.py,sha256=e__Evar99V9l65FlzT6T594CN4iMgmuVhjujQAm4po4,11662
161
+ evalscope/benchmarks/mmlu/mmlu_adapter.py,sha256=__BrO2f7_AZ87a00HCRGPm5ZK8B4JTZKzRBRQY3yf3Q,11635
154
162
  evalscope/benchmarks/mmlu/samples.jsonl,sha256=f5Y2vwbEvNtpE7vrl9BHoJzsdceI4vUAo1frexYyX2o,1345
155
163
  evalscope/benchmarks/mmlu_pro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
156
- evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=OANfue-fK543drJrDj6V_zDMtySrQEBHPgTsejr-e7U,4226
164
+ evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=hPqxDqDhqin3TxfimfhIxfEc_8UfzTDGAfX7iDrWy28,4248
157
165
  evalscope/benchmarks/mmlu_redux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
- evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py,sha256=ZZMy9exJ8hknr1D6s73sAhHHzBAKcqo7WAmlUtPqpCI,9556
166
+ evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py,sha256=Kr30i_exxBJRz9PLB5g6F04e2HJ4WuF6LDyAwaRh2MY,9578
159
167
  evalscope/benchmarks/musr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
160
- evalscope/benchmarks/musr/musr_adapter.py,sha256=Po8hcIQiqlFo0AGjcNQe75cpsMNDcfiJaKgZsk33-DY,2442
168
+ evalscope/benchmarks/musr/musr_adapter.py,sha256=85P0sY7H9pthYdCjkE2AOxaiNhcIBW1iZmODkz3FN0M,2464
161
169
  evalscope/benchmarks/process_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
170
  evalscope/benchmarks/process_bench/critique_template.txt,sha256=tycx8n42QEC0uGcwbIvHfZvfTnchlRxGz8Tp1R2_e_Y,489
163
171
  evalscope/benchmarks/process_bench/process_bench_adapter.py,sha256=ydU-r1T0DaYhOxkhZgGL7PhDd4XoeqOBzVO9oiFPd8M,3422
@@ -166,7 +174,7 @@ evalscope/benchmarks/race/race.py,sha256=TtFC3opqEA6q8AQIAFQRGx07FjD9z7iW8wmtxeO
166
174
  evalscope/benchmarks/race/race_adapter.py,sha256=RD0B-i5dzeNKuhqnWbremgf4tk9jmOO4_eLAiITB1F0,6381
167
175
  evalscope/benchmarks/race/samples.jsonl,sha256=bhSktBgU6axYQCClRtQ7nN8D1x815AU8xMAIG1oflG0,1243
168
176
  evalscope/benchmarks/simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
169
- evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=CsRUJ0v1sSUmtO6QWkdzisn9OHN-1JSXB-9ghOuNqgY,8988
177
+ evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=to4kSKc29BmtG4q9R2PeM-sdHiL8toSyoVi1D9WMRKk,8949
170
178
  evalscope/benchmarks/super_gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
171
179
  evalscope/benchmarks/super_gpqa/five_shot_prompt.txt,sha256=vD3RMeQustxY_oWA8IobntjywT8ZUO7Jaub--rElDT4,4718
172
180
  evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py,sha256=BqNLL8BYnK6tRuIdV6ijL4Uym2SejH_h1BV06XNjSE4,9331
@@ -182,69 +190,170 @@ evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py,sha256=ueUU860kg5_xf_MtU
182
190
  evalscope/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
183
191
  evalscope/cli/base.py,sha256=m1DFlF16L0Lyrn0YNuFj8ByGjVJIoI0jKzAoodIXjRk,404
184
192
  evalscope/cli/cli.py,sha256=w_dtXljur9s5lmTn6LbbFL_viTPQB1WAEzhYcId09Og,729
185
- evalscope/cli/start_app.py,sha256=WTbba_Iitz1jkQ5n6KHRH-i3U7qJIM7iCi4a9roWjaA,808
193
+ evalscope/cli/start_app.py,sha256=PoAnmYLw_UdWpA7qrUkSIx8hRoIGRy9yXrbH8bYOSL4,804
186
194
  evalscope/cli/start_eval.py,sha256=MXhVDeaMFd6ny88-gnVtQflH660UaDj240YGYnHccx8,775
187
195
  evalscope/cli/start_perf.py,sha256=5hLi5jWgM9BJPXLd8d9D1zqrcj_5c0KvkfB1DgD4_RU,831
188
196
  evalscope/cli/start_server.py,sha256=DQRIfbsHaOAsVcLGF6iRyJnxmd5Sf_tgytpJNfiWCeE,3662
189
197
  evalscope/collections/__init__.py,sha256=hd68Qf-ryeDsz5Pu-Dh83M5V5RE5mhLsG-vc55n5n0o,228
190
- evalscope/collections/evaluator.py,sha256=YJy8Dj35XCdCwhNDwZecJkeW1_ZgIOsuRLFzfe3SyV8,12724
198
+ evalscope/collections/evaluator.py,sha256=Ll-qLet04aEp1WxoCKAuvZVWEZuy1lS_D-vZIN3zSQQ,13425
191
199
  evalscope/collections/sampler.py,sha256=2NwvhJVdi-mrDeK7RWwEGOoE7DdxtpyASRUZU_D6hWw,4855
192
200
  evalscope/collections/schema.py,sha256=mjJfNmy_athJ1TmnuJRkrKRlefzefuQXZuTtjn8SHKo,4073
193
201
  evalscope/evaluator/__init__.py,sha256=S6MU1O_iiNAaKxNIhO9MEmdW-BSNf_YH2l6NQ9lxVNo,103
194
- evalscope/evaluator/evaluator.py,sha256=szRQrXH5ILpUljb14lcunuOt185H8Um1paviTokraA4,19845
202
+ evalscope/evaluator/evaluator.py,sha256=M1JrsoZZ5OvcZfzgLrNSMtbbz5gvvCd0GwJArJQV0lk,19797
195
203
  evalscope/evaluator/rating_eval.py,sha256=uo0uj9z_TDsxdYlT8WIfNZhFLAfRkW9zn_wlu-F72O0,5575
196
204
  evalscope/evaluator/reviewer/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
197
- evalscope/evaluator/reviewer/auto_reviewer.py,sha256=PDz1Nt_qq0oGZBBwek2-M8VBUNLkOkmYObzR8gO7nEc,16624
198
- evalscope/metrics/__init__.py,sha256=SWvqzUzdryW5URz6u4fPkP9XSyA09nQ8zBeE8BbchSg,349
199
- evalscope/metrics/code_metric.py,sha256=EXE2BZAc9JJT_cpd6eCb0Jo9wwtnzXzHBZxmLzG5Jpw,3300
200
- evalscope/metrics/llm_judge.py,sha256=Di0Q1c6VHLl0nQ_TVOZOOQlMApDIU83HuDPTOV8XrTA,4023
205
+ evalscope/evaluator/reviewer/auto_reviewer.py,sha256=5WRYuXFTDgVmolrOdiTysk-mXrpw6Qg87-iuY-VD1W4,16618
206
+ evalscope/metrics/__init__.py,sha256=y1sdj5FBKYW1q5kLC6QREzoITHwstJRUdji6p0X5aAE,1363
207
+ evalscope/metrics/llm_judge.py,sha256=MjyTC-xiSThk8Rd4IdUbsCXeeikoOORv6wt8H7SW8s4,4008
201
208
  evalscope/metrics/math_parser.py,sha256=uTDudn305G3b8-GboWTrDE6OfrEwAW-areHnoGXZ6Is,17302
202
209
  evalscope/metrics/metrics.py,sha256=_YI7RhxlFu_JOgeE3LF9UKu6mJruvyu4FgqVf78Bjb8,13813
203
- evalscope/metrics/named_metrics.py,sha256=pSHA2_qdi9B5bDHIh08GYhx63odilSwA_T-95K1Usl0,1380
210
+ evalscope/metrics/named_metrics.py,sha256=PrzU_1mGTeRFxVJFT1aXxIOiS7MnNoWyZsb8uCRVDeE,2278
204
211
  evalscope/metrics/rouge_metric.py,sha256=zhIUqenSuxnORR9tamLQBGjFwP91Zei2UiLtcOyseVM,4639
205
212
  evalscope/metrics/bundled_rouge_score/__init__.py,sha256=PwbTdk8168FwDJe_l8XIqDuBgZQooDsP31vj7di05Fs,650
206
213
  evalscope/metrics/bundled_rouge_score/rouge_scorer.py,sha256=Kq6AObenmLVQ5tN3NgN042a6mgRFQmRO21-ohd9mSa8,11972
207
- evalscope/metrics/resources/gpt2-zhcn3-v4.bpe,sha256=J_K-oSt9usFEw87Av6j7ETn3J48EsmFuY5_iVvY6xjc,524464
208
- evalscope/metrics/resources/gpt2-zhcn3-v4.json,sha256=WkM4J_FDPPNQwYi0kj5sM5SVjk2_6bci7tqf8dV9p_U,1289341
209
- evalscope/models/__init__.py,sha256=i9vcOBMEF_UM7C2gpmh2GsQk3njwqevoQ6A4CnP1fHs,1000
210
- evalscope/models/base_adapter.py,sha256=7PbRwfD5PIZCBYVds6ZHI8TBY9C5i2LdPOTu88FJWlY,3414
211
- evalscope/models/chat_adapter.py,sha256=2XZmdhxnvy4yezPLXNVRbgrs0QkUY2VznEBq5mCYjKs,7106
212
- evalscope/models/choice_adapter.py,sha256=fnJdo-FMJ-zvNLbEJGc73odgWXIxtVudL00JIf2vzsA,8239
213
- evalscope/models/custom_adapter.py,sha256=AGztmZ0aT0g2flh4B4NaiZ8LCDg8tT0gVNxmrP5W1mA,2401
214
- evalscope/models/local_model.py,sha256=yydggBCLcBAmUWbBhv7o2CA3RbG0DwDZharPdrkbNcg,2628
215
- evalscope/models/model.py,sha256=diu4TE1ZFWdynTxsl4DejTNsLdwjxoyj2nsKR-Y8EZE,7343
216
- evalscope/models/register.py,sha256=4vX6AfScAzwD7UkncbuejfAiQHznQkK5hvtG6jEUbWo,809
217
- evalscope/models/server_adapter.py,sha256=dS_o9_iC8QY73AehIekYwBQieFECZ97JRfbfleJ-Dtk,6845
218
- evalscope/models/custom/__init__.py,sha256=wb6f_Bi39s5sj-VO7EXRDXB2WhyFb49BUtEMk77ksNQ,102
214
+ evalscope/metrics/t2v_metrics/__init__.py,sha256=GBxgKTPVy_qhW_F3M4Oi6QMWhdAi4PqGX5w3t6Tueho,1783
215
+ evalscope/metrics/t2v_metrics/clipscore.py,sha256=IsrYKIlFb04-FfBq4MbSv4diS6706J15Y3G4qEFIwfU,455
216
+ evalscope/metrics/t2v_metrics/constants.py,sha256=oY5l5fOFl8qylah9eeebZm0pgY1PYmHDa7JlUC8Qls0,451
217
+ evalscope/metrics/t2v_metrics/itmscore.py,sha256=cIaz_urio_Of1FiA2DZW7pWRIvo487zr33-x8C3Wx0o,443
218
+ evalscope/metrics/t2v_metrics/score.py,sha256=6tIKZoQprlQOBoV-2E-3InIi2Jl29a9W2BFPjKnV1nw,3044
219
+ evalscope/metrics/t2v_metrics/vqascore.py,sha256=UmcSSdQN8mzs3b11sD5Z31WIyQVQUpgXKWQ1XYoX1c8,469
220
+ evalscope/metrics/t2v_metrics/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
221
+ evalscope/metrics/t2v_metrics/models/model.py,sha256=zL2LMvJqXyyZo3KEBl4o_0cGqkTeVTOfs8xJihOKWpk,1295
222
+ evalscope/metrics/t2v_metrics/models/utils.py,sha256=c9A8YGepQ0wier9rMTWkdiyQRfQEaRyEQKDtt_iVkS4,888
223
+ evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py,sha256=_Mwyud2HZVZAhkSmDXlHOkKkT5CwXQUChmQr1xRGtm4,1076
224
+ evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py,sha256=QhksCBA12Ekm67H4TiROkC84dcbHB4zL5oO7BU4fwnI,8099
225
+ evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py,sha256=h5z0HcnobkGw7vEeIwiVauwYC5GRyKczdevZi60a1aw,3328
226
+ evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py,sha256=atX4JAxR4xAmBZ0WIVf_K8g3tNvqeuXNIIUX50bzo4Q,3806
227
+ evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py,sha256=dp6ZyWKU9sJ2MjsyQJvTi_tBoEs6l2-KYmjz8cN_SL0,2394
228
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
229
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py,sha256=Nxo0b7Xj0qTMlVg4O3vbj05X1eNTdVXrFTsVEq8j75g,79
230
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py,sha256=7Jl2noVHFZUN5rXd9XDBr2ILChP56JPOM1mbJSq5G8c,5047
231
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py,sha256=I6KFiy1DPCtev7TLOFxjUjkC13Mt2eBBha_XaN31nlI,7940
232
+ evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py,sha256=iPug2fxMo_VXn_77yTLLyjUqyAvh8qOqYF2saHiuPQA,982
233
+ evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py,sha256=9IGXCC8d6a6WTIICcC_KgZf0KdDcJ3L_HOSXILJmMIA,3447
234
+ evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py,sha256=3uduuEmoliezG5BwymoNbRm3FXvKh9gtkN74_LX9yIs,3674
235
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py,sha256=OtnnYsW0G2vGoUHfyB0F-m5r10A5-N6k9agFRXav-Uw,3199
236
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py,sha256=IPQcC4-cYeJjHGRysh26E3iNGHz1UJA-oxxEpSIXpX0,6021
237
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
238
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py,sha256=ZCN_PJ3jz-a9I0oGbeuOcGuMOJT5iVb-yh5Dzq49VwE,2700
239
+ evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py,sha256=sMET64JKY_rqVu8f24UcGfUVb9O5hzTKA6PlMEDe8DE,727
240
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py,sha256=krWGPe--eLHwK9M2tqWkmu7iKlbAM_qanP46NUkmkhM,9896
241
+ evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py,sha256=R5VS77aDRX4HVcwJ7xOAnf_uP8jhix4PXbxWim1BOdg,5903
242
+ evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py,sha256=p22TqpCDUFV39I9anYjl5zehNXOCtPQ15fHnEeDrF3A,4712
243
+ evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py,sha256=oEsZOQCZl6NS0SaNn9mM4S3NSg6lT5Lm_HH9Ju7i8O0,587
244
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
245
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py,sha256=yDqpm4jIeJbq-Ej28OJwWbF2eWoxVv8CXxl_OelJ1lA,97
246
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py,sha256=mMDRPYpSUsnbc3ijicy4IPD6J2z44iAIgKUdhkf5Nkw,14037
247
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py,sha256=aXBQpNrmk9dbUDK-gNGne0hfgti2cYiYTq8fRMNfNx4,525
248
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py,sha256=Em8li9Mqoc3Xv3sDz4lAlKU4h9vZpUkzycGGyM6a-sM,2807
249
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py,sha256=jq0zLZypPsoieM8JR33k3fb3Tzal-Zb1ZT5i6Rl2g_U,1394
250
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py,sha256=RtW7q0OrIyJa6Lcjr2AGmRwfePuIRVHQw2sso1IUV8A,848
251
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py,sha256=R38FVQMznUzTAfq6DLp-Y34XYYWkqmy1-aMfNcGKthk,14998
252
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py,sha256=U0xsstadVQrKS7ggO-Mh4lGt9VKwHJCv-V_RiTeqzHk,3956
253
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py,sha256=0spv43GzXh_5l9BUZN8tuPZgGKkkU8InWjvFuUuRbLw,801
254
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py,sha256=LPZP_XK612apDxYkvGsH8B8E2Z2Q2CaT6JY0T1dghEk,5866
255
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py,sha256=1eMz5jxA06uoK-sZyD7SNnBy87gbwplt8526koTRBLo,3330
256
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py,sha256=he84q4G6OdycZDQHVkHUxENuIQBBH8WJ-9GBiMpYC-A,9715
257
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py,sha256=TgWnH1IblIrcTTEe3AXG4E66pX6R1314ZZ4Cx6HdYq4,13678
258
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py,sha256=ABgzv5fGmXjYuQnV77280hzJWOwLt5YjuaBfdWjXcu8,246
259
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py,sha256=F2OldVATVzyvaSxO2l-tBexhOkmoVb7n23S89JfFIEc,8313
260
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py,sha256=QKYKGyXpQbDWJfz6EDZCB5meB5HGj59ygmoPm00Q1dQ,10955
261
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml,sha256=tGWYH9wsUFC2BqlJ-Uv_v9IbAvvaY89PFqkSnx0v7T8,360
262
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json,sha256=ZcTVdwa_pISMxp8J3F0Uaee3yyrQIn65lqT3_y4KncI,490
263
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json,sha256=4Yuqi1OutvXMdCfAVIe14uEIZIhApndd6uqc1vpGwL4,511
264
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json,sha256=zVHOJiAdTS92rHzg62Q0oTZZsZalondGMqDJJfbolAU,491
265
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml,sha256=Ls2ZfsKV3gDzg6F2zBHPhFbK-3na7ozNGWFmMq_8hTg,1074
266
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml,sha256=yXP6HQVyEYc1X2C_SawNIye4eoaQPxl8JazV7CXUPDc,1073
267
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml,sha256=OpdOvll7UX6nURi84rDvWiFZrLsNNHtoFRWdugVPvdA,1073
268
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml,sha256=hPHF831LSjBGbo1fg9fqhbeSAGOVW-iiZbWHVQVs8wU,957
269
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml,sha256=xMLIGVhkgwBsP9IbKFoZNW_lbVwwjz44ArlSRPS1Q98,980
270
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml,sha256=kyfbbtLOG8cY9CUFm-_g56djMMwfZhOgXSFV2gRMomQ,983
271
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml,sha256=_e45RN28lhhyFRg7JsDczNMU_nJbwirn3eVB3mgkmrI,1022
272
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml,sha256=6ycWPKz5alaQCxpuPuqX1e_whroRULgb8gICOWLDBO4,1019
273
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml,sha256=svzg_Ao0g0-tAnBKT0Jj4PDRvv1ikSxS1Dq5YkzrUTU,860
274
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml,sha256=ZZE6AWe8iiLTXYiJk60P0J4cRLwehLYzRn1ohZxgstI,955
275
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml,sha256=T7L97c2yFLZ5N3_4NFqvRxShvr7relE2GNREuukufCU,955
276
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml,sha256=U0s_BMVb21E2aGnLGBstzdR3WSTP_gk4Hubnnt50lcg,952
277
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml,sha256=AIgEAeTUqFiNVq-uIjbE_zh7jDPLFwchZsw0fCvWqU0,982
278
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml,sha256=CQZQICT2ogqwKklzWVUnfWidOY-Deflh_WD-vq08sys,958
279
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml,sha256=UGuL20xRPZPy3sZeqMgIzovdd1BOTESwTS2gfwsdGFk,955
280
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml,sha256=eKuyCU7yFIU1VSHNRzEu4Bm7NY6NPppIHcTd0RKXUrI,955
281
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml,sha256=7AWFlM92SDySB4-InH9aw83yBhQ3HSKqvGofm-xiDM4,887
282
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml,sha256=xKS5v94CTLIIgQ4NAEuBpVjToRQ7yLme276gN5O_J0w,974
283
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml,sha256=8keYB132xFDzBsMF5nk0lOqfEIT9qupBtDiQRC3nH9o,1004
284
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py,sha256=9QnWMeulCQm_c__-b9cBYbjqihLDSblW3-luGnxfBXQ,6391
285
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py,sha256=OOr1JD9kTlUGXZNG5b3kvkUaNz7QTmhaGoHhIKL69qo,7613
286
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py,sha256=eCm7opG-Ld--sZEG67creYqoRsHjWBuWwUha2s13AuM,9806
287
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py,sha256=1vuUf6EGzfiyMOHmacI3NhjCIq1LFhANpLJ_Frgh3Mk,20423
288
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=PobqtMnfh7HuD4zdlj5tBMufvufIO5fIL0_NvTOK4AQ,52965
289
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py,sha256=L20LYyiI8AGOTKfTJuYZgrkcvg2UHsnFJE866v5jlyE,18674
290
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=gKOIPQhyB8SVbx0wW3W-VQNmZhdA4186C1XVHHWYkjc,46810
291
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
292
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py,sha256=s7EkhtrIJ0LPUuLBArws8N23R1MoIoNaYUjwsbUqRkY,7994
293
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py,sha256=FnUyxxazEVaP69pAq9cig3j-mcX37BX-unPj0SVKUJI,3805
294
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py,sha256=A5y_qCsmW9j78w92L9VEjXRaqcsyI5FCu0Z9QJvKF_4,18960
295
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py,sha256=hcPHQjb-QJRAEFpDackLqjKeu8z_uKz_5VyyeU3AQQI,13879
296
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py,sha256=DM1W0vZ0ZhoR9vvbl4O_MlShApZHv2MtK37zHsVoCjc,30527
297
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py,sha256=5Ub0AYnQFcG32ByitEah6Hog6oAKNrK7HTnmlLI4XfE,11325
298
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py,sha256=uT-ubavkEWPsqVtFy-rPQCw2rf-VOd-ZLqP_oDyNTIs,39581
299
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py,sha256=grUl2rtGJVDoP6u47jywBzcmqL9kylqCf1IkyKCgNLE,83469
300
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py,sha256=iuiXv-jZcWVoUle6GDQXLy1SMXt4WPJEOD4qhjPe6U8,3856
301
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py,sha256=1XxKgsA6QYBkaADLUoVwFDPtRJzCvcSrk-1lwGKzF_c,2286
302
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py,sha256=LFvs5dx8bnhLOZc4jM5uxXhyhOHoTprKn9B7gCmIOKU,8600
303
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py,sha256=hTIleqj6--CMndUNCT-HFPxGer8c_l2KbkUvi3U24oM,5502
304
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py,sha256=Au8iMYscDk7va-EKpwLuFJpNjfV1aChNRStkA0dzlWQ,7679
305
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py,sha256=uLV8Qp8lRGkMVq5EtvbPa4l8ZpykN6godzblV7oj8bg,7086
306
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py,sha256=r-CRo6u9qmFcdSYNz9pWeuwj0XKykuWyFm7pQVK2yI4,6939
307
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py,sha256=Vk1Fm7ED1eAHW6lmSKF7VBZa55EN4h1zBEBdm5uKW24,4303
308
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py,sha256=-DprR09KYuwNEzEbhPvFRI3MR4_VdPMUGLPN6sL9Ym8,14625
309
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py,sha256=8THNmMvp7bYNIfExYGRfNJ88cuEvdK9KTO-3-51fUsY,13961
310
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=TkSEKghtqXW_1bHFsG9dhGVKv2XAmTMcdAH2UDvuSFM,36639
311
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py,sha256=rGifS4I0o3faXrLnW-eSbksmkx7ppMkRlnVBomsbD3Q,851
312
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py,sha256=LqMHlUTy2LEzoVwjALtrAw0UYmzIuHnFjQiVmn5nv-I,605
313
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py,sha256=d4HInkL_Phk0Bgg2cWaOvhsPa6lkqDeovFW86PL0I18,6371
314
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py,sha256=XzebAHBAjOpkIMZm43dd55PESgmyq_J45Ji6bogYR3s,11204
315
+ evalscope/models/__init__.py,sha256=yB4NuKvSd3Jd4GRQvJeGPxwigd8RJErdop5PzSQhsMY,1565
316
+ evalscope/models/local_model.py,sha256=1yjwt7NHE7pI8xoGv38NTql9KcCd80x1mjlELqkNHBQ,4110
317
+ evalscope/models/model.py,sha256=MxvJAUNkuT7IA3bchnmJDur_YCKj9ShOD2Uq40dBcGc,6308
318
+ evalscope/models/register.py,sha256=pNC69YUvw-lodYpOXmByHm26h4m0Lofgd_om-JhOBq4,1882
319
+ evalscope/models/adapters/__init__.py,sha256=mduiDZ6LgmkefNf4CtObZk6heOB93HxxgqTuYvrqWoo,590
320
+ evalscope/models/adapters/base_adapter.py,sha256=f2FY8DLERudkfb4_anxNVFE_D19xCJj9BObiHWspewI,3268
321
+ evalscope/models/adapters/chat_adapter.py,sha256=HD1jAKlAv5KRjzB0s21E4rTEIhryZhZHMpSctF9xrN8,7306
322
+ evalscope/models/adapters/choice_adapter.py,sha256=4fuz3MFEqK8ln4mMs3goMCdRPBwYmmgN70HTdr_sW_U,8005
323
+ evalscope/models/adapters/custom_adapter.py,sha256=w8cD0b3xgcdhSZelcat67CGJnALOfz5IALzURnLjab8,2275
324
+ evalscope/models/adapters/server_adapter.py,sha256=5kH1yDAjETogR7aOdnCEueYE1bREI40OdXdBiJpMdIM,6734
325
+ evalscope/models/adapters/t2i_adapter.py,sha256=xkMRyZ61yTiJfmULK-p9du4nNox41pkHiV2CTFBO3qM,2659
326
+ evalscope/models/custom/__init__.py,sha256=MZylegALg1HerOYtp-qbzu4Wb6PW3JbrxwONHU-PAVs,131
219
327
  evalscope/models/custom/custom_model.py,sha256=rBQLAuPEw_OPUtRSCEmxEfpcA8jPj8bAdsmtKs4ygus,1566
220
- evalscope/models/custom/dummy_model.py,sha256=ODD6pt9FvZq_a54oYsehBDslRKHOsk9zsC9iAZvi5Yg,2020
328
+ evalscope/models/custom/dummy_model.py,sha256=WRT_aCBZLXnC4yRCgggkuySkhM71C47O2Txx_YNc3UM,1933
221
329
  evalscope/perf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
222
- evalscope/perf/arguments.py,sha256=srDp3JMYIPZxkfua5WHkjq3G8lJlTtxdXKxE_CivoJk,9156
223
- evalscope/perf/benchmark.py,sha256=qY7zrsZMDBr1fABsShXjgK12tNE7PhzGZdLaUtdtxU8,8318
224
- evalscope/perf/http_client.py,sha256=xMakdQkJ2cgIOd-yOmHEW0vbGKTJ0JWhLFt9IFtUP8Q,7473
330
+ evalscope/perf/arguments.py,sha256=UZKlkbDE2N408dY8Ji-WB8sl1rcmamywzxLvNXpnY0w,10194
331
+ evalscope/perf/benchmark.py,sha256=nv7gtCkeKnLKQQiKM4G0MYO2ambcuwsbx67OgEQG0nM,7917
332
+ evalscope/perf/http_client.py,sha256=-c3-N7bxKsj3d5DVsKSaYA3XAHJDzZgoqZBbhuDYIGk,7419
225
333
  evalscope/perf/main.py,sha256=w-yDbl0osaTAMgC-JNPpqIq2LQ7U4c-Ht7Amj8Nbjc8,1278
226
334
  evalscope/perf/plugin/__init__.py,sha256=1sl5s-csrwKb_LVTnpF3HqArz06TRD5LYJ0hpqvokUA,85
227
335
  evalscope/perf/plugin/registry.py,sha256=w1IAt6GDdluzSYK5i-yrntvx3_EvIIqJamEL0xZv3zA,1323
228
336
  evalscope/perf/plugin/api/__init__.py,sha256=Ckzbq4CkSMVQTedQcDHCYlRd6FTwQAElt2mHB-VXJac,195
229
337
  evalscope/perf/plugin/api/base.py,sha256=B_H04qKx7eRTn155rnDrbTYur7PK1mvxfQKYcqYbndU,2118
230
- evalscope/perf/plugin/api/custom_api.py,sha256=ay1AGi4y2opjwyRl0J0A54-vLB-pBj3QBFkzog0KA-g,3787
338
+ evalscope/perf/plugin/api/custom_api.py,sha256=ssE4J8AynA0n5SnXSQyk7K5Co3dwUN6Opph08clZna0,3785
231
339
  evalscope/perf/plugin/api/dashscope_api.py,sha256=V5fwn-p_fLH0dWKzhN9TvYSHRgla4INfXC4NDaIjoQ8,3825
232
- evalscope/perf/plugin/api/openai_api.py,sha256=DNDmW7jT0Abopw-K73X0PE7Vr2wTSKMBj79hJZTi-K8,7668
340
+ evalscope/perf/plugin/api/openai_api.py,sha256=kTL_2OACuKhzd2W0Pf4DirpMumzk4V3rqKZ2mvBZVCs,7655
233
341
  evalscope/perf/plugin/datasets/__init__.py,sha256=Z6Jc0RxJS_z0nBBV1-b0-56Ija60AtQ7I_67gY6ZfdQ,568
234
342
  evalscope/perf/plugin/datasets/base.py,sha256=Z-INWueeYjfEZhP4lbTlBMVwIa6BcXZKWx-w7Pop3mA,1786
235
- evalscope/perf/plugin/datasets/custom.py,sha256=_GSC5yR90_BjcRjdJqrJT2vHQAzskz5XxYOxngUM2Pg,815
236
- evalscope/perf/plugin/datasets/flickr8k.py,sha256=UzAIFIO0m5inWOkWM1mO6wfV2HOuXAqiTxCJ4b0SiZM,1589
237
- evalscope/perf/plugin/datasets/line_by_line.py,sha256=IKVZMpKei6XW9DTm9VEssWHE96i1lTqMf0621dA_img,836
238
- evalscope/perf/plugin/datasets/longalpaca.py,sha256=2aENqCly_DX1dyNcurYsLFJIvXYFph6jWm7z7XETvMk,1176
239
- evalscope/perf/plugin/datasets/openqa.py,sha256=_aVXs2s8wbmtoB6ZO-pNjUZvBVxRUYdoJDGv5-BumtI,1342
240
- evalscope/perf/plugin/datasets/random_dataset.py,sha256=wPyY5kk2zKnc8u9uYEl-vQ6BLHeWbdC8EHEAZNFSDeU,2702
343
+ evalscope/perf/plugin/datasets/custom.py,sha256=npreC7H1VsdTGYkqlMESvyOhtXOfZQA7_-ICmxe3FWk,936
344
+ evalscope/perf/plugin/datasets/flickr8k.py,sha256=MbJKEB0XqZE0nDEenwYs0FLH9QL658Vn9uQmUH4hPvk,1605
345
+ evalscope/perf/plugin/datasets/line_by_line.py,sha256=AqZYG6tVL3BIGnzh_2Tev8lDYezJG_1gqJY8bSNQl3Q,957
346
+ evalscope/perf/plugin/datasets/longalpaca.py,sha256=XelLris0-c3StLInQ-Oav4jqGcXPNfJxEDeYvaetEbI,1297
347
+ evalscope/perf/plugin/datasets/openqa.py,sha256=4Pnx5duFJzoiTUfZCbcK7LO8f-skmcpYNUUrtNR_UUc,1463
348
+ evalscope/perf/plugin/datasets/random_dataset.py,sha256=SIlsjAE_Stknfr6o1CBFvANBGCSgSExFbscLwSM_Gmk,2958
241
349
  evalscope/perf/plugin/datasets/speed_benchmark.py,sha256=J6q7AF_Re5eHLVejXEw9c1jlk1T1PPmist0yO9UFTPE,2432
242
350
  evalscope/perf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
243
351
  evalscope/perf/utils/analysis_result.py,sha256=ESzaZHGTpr2LoJR3jpOzqMphxSrr79d364ZzD159PmY,1169
244
- evalscope/perf/utils/benchmark_util.py,sha256=4TyQ_tE5odcjKDFDueI3jrC0vld6QxmTreOd5_SP4vE,5802
245
- evalscope/perf/utils/db_util.py,sha256=OAaR9bK4SPfMuk41w1t4d7ljxPDDEZOzcwDn2s9bpz0,9052
352
+ evalscope/perf/utils/benchmark_util.py,sha256=CftjnxYA7d1aeAL_iuyXcJPwCL5A8zWGZSkNtjrMyW8,6309
353
+ evalscope/perf/utils/db_util.py,sha256=efz6qQtMIYAIpG0sAEjLwuzTHBUiuzAV1n7_DCGrN5o,9461
246
354
  evalscope/perf/utils/handler.py,sha256=HyKIxbzC0XCyQanlbb7UEY7yaeqjJTePNea8kMV3Sdc,1192
247
355
  evalscope/perf/utils/local_server.py,sha256=clF8i0UFmaxBBB6gX05KvVCyzSv0xzsAidz0_sLLlAk,4627
356
+ evalscope/perf/utils/log_utils.py,sha256=1jmB31W3ol9ukPAPbQ8xG3yoZ9oi3tjEyMK5M3ERmbw,1471
248
357
  evalscope/registry/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
249
358
  evalscope/registry/config/cfg_arena.yaml,sha256=rub6ceaQxxB1mbSjdoFf0IaVgGfbOonV2nYRebv2OKo,3292
250
359
  evalscope/registry/config/cfg_arena_zhihu.yaml,sha256=tvvihBwvoTjoezwTSaZwoGOB44ysofpnin4pNyY9TfQ,2755
@@ -266,10 +375,11 @@ evalscope/registry/tasks/general_qa.yaml,sha256=S3kdlrazWX2VAX2PMhNtBnFZVSnUKBNi
266
375
  evalscope/registry/tasks/gsm8k.yaml,sha256=M2I7otwOSy0usD8yG8d6QziASQlKdhKLflRHMG0LXiM,729
267
376
  evalscope/registry/tasks/mmlu.yaml,sha256=cJcMH1Cvgo9PlYoTmeGx2bcZayysltaa6ehK57dDkvo,726
268
377
  evalscope/registry/tasks/mmlu_mini.yaml,sha256=K8ouHh7ve5ZsbkqRtV3Jl-DF01YFPuObfwEdACJA4Pk,778
269
- evalscope/report/__init__.py,sha256=0Wes3ot2hy9s-WwZaBztst8qkNrXkOF-Hwa1WW1e8lY,260
270
- evalscope/report/app.py,sha256=cvof2Nm4ORxC4D3L22Kg3Ngu3kJwBZlfnFJkwDMCmSQ,26881
378
+ evalscope/report/__init__.py,sha256=iLNqx7CnHSHQmOBqWUK_vt2VIjnvGslJTqn--7B4y_s,316
379
+ evalscope/report/app.py,sha256=8pcQi5oYAYa9hXoMoMUNfy9jSvSR9DDiXyLcyPd9AmA,28459
380
+ evalscope/report/app_arguments.py,sha256=1wHTLeFx1G94cKXYOeOVe_wTiOY2D929UctIRGOtRaQ,699
271
381
  evalscope/report/combinator.py,sha256=O3QirwtYhDhdaWVT4STJMCGZMwoX8BTeJ3HtS9iwnWQ,2567
272
- evalscope/report/generator.py,sha256=2DULY9W8QCUxdtyfNjo8XAP_YxI1LgR95jknK__kYPU,3600
382
+ evalscope/report/generator.py,sha256=q9aHWNjQgvutAKtpjfWOpfu5zNFdnXilO9OqBqt_Phg,3612
273
383
  evalscope/report/utils.py,sha256=DRlbjbqHEmM8rGlA4pwtlHFhOZtyUzcqiS-mejfIDkU,4584
274
384
  evalscope/third_party/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
275
385
  evalscope/third_party/longbench_write/README.md,sha256=1yLKeSVIcihpoc4KXr8NpK86JbcLssCPx76aOKdPbYI,5431
@@ -289,7 +399,7 @@ evalscope/third_party/longbench_write/tools/__init__.py,sha256=I_ANdxdcIHpkIzIXc
289
399
  evalscope/third_party/longbench_write/tools/data_etl.py,sha256=T7a-4PwZg5alZQh-oTi1zjMxjGmVVZYVwSR9-diZlF8,5971
290
400
  evalscope/third_party/longbench_write/tools/openai_api.py,sha256=PiIvvDYJkn041SJkLoroXwl1B8TtwpB7licVfqNSeuQ,8168
291
401
  evalscope/third_party/thinkbench/__init__.py,sha256=C0aSu71_dc1upUVkKmq2VgDd9plpRcYUdCE6BjUWJcA,110
292
- evalscope/third_party/thinkbench/eval.py,sha256=76G4LTkxqWCDCyj7Ahjj-qjO1gFem1uDzpRAC27ICl0,18896
402
+ evalscope/third_party/thinkbench/eval.py,sha256=IyfVTm6arhjBgvGMG5OZwopqQTmWVMJ8zYbbVSLtrvk,19503
293
403
  evalscope/third_party/thinkbench/infer.py,sha256=2L4DAJKn3wAhNEKnKudQT60igGOJSKH80FR4nS7DHYk,3952
294
404
  evalscope/third_party/thinkbench/resources/critique_template.txt,sha256=d4Egc-qH--4lG8X_EcmgymnuZgiCMbee1M5pt4HrRKA,535
295
405
  evalscope/third_party/thinkbench/resources/reformat_template.txt,sha256=zTZyVAzmMBtAwI9lHly9EXsqX471OW-VTg538PDcB30,1775
@@ -308,34 +418,37 @@ evalscope/third_party/toolbench_static/llm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1
308
418
  evalscope/third_party/toolbench_static/llm/swift_infer.py,sha256=GITEbyiER10Zi-ZWpSqYCdAsiVtNeGK24hvR3kmYn2s,2689
309
419
  evalscope/utils/__init__.py,sha256=jLVoGryuqUh4Km9QWWQBzpqkcVNRK0MbwNaSgckqdiU,139
310
420
  evalscope/utils/arena_utils.py,sha256=Gf8VpH4C_oF2Abif_QeL0rAP6tvTzsc0gglpdNkUE48,7155
311
- evalscope/utils/chat_service.py,sha256=9LNTT-8KsacOLqnQer8j57e224rwOMbU7txV6re-X-A,8720
421
+ evalscope/utils/chat_service.py,sha256=U2jtrkOa2asRp16Zam0zIi_38mCyWQqql_L6JSwii4I,8749
312
422
  evalscope/utils/completion_parsers.py,sha256=YWHkLkSfURTcUjNNlCL6PPDICd4F2Ns9figgPN4C97c,2933
313
423
  evalscope/utils/filters.py,sha256=x_NX40uWMmUsVrAGHCeeV2e63HZZFugWUgdUhk64ivM,1523
424
+ evalscope/utils/import_utils.py,sha256=Oo8saX_mMw4U1RrA7_pn8FmV6P9laru4fEgecqqwpqk,2585
314
425
  evalscope/utils/io_utils.py,sha256=Tjdgen1FsAA4ArqiUzu734L0Px5NuiS0GKRRiGIzxSA,4192
315
426
  evalscope/utils/logger.py,sha256=barHSdtbEu21ynGQj_wS-rd7B02wPPR5AgaWCQzvG4w,3638
316
427
  evalscope/utils/model_utils.py,sha256=hB9W334ecAb6553FhooT6_jM0g-tjj6AU48IV3K1CKw,1131
317
- evalscope/utils/utils.py,sha256=lGvn94ryIzx-7WLNJeuyehNTmINt0jYIjrjW12woPCs,9730
428
+ evalscope/utils/utils.py,sha256=hP_ntROFsZ-zaNVpJtT2prNo8iX-UAKfRtdxbLtPJng,11105
318
429
  tests/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
319
430
  tests/test_run_all.py,sha256=YcMTlWoFpvWY8jevWyIf2G_tz8hgDD1cAwSvmyZt96M,429
431
+ tests/aigc/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
432
+ tests/aigc/test_t2i.py,sha256=_M3WxY5ruBM4RD7rYHhgizcIhH-ny5XD9M16Ayl3UPk,2619
320
433
  tests/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
321
- tests/cli/test_all.py,sha256=tRC4TWaqxEsB6jMsGR7u9RHWHuKzn7Umt2XKY1V8CLU,4035
322
- tests/cli/test_collection.py,sha256=V-_M7ngwekMGqPuI16jjJZyAK2XLE4Z6QTn-8B5ykgU,4071
323
- tests/cli/test_run.py,sha256=0gD0nPiioieaDOqRZkS5ruIWuiv1B5D456wSSHv9y40,16471
434
+ tests/cli/test_all.py,sha256=pwup--iNxckUEsR_aFjIAbEQo3UogSu5aIWf9ryLP2o,4022
435
+ tests/cli/test_collection.py,sha256=y8FjoPziPRf5BdJK8DHjcXn26ETKz1OyqjnCpwjt-F4,4096
436
+ tests/cli/test_run.py,sha256=4B-6sOyotK3omirZWWyg7-CcnUSeZjiaU3aXHr0hH_Y,16804
324
437
  tests/perf/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
325
- tests/perf/test_perf.py,sha256=mfXTCsD9RaCef3b4CLvm8ErxBUaWzn-EKKhOxD65i3A,3817
438
+ tests/perf/test_perf.py,sha256=8K5tGlWwOpYWnJ0GaCpqSw9zPOiM8fEKJaDil2mpTSQ,3831
326
439
  tests/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
327
- tests/rag/test_clip_benchmark.py,sha256=Ar8Br2CoAFYT2h4zCv_JKMKCGJKbKGYZgNwJ410ZaoU,2597
328
- tests/rag/test_mteb.py,sha256=t64FXE-ZsOCLiRJrw-dIDIhKd1OXiaglXaeERs0lOh4,4643
329
- tests/rag/test_ragas.py,sha256=fzpn4zZPeZ04ZdfLmwXbsSjf7WcjPWrGsA6RDNXgIEQ,4011
440
+ tests/rag/test_clip_benchmark.py,sha256=ZCBtgnF8Vuji6WQlb92-_RIvXlUX_Xt-cHZP4AN_DNI,2552
441
+ tests/rag/test_mteb.py,sha256=YJw6X1jwX6SYNB-ryVb-OHJWu3vsE3Y4STATI75rdG0,5619
442
+ tests/rag/test_ragas.py,sha256=E7rfKpKtBqglOL1GcW9adfY8nsOZMuoB8GC55UL1Q3c,4517
330
443
  tests/swift/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
331
444
  tests/swift/test_run_swift_eval.py,sha256=JKG-0BwTxkbg-XeiXxujPqnVIM3f2EFaJ_9a7p_R4dk,5748
332
445
  tests/swift/test_run_swift_vlm_eval.py,sha256=C8DftjewnZaerQWfERI70bU3sQLWQ-ejZUQhtYO5e0o,4898
333
446
  tests/swift/test_run_swift_vlm_jugde_eval.py,sha256=THZEXUOSqm9rWslwJHmZyh-Ytv5c_QKpgRW5J2s_69E,6017
334
447
  tests/vlm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
335
- tests/vlm/test_vlmeval.py,sha256=nzWXjw49SlxXgDnYS9N5JSFtcUp8xPOW2YNNzupvtt4,1806
336
- evalscope-0.13.2.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
337
- evalscope-0.13.2.dist-info/METADATA,sha256=b7rVRQHN5miovM5qlh4Dozpl8OaxO0rg0ctT-kDZMyY,32399
338
- evalscope-0.13.2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
339
- evalscope-0.13.2.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
340
- evalscope-0.13.2.dist-info/top_level.txt,sha256=Yv0iprOqZQ4rfUO-AWJp7Ni6m0Twxny1yvZwO-8hUDM,16
341
- evalscope-0.13.2.dist-info/RECORD,,
448
+ tests/vlm/test_vlmeval.py,sha256=UqRiBPMU3vRtLIG1Qu4ZVhyUQx-zGYQuLCgobwf-7a4,3176
449
+ evalscope-0.15.0.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
450
+ evalscope-0.15.0.dist-info/METADATA,sha256=MLn0s_L7s0oeQPWL1XuhihDAFJnzLdVTvdrep-9Bgag,34053
451
+ evalscope-0.15.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
452
+ evalscope-0.15.0.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
453
+ evalscope-0.15.0.dist-info/top_level.txt,sha256=Yv0iprOqZQ4rfUO-AWJp7Ni6m0Twxny1yvZwO-8hUDM,16
454
+ evalscope-0.15.0.dist-info/RECORD,,
tests/aigc/__init__.py ADDED
@@ -0,0 +1 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
tests/aigc/test_t2i.py ADDED
@@ -0,0 +1,87 @@
1
+ from dotenv import dotenv_values
2
+
3
+ env = dotenv_values('.env')
4
+
5
+ import os
6
+ import unittest
7
+
8
+ from evalscope.config import TaskConfig
9
+ from evalscope.constants import EvalType, JudgeStrategy, ModelTask, OutputType
10
+ from evalscope.run import run_task
11
+ from evalscope.utils import test_level_list
12
+ from evalscope.utils.logger import get_logger
13
+
14
+ os.environ['LOG_LEVEL'] = 'DEBUG'
15
+
16
+ logger = get_logger()
17
+
18
+
19
+ class TestRun(unittest.TestCase):
20
+ @unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
21
+ def test_run_general(self):
22
+ from evalscope.config import TaskConfig
23
+
24
+ task_cfg = TaskConfig(
25
+ datasets=[
26
+ 'general_t2i'
27
+ ],
28
+ dataset_args={
29
+ 'general_t2i': {
30
+ 'metric_list': [
31
+ 'PickScore',
32
+ 'CLIPScore',
33
+ 'HPSv2Score',
34
+ 'HPSv2.1Score',
35
+ 'BLIPv2Score',
36
+ 'ImageRewardScore',
37
+ 'VQAScore',
38
+ 'FGA_BLIP2Score',
39
+ 'MPS'
40
+ ],
41
+ 'dataset_id': 'custom_eval/multimodal/t2i/example.jsonl',
42
+ }
43
+ }
44
+ )
45
+
46
+ run_task(task_cfg=task_cfg)
47
+
48
+
49
+ @unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
50
+ def test_run_benchmark(self):
51
+
52
+ task_cfg = TaskConfig(
53
+ model='stabilityai/stable-diffusion-xl-base-1.0', # model on modelscope
54
+ model_task=ModelTask.IMAGE_GENERATION, # must be IMAGE_GENERATION
55
+ model_args={
56
+ 'use_safetensors': True,
57
+ 'variant': 'fp16',
58
+ 'torch_dtype': 'torch.float16',
59
+ },
60
+ datasets=[
61
+ 'tifa160',
62
+ 'genai_bench',
63
+ 'evalmuse',
64
+ 'hpdv2',
65
+ ],
66
+ dataset_args={
67
+ 'tifa160': {
68
+ 'metric_list': [
69
+ 'PickScore',
70
+ # 'CLIPScore',
71
+ # 'HPSv2Score',
72
+ # 'BLIPv2Score',
73
+ # 'ImageRewardScore',
74
+ # 'VQAScore',
75
+ # 'FGA_BLIP2Score',
76
+ ]
77
+ }
78
+ },
79
+ limit=5,
80
+ generation_config={
81
+ 'num_inference_steps': 50,
82
+ 'guidance_scale': 7.5
83
+ },
84
+ use_cache='outputs/20250427_134122',
85
+ )
86
+
87
+ run_task(task_cfg=task_cfg)