evalscope 0.14.0__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (181) hide show
  1. evalscope/arguments.py +2 -1
  2. evalscope/benchmarks/__init__.py +2 -2
  3. evalscope/benchmarks/aigc/__init__.py +0 -0
  4. evalscope/benchmarks/aigc/t2i/__init__.py +0 -0
  5. evalscope/benchmarks/aigc/t2i/base.py +56 -0
  6. evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py +77 -0
  7. evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py +58 -0
  8. evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py +58 -0
  9. evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py +57 -0
  10. evalscope/benchmarks/aigc/t2i/tifa_adapter.py +37 -0
  11. evalscope/benchmarks/aime/aime24_adapter.py +1 -1
  12. evalscope/benchmarks/aime/aime25_adapter.py +4 -4
  13. evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +1 -2
  14. evalscope/benchmarks/arc/arc_adapter.py +1 -1
  15. evalscope/benchmarks/arena_hard/arena_hard_adapter.py +1 -3
  16. evalscope/benchmarks/ceval/ceval_adapter.py +2 -2
  17. evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +1 -3
  18. evalscope/benchmarks/cmmlu/cmmlu_adapter.py +1 -1
  19. evalscope/benchmarks/competition_math/competition_math_adapter.py +1 -2
  20. evalscope/benchmarks/data_adapter.py +16 -9
  21. evalscope/benchmarks/data_collection/data_collection_adapter.py +6 -4
  22. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +2 -2
  23. evalscope/benchmarks/general_qa/general_qa_adapter.py +3 -3
  24. evalscope/benchmarks/live_code_bench/evaluate_utils.py +16 -21
  25. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +4 -1
  26. evalscope/benchmarks/live_code_bench/testing_util.py +6 -3
  27. evalscope/benchmarks/math_500/math_500_adapter.py +1 -1
  28. evalscope/benchmarks/mmlu/mmlu_adapter.py +3 -1
  29. evalscope/benchmarks/simple_qa/simple_qa_adapter.py +1 -2
  30. evalscope/benchmarks/utils.py +7 -16
  31. evalscope/cli/start_app.py +1 -1
  32. evalscope/collections/evaluator.py +16 -4
  33. evalscope/config.py +7 -3
  34. evalscope/constants.py +11 -0
  35. evalscope/evaluator/evaluator.py +9 -3
  36. evalscope/evaluator/reviewer/auto_reviewer.py +1 -1
  37. evalscope/metrics/__init__.py +49 -4
  38. evalscope/metrics/llm_judge.py +1 -1
  39. evalscope/metrics/named_metrics.py +13 -0
  40. evalscope/metrics/t2v_metrics/__init__.py +66 -0
  41. evalscope/metrics/t2v_metrics/clipscore.py +14 -0
  42. evalscope/metrics/t2v_metrics/constants.py +12 -0
  43. evalscope/metrics/t2v_metrics/itmscore.py +14 -0
  44. evalscope/metrics/t2v_metrics/models/__init__.py +0 -0
  45. evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py +30 -0
  46. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py +0 -0
  47. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py +6 -0
  48. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +132 -0
  49. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +286 -0
  50. evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +114 -0
  51. evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +86 -0
  52. evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +85 -0
  53. evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +62 -0
  54. evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py +26 -0
  55. evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +84 -0
  56. evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +97 -0
  57. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +171 -0
  58. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py +0 -0
  59. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +80 -0
  60. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +73 -0
  61. evalscope/metrics/t2v_metrics/models/model.py +45 -0
  62. evalscope/metrics/t2v_metrics/models/utils.py +25 -0
  63. evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py +22 -0
  64. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py +0 -0
  65. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py +1 -0
  66. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +300 -0
  67. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py +12 -0
  68. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +82 -0
  69. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py +50 -0
  70. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +218 -0
  71. evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +150 -0
  72. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py +26 -0
  73. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +465 -0
  74. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +141 -0
  75. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +22 -0
  76. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +188 -0
  77. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +106 -0
  78. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +307 -0
  79. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +416 -0
  80. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +8 -0
  81. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +191 -0
  82. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +318 -0
  83. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml +10 -0
  84. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml +42 -0
  85. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml +42 -0
  86. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml +42 -0
  87. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml +36 -0
  88. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml +43 -0
  89. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml +43 -0
  90. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml +43 -0
  91. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml +43 -0
  92. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml +36 -0
  93. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml +42 -0
  94. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml +42 -0
  95. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml +42 -0
  96. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml +43 -0
  97. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml +42 -0
  98. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml +42 -0
  99. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml +42 -0
  100. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml +37 -0
  101. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml +43 -0
  102. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml +43 -0
  103. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json +21 -0
  104. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json +22 -0
  105. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json +21 -0
  106. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +208 -0
  107. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py +231 -0
  108. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +1093 -0
  109. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py +0 -0
  110. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py +211 -0
  111. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py +109 -0
  112. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +452 -0
  113. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +364 -0
  114. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +755 -0
  115. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +273 -0
  116. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +880 -0
  117. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +1844 -0
  118. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +81 -0
  119. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +56 -0
  120. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py +212 -0
  121. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py +164 -0
  122. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py +202 -0
  123. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +185 -0
  124. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +178 -0
  125. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +112 -0
  126. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py +371 -0
  127. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +344 -0
  128. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +858 -0
  129. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +271 -0
  130. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +503 -0
  131. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +1270 -0
  132. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +473 -0
  133. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +31 -0
  134. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py +27 -0
  135. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py +233 -0
  136. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +392 -0
  137. evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +127 -0
  138. evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +17 -0
  139. evalscope/metrics/t2v_metrics/score.py +78 -0
  140. evalscope/metrics/t2v_metrics/vqascore.py +14 -0
  141. evalscope/models/__init__.py +50 -14
  142. evalscope/models/adapters/__init__.py +17 -0
  143. evalscope/models/{base_adapter.py → adapters/base_adapter.py} +17 -17
  144. evalscope/models/{chat_adapter.py → adapters/chat_adapter.py} +10 -7
  145. evalscope/models/{choice_adapter.py → adapters/choice_adapter.py} +2 -6
  146. evalscope/models/{custom_adapter.py → adapters/custom_adapter.py} +2 -4
  147. evalscope/models/{server_adapter.py → adapters/server_adapter.py} +1 -3
  148. evalscope/models/adapters/t2i_adapter.py +76 -0
  149. evalscope/models/custom/__init__.py +2 -1
  150. evalscope/models/custom/dummy_model.py +11 -13
  151. evalscope/models/local_model.py +82 -33
  152. evalscope/models/model.py +2 -42
  153. evalscope/models/register.py +26 -0
  154. evalscope/perf/benchmark.py +4 -3
  155. evalscope/perf/main.py +4 -2
  156. evalscope/perf/plugin/datasets/flickr8k.py +2 -1
  157. evalscope/perf/utils/benchmark_util.py +2 -2
  158. evalscope/perf/utils/db_util.py +16 -8
  159. evalscope/report/__init__.py +1 -0
  160. evalscope/report/app.py +117 -67
  161. evalscope/report/app_arguments.py +11 -0
  162. evalscope/report/generator.py +1 -1
  163. evalscope/run.py +3 -3
  164. evalscope/third_party/thinkbench/eval.py +19 -7
  165. evalscope/utils/chat_service.py +2 -2
  166. evalscope/utils/import_utils.py +66 -0
  167. evalscope/utils/utils.py +12 -4
  168. evalscope/version.py +2 -2
  169. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/METADATA +20 -3
  170. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/RECORD +178 -66
  171. tests/aigc/__init__.py +1 -0
  172. tests/aigc/test_t2i.py +87 -0
  173. tests/cli/test_run.py +20 -7
  174. tests/perf/test_perf.py +6 -3
  175. evalscope/metrics/code_metric.py +0 -98
  176. evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +0 -58485
  177. evalscope/metrics/resources/gpt2-zhcn3-v4.json +0 -1
  178. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/LICENSE +0 -0
  179. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/WHEEL +0 -0
  180. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/entry_points.txt +0 -0
  181. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,11 @@
1
1
  evalscope/__init__.py,sha256=XZYDn3ShhM_48je5qQgwymtSdpTt8zYEnNfanYnpBdA,181
2
- evalscope/arguments.py,sha256=OPYmX_ar7rXFm0ETPuE2hs-knDQtwQ0pFwSazjn3S9Q,5241
3
- evalscope/config.py,sha256=sc8NoqhspbrNYMS201ZWreCKV-tBJrUEt96vKwpqfDY,9483
4
- evalscope/constants.py,sha256=Cgzkoz4R3MC3YLtbCM2fmSwF8Z2kuxYdOC8t9FWJj9w,3740
5
- evalscope/run.py,sha256=XbUhllYPjaJJuR1hPoGZH0jlW8XlvUv9gONrMBc4Ni0,6450
2
+ evalscope/arguments.py,sha256=jywTxu_HWhgf0_OlnaOyRSzUHenr5Zio2vmcCgcfbxg,5453
3
+ evalscope/config.py,sha256=O3kjjVFRGSrlLD5EI4t99Z-m6oFtQVmEudvE62x92wY,9648
4
+ evalscope/constants.py,sha256=PHnsGndB4N5-jvmawPxMK5b9geE2Es5cUe8ZKYSuKgM,4016
5
+ evalscope/run.py,sha256=_DKbxgQGwhweBnQrI7lQhu5eoz4LYPVeNanzD4lHuJA,6476
6
6
  evalscope/run_arena.py,sha256=WXPCT0L-b_KvLBQ9KnrVW6y8icdDcqVhaXjTZMpS8k8,8572
7
7
  evalscope/summarizer.py,sha256=61kU5ZoSh1dd8HMJPqP3ZvJwcY9szwWFCZdu2lfATJA,5920
8
- evalscope/version.py,sha256=4w52xL5au75pTD-PrvG-9l-U1euGk2032efyc-7IkQw,119
8
+ evalscope/version.py,sha256=eFCP5Hfk4dip59uCASefVxaNqxWNtwDQPrqaoRJxO9c,119
9
9
  evalscope/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  evalscope/backend/base.py,sha256=qYu8Shokrtrx-N6T_BAJk_6OCpovUBYuN0p3wngt-dw,1030
11
11
  evalscope/backend/opencompass/__init__.py,sha256=UP_TW5KBq6V_Nvqkeb7PGvGGX3rVYussT43npwCwDgE,135
@@ -54,20 +54,28 @@ evalscope/backend/rag_eval/utils/llm.py,sha256=acaD5QHPJUstJGpW1sNJ-3ZPT5J_Z8beO
54
54
  evalscope/backend/rag_eval/utils/tools.py,sha256=FU7tNu-8y8V_o_kArFVTTLM_GzL12KBNeXiwQw5SpJA,1529
55
55
  evalscope/backend/vlm_eval_kit/__init__.py,sha256=R-GuBm8dAwvDF73XHaGpPSjlt7Y4tycyy-FJgzLdjeY,84
56
56
  evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=sUYvQxCtPl6CrcwhQpY8lJjW5skqWc-fvHUSnXd_MvQ,6054
57
- evalscope/benchmarks/__init__.py,sha256=b_SWdV1ZyOqFiwc_9lIjKrIvK1rwnF2cCIF7XN9CN8E,932
57
+ evalscope/benchmarks/__init__.py,sha256=5AXNhhmbaBFEe3u7y5TtIrviYzFI-hC8oKqxFILs1pE,937
58
58
  evalscope/benchmarks/benchmark.py,sha256=a_7Ctz36McuTyBSTYi56jis9pvOdWhg7JVSPFrbxqR4,2535
59
- evalscope/benchmarks/data_adapter.py,sha256=lwW23GjHHAptv4mc1u3xLlKqiRI1EfbSqaG3QGmxqEQ,17750
60
- evalscope/benchmarks/utils.py,sha256=6kxeBz4w8Fw68AYH05a4ncjgkaUV4bU3eaFVLqOdkMI,1321
59
+ evalscope/benchmarks/data_adapter.py,sha256=mWdxtHbordS577NqZUQZmIjlewjGDlStqc-iDvqpAyU,18061
60
+ evalscope/benchmarks/utils.py,sha256=yXQyszzrILNiBuUrbB1BtgotQSaNA8w6X935AL1dNAw,1074
61
+ evalscope/benchmarks/aigc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
+ evalscope/benchmarks/aigc/t2i/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
+ evalscope/benchmarks/aigc/t2i/base.py,sha256=4GFAvceT1Gpt5teDLRCZi62RwvPazuhG3zwft3gN3X4,2102
64
+ evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py,sha256=WV9w3z8TxWNzVzn9A_g0xqeHh76ydnHL5xLwyg63VmU,2992
65
+ evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py,sha256=baDGFRpVcSKpc1CdzNAMBtjeCZDUpyEc5l1KyrPNoEU,1892
66
+ evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py,sha256=t9h5qlo4KrHOgXIhHo3z6fEAi0HfUqDZvaItQdS7dZ4,2097
67
+ evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py,sha256=U0RKN3apyD3YyZfIvqgO8TNuDO-zctlftHsSfBRyQxU,1825
68
+ evalscope/benchmarks/aigc/t2i/tifa_adapter.py,sha256=vOOiOe26H2dk9VN2WbB_Oi3lzavMIaYDBq6sqeSIiAU,1093
61
69
  evalscope/benchmarks/aime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
- evalscope/benchmarks/aime/aime24_adapter.py,sha256=dBm9yukt4-CByEPUlAPAIN6mL3VkZcI-dw2kz4oQBMo,1715
63
- evalscope/benchmarks/aime/aime25_adapter.py,sha256=FB_NufY2V7uYdxVnrY_4y81gyyfYDnvedz1_zHdDWt4,1709
70
+ evalscope/benchmarks/aime/aime24_adapter.py,sha256=GrIxCHpUwgUy8tXGTB7iQOt8k7wG8MJB0CWbwBmIy-8,1703
71
+ evalscope/benchmarks/aime/aime25_adapter.py,sha256=yxo5roCb8ryX9ROUU2FdZ-WBTUPZ14MrBzEL0zPOh-U,1718
64
72
  evalscope/benchmarks/alpaca_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
- evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py,sha256=2a6wHJSLe89Xh18u1LBkMQEZzfOURiek6o0-k2lCQgM,4065
73
+ evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py,sha256=em1YM2PxnJ8Of7Li3eqrw8PtwfeXSinfVIr-CIKVb60,4026
66
74
  evalscope/benchmarks/arc/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
67
75
  evalscope/benchmarks/arc/ai2_arc.py,sha256=WtL4Z_ulcCU2KfptWTjTm75T2I2rVGd9aDBBB76P14w,5697
68
- evalscope/benchmarks/arc/arc_adapter.py,sha256=U-yPDAjYkPUUOXYjCM1ajdvlUVcdeuVoMK7yWJcX6LI,6369
76
+ evalscope/benchmarks/arc/arc_adapter.py,sha256=0h-eT4BBmUJQrakKMPUNE1nSRwK6LHB-cflWpWzY978,6364
69
77
  evalscope/benchmarks/arena_hard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
- evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=bdQfLTWB5pFo4hET0uFqu5zMX9PNQNwdoLoGrL5jCBE,6213
78
+ evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=S3FQ_UD3GC8M7FU-PPeuJm5YVrG5qhnVE5T1jRpPuxo,6131
71
79
  evalscope/benchmarks/arena_hard/utils.py,sha256=NstI1VR5fTaT-bfXRj0cLqm0DtH8EY4EQHR-K9HJubI,5089
72
80
  evalscope/benchmarks/bbh/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
73
81
  evalscope/benchmarks/bbh/bbh_adapter.py,sha256=fROpzenrjpEBWtnvM_RL_m0uXPOhXTtYAglJEZbzUdY,8330
@@ -99,23 +107,23 @@ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt
99
107
  evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt,sha256=s_x6u5MLeKpuAHZj3GNQqY1I8vWqQIfJasOp9XcM7Ck,2945
100
108
  evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt,sha256=qfTZafCzNiz9ULBaDlfy_LISL617NyH5Nc0-nO0K0LE,2164
101
109
  evalscope/benchmarks/ceval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
102
- evalscope/benchmarks/ceval/ceval_adapter.py,sha256=E4QobCjSSkMZtPJyaT_XBVxiqEqa1bta1I9aFnaHOqs,11308
110
+ evalscope/benchmarks/ceval/ceval_adapter.py,sha256=1ITBXI0f01Dt1p7sb2RGswIeg9685Bkk2S2xmA1vat8,11295
103
111
  evalscope/benchmarks/ceval/ceval_exam.py,sha256=ngOvb6Fymt7iPWIb2fzrUVpqmUT2VBoqh7X_IH8Bcsc,4824
104
112
  evalscope/benchmarks/chinese_simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
- evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=fYvkJn1UcWM3aqhPMTTtBPVzjTL-Rm_g9UwUJx1FvJc,8106
113
+ evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=zY8dfvrTeCHAQ3d7AM02CexZw5CVKH51ZOhtT7Q1Gko,8031
106
114
  evalscope/benchmarks/cmmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
107
115
  evalscope/benchmarks/cmmlu/cmmlu.py,sha256=Y59NIGUFzJEztJbkehZsG4Cz0J_v9Cyju6xazHMYIcA,5022
108
- evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=TTq2jRz46Hqc_D_ZBaiw_OwKub1FZX6w8C7g7COIdGs,10372
116
+ evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=r9zael_Y2Jso0ashevYpF8e5SHOBh8iMcPIJU5WT3pQ,10367
109
117
  evalscope/benchmarks/cmmlu/samples.jsonl,sha256=FXbyPQSDorKBGSD0lnOzioZmFjG07lIL87FRDRaMPSY,1722
110
118
  evalscope/benchmarks/competition_math/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
111
119
  evalscope/benchmarks/competition_math/competition_math.py,sha256=Cehyokift7oDKjc8TdmfblZ6mMc39wQWtqqbUi34QLc,2629
112
- evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=lD7sDro0dSWKgYaM_ZgWbBdetxVURpjo_2q1gvVt1XU,6815
120
+ evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=wgejW-_QswtT8_3JKAQ_H6svH8IotDJDBEH7X4nP4bY,6760
113
121
  evalscope/benchmarks/data_collection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
- evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=ecNwAE3p2eKIeC4whSUdZpeJ8NgidbSFZbIYtSW26Xo,2394
122
+ evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=QgLgIrjD3q53T-lu1UWTV6T4h1cKGoCQDh0O4QxFezw,2569
115
123
  evalscope/benchmarks/general_mcq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
- evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=ZVGzUuuQ0UTOqQtXE40ZyBeMOSl8saSiFEQ5_siJ-c8,5052
124
+ evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=fqbt61owPP7t2H4B2zbYVZTs0VBGuXNvWGvkukwhRYc,5039
117
125
  evalscope/benchmarks/general_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
118
- evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=8d5znAcQmFSmvyKV-JuMQzbY5k6xDNQQdrWZ7zgPTK4,4603
126
+ evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=I2BanmO4WLrKviyLiIeqmS5mdyjqGg1X7hauv4HBjgk,4653
119
127
  evalscope/benchmarks/gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
120
128
  evalscope/benchmarks/gpqa/chain_of_thought.txt,sha256=pgoT5a-DMPJaMhoH_M8zfU5s80ibWDTVW6vnonITd8k,5610
121
129
  evalscope/benchmarks/gpqa/gpqa_adapter.py,sha256=UB287DtnbkSQXZsbReFJqmQRwbo672DTCeXXilR_-Vc,4790
@@ -137,20 +145,20 @@ evalscope/benchmarks/ifeval/utils.py,sha256=TKrM1m2qDCUauahogItDdICf4mDk0OjasSxg
137
145
  evalscope/benchmarks/iquiz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
138
146
  evalscope/benchmarks/iquiz/iquiz_adapter.py,sha256=16whmFkJt9fLbei9d-kmjnWB_5y5vsiX9tK5kSuxDw8,2449
139
147
  evalscope/benchmarks/live_code_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
140
- evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=rOWaG8PV4AGIRhS_gqwxEhphEVe1Cqg57Eudwm5HTjI,6820
148
+ evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=iqmVUMZmyRhzOOXXQ-NN9P1nGvvbzTjOSEp6djbN_rw,6503
141
149
  evalscope/benchmarks/live_code_bench/extract_utils.py,sha256=ZcQ8y741uawPo6I_1_XglR3eqJFDNrqc8fILKZupVRs,2375
142
- evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=vLr43hvtR0WS9GclJ6xL9MIqwC941EiRSqgZ_hGHZnw,3382
150
+ evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=AkvlQ-3oS8Tr3xZgx3omMt5w8jia6yH07D5Bq27Q5wc,3490
143
151
  evalscope/benchmarks/live_code_bench/load_utils.py,sha256=5i9wtdPLYR8ckjx5MaYQVC2LFYvjKzR6Fa6UZmeOTRc,2445
144
152
  evalscope/benchmarks/live_code_bench/pass_k_utils.py,sha256=Ktrp_lXdfFzoHtQNQNdGfIl26ySjaPCHm4Zv-dFvRqM,2024
145
153
  evalscope/benchmarks/live_code_bench/prompts.py,sha256=P4KILIAIDT1MKDck0xHYV_6v9820wDZRhxVMazmlL-g,12600
146
- evalscope/benchmarks/live_code_bench/testing_util.py,sha256=v4N7Y4MasNL6TjC4w-Duw_4Zn0oLdWAw3HU6ZrM76P8,17161
154
+ evalscope/benchmarks/live_code_bench/testing_util.py,sha256=abjlwp6HDayf88mMI_daOKm06nEOeNBaMkmGWqk2DJo,17286
147
155
  evalscope/benchmarks/maritime_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
148
156
  evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py,sha256=RVbsiglxmEW37-tDYgr4Drywh26I94DRGhwv7uP2aYk,2829
149
157
  evalscope/benchmarks/math_500/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
150
- evalscope/benchmarks/math_500/math_500_adapter.py,sha256=SB2eb4Z7DTXdptqirEoctqTdDLEu28s7bLeCAMBmAFo,1923
158
+ evalscope/benchmarks/math_500/math_500_adapter.py,sha256=opT73il3CbM1zZhuqRHZu_4O4WEZCZPvZe06I4U8YGM,1911
151
159
  evalscope/benchmarks/mmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
152
160
  evalscope/benchmarks/mmlu/mmlu.py,sha256=sA8AC0bN7iURrSazqkY31s_reNVbDZSUCB-NCTQsVeI,5042
153
- evalscope/benchmarks/mmlu/mmlu_adapter.py,sha256=bQSRTgXk01pCfKdmTxr3si4FxET3j_yBVVmQlLchTns,11586
161
+ evalscope/benchmarks/mmlu/mmlu_adapter.py,sha256=__BrO2f7_AZ87a00HCRGPm5ZK8B4JTZKzRBRQY3yf3Q,11635
154
162
  evalscope/benchmarks/mmlu/samples.jsonl,sha256=f5Y2vwbEvNtpE7vrl9BHoJzsdceI4vUAo1frexYyX2o,1345
155
163
  evalscope/benchmarks/mmlu_pro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
156
164
  evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=hPqxDqDhqin3TxfimfhIxfEc_8UfzTDGAfX7iDrWy28,4248
@@ -166,7 +174,7 @@ evalscope/benchmarks/race/race.py,sha256=TtFC3opqEA6q8AQIAFQRGx07FjD9z7iW8wmtxeO
166
174
  evalscope/benchmarks/race/race_adapter.py,sha256=RD0B-i5dzeNKuhqnWbremgf4tk9jmOO4_eLAiITB1F0,6381
167
175
  evalscope/benchmarks/race/samples.jsonl,sha256=bhSktBgU6axYQCClRtQ7nN8D1x815AU8xMAIG1oflG0,1243
168
176
  evalscope/benchmarks/simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
169
- evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=CsRUJ0v1sSUmtO6QWkdzisn9OHN-1JSXB-9ghOuNqgY,8988
177
+ evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=to4kSKc29BmtG4q9R2PeM-sdHiL8toSyoVi1D9WMRKk,8949
170
178
  evalscope/benchmarks/super_gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
171
179
  evalscope/benchmarks/super_gpqa/five_shot_prompt.txt,sha256=vD3RMeQustxY_oWA8IobntjywT8ZUO7Jaub--rElDT4,4718
172
180
  evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py,sha256=BqNLL8BYnK6tRuIdV6ijL4Uym2SejH_h1BV06XNjSE4,9331
@@ -182,47 +190,147 @@ evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py,sha256=ueUU860kg5_xf_MtU
182
190
  evalscope/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
183
191
  evalscope/cli/base.py,sha256=m1DFlF16L0Lyrn0YNuFj8ByGjVJIoI0jKzAoodIXjRk,404
184
192
  evalscope/cli/cli.py,sha256=w_dtXljur9s5lmTn6LbbFL_viTPQB1WAEzhYcId09Og,729
185
- evalscope/cli/start_app.py,sha256=WTbba_Iitz1jkQ5n6KHRH-i3U7qJIM7iCi4a9roWjaA,808
193
+ evalscope/cli/start_app.py,sha256=PoAnmYLw_UdWpA7qrUkSIx8hRoIGRy9yXrbH8bYOSL4,804
186
194
  evalscope/cli/start_eval.py,sha256=MXhVDeaMFd6ny88-gnVtQflH660UaDj240YGYnHccx8,775
187
195
  evalscope/cli/start_perf.py,sha256=5hLi5jWgM9BJPXLd8d9D1zqrcj_5c0KvkfB1DgD4_RU,831
188
196
  evalscope/cli/start_server.py,sha256=DQRIfbsHaOAsVcLGF6iRyJnxmd5Sf_tgytpJNfiWCeE,3662
189
197
  evalscope/collections/__init__.py,sha256=hd68Qf-ryeDsz5Pu-Dh83M5V5RE5mhLsG-vc55n5n0o,228
190
- evalscope/collections/evaluator.py,sha256=4IkdbKySOW-MzH9Zjn0uddQviFLe2pOef746fgbjkJo,12784
198
+ evalscope/collections/evaluator.py,sha256=Ll-qLet04aEp1WxoCKAuvZVWEZuy1lS_D-vZIN3zSQQ,13425
191
199
  evalscope/collections/sampler.py,sha256=2NwvhJVdi-mrDeK7RWwEGOoE7DdxtpyASRUZU_D6hWw,4855
192
200
  evalscope/collections/schema.py,sha256=mjJfNmy_athJ1TmnuJRkrKRlefzefuQXZuTtjn8SHKo,4073
193
201
  evalscope/evaluator/__init__.py,sha256=S6MU1O_iiNAaKxNIhO9MEmdW-BSNf_YH2l6NQ9lxVNo,103
194
- evalscope/evaluator/evaluator.py,sha256=szRQrXH5ILpUljb14lcunuOt185H8Um1paviTokraA4,19845
202
+ evalscope/evaluator/evaluator.py,sha256=oOVYRMMQfT3fqu-l33wmJtKlyeWxwoIUADMCoBNARTM,20271
195
203
  evalscope/evaluator/rating_eval.py,sha256=uo0uj9z_TDsxdYlT8WIfNZhFLAfRkW9zn_wlu-F72O0,5575
196
204
  evalscope/evaluator/reviewer/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
197
- evalscope/evaluator/reviewer/auto_reviewer.py,sha256=PDz1Nt_qq0oGZBBwek2-M8VBUNLkOkmYObzR8gO7nEc,16624
198
- evalscope/metrics/__init__.py,sha256=SWvqzUzdryW5URz6u4fPkP9XSyA09nQ8zBeE8BbchSg,349
199
- evalscope/metrics/code_metric.py,sha256=EXE2BZAc9JJT_cpd6eCb0Jo9wwtnzXzHBZxmLzG5Jpw,3300
200
- evalscope/metrics/llm_judge.py,sha256=Di0Q1c6VHLl0nQ_TVOZOOQlMApDIU83HuDPTOV8XrTA,4023
205
+ evalscope/evaluator/reviewer/auto_reviewer.py,sha256=5WRYuXFTDgVmolrOdiTysk-mXrpw6Qg87-iuY-VD1W4,16618
206
+ evalscope/metrics/__init__.py,sha256=y1sdj5FBKYW1q5kLC6QREzoITHwstJRUdji6p0X5aAE,1363
207
+ evalscope/metrics/llm_judge.py,sha256=MjyTC-xiSThk8Rd4IdUbsCXeeikoOORv6wt8H7SW8s4,4008
201
208
  evalscope/metrics/math_parser.py,sha256=uTDudn305G3b8-GboWTrDE6OfrEwAW-areHnoGXZ6Is,17302
202
209
  evalscope/metrics/metrics.py,sha256=_YI7RhxlFu_JOgeE3LF9UKu6mJruvyu4FgqVf78Bjb8,13813
203
- evalscope/metrics/named_metrics.py,sha256=pSHA2_qdi9B5bDHIh08GYhx63odilSwA_T-95K1Usl0,1380
210
+ evalscope/metrics/named_metrics.py,sha256=PrzU_1mGTeRFxVJFT1aXxIOiS7MnNoWyZsb8uCRVDeE,2278
204
211
  evalscope/metrics/rouge_metric.py,sha256=zhIUqenSuxnORR9tamLQBGjFwP91Zei2UiLtcOyseVM,4639
205
212
  evalscope/metrics/bundled_rouge_score/__init__.py,sha256=PwbTdk8168FwDJe_l8XIqDuBgZQooDsP31vj7di05Fs,650
206
213
  evalscope/metrics/bundled_rouge_score/rouge_scorer.py,sha256=Kq6AObenmLVQ5tN3NgN042a6mgRFQmRO21-ohd9mSa8,11972
207
- evalscope/metrics/resources/gpt2-zhcn3-v4.bpe,sha256=J_K-oSt9usFEw87Av6j7ETn3J48EsmFuY5_iVvY6xjc,524464
208
- evalscope/metrics/resources/gpt2-zhcn3-v4.json,sha256=WkM4J_FDPPNQwYi0kj5sM5SVjk2_6bci7tqf8dV9p_U,1289341
209
- evalscope/models/__init__.py,sha256=i9vcOBMEF_UM7C2gpmh2GsQk3njwqevoQ6A4CnP1fHs,1000
210
- evalscope/models/base_adapter.py,sha256=7PbRwfD5PIZCBYVds6ZHI8TBY9C5i2LdPOTu88FJWlY,3414
211
- evalscope/models/chat_adapter.py,sha256=2XZmdhxnvy4yezPLXNVRbgrs0QkUY2VznEBq5mCYjKs,7106
212
- evalscope/models/choice_adapter.py,sha256=fnJdo-FMJ-zvNLbEJGc73odgWXIxtVudL00JIf2vzsA,8239
213
- evalscope/models/custom_adapter.py,sha256=AGztmZ0aT0g2flh4B4NaiZ8LCDg8tT0gVNxmrP5W1mA,2401
214
- evalscope/models/local_model.py,sha256=yydggBCLcBAmUWbBhv7o2CA3RbG0DwDZharPdrkbNcg,2628
215
- evalscope/models/model.py,sha256=diu4TE1ZFWdynTxsl4DejTNsLdwjxoyj2nsKR-Y8EZE,7343
216
- evalscope/models/register.py,sha256=4vX6AfScAzwD7UkncbuejfAiQHznQkK5hvtG6jEUbWo,809
217
- evalscope/models/server_adapter.py,sha256=dS_o9_iC8QY73AehIekYwBQieFECZ97JRfbfleJ-Dtk,6845
218
- evalscope/models/custom/__init__.py,sha256=wb6f_Bi39s5sj-VO7EXRDXB2WhyFb49BUtEMk77ksNQ,102
214
+ evalscope/metrics/t2v_metrics/__init__.py,sha256=GBxgKTPVy_qhW_F3M4Oi6QMWhdAi4PqGX5w3t6Tueho,1783
215
+ evalscope/metrics/t2v_metrics/clipscore.py,sha256=IsrYKIlFb04-FfBq4MbSv4diS6706J15Y3G4qEFIwfU,455
216
+ evalscope/metrics/t2v_metrics/constants.py,sha256=oY5l5fOFl8qylah9eeebZm0pgY1PYmHDa7JlUC8Qls0,451
217
+ evalscope/metrics/t2v_metrics/itmscore.py,sha256=cIaz_urio_Of1FiA2DZW7pWRIvo487zr33-x8C3Wx0o,443
218
+ evalscope/metrics/t2v_metrics/score.py,sha256=6tIKZoQprlQOBoV-2E-3InIi2Jl29a9W2BFPjKnV1nw,3044
219
+ evalscope/metrics/t2v_metrics/vqascore.py,sha256=UmcSSdQN8mzs3b11sD5Z31WIyQVQUpgXKWQ1XYoX1c8,469
220
+ evalscope/metrics/t2v_metrics/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
221
+ evalscope/metrics/t2v_metrics/models/model.py,sha256=zL2LMvJqXyyZo3KEBl4o_0cGqkTeVTOfs8xJihOKWpk,1295
222
+ evalscope/metrics/t2v_metrics/models/utils.py,sha256=c9A8YGepQ0wier9rMTWkdiyQRfQEaRyEQKDtt_iVkS4,888
223
+ evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py,sha256=_Mwyud2HZVZAhkSmDXlHOkKkT5CwXQUChmQr1xRGtm4,1076
224
+ evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py,sha256=QhksCBA12Ekm67H4TiROkC84dcbHB4zL5oO7BU4fwnI,8099
225
+ evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py,sha256=h5z0HcnobkGw7vEeIwiVauwYC5GRyKczdevZi60a1aw,3328
226
+ evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py,sha256=atX4JAxR4xAmBZ0WIVf_K8g3tNvqeuXNIIUX50bzo4Q,3806
227
+ evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py,sha256=dp6ZyWKU9sJ2MjsyQJvTi_tBoEs6l2-KYmjz8cN_SL0,2394
228
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
229
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py,sha256=Nxo0b7Xj0qTMlVg4O3vbj05X1eNTdVXrFTsVEq8j75g,79
230
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py,sha256=7Jl2noVHFZUN5rXd9XDBr2ILChP56JPOM1mbJSq5G8c,5047
231
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py,sha256=I6KFiy1DPCtev7TLOFxjUjkC13Mt2eBBha_XaN31nlI,7940
232
+ evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py,sha256=iPug2fxMo_VXn_77yTLLyjUqyAvh8qOqYF2saHiuPQA,982
233
+ evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py,sha256=9IGXCC8d6a6WTIICcC_KgZf0KdDcJ3L_HOSXILJmMIA,3447
234
+ evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py,sha256=3uduuEmoliezG5BwymoNbRm3FXvKh9gtkN74_LX9yIs,3674
235
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py,sha256=OtnnYsW0G2vGoUHfyB0F-m5r10A5-N6k9agFRXav-Uw,3199
236
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py,sha256=IPQcC4-cYeJjHGRysh26E3iNGHz1UJA-oxxEpSIXpX0,6021
237
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
238
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py,sha256=ZCN_PJ3jz-a9I0oGbeuOcGuMOJT5iVb-yh5Dzq49VwE,2700
239
+ evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py,sha256=sMET64JKY_rqVu8f24UcGfUVb9O5hzTKA6PlMEDe8DE,727
240
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py,sha256=krWGPe--eLHwK9M2tqWkmu7iKlbAM_qanP46NUkmkhM,9896
241
+ evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py,sha256=R5VS77aDRX4HVcwJ7xOAnf_uP8jhix4PXbxWim1BOdg,5903
242
+ evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py,sha256=p22TqpCDUFV39I9anYjl5zehNXOCtPQ15fHnEeDrF3A,4712
243
+ evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py,sha256=oEsZOQCZl6NS0SaNn9mM4S3NSg6lT5Lm_HH9Ju7i8O0,587
244
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
245
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py,sha256=yDqpm4jIeJbq-Ej28OJwWbF2eWoxVv8CXxl_OelJ1lA,97
246
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py,sha256=mMDRPYpSUsnbc3ijicy4IPD6J2z44iAIgKUdhkf5Nkw,14037
247
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py,sha256=aXBQpNrmk9dbUDK-gNGne0hfgti2cYiYTq8fRMNfNx4,525
248
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py,sha256=Em8li9Mqoc3Xv3sDz4lAlKU4h9vZpUkzycGGyM6a-sM,2807
249
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py,sha256=jq0zLZypPsoieM8JR33k3fb3Tzal-Zb1ZT5i6Rl2g_U,1394
250
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py,sha256=RtW7q0OrIyJa6Lcjr2AGmRwfePuIRVHQw2sso1IUV8A,848
251
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py,sha256=R38FVQMznUzTAfq6DLp-Y34XYYWkqmy1-aMfNcGKthk,14998
252
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py,sha256=U0xsstadVQrKS7ggO-Mh4lGt9VKwHJCv-V_RiTeqzHk,3956
253
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py,sha256=0spv43GzXh_5l9BUZN8tuPZgGKkkU8InWjvFuUuRbLw,801
254
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py,sha256=LPZP_XK612apDxYkvGsH8B8E2Z2Q2CaT6JY0T1dghEk,5866
255
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py,sha256=1eMz5jxA06uoK-sZyD7SNnBy87gbwplt8526koTRBLo,3330
256
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py,sha256=he84q4G6OdycZDQHVkHUxENuIQBBH8WJ-9GBiMpYC-A,9715
257
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py,sha256=TgWnH1IblIrcTTEe3AXG4E66pX6R1314ZZ4Cx6HdYq4,13678
258
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py,sha256=ABgzv5fGmXjYuQnV77280hzJWOwLt5YjuaBfdWjXcu8,246
259
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py,sha256=F2OldVATVzyvaSxO2l-tBexhOkmoVb7n23S89JfFIEc,8313
260
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py,sha256=QKYKGyXpQbDWJfz6EDZCB5meB5HGj59ygmoPm00Q1dQ,10955
261
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml,sha256=tGWYH9wsUFC2BqlJ-Uv_v9IbAvvaY89PFqkSnx0v7T8,360
262
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json,sha256=ZcTVdwa_pISMxp8J3F0Uaee3yyrQIn65lqT3_y4KncI,490
263
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json,sha256=4Yuqi1OutvXMdCfAVIe14uEIZIhApndd6uqc1vpGwL4,511
264
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json,sha256=zVHOJiAdTS92rHzg62Q0oTZZsZalondGMqDJJfbolAU,491
265
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml,sha256=Ls2ZfsKV3gDzg6F2zBHPhFbK-3na7ozNGWFmMq_8hTg,1074
266
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml,sha256=yXP6HQVyEYc1X2C_SawNIye4eoaQPxl8JazV7CXUPDc,1073
267
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml,sha256=OpdOvll7UX6nURi84rDvWiFZrLsNNHtoFRWdugVPvdA,1073
268
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml,sha256=hPHF831LSjBGbo1fg9fqhbeSAGOVW-iiZbWHVQVs8wU,957
269
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml,sha256=xMLIGVhkgwBsP9IbKFoZNW_lbVwwjz44ArlSRPS1Q98,980
270
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml,sha256=kyfbbtLOG8cY9CUFm-_g56djMMwfZhOgXSFV2gRMomQ,983
271
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml,sha256=_e45RN28lhhyFRg7JsDczNMU_nJbwirn3eVB3mgkmrI,1022
272
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml,sha256=6ycWPKz5alaQCxpuPuqX1e_whroRULgb8gICOWLDBO4,1019
273
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml,sha256=svzg_Ao0g0-tAnBKT0Jj4PDRvv1ikSxS1Dq5YkzrUTU,860
274
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml,sha256=ZZE6AWe8iiLTXYiJk60P0J4cRLwehLYzRn1ohZxgstI,955
275
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml,sha256=T7L97c2yFLZ5N3_4NFqvRxShvr7relE2GNREuukufCU,955
276
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml,sha256=U0s_BMVb21E2aGnLGBstzdR3WSTP_gk4Hubnnt50lcg,952
277
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml,sha256=AIgEAeTUqFiNVq-uIjbE_zh7jDPLFwchZsw0fCvWqU0,982
278
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml,sha256=CQZQICT2ogqwKklzWVUnfWidOY-Deflh_WD-vq08sys,958
279
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml,sha256=UGuL20xRPZPy3sZeqMgIzovdd1BOTESwTS2gfwsdGFk,955
280
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml,sha256=eKuyCU7yFIU1VSHNRzEu4Bm7NY6NPppIHcTd0RKXUrI,955
281
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml,sha256=7AWFlM92SDySB4-InH9aw83yBhQ3HSKqvGofm-xiDM4,887
282
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml,sha256=xKS5v94CTLIIgQ4NAEuBpVjToRQ7yLme276gN5O_J0w,974
283
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml,sha256=8keYB132xFDzBsMF5nk0lOqfEIT9qupBtDiQRC3nH9o,1004
284
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py,sha256=9QnWMeulCQm_c__-b9cBYbjqihLDSblW3-luGnxfBXQ,6391
285
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py,sha256=OOr1JD9kTlUGXZNG5b3kvkUaNz7QTmhaGoHhIKL69qo,7613
286
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py,sha256=eCm7opG-Ld--sZEG67creYqoRsHjWBuWwUha2s13AuM,9806
287
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py,sha256=1vuUf6EGzfiyMOHmacI3NhjCIq1LFhANpLJ_Frgh3Mk,20423
288
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=PobqtMnfh7HuD4zdlj5tBMufvufIO5fIL0_NvTOK4AQ,52965
289
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py,sha256=L20LYyiI8AGOTKfTJuYZgrkcvg2UHsnFJE866v5jlyE,18674
290
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=gKOIPQhyB8SVbx0wW3W-VQNmZhdA4186C1XVHHWYkjc,46810
291
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
292
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py,sha256=s7EkhtrIJ0LPUuLBArws8N23R1MoIoNaYUjwsbUqRkY,7994
293
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py,sha256=FnUyxxazEVaP69pAq9cig3j-mcX37BX-unPj0SVKUJI,3805
294
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py,sha256=A5y_qCsmW9j78w92L9VEjXRaqcsyI5FCu0Z9QJvKF_4,18960
295
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py,sha256=hcPHQjb-QJRAEFpDackLqjKeu8z_uKz_5VyyeU3AQQI,13879
296
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py,sha256=DM1W0vZ0ZhoR9vvbl4O_MlShApZHv2MtK37zHsVoCjc,30527
297
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py,sha256=5Ub0AYnQFcG32ByitEah6Hog6oAKNrK7HTnmlLI4XfE,11325
298
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py,sha256=uT-ubavkEWPsqVtFy-rPQCw2rf-VOd-ZLqP_oDyNTIs,39581
299
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py,sha256=grUl2rtGJVDoP6u47jywBzcmqL9kylqCf1IkyKCgNLE,83469
300
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py,sha256=iuiXv-jZcWVoUle6GDQXLy1SMXt4WPJEOD4qhjPe6U8,3856
301
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py,sha256=1XxKgsA6QYBkaADLUoVwFDPtRJzCvcSrk-1lwGKzF_c,2286
302
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py,sha256=LFvs5dx8bnhLOZc4jM5uxXhyhOHoTprKn9B7gCmIOKU,8600
303
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py,sha256=hTIleqj6--CMndUNCT-HFPxGer8c_l2KbkUvi3U24oM,5502
304
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py,sha256=Au8iMYscDk7va-EKpwLuFJpNjfV1aChNRStkA0dzlWQ,7679
305
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py,sha256=uLV8Qp8lRGkMVq5EtvbPa4l8ZpykN6godzblV7oj8bg,7086
306
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py,sha256=r-CRo6u9qmFcdSYNz9pWeuwj0XKykuWyFm7pQVK2yI4,6939
307
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py,sha256=Vk1Fm7ED1eAHW6lmSKF7VBZa55EN4h1zBEBdm5uKW24,4303
308
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py,sha256=-DprR09KYuwNEzEbhPvFRI3MR4_VdPMUGLPN6sL9Ym8,14625
309
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py,sha256=8THNmMvp7bYNIfExYGRfNJ88cuEvdK9KTO-3-51fUsY,13961
310
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=TkSEKghtqXW_1bHFsG9dhGVKv2XAmTMcdAH2UDvuSFM,36639
311
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py,sha256=rGifS4I0o3faXrLnW-eSbksmkx7ppMkRlnVBomsbD3Q,851
312
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py,sha256=LqMHlUTy2LEzoVwjALtrAw0UYmzIuHnFjQiVmn5nv-I,605
313
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py,sha256=d4HInkL_Phk0Bgg2cWaOvhsPa6lkqDeovFW86PL0I18,6371
314
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py,sha256=XzebAHBAjOpkIMZm43dd55PESgmyq_J45Ji6bogYR3s,11204
315
+ evalscope/models/__init__.py,sha256=yB4NuKvSd3Jd4GRQvJeGPxwigd8RJErdop5PzSQhsMY,1565
316
+ evalscope/models/local_model.py,sha256=1yjwt7NHE7pI8xoGv38NTql9KcCd80x1mjlELqkNHBQ,4110
317
+ evalscope/models/model.py,sha256=MxvJAUNkuT7IA3bchnmJDur_YCKj9ShOD2Uq40dBcGc,6308
318
+ evalscope/models/register.py,sha256=pNC69YUvw-lodYpOXmByHm26h4m0Lofgd_om-JhOBq4,1882
319
+ evalscope/models/adapters/__init__.py,sha256=mduiDZ6LgmkefNf4CtObZk6heOB93HxxgqTuYvrqWoo,590
320
+ evalscope/models/adapters/base_adapter.py,sha256=f2FY8DLERudkfb4_anxNVFE_D19xCJj9BObiHWspewI,3268
321
+ evalscope/models/adapters/chat_adapter.py,sha256=hzFrpvIrakKO5hsnbdXiDTO0cGajAdhcAN9ENoI6XvY,7312
322
+ evalscope/models/adapters/choice_adapter.py,sha256=4fuz3MFEqK8ln4mMs3goMCdRPBwYmmgN70HTdr_sW_U,8005
323
+ evalscope/models/adapters/custom_adapter.py,sha256=w8cD0b3xgcdhSZelcat67CGJnALOfz5IALzURnLjab8,2275
324
+ evalscope/models/adapters/server_adapter.py,sha256=5kH1yDAjETogR7aOdnCEueYE1bREI40OdXdBiJpMdIM,6734
325
+ evalscope/models/adapters/t2i_adapter.py,sha256=xkMRyZ61yTiJfmULK-p9du4nNox41pkHiV2CTFBO3qM,2659
326
+ evalscope/models/custom/__init__.py,sha256=MZylegALg1HerOYtp-qbzu4Wb6PW3JbrxwONHU-PAVs,131
219
327
  evalscope/models/custom/custom_model.py,sha256=rBQLAuPEw_OPUtRSCEmxEfpcA8jPj8bAdsmtKs4ygus,1566
220
- evalscope/models/custom/dummy_model.py,sha256=ODD6pt9FvZq_a54oYsehBDslRKHOsk9zsC9iAZvi5Yg,2020
328
+ evalscope/models/custom/dummy_model.py,sha256=WRT_aCBZLXnC4yRCgggkuySkhM71C47O2Txx_YNc3UM,1933
221
329
  evalscope/perf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
222
330
  evalscope/perf/arguments.py,sha256=UZKlkbDE2N408dY8Ji-WB8sl1rcmamywzxLvNXpnY0w,10194
223
- evalscope/perf/benchmark.py,sha256=nv7gtCkeKnLKQQiKM4G0MYO2ambcuwsbx67OgEQG0nM,7917
331
+ evalscope/perf/benchmark.py,sha256=C0tLaZzxqMonZK4iLtfjiQIxX3tO3-uFrOjgV-oVsU0,8024
224
332
  evalscope/perf/http_client.py,sha256=-c3-N7bxKsj3d5DVsKSaYA3XAHJDzZgoqZBbhuDYIGk,7419
225
- evalscope/perf/main.py,sha256=w-yDbl0osaTAMgC-JNPpqIq2LQ7U4c-Ht7Amj8Nbjc8,1278
333
+ evalscope/perf/main.py,sha256=C7iNEdb4SEMGmHsF4DHAak4O1zRxrWW1tMRmyhEkVwQ,1376
226
334
  evalscope/perf/plugin/__init__.py,sha256=1sl5s-csrwKb_LVTnpF3HqArz06TRD5LYJ0hpqvokUA,85
227
335
  evalscope/perf/plugin/registry.py,sha256=w1IAt6GDdluzSYK5i-yrntvx3_EvIIqJamEL0xZv3zA,1323
228
336
  evalscope/perf/plugin/api/__init__.py,sha256=Ckzbq4CkSMVQTedQcDHCYlRd6FTwQAElt2mHB-VXJac,195
@@ -233,7 +341,7 @@ evalscope/perf/plugin/api/openai_api.py,sha256=kTL_2OACuKhzd2W0Pf4DirpMumzk4V3rq
233
341
  evalscope/perf/plugin/datasets/__init__.py,sha256=Z6Jc0RxJS_z0nBBV1-b0-56Ija60AtQ7I_67gY6ZfdQ,568
234
342
  evalscope/perf/plugin/datasets/base.py,sha256=Z-INWueeYjfEZhP4lbTlBMVwIa6BcXZKWx-w7Pop3mA,1786
235
343
  evalscope/perf/plugin/datasets/custom.py,sha256=npreC7H1VsdTGYkqlMESvyOhtXOfZQA7_-ICmxe3FWk,936
236
- evalscope/perf/plugin/datasets/flickr8k.py,sha256=UzAIFIO0m5inWOkWM1mO6wfV2HOuXAqiTxCJ4b0SiZM,1589
344
+ evalscope/perf/plugin/datasets/flickr8k.py,sha256=MbJKEB0XqZE0nDEenwYs0FLH9QL658Vn9uQmUH4hPvk,1605
237
345
  evalscope/perf/plugin/datasets/line_by_line.py,sha256=AqZYG6tVL3BIGnzh_2Tev8lDYezJG_1gqJY8bSNQl3Q,957
238
346
  evalscope/perf/plugin/datasets/longalpaca.py,sha256=XelLris0-c3StLInQ-Oav4jqGcXPNfJxEDeYvaetEbI,1297
239
347
  evalscope/perf/plugin/datasets/openqa.py,sha256=4Pnx5duFJzoiTUfZCbcK7LO8f-skmcpYNUUrtNR_UUc,1463
@@ -241,8 +349,8 @@ evalscope/perf/plugin/datasets/random_dataset.py,sha256=SIlsjAE_Stknfr6o1CBFvANB
241
349
  evalscope/perf/plugin/datasets/speed_benchmark.py,sha256=J6q7AF_Re5eHLVejXEw9c1jlk1T1PPmist0yO9UFTPE,2432
242
350
  evalscope/perf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
243
351
  evalscope/perf/utils/analysis_result.py,sha256=ESzaZHGTpr2LoJR3jpOzqMphxSrr79d364ZzD159PmY,1169
244
- evalscope/perf/utils/benchmark_util.py,sha256=XrpB6ISjY2p1ngwPr5eOQS7O_I1kmlbEn2wCwsC_5AA,6278
245
- evalscope/perf/utils/db_util.py,sha256=VDqiM6xOK7fSneU3YOOU-78LWB8El3mxj_Ixtw2gX3o,9051
352
+ evalscope/perf/utils/benchmark_util.py,sha256=CftjnxYA7d1aeAL_iuyXcJPwCL5A8zWGZSkNtjrMyW8,6309
353
+ evalscope/perf/utils/db_util.py,sha256=VsYgz6IsSNPAWGCopOOIxAUhUat3GRbZMlrfdZ6i4kM,9575
246
354
  evalscope/perf/utils/handler.py,sha256=HyKIxbzC0XCyQanlbb7UEY7yaeqjJTePNea8kMV3Sdc,1192
247
355
  evalscope/perf/utils/local_server.py,sha256=clF8i0UFmaxBBB6gX05KvVCyzSv0xzsAidz0_sLLlAk,4627
248
356
  evalscope/perf/utils/log_utils.py,sha256=1jmB31W3ol9ukPAPbQ8xG3yoZ9oi3tjEyMK5M3ERmbw,1471
@@ -267,10 +375,11 @@ evalscope/registry/tasks/general_qa.yaml,sha256=S3kdlrazWX2VAX2PMhNtBnFZVSnUKBNi
267
375
  evalscope/registry/tasks/gsm8k.yaml,sha256=M2I7otwOSy0usD8yG8d6QziASQlKdhKLflRHMG0LXiM,729
268
376
  evalscope/registry/tasks/mmlu.yaml,sha256=cJcMH1Cvgo9PlYoTmeGx2bcZayysltaa6ehK57dDkvo,726
269
377
  evalscope/registry/tasks/mmlu_mini.yaml,sha256=K8ouHh7ve5ZsbkqRtV3Jl-DF01YFPuObfwEdACJA4Pk,778
270
- evalscope/report/__init__.py,sha256=0Wes3ot2hy9s-WwZaBztst8qkNrXkOF-Hwa1WW1e8lY,260
271
- evalscope/report/app.py,sha256=Lew--YreNeuyLVktnUNZKIfGvnGE_oAD054kZB-YTHo,26904
378
+ evalscope/report/__init__.py,sha256=iLNqx7CnHSHQmOBqWUK_vt2VIjnvGslJTqn--7B4y_s,316
379
+ evalscope/report/app.py,sha256=8pcQi5oYAYa9hXoMoMUNfy9jSvSR9DDiXyLcyPd9AmA,28459
380
+ evalscope/report/app_arguments.py,sha256=1wHTLeFx1G94cKXYOeOVe_wTiOY2D929UctIRGOtRaQ,699
272
381
  evalscope/report/combinator.py,sha256=O3QirwtYhDhdaWVT4STJMCGZMwoX8BTeJ3HtS9iwnWQ,2567
273
- evalscope/report/generator.py,sha256=2DULY9W8QCUxdtyfNjo8XAP_YxI1LgR95jknK__kYPU,3600
382
+ evalscope/report/generator.py,sha256=q9aHWNjQgvutAKtpjfWOpfu5zNFdnXilO9OqBqt_Phg,3612
274
383
  evalscope/report/utils.py,sha256=DRlbjbqHEmM8rGlA4pwtlHFhOZtyUzcqiS-mejfIDkU,4584
275
384
  evalscope/third_party/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
276
385
  evalscope/third_party/longbench_write/README.md,sha256=1yLKeSVIcihpoc4KXr8NpK86JbcLssCPx76aOKdPbYI,5431
@@ -290,7 +399,7 @@ evalscope/third_party/longbench_write/tools/__init__.py,sha256=I_ANdxdcIHpkIzIXc
290
399
  evalscope/third_party/longbench_write/tools/data_etl.py,sha256=T7a-4PwZg5alZQh-oTi1zjMxjGmVVZYVwSR9-diZlF8,5971
291
400
  evalscope/third_party/longbench_write/tools/openai_api.py,sha256=PiIvvDYJkn041SJkLoroXwl1B8TtwpB7licVfqNSeuQ,8168
292
401
  evalscope/third_party/thinkbench/__init__.py,sha256=C0aSu71_dc1upUVkKmq2VgDd9plpRcYUdCE6BjUWJcA,110
293
- evalscope/third_party/thinkbench/eval.py,sha256=76G4LTkxqWCDCyj7Ahjj-qjO1gFem1uDzpRAC27ICl0,18896
402
+ evalscope/third_party/thinkbench/eval.py,sha256=IyfVTm6arhjBgvGMG5OZwopqQTmWVMJ8zYbbVSLtrvk,19503
294
403
  evalscope/third_party/thinkbench/infer.py,sha256=2L4DAJKn3wAhNEKnKudQT60igGOJSKH80FR4nS7DHYk,3952
295
404
  evalscope/third_party/thinkbench/resources/critique_template.txt,sha256=d4Egc-qH--4lG8X_EcmgymnuZgiCMbee1M5pt4HrRKA,535
296
405
  evalscope/third_party/thinkbench/resources/reformat_template.txt,sha256=zTZyVAzmMBtAwI9lHly9EXsqX471OW-VTg538PDcB30,1775
@@ -309,21 +418,24 @@ evalscope/third_party/toolbench_static/llm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1
309
418
  evalscope/third_party/toolbench_static/llm/swift_infer.py,sha256=GITEbyiER10Zi-ZWpSqYCdAsiVtNeGK24hvR3kmYn2s,2689
310
419
  evalscope/utils/__init__.py,sha256=jLVoGryuqUh4Km9QWWQBzpqkcVNRK0MbwNaSgckqdiU,139
311
420
  evalscope/utils/arena_utils.py,sha256=Gf8VpH4C_oF2Abif_QeL0rAP6tvTzsc0gglpdNkUE48,7155
312
- evalscope/utils/chat_service.py,sha256=9LNTT-8KsacOLqnQer8j57e224rwOMbU7txV6re-X-A,8720
421
+ evalscope/utils/chat_service.py,sha256=U2jtrkOa2asRp16Zam0zIi_38mCyWQqql_L6JSwii4I,8749
313
422
  evalscope/utils/completion_parsers.py,sha256=YWHkLkSfURTcUjNNlCL6PPDICd4F2Ns9figgPN4C97c,2933
314
423
  evalscope/utils/filters.py,sha256=x_NX40uWMmUsVrAGHCeeV2e63HZZFugWUgdUhk64ivM,1523
424
+ evalscope/utils/import_utils.py,sha256=Oo8saX_mMw4U1RrA7_pn8FmV6P9laru4fEgecqqwpqk,2585
315
425
  evalscope/utils/io_utils.py,sha256=Tjdgen1FsAA4ArqiUzu734L0Px5NuiS0GKRRiGIzxSA,4192
316
426
  evalscope/utils/logger.py,sha256=barHSdtbEu21ynGQj_wS-rd7B02wPPR5AgaWCQzvG4w,3638
317
427
  evalscope/utils/model_utils.py,sha256=hB9W334ecAb6553FhooT6_jM0g-tjj6AU48IV3K1CKw,1131
318
- evalscope/utils/utils.py,sha256=VuGdJh3xZAZ-cRoGcKeJTx3z8sgSs2eMjH-1JX2ZYOU,10615
428
+ evalscope/utils/utils.py,sha256=hP_ntROFsZ-zaNVpJtT2prNo8iX-UAKfRtdxbLtPJng,11105
319
429
  tests/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
320
430
  tests/test_run_all.py,sha256=YcMTlWoFpvWY8jevWyIf2G_tz8hgDD1cAwSvmyZt96M,429
431
+ tests/aigc/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
432
+ tests/aigc/test_t2i.py,sha256=BcdS3OMypWnraXF4Cq3DhDVRpZq0qo9_0Qpyg54B7FY,2627
321
433
  tests/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
322
434
  tests/cli/test_all.py,sha256=pwup--iNxckUEsR_aFjIAbEQo3UogSu5aIWf9ryLP2o,4022
323
435
  tests/cli/test_collection.py,sha256=y8FjoPziPRf5BdJK8DHjcXn26ETKz1OyqjnCpwjt-F4,4096
324
- tests/cli/test_run.py,sha256=RW4AkJILqzzyd0wuIdy8Y9SB_4koSRJFezGjFdXdLJI,16549
436
+ tests/cli/test_run.py,sha256=1DHLFlgGvHJizbLVc1ShcGFAHirEPgW8r88H7g8Sbx4,17245
325
437
  tests/perf/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
326
- tests/perf/test_perf.py,sha256=BXd6SCMbBDKmh-P_KGTOpuwVQZ05xCKjvH01zGyvBJI,3787
438
+ tests/perf/test_perf.py,sha256=diwwEmoWR-6xSVeGF65J6TWHRNj54rkwyvnhHh7PiE0,3919
327
439
  tests/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
328
440
  tests/rag/test_clip_benchmark.py,sha256=ZCBtgnF8Vuji6WQlb92-_RIvXlUX_Xt-cHZP4AN_DNI,2552
329
441
  tests/rag/test_mteb.py,sha256=YJw6X1jwX6SYNB-ryVb-OHJWu3vsE3Y4STATI75rdG0,5619
@@ -334,9 +446,9 @@ tests/swift/test_run_swift_vlm_eval.py,sha256=C8DftjewnZaerQWfERI70bU3sQLWQ-ejZU
334
446
  tests/swift/test_run_swift_vlm_jugde_eval.py,sha256=THZEXUOSqm9rWslwJHmZyh-Ytv5c_QKpgRW5J2s_69E,6017
335
447
  tests/vlm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
336
448
  tests/vlm/test_vlmeval.py,sha256=UqRiBPMU3vRtLIG1Qu4ZVhyUQx-zGYQuLCgobwf-7a4,3176
337
- evalscope-0.14.0.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
338
- evalscope-0.14.0.dist-info/METADATA,sha256=HQ1pt-YU950AcwwWiypjGcWg0wYU9n6PFZ7j6PG4uHg,33040
339
- evalscope-0.14.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
340
- evalscope-0.14.0.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
341
- evalscope-0.14.0.dist-info/top_level.txt,sha256=Yv0iprOqZQ4rfUO-AWJp7Ni6m0Twxny1yvZwO-8hUDM,16
342
- evalscope-0.14.0.dist-info/RECORD,,
449
+ evalscope-0.15.1.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
450
+ evalscope-0.15.1.dist-info/METADATA,sha256=JvRF5sI_9ak9Y-FwWdU1Y8BE96iKPLO_hIGC7Z9SWpg,34080
451
+ evalscope-0.15.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
452
+ evalscope-0.15.1.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
453
+ evalscope-0.15.1.dist-info/top_level.txt,sha256=Yv0iprOqZQ4rfUO-AWJp7Ni6m0Twxny1yvZwO-8hUDM,16
454
+ evalscope-0.15.1.dist-info/RECORD,,
tests/aigc/__init__.py ADDED
@@ -0,0 +1 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
tests/aigc/test_t2i.py ADDED
@@ -0,0 +1,87 @@
1
+ from dotenv import dotenv_values
2
+
3
+ env = dotenv_values('.env')
4
+
5
+ import os
6
+ import unittest
7
+
8
+ from evalscope.config import TaskConfig
9
+ from evalscope.constants import EvalType, JudgeStrategy, ModelTask, OutputType
10
+ from evalscope.run import run_task
11
+ from evalscope.utils import test_level_list
12
+ from evalscope.utils.logger import get_logger
13
+
14
+ os.environ['LOG_LEVEL'] = 'DEBUG'
15
+
16
+ logger = get_logger()
17
+
18
+
19
+ class TestRun(unittest.TestCase):
20
+ @unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
21
+ def test_run_general(self):
22
+ from evalscope.config import TaskConfig
23
+
24
+ task_cfg = TaskConfig(
25
+ datasets=[
26
+ 'general_t2i'
27
+ ],
28
+ dataset_args={
29
+ 'general_t2i': {
30
+ 'metric_list': [
31
+ 'PickScore',
32
+ 'CLIPScore',
33
+ 'HPSv2Score',
34
+ 'HPSv2.1Score',
35
+ 'BLIPv2Score',
36
+ 'ImageRewardScore',
37
+ 'VQAScore',
38
+ 'FGA_BLIP2Score',
39
+ 'MPS'
40
+ ],
41
+ 'dataset_id': 'custom_eval/multimodal/t2i/example.jsonl',
42
+ }
43
+ }
44
+ )
45
+
46
+ run_task(task_cfg=task_cfg)
47
+
48
+
49
+ @unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
50
+ def test_run_benchmark(self):
51
+
52
+ task_cfg = TaskConfig(
53
+ model='stabilityai/stable-diffusion-xl-base-1.0', # model on modelscope
54
+ model_task=ModelTask.IMAGE_GENERATION, # must be IMAGE_GENERATION
55
+ model_args={
56
+ 'use_safetensors': True,
57
+ 'variant': 'fp16',
58
+ 'torch_dtype': 'torch.float16',
59
+ },
60
+ datasets=[
61
+ 'tifa160',
62
+ # 'genai_bench',
63
+ # 'evalmuse',
64
+ # 'hpdv2',
65
+ ],
66
+ dataset_args={
67
+ 'tifa160': {
68
+ 'metric_list': [
69
+ 'PickScore',
70
+ # 'CLIPScore',
71
+ # 'HPSv2Score',
72
+ # 'BLIPv2Score',
73
+ # 'ImageRewardScore',
74
+ # 'VQAScore',
75
+ # 'FGA_BLIP2Score',
76
+ ]
77
+ }
78
+ },
79
+ limit=5,
80
+ generation_config={
81
+ 'num_inference_steps': 50,
82
+ 'guidance_scale': 7.5
83
+ },
84
+ # use_cache='outputs/20250427_134122',
85
+ )
86
+
87
+ run_task(task_cfg=task_cfg)
tests/cli/test_run.py CHANGED
@@ -207,19 +207,32 @@ class TestRun(unittest.TestCase):
207
207
  from evalscope.config import TaskConfig
208
208
 
209
209
  task_cfg = TaskConfig(
210
- model='Qwen/Qwen2.5-0.5B-Instruct',
210
+ model='Qwen/Qwen3-1.7B',
211
211
  datasets=[
212
- 'iquiz',
212
+ # 'iquiz',
213
213
  # 'math_500',
214
- # 'aime24',
215
- # 'competition_math'
214
+ 'aime24',
215
+ # 'competition_math',
216
+ # 'mmlu',
216
217
  ],
217
218
  dataset_args={
218
219
  'competition_math': {
219
220
  'subset_list': ['Level 4', 'Level 5']
220
- }
221
+ },
222
+ 'mmlu': {
223
+ 'subset_list': ['elementary_mathematics', 'high_school_european_history', 'nutrition'],
224
+ 'few_shot_num': 0
225
+ },
221
226
  },
222
- limit=5
227
+ limit=5,
228
+ eval_batch_size=5,
229
+ generation_config={
230
+ 'max_new_tokens': 1000, # 最大生成token数,建议设置为较大值避免输出截断
231
+ 'temperature': 0.7, # 采样温度 (qwen 报告推荐值)
232
+ 'top_p': 0.8, # top-p采样 (qwen 报告推荐值)
233
+ 'top_k': 20, # top-k采样 (qwen 报告推荐值)
234
+ 'chat_template_kwargs': {'enable_thinking': False} # 关闭思考模式
235
+ }
223
236
  )
224
237
 
225
238
  run_task(task_cfg=task_cfg)
@@ -284,7 +297,7 @@ class TestRun(unittest.TestCase):
284
297
  # 'general_qa'
285
298
  # 'super_gpqa',
286
299
  # 'mmlu_redux',
287
- 'maritime_bench'
300
+ # 'maritime_bench'
288
301
  ],
289
302
  dataset_args={
290
303
  'mmlu': {
tests/perf/test_perf.py CHANGED
@@ -103,7 +103,7 @@ class TestPerf(unittest.TestCase):
103
103
  from evalscope.perf.arguments import Arguments
104
104
  task_cfg = Arguments(
105
105
  parallel=20,
106
- model='Qwen2.5-0.5B-Instruct',
106
+ model='Qwen3-1.7B',
107
107
  url='http://127.0.0.1:8801/v1/completions',
108
108
  api='openai',
109
109
  dataset='random',
@@ -112,11 +112,14 @@ class TestPerf(unittest.TestCase):
112
112
  prefix_length=0,
113
113
  min_prompt_length=1024,
114
114
  max_prompt_length=1024,
115
- number=40,
115
+ number=20,
116
116
  tokenizer_path='Qwen/Qwen2.5-0.5B-Instruct',
117
117
  seed=None,
118
+ extra_args={'ignore_eos': True}
118
119
  )
119
- run_perf_benchmark(task_cfg)
120
+ metrics_result, percentile_result = run_perf_benchmark(task_cfg)
121
+ print(metrics_result)
122
+ print(percentile_result)
120
123
 
121
124
 
122
125
  if __name__ == '__main__':