evalscope 1.0.2__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (176) hide show
  1. evalscope/api/benchmark/__init__.py +8 -1
  2. evalscope/api/benchmark/adapters/__init__.py +1 -0
  3. evalscope/api/benchmark/adapters/default_data_adapter.py +12 -0
  4. evalscope/api/benchmark/adapters/ner_adapter.py +212 -0
  5. evalscope/api/benchmark/benchmark.py +14 -0
  6. evalscope/api/dataset/dataset.py +21 -0
  7. evalscope/api/dataset/loader.py +6 -2
  8. evalscope/api/mixin/sandbox_mixin.py +32 -54
  9. evalscope/api/model/generate_config.py +6 -0
  10. evalscope/app/ui/multi_model.py +6 -1
  11. evalscope/app/ui/single_model.py +8 -2
  12. evalscope/app/utils/data_utils.py +3 -2
  13. evalscope/app/utils/visualization.py +2 -2
  14. evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py +205 -0
  15. evalscope/benchmarks/ai2d/ai2d_adapter.py +3 -2
  16. evalscope/benchmarks/bfcl/bfcl_adapter.py +11 -46
  17. evalscope/benchmarks/blink/__init__.py +0 -0
  18. evalscope/benchmarks/blink/blink_adapter.py +61 -0
  19. evalscope/benchmarks/chartqa/__init__.py +0 -0
  20. evalscope/benchmarks/chartqa/chartqa_adapter.py +80 -0
  21. evalscope/benchmarks/chartqa/utils.py +38 -0
  22. evalscope/benchmarks/data_collection/data_collection_adapter.py +2 -1
  23. evalscope/benchmarks/docvqa/__init__.py +0 -0
  24. evalscope/benchmarks/docvqa/docvqa_adapter.py +67 -0
  25. evalscope/benchmarks/general_arena/general_arena_adapter.py +1 -1
  26. evalscope/benchmarks/general_arena/utils.py +2 -1
  27. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +1 -1
  28. evalscope/benchmarks/general_qa/general_qa_adapter.py +1 -1
  29. evalscope/benchmarks/gsm8k/gsm8k_adapter.py +23 -4
  30. evalscope/benchmarks/hallusion_bench/__init__.py +0 -0
  31. evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py +158 -0
  32. evalscope/benchmarks/hle/hle_adapter.py +3 -2
  33. evalscope/benchmarks/humaneval/humaneval_adapter.py +2 -1
  34. evalscope/benchmarks/infovqa/__init__.py +0 -0
  35. evalscope/benchmarks/infovqa/infovqa_adapter.py +66 -0
  36. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +3 -1
  37. evalscope/benchmarks/math_verse/__init__.py +0 -0
  38. evalscope/benchmarks/math_verse/math_verse_adapter.py +100 -0
  39. evalscope/benchmarks/math_vision/__init__.py +0 -0
  40. evalscope/benchmarks/math_vision/math_vision_adapter.py +111 -0
  41. evalscope/benchmarks/math_vista/math_vista_adapter.py +6 -26
  42. evalscope/benchmarks/mm_bench/mm_bench_adapter.py +2 -2
  43. evalscope/benchmarks/mmmu/mmmu_adapter.py +1 -1
  44. evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +1 -1
  45. evalscope/benchmarks/ner/__init__.py +0 -0
  46. evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py +52 -0
  47. evalscope/benchmarks/ner/conll2003_adapter.py +48 -0
  48. evalscope/benchmarks/ner/copious_adapter.py +85 -0
  49. evalscope/benchmarks/ner/cross_ner_adapter.py +120 -0
  50. evalscope/benchmarks/ner/cross_ner_entities/__init__.py +0 -0
  51. evalscope/benchmarks/ner/cross_ner_entities/ai.py +54 -0
  52. evalscope/benchmarks/ner/cross_ner_entities/literature.py +36 -0
  53. evalscope/benchmarks/ner/cross_ner_entities/music.py +39 -0
  54. evalscope/benchmarks/ner/cross_ner_entities/politics.py +37 -0
  55. evalscope/benchmarks/ner/cross_ner_entities/science.py +58 -0
  56. evalscope/benchmarks/ner/genia_ner_adapter.py +66 -0
  57. evalscope/benchmarks/ner/harvey_ner_adapter.py +58 -0
  58. evalscope/benchmarks/ner/mit_movie_trivia_adapter.py +74 -0
  59. evalscope/benchmarks/ner/mit_restaurant_adapter.py +66 -0
  60. evalscope/benchmarks/ner/ontonotes5_adapter.py +87 -0
  61. evalscope/benchmarks/ner/wnut2017_adapter.py +61 -0
  62. evalscope/benchmarks/ocr_bench/__init__.py +0 -0
  63. evalscope/benchmarks/ocr_bench/ocr_bench_adapter.py +101 -0
  64. evalscope/benchmarks/ocr_bench_v2/IoUscore_metric.py +87 -0
  65. evalscope/benchmarks/ocr_bench_v2/TEDS_metric.py +963 -0
  66. evalscope/benchmarks/ocr_bench_v2/__init__.py +0 -0
  67. evalscope/benchmarks/ocr_bench_v2/ocr_bench_v2_adapter.py +161 -0
  68. evalscope/benchmarks/ocr_bench_v2/page_ocr_metric.py +50 -0
  69. evalscope/benchmarks/ocr_bench_v2/parallel.py +46 -0
  70. evalscope/benchmarks/ocr_bench_v2/spotting_eval/__init__.py +0 -0
  71. evalscope/benchmarks/ocr_bench_v2/spotting_eval/readme.txt +26 -0
  72. evalscope/benchmarks/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py +537 -0
  73. evalscope/benchmarks/ocr_bench_v2/spotting_eval/script.py +481 -0
  74. evalscope/benchmarks/ocr_bench_v2/spotting_metric.py +179 -0
  75. evalscope/benchmarks/ocr_bench_v2/utils.py +433 -0
  76. evalscope/benchmarks/ocr_bench_v2/vqa_metric.py +254 -0
  77. evalscope/benchmarks/omnidoc_bench/__init__.py +0 -0
  78. evalscope/benchmarks/omnidoc_bench/end2end_eval.py +349 -0
  79. evalscope/benchmarks/omnidoc_bench/metrics.py +547 -0
  80. evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py +135 -0
  81. evalscope/benchmarks/omnidoc_bench/utils.py +1937 -0
  82. evalscope/benchmarks/poly_math/__init__.py +0 -0
  83. evalscope/benchmarks/poly_math/poly_math_adapter.py +127 -0
  84. evalscope/benchmarks/poly_math/utils/instruction.py +105 -0
  85. evalscope/benchmarks/pope/__init__.py +0 -0
  86. evalscope/benchmarks/pope/pope_adapter.py +111 -0
  87. evalscope/benchmarks/seed_bench_2_plus/__init__.py +0 -0
  88. evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py +72 -0
  89. evalscope/benchmarks/simple_vqa/__init__.py +0 -0
  90. evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py +169 -0
  91. evalscope/benchmarks/tau_bench/tau_bench_adapter.py +1 -1
  92. evalscope/benchmarks/tool_bench/tool_bench_adapter.py +1 -1
  93. evalscope/benchmarks/visu_logic/__init__.py +0 -0
  94. evalscope/benchmarks/visu_logic/visu_logic_adapter.py +75 -0
  95. evalscope/benchmarks/zerobench/__init__.py +0 -0
  96. evalscope/benchmarks/zerobench/zerobench_adapter.py +64 -0
  97. evalscope/constants.py +4 -0
  98. evalscope/evaluator/evaluator.py +72 -79
  99. evalscope/metrics/math_parser.py +14 -0
  100. evalscope/metrics/metric.py +52 -1
  101. evalscope/metrics/metrics.py +16 -0
  102. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +0 -0
  103. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +0 -0
  104. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +0 -0
  105. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +0 -0
  106. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +0 -0
  107. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +0 -0
  108. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +0 -0
  109. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +0 -0
  110. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +0 -0
  111. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +0 -0
  112. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +2 -6
  113. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +2 -6
  114. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +2 -6
  115. evalscope/models/utils/openai.py +4 -0
  116. evalscope/perf/arguments.py +24 -4
  117. evalscope/perf/benchmark.py +74 -89
  118. evalscope/perf/http_client.py +31 -16
  119. evalscope/perf/main.py +15 -2
  120. evalscope/perf/plugin/api/base.py +9 -7
  121. evalscope/perf/plugin/api/custom_api.py +13 -58
  122. evalscope/perf/plugin/api/default_api.py +179 -79
  123. evalscope/perf/plugin/api/openai_api.py +4 -3
  124. evalscope/perf/plugin/datasets/base.py +21 -0
  125. evalscope/perf/plugin/datasets/custom.py +2 -3
  126. evalscope/perf/plugin/datasets/line_by_line.py +2 -3
  127. evalscope/perf/plugin/datasets/longalpaca.py +2 -3
  128. evalscope/perf/plugin/datasets/openqa.py +2 -4
  129. evalscope/perf/plugin/datasets/random_dataset.py +1 -3
  130. evalscope/perf/utils/benchmark_util.py +36 -22
  131. evalscope/perf/utils/db_util.py +14 -19
  132. evalscope/perf/utils/local_server.py +0 -44
  133. evalscope/perf/utils/log_utils.py +21 -6
  134. evalscope/report/__init__.py +11 -2
  135. evalscope/report/combinator.py +52 -2
  136. evalscope/run.py +4 -0
  137. evalscope/utils/function_utils.py +195 -12
  138. evalscope/utils/io_utils.py +74 -0
  139. evalscope/utils/json_schema.py +8 -6
  140. evalscope/utils/logger.py +49 -17
  141. evalscope/utils/multi_choices.py +16 -1
  142. evalscope/utils/ner.py +377 -0
  143. evalscope/version.py +2 -2
  144. {evalscope-1.0.2.dist-info → evalscope-1.1.1.dist-info}/METADATA +239 -393
  145. {evalscope-1.0.2.dist-info → evalscope-1.1.1.dist-info}/RECORD +140 -98
  146. {evalscope-1.0.2.dist-info → evalscope-1.1.1.dist-info}/WHEEL +1 -1
  147. {evalscope-1.0.2.dist-info → evalscope-1.1.1.dist-info}/top_level.txt +0 -1
  148. tests/__init__.py +0 -1
  149. tests/benchmark/__init__.py +0 -1
  150. tests/benchmark/test_eval.py +0 -429
  151. tests/benchmark/test_image_edit.py +0 -65
  152. tests/benchmark/test_sandbox.py +0 -81
  153. tests/benchmark/test_t2i.py +0 -142
  154. tests/benchmark/test_vlm.py +0 -137
  155. tests/cli/__init__.py +0 -1
  156. tests/cli/test_all.py +0 -269
  157. tests/cli/test_collection.py +0 -99
  158. tests/cli/test_custom.py +0 -268
  159. tests/cli/test_reasoning.py +0 -81
  160. tests/common.py +0 -73
  161. tests/perf/__init__.py +0 -1
  162. tests/perf/test_perf.py +0 -206
  163. tests/rag/test_clip_benchmark.py +0 -87
  164. tests/rag/test_mteb.py +0 -213
  165. tests/rag/test_ragas.py +0 -128
  166. tests/swift/__init__.py +0 -1
  167. tests/swift/test_run_swift_eval.py +0 -146
  168. tests/swift/test_run_swift_vlm_eval.py +0 -128
  169. tests/swift/test_run_swift_vlm_jugde_eval.py +0 -157
  170. tests/test_run_all.py +0 -12
  171. tests/utils.py +0 -13
  172. tests/vlm/__init__.py +0 -1
  173. tests/vlm/test_vlmeval.py +0 -102
  174. {tests/rag → evalscope/benchmarks/aa_lcr}/__init__.py +0 -0
  175. {evalscope-1.0.2.dist-info → evalscope-1.1.1.dist-info}/entry_points.txt +0 -0
  176. {evalscope-1.0.2.dist-info → evalscope-1.1.1.dist-info/licenses}/LICENSE +0 -0
@@ -1,24 +1,25 @@
1
1
  evalscope/__init__.py,sha256=oivLvqwNw2JlB-h-Z8_525IpfKcYEkS51F59tEfpy5w,445
2
2
  evalscope/arguments.py,sha256=jKAF47PsqXRioU21gRHw9hxJnfR31z_X7c__glRY5ns,6257
3
3
  evalscope/config.py,sha256=S2N11-AxQkT7lVffpjXdtpT4QpnSP6th-c8I-501mwM,11507
4
- evalscope/constants.py,sha256=W3E4Jp-x6qxvPOYtU9bNlzlERFvSAA_3F007apIwUlU,3601
5
- evalscope/run.py,sha256=A9_7pR3FiA-It46A3Mqk7ce6fQy548p0ux2QUugj2hI,6531
4
+ evalscope/constants.py,sha256=Su_CoL5Gn8AV2lZN3vmTxpnTXuJ3Y3xz7SOzF8BcISI,3717
5
+ evalscope/run.py,sha256=dKFesxZZteOhscHif2A8xQHsJnG78D-m2gdfaWyMNC4,6742
6
6
  evalscope/summarizer.py,sha256=HUDJ1zKi22uNst3AUfX67Z0sHzeZy-4S8sYyvxJnBzc,5901
7
- evalscope/version.py,sha256=H_zHGJkiB6equdW6Jo4F_hhdLYKZqriowav05O5_CeY,118
7
+ evalscope/version.py,sha256=-m3fRuGUlprLmY84Yfh4OA1j3cM9SSJUGypM9Z5Ltng,118
8
8
  evalscope/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  evalscope/api/registry.py,sha256=Qk0KMGDbt-iI0-OfoJZbOtxt76qreAVWh36HOoQAKM4,5448
10
- evalscope/api/benchmark/__init__.py,sha256=9xcTxpcQ6HhZ0QDwEIZhAT5IjybzaJ60VGLcmaFE5dU,188
11
- evalscope/api/benchmark/benchmark.py,sha256=gqAM81SeGb_Q0rA6Q-LFpnNkOUiwOj43aRWECtCxAOE,10832
10
+ evalscope/api/benchmark/__init__.py,sha256=tp5ZjfopdknpePwZ-byZd8BkQs6fFzW0UoOp0NBEaks,229
11
+ evalscope/api/benchmark/benchmark.py,sha256=iH35ugrPfL9RHFbtJjs0AMFnQiYO9lUqSkRnRPUQsVc,11194
12
12
  evalscope/api/benchmark/meta.py,sha256=N4u8NQjkjIw-xaf6KFnb6C8JDKB0DLbsXyXblDqIpvE,4304
13
- evalscope/api/benchmark/adapters/__init__.py,sha256=uLt_GiU4s-_6Rjgmr4OUTtE7dvEX-ZIQ403fd6oNuxA,264
14
- evalscope/api/benchmark/adapters/default_data_adapter.py,sha256=WS4Pm0pk51Se196Ho31FmOqGyOajTtUGbbjWD9U7UwU,28064
13
+ evalscope/api/benchmark/adapters/__init__.py,sha256=TEVqVR0hDuvH78jX-e028XJgXVcrnVuE8FRV-36Aspc,300
14
+ evalscope/api/benchmark/adapters/default_data_adapter.py,sha256=RWDweSmXKGv5hPPjeV4VF76gbKqYJEsab_lQYGUM2PA,28785
15
15
  evalscope/api/benchmark/adapters/image_edit_adapter.py,sha256=06V-_A8RKuMNYMt7-vaXn2qBa9LIZgfFO_6PUuhAkh0,3052
16
16
  evalscope/api/benchmark/adapters/multi_choice_adapter.py,sha256=auqLNvF50Or9bo3LOmQLXHfFaTTCTqvQzZog3glInng,3062
17
+ evalscope/api/benchmark/adapters/ner_adapter.py,sha256=_rvfl8cNlvKoQkHqR2tC_K-xZaV0TsB_pIzI4sP_SM0,8906
17
18
  evalscope/api/benchmark/adapters/text2image_adapter.py,sha256=jO64hwjQexIv-MTyHH0Ffp_6p--9TKufOmX_U39mAnE,6385
18
19
  evalscope/api/benchmark/adapters/vision_language_adapter.py,sha256=5d7ITkeosikb7u0ag0WkMaZ0SAYGkR_wKM9NP495GKk,280
19
20
  evalscope/api/dataset/__init__.py,sha256=RHFMzwfONEqmmn3vRtxyN3r29mipDUUUSEDhuwm0YpQ,147
20
- evalscope/api/dataset/dataset.py,sha256=9bwSx89zgOOBRQkRPVv-B5Yi30A6J1MLtekQSqwsy9g,11328
21
- evalscope/api/dataset/loader.py,sha256=t7KLH5ltLUumhiPIyYJzk6zn2iKLx-D2gIIoMhKdnhc,9714
21
+ evalscope/api/dataset/dataset.py,sha256=y-1DvPxN1Gxf-oEnrUq0Dcs4-rUQkApXP_rVYwsixSM,12119
22
+ evalscope/api/dataset/loader.py,sha256=44wQ3aBbn4YJyRjEsA1Bpg1DZicdCUzVybPoba_JhzY,9797
22
23
  evalscope/api/dataset/utils.py,sha256=3E0ikqr6QWV_lX0d3Z4F4xFuVTcwbeDPgCvJY7v83Bc,4935
23
24
  evalscope/api/evaluator/__init__.py,sha256=-Ure6X4GlE7VYSNWSZ_DpjbUBGa5irVTymLENEHTYqY,138
24
25
  evalscope/api/evaluator/cache.py,sha256=a_M2ouUjtkMr5m3wRbmsE8ETP_aacxbm0d38yY5RljM,13244
@@ -35,9 +36,9 @@ evalscope/api/metric/metric.py,sha256=XkjBqpZbFYynhTIH8WawfPmItbDQ6jWufE_ox9zDPC
35
36
  evalscope/api/metric/scorer.py,sha256=dczSQwkRmPk1uvNCMGT5G6nYbwWTcpwsZtyYXWkrJII,3749
36
37
  evalscope/api/mixin/__init__.py,sha256=xBuoTuao5o_EFThgeeeWI87x64Q12aJttsaZc8gak_c,83
37
38
  evalscope/api/mixin/llm_judge_mixin.py,sha256=ECVDfxCeAEkymFssD7xKhIDcct2qgQTqGnbijXk9leE,5675
38
- evalscope/api/mixin/sandbox_mixin.py,sha256=uKqBtTtttKwrUArY-CTMDdFHjRBOR7Kl1sxaGHe-S2Q,7653
39
+ evalscope/api/mixin/sandbox_mixin.py,sha256=RbTpZXr6ohxgp1vU4YGMKmGKiIzVqQZ44quAHBX8zvs,6539
39
40
  evalscope/api/model/__init__.py,sha256=YxKdz1IKUt6eYoC7nx81yD2BtyiWQDvaoTcc8O9lvoE,286
40
- evalscope/api/model/generate_config.py,sha256=wQeDknXb49yBKSRL9rlIyerPobGXqU-A4hL1vySNGPo,7656
41
+ evalscope/api/model/generate_config.py,sha256=xp8yDdDNomHpYaCNVDnZrZkQlfHpp9lrXKO7lqXEQHY,8017
41
42
  evalscope/api/model/model.py,sha256=c7YVbYYk47MHWwPjoB66xWjgmHdUGTOSOdtIsLcJfyc,12782
42
43
  evalscope/api/model/model_output.py,sha256=NeN6bLtAvg_3fTirewWfdP-_x4SJXa9pGuRpyXJY3B8,9333
43
44
  evalscope/api/tool/__init__.py,sha256=bEaW5ryY-erLcl2zMoDJNgiaBqlSPAL0jQ5daUHvvrw,272
@@ -50,15 +51,15 @@ evalscope/app/arguments.py,sha256=1wHTLeFx1G94cKXYOeOVe_wTiOY2D929UctIRGOtRaQ,69
50
51
  evalscope/app/constants.py,sha256=oG6tZ618zJcCnwZ5THnYL0gWTPDb5XKrnmdrWxY3Z4Q,385
51
52
  evalscope/app/ui/__init__.py,sha256=IBxyQ2H-kSHoHJmXWDR8QMermvsMbiu673PQbXP_FnE,616
52
53
  evalscope/app/ui/app_ui.py,sha256=wLrQ4VM7BnzvaYmPAk8NH9t5BaWooHFJcgmAOOd2I1w,2032
53
- evalscope/app/ui/multi_model.py,sha256=fO8z-ZFucWtgaKmuQ50AkUp4BoYOFqOkxeTBUUAK0bM,15122
54
+ evalscope/app/ui/multi_model.py,sha256=mvMgpgiJGRrNRtReFcD_PiLatq-81zp65Vb3JYUP3PE,15356
54
55
  evalscope/app/ui/sidebar.py,sha256=JA0QbG2iPStK-lFy6x_AjOHlQdesmgXoS0OYJUJ_Wyg,1339
55
- evalscope/app/ui/single_model.py,sha256=1rgYrJOO75fJG2pa74tzEocO_91jXOAKFQAUViBcYFk,9459
56
+ evalscope/app/ui/single_model.py,sha256=zFt1uDYrcgNJ7e_YLigrs6IXT3jyGMVn-7rv4CHAZvE,9741
56
57
  evalscope/app/ui/visualization.py,sha256=jXFX_-7woQkcAiQkPAIRwVv1kdRdXonn9IvmB8yzPDU,1102
57
- evalscope/app/utils/data_utils.py,sha256=m7Z0Us_josUFseI8VJpIp8QaYeLnu91E2HCZ8WSB07E,7396
58
+ evalscope/app/utils/data_utils.py,sha256=GYOfkh0NoueeX3od-L852Q9C9SSkEFlW_40wjPa5b9w,7470
58
59
  evalscope/app/utils/env_utils.py,sha256=2pmz4uNun-XNP6TqM6Oe576XopweEClhBaIdWO--kd0,382
59
60
  evalscope/app/utils/localization.py,sha256=rWEviBmcnhIpAA-cG8djbbUA6p1Y358c0dxge5Pqi1U,6131
60
61
  evalscope/app/utils/text_utils.py,sha256=-K-hRPMZ29Yqjhzd-391gPaD4B4wUuIg71PfbLnGJ38,3754
61
- evalscope/app/utils/visualization.py,sha256=dwEXbGfY7vFysnL0HmrHS2BEWaJkg-dZ9ayDlRhdvv4,3559
62
+ evalscope/app/utils/visualization.py,sha256=lycwcr-kFT2FKVw6iWMh3iD_n4dqpWVzhXMLDnkN8QY,3563
62
63
  evalscope/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
64
  evalscope/backend/base.py,sha256=qYu8Shokrtrx-N6T_BAJk_6OCpovUBYuN0p3wngt-dw,1030
64
65
  evalscope/backend/opencompass/__init__.py,sha256=UP_TW5KBq6V_Nvqkeb7PGvGGX3rVYussT43npwCwDgE,135
@@ -108,8 +109,10 @@ evalscope/backend/rag_eval/utils/tools.py,sha256=FU7tNu-8y8V_o_kArFVTTLM_GzL12KB
108
109
  evalscope/backend/vlm_eval_kit/__init__.py,sha256=R-GuBm8dAwvDF73XHaGpPSjlt7Y4tycyy-FJgzLdjeY,84
109
110
  evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=jlwM13Ty-Ax6AeMsNlo9xIBupNFgnceYuXtCmh0hNTQ,6160
110
111
  evalscope/benchmarks/__init__.py,sha256=WHR4ej9Tqa2N9CyIaUWXS8EnHZtcujaNeg9hf8GT31Y,1182
112
+ evalscope/benchmarks/aa_lcr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
113
+ evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py,sha256=7KZRdIhg733vBMBWngxTjtrZtl_DHjwMNLt9C2tN0_w,7483
111
114
  evalscope/benchmarks/ai2d/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
112
- evalscope/benchmarks/ai2d/ai2d_adapter.py,sha256=3GBNV4cNv9bBLJRdG_uA9qNhuN6qAEutHl8d-rsFpFU,2018
115
+ evalscope/benchmarks/ai2d/ai2d_adapter.py,sha256=qnQT2E0ZG8g4noOafu-QvBOKm-zEJ5X08QHw3ekNa4w,2473
113
116
  evalscope/benchmarks/aime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
117
  evalscope/benchmarks/aime/aime24_adapter.py,sha256=HTlriHoHzlm1Rf3KAiGRLs8sx6Gyf6s7RGtOjk_hGS4,1767
115
118
  evalscope/benchmarks/aime/aime25_adapter.py,sha256=ZOE_6Zhg1MatWJSu2Zq372nKUODYtNFZimS1MJRFz5A,1591
@@ -152,10 +155,15 @@ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt
152
155
  evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt,sha256=s_x6u5MLeKpuAHZj3GNQqY1I8vWqQIfJasOp9XcM7Ck,2945
153
156
  evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt,sha256=qfTZafCzNiz9ULBaDlfy_LISL617NyH5Nc0-nO0K0LE,2164
154
157
  evalscope/benchmarks/bfcl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
- evalscope/benchmarks/bfcl/bfcl_adapter.py,sha256=WzpL7XWDdx-EvbLluIOiMlADTO42CYs0IwQFvIfhTI0,18402
158
+ evalscope/benchmarks/bfcl/bfcl_adapter.py,sha256=GOHmMqP1jnXFU4lkke5dzCszyG9kHvBsDl-GhtRWlgg,17030
156
159
  evalscope/benchmarks/bfcl/generation.py,sha256=c6lNjo-VTSUrVg-pqyPSucrbCKBOdBSyN0aR5AAtE4A,8701
160
+ evalscope/benchmarks/blink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
+ evalscope/benchmarks/blink/blink_adapter.py,sha256=ocQKsDGwnUAg2si2p7tqIGeH3PKPqTSByjbt7ceraRo,2642
157
162
  evalscope/benchmarks/ceval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
158
163
  evalscope/benchmarks/ceval/ceval_adapter.py,sha256=4FLPgY-UtqINafnNxfOsE9AwS6GFXFCUGOBI-4EZUGk,8503
164
+ evalscope/benchmarks/chartqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
+ evalscope/benchmarks/chartqa/chartqa_adapter.py,sha256=DA1kthMUvn4_GUfdRfuR-au3RkhE3WKPnR_f8nlhd4c,2813
166
+ evalscope/benchmarks/chartqa/utils.py,sha256=Ta9ZUMpIqzrAszju7_WOMBAlilH1Tx6TCheVpjrZJJI,1672
159
167
  evalscope/benchmarks/chinese_simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
160
168
  evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=OWzRlSGswV24V-heLqqo7GQzpJp01TZ0DhFHq0iUP9A,8238
161
169
  evalscope/benchmarks/cmmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
@@ -163,10 +171,12 @@ evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=P0VPAL5T2V_zj0q7im0FdDoq_W5ri
163
171
  evalscope/benchmarks/competition_math/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
164
172
  evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=NOqckeyuabH_nwaxL5IWmH887UO5rvBKA2jx7qb9fNs,2226
165
173
  evalscope/benchmarks/data_collection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
- evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=eetF21dN55e0MNPxTaiDbkPZDidt4cX2decQjC_deJI,8676
174
+ evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=PbV5S0rUVF0jgrBKNjuZh2oE1FAsbYnPymg5u7NBjqo,8712
167
175
  evalscope/benchmarks/docmath/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
176
  evalscope/benchmarks/docmath/docmath_adapter.py,sha256=-mel6hA-x_e7fV0uOHdX5BpoQEVyQ5VqwIwEqSNDpnc,4623
169
177
  evalscope/benchmarks/docmath/utils.py,sha256=d6Yjoa5q91kjr1SdVPVBndzDaUzMlO_GfEqMtUXXr0s,7707
178
+ evalscope/benchmarks/docvqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
179
+ evalscope/benchmarks/docvqa/docvqa_adapter.py,sha256=xGaayycILYoLd8r6wLLppDbU6Z1FdafbYFyjLHaftAA,2882
170
180
  evalscope/benchmarks/drop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
171
181
  evalscope/benchmarks/drop/drop_adapter.py,sha256=Jbbr5O_Y5LI_vT_RskRQVKxGkiIraX_uXP7fYaZ5eZs,9995
172
182
  evalscope/benchmarks/drop/utils.py,sha256=Z9PHrNnRfGqFHCLONg5SWKARp1eTJlHFc_bU46t_YrM,1344
@@ -174,26 +184,28 @@ evalscope/benchmarks/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
174
184
  evalscope/benchmarks/frames/frames_adapter.py,sha256=w1kRya7w5omt95HHE6AzbzYVhyTT5r521676d_xJ6Vg,5514
175
185
  evalscope/benchmarks/frames/utils.py,sha256=gULWM6Rwv5bTSSWcDYp-iSIoWj8r5VtbQakhRzHJq8A,1172
176
186
  evalscope/benchmarks/general_arena/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
177
- evalscope/benchmarks/general_arena/general_arena_adapter.py,sha256=DzJaokqZwR2L8HDiahss8EbQ3vcsMXkzkMghxU-uAOo,21639
178
- evalscope/benchmarks/general_arena/utils.py,sha256=zS4l1RKwvl0Z9Mk7kth9WVQGHTgE_aNDZa_XNy9tGyM,6874
187
+ evalscope/benchmarks/general_arena/general_arena_adapter.py,sha256=lb51HwfvKXJgJWdjYyyTRsT5pX876yEuWAz8G2oH6yM,21636
188
+ evalscope/benchmarks/general_arena/utils.py,sha256=p6pZfvdNCMOU_vWHm_DYU57Sa2WTDdFOkVBubblCRN4,6912
179
189
  evalscope/benchmarks/general_mcq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
180
- evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=7VKg_EzXkRvoWpR7h8qB4sVVb1eZHCGcPk-X_NMS5tE,2062
190
+ evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=p7_C0CmKJthMY1Iri1SyNfssuYBws_dkhPMREu-uM94,2059
181
191
  evalscope/benchmarks/general_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
182
- evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=u29IsH5sgw-T0IezuI1jErGwykz-F7875AdEhdvS5rk,3522
192
+ evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=_7Jk_h-qcaxWHgrULojNqXwZ8XgicmXhYT8bOKwnyAU,3519
183
193
  evalscope/benchmarks/gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
194
  evalscope/benchmarks/gpqa/gpqa_adapter.py,sha256=zWK2hhyKw5n8K30YvMjSm6XMwyrireODGTE6wKmyuOo,3311
185
195
  evalscope/benchmarks/gpqa/prompt.py,sha256=b1Gw2D5dEdhvLYymPfcvGKJdHrIzpiZkOwURKSxiQJg,5576
186
196
  evalscope/benchmarks/gsm8k/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
187
- evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=W4vTXsC7iHN1AgvpaCf1Rj7y2O8QczIluucnpSC5aYo,2636
197
+ evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=59EaZI2veg89_yyJyJ8QxkDMrQPC4ZTj3YQOBZ2_Vbk,3844
198
+ evalscope/benchmarks/hallusion_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
+ evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py,sha256=cKnhPNtZTfu1zKlgeguH9qnMNjnDNPSUKLb9ZETDnqg,6518
188
200
  evalscope/benchmarks/healthbench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
189
201
  evalscope/benchmarks/healthbench/healthbench_adapter.py,sha256=1sL7i9yhORH4xiFWB9puPKWNZZFJGZFAlKdlzHp-fiw,13228
190
202
  evalscope/benchmarks/healthbench/utils.py,sha256=M8SnOEhlqXWm03CFE6CAtbMiu6MqdGgVczAv-LPjA7Y,3683
191
203
  evalscope/benchmarks/hellaswag/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
192
204
  evalscope/benchmarks/hellaswag/hellaswag_adapter.py,sha256=tAe63NfV5ljUm1f4RTSFxWOVKBUhk3Cc0EGzF5uYLK4,2041
193
205
  evalscope/benchmarks/hle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
194
- evalscope/benchmarks/hle/hle_adapter.py,sha256=4YVmETL9mEiLxF4vWRjePLyFaxelax6nOaqoAH5ZxmU,6389
206
+ evalscope/benchmarks/hle/hle_adapter.py,sha256=kJP7bzIDbr82GKi0FTy2zf_j1UWNBfuXYzokYJ-S9WE,6410
195
207
  evalscope/benchmarks/humaneval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
196
- evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=uLs3UHSALS3YHt0qzBismrIqdEUgbEalQbjC0CU7ym4,4085
208
+ evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=KArOIHNpjxD2ihH155Z0VxOzvlzKWqOwnqbp0J6aNzE,4375
197
209
  evalscope/benchmarks/humaneval/utils.py,sha256=rPnc_JuSjNg9aV7UMUwsLrDlm-ufj64GNIBCWBeuRcM,6517
198
210
  evalscope/benchmarks/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
211
  evalscope/benchmarks/ifeval/ifeval_adapter.py,sha256=55FQwJ0_eDijppkVVlM5XCXzgRFmjH1SvGMItGsvn6o,2769
@@ -206,12 +218,14 @@ evalscope/benchmarks/image_edit/gedit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
206
218
  evalscope/benchmarks/image_edit/gedit/gedit_adapter.py,sha256=a6hhRbnGCvMEMsbnSbczjXd4vHfMVEnFfP459FCF_Mc,5250
207
219
  evalscope/benchmarks/image_edit/gedit/utils.py,sha256=UN0z9Dafs8d8lEXqxin321d8smiS3H9p3gyLkZFPFNg,14735
208
220
  evalscope/benchmarks/image_edit/gedit/vie_prompts.py,sha256=qVXWQyVUwZxEasDjVmYBk30_JI4gnvHacMOmMsA4wcI,22056
221
+ evalscope/benchmarks/infovqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
222
+ evalscope/benchmarks/infovqa/infovqa_adapter.py,sha256=3m_EvfRZ5ItHkz-3mVlsF_NnPS7NH1-EXwUW-s4VMxA,2617
209
223
  evalscope/benchmarks/iquiz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
210
224
  evalscope/benchmarks/iquiz/iquiz_adapter.py,sha256=mNHA_Fuj_gAdOEoR7oChnGmErf1czqwnk8Zk-jRhBys,1304
211
225
  evalscope/benchmarks/live_code_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
212
226
  evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=maN8qHmDHJpexPeB0qwZoXJ5zrqPbJDYVRptqvXI9d4,6827
213
227
  evalscope/benchmarks/live_code_bench/extract_utils.py,sha256=ZcQ8y741uawPo6I_1_XglR3eqJFDNrqc8fILKZupVRs,2375
214
- evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=qnprJTv7zWA9aq6Lw4aDoall---kiivR0oDo3uSO2mI,6399
228
+ evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=GDKmbvV2NJe-QQfy-C6nDJVpeiP6SVxjJzgTba_K4_w,6682
215
229
  evalscope/benchmarks/live_code_bench/load_utils.py,sha256=fEzWz_fUGwi5Ncum5PNVF9jFcuDwGgs7Vt_10YKBE2Q,2087
216
230
  evalscope/benchmarks/live_code_bench/pass_k_utils.py,sha256=Ktrp_lXdfFzoHtQNQNdGfIl26ySjaPCHm4Zv-dFvRqM,2024
217
231
  evalscope/benchmarks/live_code_bench/prompts.py,sha256=P4KILIAIDT1MKDck0xHYV_6v9820wDZRhxVMazmlL-g,12600
@@ -221,12 +235,16 @@ evalscope/benchmarks/maritime_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
221
235
  evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py,sha256=Rx7iZ5JaEo73YwIzhm78gMDQ6gqcErbnWWXHxXM6BcU,2379
222
236
  evalscope/benchmarks/math_500/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
223
237
  evalscope/benchmarks/math_500/math_500_adapter.py,sha256=hn7SQhoIHKuH-2A_nGUhQPRw2gl2G-kZldc9ueY0G3A,1802
238
+ evalscope/benchmarks/math_verse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
239
+ evalscope/benchmarks/math_verse/math_verse_adapter.py,sha256=lRSNE9C9Y_zd9WJJpzsq3KgqFN5YHJzmpfBbW-h8D4M,4153
240
+ evalscope/benchmarks/math_vision/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
241
+ evalscope/benchmarks/math_vision/math_vision_adapter.py,sha256=oWwRZRX2ardjpR4_8ZzBRO58ki9rLtPYQvZ4Z3OoMfE,4475
224
242
  evalscope/benchmarks/math_vista/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
225
- evalscope/benchmarks/math_vista/math_vista_adapter.py,sha256=Mu9BpH0rDNM0yMrGws4SEOnXy2NTSIKwyLs5t4nAP-s,5842
243
+ evalscope/benchmarks/math_vista/math_vista_adapter.py,sha256=XXSHxWzCRPnRDCXEuY8--6o_j-gXyCb3S_yOShMzWfk,4928
226
244
  evalscope/benchmarks/minerva_math/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
227
245
  evalscope/benchmarks/minerva_math/minerva_math_adapter.py,sha256=jyT9_D4w8PTtLBN3Kn10_CnssH_mPuRNnn9rek_zUEs,1655
228
246
  evalscope/benchmarks/mm_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
229
- evalscope/benchmarks/mm_bench/mm_bench_adapter.py,sha256=py0DakGQX1JE2rqYjYN9w_-H0DtQ-YqG5k2s_UzbxxU,4372
247
+ evalscope/benchmarks/mm_bench/mm_bench_adapter.py,sha256=ht2DVt_zEBJp4jvGy3myHHgdUUP9eff2O5BpIc9Fv74,4376
230
248
  evalscope/benchmarks/mm_star/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
231
249
  evalscope/benchmarks/mm_star/mm_star_adapter.py,sha256=oamLv6U2-JAK5mdVLkUgYxkOahxQkQYMRKAyu_xPAUE,2818
232
250
  evalscope/benchmarks/mmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
@@ -236,7 +254,7 @@ evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=GtIyUubUg6Q6Ydh1Adj0-32
236
254
  evalscope/benchmarks/mmlu_redux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
237
255
  evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py,sha256=m_37OIFrJB4ZIvtbDJ_m9P9mA2QtrNjGfbbVo15awJg,7402
238
256
  evalscope/benchmarks/mmmu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
239
- evalscope/benchmarks/mmmu/mmmu_adapter.py,sha256=C7UM6HvomcA_Srf7771S0CaUvifBX63i161XaacraGQ,6038
257
+ evalscope/benchmarks/mmmu/mmmu_adapter.py,sha256=WrykWq8n61CVrQ4XQhI3iEySgErHdZyng3udOL-Pddk,6054
240
258
  evalscope/benchmarks/mmmu_pro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
241
259
  evalscope/benchmarks/mmmu_pro/mmmu_pro_adapter.py,sha256=banPS1nDt9bQ95urKbSZnR-hBTw23eL9MSrHt_0ZLp0,4725
242
260
  evalscope/benchmarks/multi_if/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -246,28 +264,74 @@ evalscope/benchmarks/multi_if/multi_if_adapter.py,sha256=I3_YPPUuRbrs9Gt3Qjhx9RM
246
264
  evalscope/benchmarks/musr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
247
265
  evalscope/benchmarks/musr/musr_adapter.py,sha256=kx6bckj7Nijl4Wysuj-mKYdy0hIRDJho8yVTup403Hc,1473
248
266
  evalscope/benchmarks/needle_haystack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
249
- evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py,sha256=GYaswWPwYI3aV5HSpcuBTgW9-HDtf2xzNZg0WrsI0Yo,17033
267
+ evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py,sha256=aQw8Sss1-ZgQPWqwMITOpAtwzMoYWDGjLhUpZtkcrvY,17030
250
268
  evalscope/benchmarks/needle_haystack/utils.py,sha256=k8WDigqt5LgzHw6DtaYsLtb3BJL0FTZS9JOyJCpoPq8,2935
269
+ evalscope/benchmarks/ner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
270
+ evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py,sha256=sNL6D0uRlzdLSxUuKOcKzY4o82HQJeCiwBuUdfGVy_o,2012
271
+ evalscope/benchmarks/ner/conll2003_adapter.py,sha256=r_6UmR68ohFsn021zArkGRq0tRZSaIy9RNNJncag0i8,1970
272
+ evalscope/benchmarks/ner/copious_adapter.py,sha256=ufxsmTvEEayLaDJcUW5--oo6vkDY69W2yQ1fpD0E5lQ,3751
273
+ evalscope/benchmarks/ner/cross_ner_adapter.py,sha256=a37zVh_kfyOoHf-QgBlnVfee93RkEKqT-6BBwS1PWps,4916
274
+ evalscope/benchmarks/ner/genia_ner_adapter.py,sha256=WnuzyCQ0l2SU9ZvuDNOGeLubKZmJvxSnYPWQZ8TR8Yc,2457
275
+ evalscope/benchmarks/ner/harvey_ner_adapter.py,sha256=iUxnkg7yIQXAFU6lbrhAvJAqQqVysQUSkBeyNE7P6eM,2098
276
+ evalscope/benchmarks/ner/mit_movie_trivia_adapter.py,sha256=dAYHGX-eS7PVxcoT2nidMbpssG1yf2D5w55_mL5rqhw,3017
277
+ evalscope/benchmarks/ner/mit_restaurant_adapter.py,sha256=mIc_huJnksd3fZ8nIY7Uacs4x0r8W7pt5RjvEnTYZ00,2528
278
+ evalscope/benchmarks/ner/ontonotes5_adapter.py,sha256=oulC4XkVF42yjXWPuKg_zptLQiRItCmlZBlHN0shr6A,3546
279
+ evalscope/benchmarks/ner/wnut2017_adapter.py,sha256=uGrfp-4wYIcpEL9PqQx82uzCeWz6vIPKb7JlStTSE9M,2379
280
+ evalscope/benchmarks/ner/cross_ner_entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
281
+ evalscope/benchmarks/ner/cross_ner_entities/ai.py,sha256=RcgzYCygBmyrSOLacxxUN4cUznBZ3NemwfSR4hYBVKs,2484
282
+ evalscope/benchmarks/ner/cross_ner_entities/literature.py,sha256=ETzhu4PmiKS88NRkKPh96J9KiXKFdeQk5s_LSNqbD-M,1874
283
+ evalscope/benchmarks/ner/cross_ner_entities/music.py,sha256=_aJyKo83pO-j_LtGwXgrg9p8H1sHqXGPNW-wv1EIfWc,1999
284
+ evalscope/benchmarks/ner/cross_ner_entities/politics.py,sha256=taAqCnGdxHZGHM7sV0KONim8GjqVBrpMME6CVHwfJMo,1635
285
+ evalscope/benchmarks/ner/cross_ner_entities/science.py,sha256=DVZrCuMQ6-sPvRNTfx8iF_x9LaEBZ4o_RIWZADYKYGE,2919
286
+ evalscope/benchmarks/ocr_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
287
+ evalscope/benchmarks/ocr_bench/ocr_bench_adapter.py,sha256=gkQb7g0-Lf5Sjemqs5kqogCLGFJI6YQv8-vGI1EbyLE,4392
288
+ evalscope/benchmarks/ocr_bench_v2/IoUscore_metric.py,sha256=cBpRDJvI9f6vKRD4wTPv-8ThGddR3EhVobgjQQUAYlE,2606
289
+ evalscope/benchmarks/ocr_bench_v2/TEDS_metric.py,sha256=31bL0V32Fq7prF1WoVjXmrmMdhg0qNcoiOaKykKOrZM,36528
290
+ evalscope/benchmarks/ocr_bench_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
291
+ evalscope/benchmarks/ocr_bench_v2/ocr_bench_v2_adapter.py,sha256=QGY4R75UxDafIwSaOEPPuCaX3Z8BGoZVvcc6OWbeO9w,7976
292
+ evalscope/benchmarks/ocr_bench_v2/page_ocr_metric.py,sha256=d1nU7LNwubBd_1rIe7i67hOVcJx5IUXkqVeqt1CQzak,1624
293
+ evalscope/benchmarks/ocr_bench_v2/parallel.py,sha256=Q54wFSSRBp-kG2MhW4eOoXE1W9g-SDVhN8JuphDERsE,2029
294
+ evalscope/benchmarks/ocr_bench_v2/spotting_metric.py,sha256=nftLaTOKEmqvSWr-c20f9hyyvNnd-Hg3E46KwqmkjLc,6149
295
+ evalscope/benchmarks/ocr_bench_v2/utils.py,sha256=hhF2MuPo5n6uM0OCgTHCNIgscNVhXRb3koqU73AErwY,15924
296
+ evalscope/benchmarks/ocr_bench_v2/vqa_metric.py,sha256=XkAiXk1uE7lsWQQXvjnHXZMsga8B9FVyq5qG8ghePK4,8980
297
+ evalscope/benchmarks/ocr_bench_v2/spotting_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
298
+ evalscope/benchmarks/ocr_bench_v2/spotting_eval/readme.txt,sha256=QO0K9z1ethy_lgs9vaxGN1u5DnPFsssp8z62Cni24iw,1424
299
+ evalscope/benchmarks/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py,sha256=qCuqDtsCfxAiQHYLNdHU7BQ9kLIZ9iyfmRxtIrGOBck,20349
300
+ evalscope/benchmarks/ocr_bench_v2/spotting_eval/script.py,sha256=7HzM1PEw8wNOhmQOsZe582Y2rr4u66Q3JKVvvMasntE,19565
251
301
  evalscope/benchmarks/olympiad_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
252
302
  evalscope/benchmarks/olympiad_bench/olympiad_bench_adapter.py,sha256=zePVmGjmyuwCWVb4h1PIQKAIFqBehwRwO2WOD0KX_ik,6565
253
303
  evalscope/benchmarks/olympiad_bench/utils.py,sha256=w7vEZcT3vCVq8_DSMgAjZPpVFVHStJPJYsPkrs-yOFM,21412
254
304
  evalscope/benchmarks/omni_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
255
305
  evalscope/benchmarks/omni_bench/omni_bench_adapter.py,sha256=IJkRSokQC6MF_pN46Yofr_NaZaNt1XZFX1PUBmX4-qA,3651
306
+ evalscope/benchmarks/omnidoc_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
307
+ evalscope/benchmarks/omnidoc_bench/end2end_eval.py,sha256=71IEdeDsldtoFmMb1c_Pyugv-Wx-WOVIvccRkPvsJdU,15916
308
+ evalscope/benchmarks/omnidoc_bench/metrics.py,sha256=DZfaL5BlDjnW60kRnnfmsMgldPOKX0MJ2tAdsBf4dI0,20620
309
+ evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py,sha256=JBw9nS0e-P07MnfE4XAfbmaw3f-1okkJiOYl8a4lYYA,6192
310
+ evalscope/benchmarks/omnidoc_bench/utils.py,sha256=Db6QeIq_bc6Dl5xdYel5G7tnWib9_vn_KFiKeFN37IA,74435
311
+ evalscope/benchmarks/poly_math/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
312
+ evalscope/benchmarks/poly_math/poly_math_adapter.py,sha256=CHTDWgyYYaHwjwtT8K2at65GsDdl972onG7NLHuh8Gk,5377
313
+ evalscope/benchmarks/poly_math/utils/instruction.py,sha256=v3E8TnoWlooL_Ms5CQySzMmdyPKHAO005tGtTWMviPo,6901
314
+ evalscope/benchmarks/pope/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
315
+ evalscope/benchmarks/pope/pope_adapter.py,sha256=444tJqHdnecfOmqANwXUN5J-rp9w0PyCG_TVUpDyMXQ,5009
256
316
  evalscope/benchmarks/process_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
257
317
  evalscope/benchmarks/process_bench/process_bench_adapter.py,sha256=XN3F6NH7mF4ibwGX5nI01sqEHz05UQFnBAyfAe14QYE,6174
258
318
  evalscope/benchmarks/race/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
259
319
  evalscope/benchmarks/race/race_adapter.py,sha256=KibT9gHpIOZhTcWihG0dUDAX4gAHa2g1WdGPOcEP9OY,1705
260
320
  evalscope/benchmarks/real_world_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
261
321
  evalscope/benchmarks/real_world_qa/real_world_qa_adapter.py,sha256=J2u0J9d31uvkoz9nBI9tCMqG27hmYwdLQPPef9jx_pg,2788
322
+ evalscope/benchmarks/seed_bench_2_plus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
323
+ evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py,sha256=ngUOFhP8YFOE8ximkMg5U6TGLZMIXPHJsVJUurvbzM8,3064
262
324
  evalscope/benchmarks/simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
263
325
  evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=_duveAliSaPUqVSLQ2TtSv5sfwvFFy7t-MgIIokQ24s,9017
326
+ evalscope/benchmarks/simple_vqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
327
+ evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py,sha256=3ioSompYERllFE6yc3yZLl0NKWypRjg5d0uVf3b-4d0,9530
264
328
  evalscope/benchmarks/super_gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
265
329
  evalscope/benchmarks/super_gpqa/prompt.py,sha256=wQ8Y4NAvQJRhPS7gsrUBBzeM_UCHsHOloB_t5WfnIO8,4707
266
330
  evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py,sha256=SPqpBebiHj_oyEqU94p9NSqhVkO0KeXQYcBmpfH81nM,6888
267
331
  evalscope/benchmarks/super_gpqa/utils.py,sha256=OK_oT-DnWNssITEwu_Zc3Ty5v21n0IaJQYftK2cpwmQ,3401
268
332
  evalscope/benchmarks/tau_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
269
333
  evalscope/benchmarks/tau_bench/generation.py,sha256=d7J5xrxEI-0BYxdSuxdDavcR7f1ipBdpQsKZzwyzGds,5190
270
- evalscope/benchmarks/tau_bench/tau_bench_adapter.py,sha256=47wA0ia6gezA3nqvUpd4Pb8f5alCrBKEt7GOxJFupow,6464
334
+ evalscope/benchmarks/tau_bench/tau_bench_adapter.py,sha256=VMb63g1_d0lZRhsc2eZJQjrNLhCUF7wss6Lzt87LNNw,6461
271
335
  evalscope/benchmarks/text2image/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
272
336
  evalscope/benchmarks/text2image/evalmuse_adapter.py,sha256=g-Wc1qTg-xWLTjiZPo8zmQud75ac-8mBpYRxOHfiO0g,3024
273
337
  evalscope/benchmarks/text2image/genai_bench_adapter.py,sha256=1GDB3gS9zwrfb9C83LQdQyN7bvvqeYuu5ulJ9Igmi2k,1876
@@ -275,15 +339,19 @@ evalscope/benchmarks/text2image/general_t2i_adapter.py,sha256=CHy9ufvrVHc_5WkGVR
275
339
  evalscope/benchmarks/text2image/hpdv2_adapter.py,sha256=8-vWCV21eo_e9EbxDB5mGw2cFzD4OUQPLB66FvlO9W4,1781
276
340
  evalscope/benchmarks/text2image/tifa_adapter.py,sha256=4CcprucAe25UpTZRV3Qgb-8jbeNHtXNRWHw8RiYvfJA,784
277
341
  evalscope/benchmarks/tool_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
278
- evalscope/benchmarks/tool_bench/tool_bench_adapter.py,sha256=BHsesDDELEINdbWSR3WKCQGZ6MqWc2LiOZA3MbTp2_s,3805
342
+ evalscope/benchmarks/tool_bench/tool_bench_adapter.py,sha256=T3XtFLllrO5QOF13fU99LcigLKGqa0_VVi3C5mKPk84,3802
279
343
  evalscope/benchmarks/tool_bench/utils.py,sha256=led0d-Pa3rvmWkSWhEnZWP00fceudgESq5HXAQzJGls,7042
280
344
  evalscope/benchmarks/trivia_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
281
345
  evalscope/benchmarks/trivia_qa/samples.jsonl,sha256=1isBD62PGhCiNbzQa-GFrHHL4XLHIkojWfgSvn7ktf8,3445
282
346
  evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py,sha256=oZAiCmBpZbBAgzAKPfddaJWMckIyaoRM7fB2XJ5EoQU,2614
283
347
  evalscope/benchmarks/truthful_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
284
348
  evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py,sha256=W7ESUAcLsHwbssiiSCQNUeQcqx6JEeW7FSQiBFycS24,3512
349
+ evalscope/benchmarks/visu_logic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
350
+ evalscope/benchmarks/visu_logic/visu_logic_adapter.py,sha256=8dK8_HFxDhWTvCC8WTZjadChP6lNzgsFp_5qFSRGFoM,3277
285
351
  evalscope/benchmarks/winogrande/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
286
352
  evalscope/benchmarks/winogrande/winogrande_adapter.py,sha256=LWm6qZd3pJbtpcERq7WPK3adwY3uVm4wiUgfyEI_uHE,1310
353
+ evalscope/benchmarks/zerobench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
354
+ evalscope/benchmarks/zerobench/zerobench_adapter.py,sha256=pqnJEx4uOi3bxwYKqLxrxU5DX9p3F01N2itzbG_-VaU,2739
287
355
  evalscope/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
288
356
  evalscope/cli/base.py,sha256=m1DFlF16L0Lyrn0YNuFj8ByGjVJIoI0jKzAoodIXjRk,404
289
357
  evalscope/cli/cli.py,sha256=qXQ6k9GBkRy2dmBxM24tbVP42bQDyM6G7kkc32LdpCA,860
@@ -295,15 +363,15 @@ evalscope/collections/__init__.py,sha256=x05hFLrjGsdtuHtc6PyQXHNuucVdYaBN9ZrM8gB
295
363
  evalscope/collections/sampler.py,sha256=086pzXQO4CO_QYCd10z149Sjh6sBpRBeIHf5OTLOVu8,4896
296
364
  evalscope/collections/schema.py,sha256=yzAlnH0O7iiWB4UnkFXI_Dvxcsq9hDgl0aGK2OpyBY8,4158
297
365
  evalscope/evaluator/__init__.py,sha256=KzYmVTfU-1pdX7va7l3B1-5QKWG07hj1B7rYkMmxitY,91
298
- evalscope/evaluator/evaluator.py,sha256=o99m1CF7xuc3Qn2M25AhWulothZxICwZgZiWxSbynTc,14435
366
+ evalscope/evaluator/evaluator.py,sha256=pw-5uBYoMdOUtNv7CE9-ZP8IQrkTMwp-csMjb9hO_B0,13832
299
367
  evalscope/filters/__init__.py,sha256=AsXwKYDjGhFsJvtj036PRjMOPsHGt-CRicnHTtM_qA4,51
300
368
  evalscope/filters/extraction.py,sha256=KLFr_3XYsrv0PTvmXy0ugj2sqv2ZOWJFV7G_MmGjTHk,4146
301
369
  evalscope/filters/selection.py,sha256=yiJu2JjXDH_lgfEtB9umkGcA3zpo3zvnyoq2mKrXbnw,1609
302
370
  evalscope/metrics/__init__.py,sha256=1giVHESSjn98uBiAvYm5uLsmRQwmf9NHPSt7OT_QJss,1615
303
371
  evalscope/metrics/llm_judge.py,sha256=XukhH9PQtIZAcbjJlOmOD9ye3ngRv_IGKKJE9jhheOE,8653
304
- evalscope/metrics/math_parser.py,sha256=BMfautQtNNiF9f2DIEfO6SXSn_GYhzaddAjGWG10MJA,17257
305
- evalscope/metrics/metric.py,sha256=CabKKEbw_DptyH1ZQju7WzjB47fWUKdOhFB1ROpUC-4,10871
306
- evalscope/metrics/metrics.py,sha256=VxAggzEfaLKxWcXyuve8QbEBwV2W71udVyt0gynzGec,14134
372
+ evalscope/metrics/math_parser.py,sha256=gJ1NR2Mcyzt9qMdR8I0-6U31Jzoe8a6yUuwvayYPi4c,17979
373
+ evalscope/metrics/metric.py,sha256=Hr1F_kuQfu3FNPwbCrJQA7VHb83-VPoDj7I9uX7d8U0,12840
374
+ evalscope/metrics/metrics.py,sha256=Y7TQ6MYaGE32EntTz-18CmQqYMpo1rQSvUiSwzBgpaQ,14599
307
375
  evalscope/metrics/rouge_metric.py,sha256=bqvSotuDdC0MEKmt8v6y6tBTBx0S3Ma-tfF-cMCckA4,4645
308
376
  evalscope/metrics/bundled_rouge_score/__init__.py,sha256=PwbTdk8168FwDJe_l8XIqDuBgZQooDsP31vj7di05Fs,650
309
377
  evalscope/metrics/bundled_rouge_score/rouge_scorer.py,sha256=T91PgJfi1As7BR7I-Hq6rLlvHAtMB9JpBw9gMTH8VlE,12114
@@ -381,9 +449,9 @@ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py,sh
381
449
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py,sha256=OOr1JD9kTlUGXZNG5b3kvkUaNz7QTmhaGoHhIKL69qo,7613
382
450
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py,sha256=Ns7oM4KpKxWZTo8Lefe4EDFw-jzp5633zAArcWjoVZA,9772
383
451
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py,sha256=KIF5tsiE7a5dbDfa-IKwzuzMUpuEAQPrm1nWFFtAeoI,20032
384
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=uhaehowhTqRhQtq_dVCgF-9Iu4yU19AMxx2sJimYwlA,52711
452
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=85ZvV2gKSnsbP5941PeJ-JJ4t8_lOYQe1EOxrHlIbNI,52728
385
453
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py,sha256=o5ykt3Q_WQlNmyxjQaS2-KPLGq1xqLZixNYam_Bs6NA,18701
386
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=aBKdQQS7cHMPgYqIknCdHCZ7j2_QLACPn_jU_njiMIs,46840
454
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=NPDpIRxjiroafZk5Z2uA9bC8Bi-yXY7um5HXxThF7N0,46857
387
455
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
388
456
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py,sha256=s7EkhtrIJ0LPUuLBArws8N23R1MoIoNaYUjwsbUqRkY,7994
389
457
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py,sha256=FnUyxxazEVaP69pAq9cig3j-mcX37BX-unPj0SVKUJI,3805
@@ -403,7 +471,7 @@ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/bl
403
471
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py,sha256=TOAI-KaUrtKjR1GNU_WwNXNpb9gGT-KX2FYe3muv_e0,4275
404
472
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py,sha256=-DprR09KYuwNEzEbhPvFRI3MR4_VdPMUGLPN6sL9Ym8,14625
405
473
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py,sha256=S68U0DxWYGDmreRbH5yLDHBNN9PsczY9H0Uik0hO-ds,13872
406
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=i1XlJe_PTSmiPkZKIhUXC_lc0-z2ewNYo4W1DvZQxjY,36678
474
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=zv_WyHi67hvgHQ4DkZ8a4UoPcgrADKayqVtiIq-p3V4,36695
407
475
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py,sha256=p67DDiFS-676z0z8jPj6NwXwNjEsqTXaXCh3g2UiDno,840
408
476
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py,sha256=LqMHlUTy2LEzoVwjALtrAw0UYmzIuHnFjQiVmn5nv-I,605
409
477
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py,sha256=d4HInkL_Phk0Bgg2cWaOvhsPa6lkqDeovFW86PL0I18,6371
@@ -415,41 +483,41 @@ evalscope/models/model_apis.py,sha256=ZkZ_nfbeAFJnCndRvRIRLcbmJFTMhGRBi-WfMu0uZK
415
483
  evalscope/models/modelscope.py,sha256=jSFkho_Ir2py54y_Bwj9jpCoY2mMKkZ8ORzne-ldAIE,15806
416
484
  evalscope/models/openai_compatible.py,sha256=2uK78nDhWwgph7hcIiMc3NHRbIwvswRDM9o9ENahj4k,4659
417
485
  evalscope/models/text2image_model.py,sha256=Sdiyw6vewjVTiXK8RFEh1pohOhDge80EoIWYpnLjr5Y,3929
418
- evalscope/models/utils/openai.py,sha256=0DzuvTQYFEqcTp6sVtB2VZY7xeyWcOS0I6votqWegUg,28130
486
+ evalscope/models/utils/openai.py,sha256=qoq9xXP1NrwBfnIS0SqzK9gl8tvbDpNmJP5n17pKyqw,28292
419
487
  evalscope/perf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
420
- evalscope/perf/arguments.py,sha256=FmwVE4gC09B8nLd0sdczeEA9b5ztv4kwhOvLuby4wI8,11695
421
- evalscope/perf/benchmark.py,sha256=nSJr8lQvHDYiG33tNhkYaVOYONjhJ2wUb1x5RlUiXRY,7968
422
- evalscope/perf/http_client.py,sha256=4Ov1Cwi7gMgO05ZmazwyfYjUGAQNGWn7nbfl1ljRNh4,4610
423
- evalscope/perf/main.py,sha256=WZbBgFhIj9KqxzC7_NZxDlou019_EXatsHRt5vqDhFg,3439
488
+ evalscope/perf/arguments.py,sha256=JHB-JIEHq5p3zoHeKn6dkelGq0JrMVMRne-wbXK2yhg,12892
489
+ evalscope/perf/benchmark.py,sha256=Uc6BJJGYTsAnfFljPy0WJIXcapHOIwvym3o0yPRTVqU,6964
490
+ evalscope/perf/http_client.py,sha256=8xJFYja8FoQA0MDTj2NcxPkAmji4n81fsaw1gRuL1sA,5152
491
+ evalscope/perf/main.py,sha256=eEL0qUdNPMyHr3ZTixTfZxKN4IIw3gz3sw8sq3S_vs4,4015
424
492
  evalscope/perf/plugin/__init__.py,sha256=Ztj4h1_JYJqbbWkeuDTj5aTRyGQf5Woc4xEIyjcokVU,94
425
493
  evalscope/perf/plugin/registry.py,sha256=GhLe-h1rGzya2bgIUaV5VymQIaHqI7h5SG_i4PoGAm8,1967
426
494
  evalscope/perf/plugin/api/__init__.py,sha256=7RsGdYTSfnW6iVpveEzNu8v4x8Yc8H-Kk39DqOHMrd4,152
427
- evalscope/perf/plugin/api/base.py,sha256=RRZVk9MFuBwb9PFPTklFhQ_RTihg_E7W_LR26-ldPIA,2782
428
- evalscope/perf/plugin/api/custom_api.py,sha256=VYJO2lUt9EKdWz6zeYCfvdI0MqfcsIgcKvxqvY5C-3k,10376
495
+ evalscope/perf/plugin/api/base.py,sha256=LLBDKOWUXYbLLLTtO86X1Y4Erbp5egs2WCXGj4my754,2822
496
+ evalscope/perf/plugin/api/custom_api.py,sha256=HHvhNlqNQr43GhIC61yoa54QCEAy4MRMmJ0kBy-rnsQ,8305
429
497
  evalscope/perf/plugin/api/dashscope_api.py,sha256=Miv2pzMa6sxZyYYJhCzcbOI_QHuZx7tazKpb6Not7ck,3627
430
- evalscope/perf/plugin/api/default_api.py,sha256=qvMIjbe_rM13cDHcFCwjtCsjc11qE80Yg7LypaSNTXc,4251
431
- evalscope/perf/plugin/api/openai_api.py,sha256=a6w4C_voza61trHskHaWNPFr2x2zhRVwIXdiNnMH81E,10570
498
+ evalscope/perf/plugin/api/default_api.py,sha256=5hXjFN9bSIWUinrdFp9Cs1-jeKuNvRdBvDUX6VhSeZI,8991
499
+ evalscope/perf/plugin/api/openai_api.py,sha256=UVo9tAnqZbVNEQwAT0wOZb1Abbf-yQmr3iDKHwXDoI8,10628
432
500
  evalscope/perf/plugin/datasets/__init__.py,sha256=qzeQ9BrJhiJJm1wHaFeOQkvXXdSd15Ucspbn5zjs-6Q,495
433
- evalscope/perf/plugin/datasets/base.py,sha256=-3Ihnp2hYvZyPnP8Gh2Pu8ovlLNFHyZnNgRu3WHG4d0,2714
434
- evalscope/perf/plugin/datasets/custom.py,sha256=yoRHTvTGAglaZ-mmRkPjYNMG7uZYuT1_KrBxnl2i0qg,1385
501
+ evalscope/perf/plugin/datasets/base.py,sha256=PFBMdo3H_Hx2jOXNrMb97DvJ5gJg6QajSYymCgTXKmo,3629
502
+ evalscope/perf/plugin/datasets/custom.py,sha256=kCofjHfcihPcsc1XwyLxn9QG9E88eZ5qAQW7nW6ID0c,1311
435
503
  evalscope/perf/plugin/datasets/flickr8k.py,sha256=nhHiGNhXX-2c17NQ5q5Q7FgV2hB8XVeeAP8dKkboyHE,1033
436
504
  evalscope/perf/plugin/datasets/kontext_bench.py,sha256=cN70hiBX1940IWvNWZG9YGE4vO1yj41Bo7bqmOWusoQ,1081
437
- evalscope/perf/plugin/datasets/line_by_line.py,sha256=F4ppdjKKLzFNf_16h6S-6nAU4lOfOFI2-tPgIeZDTMA,996
438
- evalscope/perf/plugin/datasets/longalpaca.py,sha256=JjPGYP8NdPmP48wff2fL5IZQfajXL5qhZBvKmZxtfW4,1336
439
- evalscope/perf/plugin/datasets/openqa.py,sha256=5PqqiIvNTLlRrPb8PWqMGQyWRb6LuIqipYn67-xd-dY,1519
440
- evalscope/perf/plugin/datasets/random_dataset.py,sha256=NNAXvgFPkLDOSpYNex1DyE4X-ELtQRm13_oBooO30j8,3514
505
+ evalscope/perf/plugin/datasets/line_by_line.py,sha256=L3lj9evcr3q-Mcemyuy2WauBB5c6O-ttnIVw1t4UJUE,922
506
+ evalscope/perf/plugin/datasets/longalpaca.py,sha256=abFLvrRZFsno9IUr_bpvhMWHL9X2sahlIpGLUb-5BxA,1262
507
+ evalscope/perf/plugin/datasets/openqa.py,sha256=UlbHhzGoQTBXa4foEFhRTZX6v7So6pR-ExFhU2ws8YM,1427
508
+ evalscope/perf/plugin/datasets/random_dataset.py,sha256=GPuC5ovi3BW84RCiGSDd2cBZ3jRmFrtMRsxEocc1ud8,3347
441
509
  evalscope/perf/plugin/datasets/random_vl_dataset.py,sha256=e6exWQnupWkTDNwt2MmEK-hccuxEDmWLJRMM70onKi0,3230
442
510
  evalscope/perf/plugin/datasets/speed_benchmark.py,sha256=J6q7AF_Re5eHLVejXEw9c1jlk1T1PPmist0yO9UFTPE,2432
443
511
  evalscope/perf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
444
512
  evalscope/perf/utils/analysis_result.py,sha256=aoT7JD2zAzBeuZUfncKhJ2odX_7KnymwOmNB1Upam2c,935
445
- evalscope/perf/utils/benchmark_util.py,sha256=A5d--rCElabDOl6Aaxqnu0fNR5c763YZwKIHBSeTK00,7294
446
- evalscope/perf/utils/db_util.py,sha256=HAISq6M7xCD2gjUEqqfbK3FjBxA-tvr_n-751tU9ypo,11634
513
+ evalscope/perf/utils/benchmark_util.py,sha256=Uf4vUAsfgAZs2qsyv9cRY_i87QNEHl17XMhGgXq7wFw,8048
514
+ evalscope/perf/utils/db_util.py,sha256=lr26ah_KRznBBu_ssxXki_PgtELk5bUJV2JaM4LaeNI,11534
447
515
  evalscope/perf/utils/handler.py,sha256=HyKIxbzC0XCyQanlbb7UEY7yaeqjJTePNea8kMV3Sdc,1192
448
- evalscope/perf/utils/local_server.py,sha256=_lSPlNEnOmPA_DtREgPS_vj2w_7D8PPSpypXbb0YfJM,4880
449
- evalscope/perf/utils/log_utils.py,sha256=NWSK_ITG4yoVx5GMLbIRGDoXSs90s7X3mftdm37Os2U,1666
516
+ evalscope/perf/utils/local_server.py,sha256=dMoX8p6aCQq1JnoXxcyWknadLdBwpfQhvKwk5fn6G4Q,3727
517
+ evalscope/perf/utils/log_utils.py,sha256=YY8mnpJoHMlP6jtmEq7QujyuxhSUF1vqLk8TpBAkbY0,2162
450
518
  evalscope/perf/utils/rich_display.py,sha256=AQmXv1EuA1-IGgco-Jy1NLOmTKv4eBFH2K4QS8OoGVo,8206
451
- evalscope/report/__init__.py,sha256=DTigCg9fkU_zGNDqIaZy3CWYbrlvODvCxCTVqSx6ano,875
452
- evalscope/report/combinator.py,sha256=Xzlhs7kwfI6cgs7rngxhvsur0bCJkrM0tAy6isq2VME,3235
519
+ evalscope/report/__init__.py,sha256=Za-5AljVqZ7N-ap44MHUPtlaVkLCzpmst-n7SPkXPVE,1110
520
+ evalscope/report/combinator.py,sha256=F7KOClXVh56-XEw3Sb5uxwA6L8ZlH_P4-MOlm3Yp_Cg,5020
453
521
  evalscope/report/generator.py,sha256=t2R3WGa4SowTRUPOgITtyTR4QDiJ6i3FH__byDKZU8Y,4959
454
522
  evalscope/report/report.py,sha256=lEBD_E_RJiydFTaGFNLIMTFxNrqv8QcLZb_iuUg5HB0,8479
455
523
  evalscope/third_party/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
@@ -491,44 +559,18 @@ evalscope/utils/__init__.py,sha256=5OH8cOoX3YKMKUu0dMRvwzckXligIbUV-1jjJNXlpGI,2
491
559
  evalscope/utils/argument_utils.py,sha256=D7qOH85wf7LKh_cJ2X51OEaL7CMaddydmHZkfoYpvLk,1952
492
560
  evalscope/utils/chat_service.py,sha256=sSki2pKGQP3UjcIf_lbO06afI-vsaUAqglwX__wUDEw,8766
493
561
  evalscope/utils/deprecation_utils.py,sha256=aDv3HFNcJFZ7rxNgALQP0-ITO8L23HC_RX-C_m2i34Y,1610
494
- evalscope/utils/function_utils.py,sha256=E-AIzx_PKrZDGl1cBvlvqNvMa8yM2WUJ2wh73PNBXrQ,1887
562
+ evalscope/utils/function_utils.py,sha256=-WiBBrFaMzfgH7H2qOg3ciZ-BGyUGlsPUF-2nnU2KLw,9599
495
563
  evalscope/utils/import_utils.py,sha256=S0WQ3gt4zpwJHjGcyC-604pWWExg3JV7f3wzoOH-tuo,5794
496
- evalscope/utils/io_utils.py,sha256=79F0p7dFxA84tIVSL_C4piJgeQQtVUfb2R_Xcd8v_cE,11615
497
- evalscope/utils/json_schema.py,sha256=ZExvQA-SI6SxWBx_hCmuQ2RRqwGKuywy4sTotvd2hH0,8288
498
- evalscope/utils/logger.py,sha256=roFk4Su4aJwsF0s-uYc5-tABnghwYPX3gpkA5QUGzK8,5675
564
+ evalscope/utils/io_utils.py,sha256=BRBdPi_BejTRbevvbTWz6kHf33v6i9bOQUMam2yxL5Y,14070
565
+ evalscope/utils/json_schema.py,sha256=GVP1m6g4mBrsFmOWOOVnmvl2joOz8gTlGEytLv5qy7s,8451
566
+ evalscope/utils/logger.py,sha256=Zv4lb9gXx5R0t_wnQ4rIIbRg0oC04qsr2z6cUoRM47Y,6771
499
567
  evalscope/utils/model_utils.py,sha256=mdtYoHhUdfpxUtnS52XZjNdO3uSK4yeIBHT3aDU7s-A,2455
500
- evalscope/utils/multi_choices.py,sha256=OxBER7amWpoRY0Z-o39rDmCNK6wpr1HQm9mMHpWLgp0,9524
568
+ evalscope/utils/multi_choices.py,sha256=0UJbgr5eXNgitPC79JLcyUU-OXg9BlM-mVk-fWtUSno,9881
569
+ evalscope/utils/ner.py,sha256=gxvUURZVLJqZUrIqCy892rAAJ2ydYiGG5ZKPW_mpHsM,14148
501
570
  evalscope/utils/url_utils.py,sha256=9HcFt9uZNbOJR3ADUFQ_dBFKziHV6H66Df7HYs1M4Po,1757
502
- tests/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
503
- tests/common.py,sha256=BB136KcGaEfdWqMwApa48K0CTSGmOCUZ0FYDqpfYnAA,2423
504
- tests/test_run_all.py,sha256=YcMTlWoFpvWY8jevWyIf2G_tz8hgDD1cAwSvmyZt96M,429
505
- tests/utils.py,sha256=Fgm0CU6ilZjCGOfOMJH-Trxy0UIAGbhvy0Ijy_zDGUk,323
506
- tests/benchmark/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
507
- tests/benchmark/test_eval.py,sha256=vSAvhiCKxHpjHdGhZn8l0qzPSiG1ZZafz_M06B_a8_Y,13827
508
- tests/benchmark/test_image_edit.py,sha256=z3z7psMRFynpVgUAFoH--ieeGXzb9cHkrq3tT_sCZo8,2165
509
- tests/benchmark/test_sandbox.py,sha256=bHyX8ammdn7EsEbN80cIzDNhQZlJD3Ssoj9l4efF7rI,2968
510
- tests/benchmark/test_t2i.py,sha256=fciaGsOrkOpT4WQlsnmjrqw6qolCzI0DGyWQAJkM-Es,4513
511
- tests/benchmark/test_vlm.py,sha256=gn0ledf_yPY1IhCyCtiqT_dTVPUVZ3NVPr9yzsC_UZQ,4501
512
- tests/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
513
- tests/cli/test_all.py,sha256=1omOXC1lBphBLm0hTf5HNstlF_bwi16dYyr00gvaCTM,7301
514
- tests/cli/test_collection.py,sha256=lGz3YUS_0gM6_HjQLe26OfBAkHOPOEDWMO-UyP58GN8,4455
515
- tests/cli/test_custom.py,sha256=9z_N7Re712xI62TqVSTBdzB_iFFEUb55wcWIcGvJb84,9254
516
- tests/cli/test_reasoning.py,sha256=rU181LLoKbFCpNPFCIZULxEgsJ2PYswel2pP2EsjEmo,2696
517
- tests/perf/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
518
- tests/perf/test_perf.py,sha256=ugYNEyU32ctryPFa_6fr8aQYxfHJMymdKnKKEHM9Ajc,6174
519
- tests/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
520
- tests/rag/test_clip_benchmark.py,sha256=qpSLgmHMGcYTnxP7AI__y-ii5_tu_fCSht6p3TBetkA,2650
521
- tests/rag/test_mteb.py,sha256=fdNQIyUEzE7puPCKw5QhCHTEu7hz-ieHeq1xCWGh6IM,7246
522
- tests/rag/test_ragas.py,sha256=5qozXvPFIb67T-igJv87ijlOgkPnqgkkBVXu6Ht4D0A,4554
523
- tests/swift/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
524
- tests/swift/test_run_swift_eval.py,sha256=YbIhYNoI4kAB-ox-OXAKUifLIXTFqP-xGZicrAgK_V0,5784
525
- tests/swift/test_run_swift_vlm_eval.py,sha256=RwrKkc1WHEZxetM11cGL81G4faKCn7SYn4VlwL03atI,4934
526
- tests/swift/test_run_swift_vlm_jugde_eval.py,sha256=UAUtOCQ72xbm8s-sov3cBEpYVDy189wpB-qOL3KoU7M,6053
527
- tests/vlm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
528
- tests/vlm/test_vlmeval.py,sha256=EDQRkYfSyOICUwo_tm3p-puaE_xdFmqOPkrt5etxsqM,3307
529
- evalscope-1.0.2.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
530
- evalscope-1.0.2.dist-info/METADATA,sha256=vZciS7qNosSJOdwyRSxsCyVqvw8hyqKS84yKjlbxwzw,40305
531
- evalscope-1.0.2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
532
- evalscope-1.0.2.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
533
- evalscope-1.0.2.dist-info/top_level.txt,sha256=Yv0iprOqZQ4rfUO-AWJp7Ni6m0Twxny1yvZwO-8hUDM,16
534
- evalscope-1.0.2.dist-info/RECORD,,
571
+ evalscope-1.1.1.dist-info/licenses/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
572
+ evalscope-1.1.1.dist-info/METADATA,sha256=vQzNoEz383srIkNFekFt_e9wzWQeWDMytu4aIaVTaYU,34332
573
+ evalscope-1.1.1.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
574
+ evalscope-1.1.1.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
575
+ evalscope-1.1.1.dist-info/top_level.txt,sha256=jNR-HMn3TR8Atolq7_4rW8IWVX6GhvYV5_1Y_KbJKlY,10
576
+ evalscope-1.1.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.45.1)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
tests/__init__.py DELETED
@@ -1 +0,0 @@
1
- # Copyright (c) Alibaba, Inc. and its affiliates.
@@ -1 +0,0 @@
1
- # Copyright (c) Alibaba, Inc. and its affiliates.