evalscope 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (100) hide show
  1. evalscope/api/benchmark/__init__.py +8 -1
  2. evalscope/api/benchmark/adapters/__init__.py +1 -0
  3. evalscope/api/benchmark/adapters/ner_adapter.py +212 -0
  4. evalscope/api/benchmark/benchmark.py +14 -0
  5. evalscope/api/dataset/dataset.py +21 -0
  6. evalscope/api/dataset/loader.py +6 -2
  7. evalscope/api/mixin/sandbox_mixin.py +32 -54
  8. evalscope/api/model/generate_config.py +6 -0
  9. evalscope/benchmarks/aa_lcr/__init__.py +0 -0
  10. evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py +205 -0
  11. evalscope/benchmarks/bfcl/bfcl_adapter.py +1 -1
  12. evalscope/benchmarks/data_collection/data_collection_adapter.py +2 -1
  13. evalscope/benchmarks/general_arena/general_arena_adapter.py +1 -1
  14. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +1 -1
  15. evalscope/benchmarks/general_qa/general_qa_adapter.py +1 -1
  16. evalscope/benchmarks/gsm8k/gsm8k_adapter.py +23 -4
  17. evalscope/benchmarks/hallusion_bench/__init__.py +0 -0
  18. evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py +158 -0
  19. evalscope/benchmarks/humaneval/humaneval_adapter.py +2 -1
  20. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +3 -1
  21. evalscope/benchmarks/math_verse/__init__.py +0 -0
  22. evalscope/benchmarks/math_verse/math_verse_adapter.py +100 -0
  23. evalscope/benchmarks/math_vision/__init__.py +0 -0
  24. evalscope/benchmarks/math_vision/math_vision_adapter.py +111 -0
  25. evalscope/benchmarks/math_vista/math_vista_adapter.py +6 -26
  26. evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +1 -1
  27. evalscope/benchmarks/ner/__init__.py +0 -0
  28. evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py +52 -0
  29. evalscope/benchmarks/ner/conll2003_adapter.py +48 -0
  30. evalscope/benchmarks/ner/copious_adapter.py +85 -0
  31. evalscope/benchmarks/ner/cross_ner_adapter.py +120 -0
  32. evalscope/benchmarks/ner/cross_ner_entities/__init__.py +0 -0
  33. evalscope/benchmarks/ner/cross_ner_entities/ai.py +54 -0
  34. evalscope/benchmarks/ner/cross_ner_entities/literature.py +36 -0
  35. evalscope/benchmarks/ner/cross_ner_entities/music.py +39 -0
  36. evalscope/benchmarks/ner/cross_ner_entities/politics.py +37 -0
  37. evalscope/benchmarks/ner/cross_ner_entities/science.py +58 -0
  38. evalscope/benchmarks/ner/genia_ner_adapter.py +66 -0
  39. evalscope/benchmarks/ner/harvey_ner_adapter.py +58 -0
  40. evalscope/benchmarks/ner/mit_movie_trivia_adapter.py +74 -0
  41. evalscope/benchmarks/ner/mit_restaurant_adapter.py +66 -0
  42. evalscope/benchmarks/ner/ontonotes5_adapter.py +87 -0
  43. evalscope/benchmarks/ner/wnut2017_adapter.py +61 -0
  44. evalscope/benchmarks/ocr_bench_v2/utils.py +1 -0
  45. evalscope/benchmarks/omnidoc_bench/__init__.py +0 -0
  46. evalscope/benchmarks/omnidoc_bench/end2end_eval.py +349 -0
  47. evalscope/benchmarks/omnidoc_bench/metrics.py +547 -0
  48. evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py +135 -0
  49. evalscope/benchmarks/omnidoc_bench/utils.py +1937 -0
  50. evalscope/benchmarks/poly_math/__init__.py +0 -0
  51. evalscope/benchmarks/poly_math/poly_math_adapter.py +127 -0
  52. evalscope/benchmarks/poly_math/utils/instruction.py +105 -0
  53. evalscope/benchmarks/pope/__init__.py +0 -0
  54. evalscope/benchmarks/pope/pope_adapter.py +111 -0
  55. evalscope/benchmarks/seed_bench_2_plus/__init__.py +0 -0
  56. evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py +72 -0
  57. evalscope/benchmarks/simple_vqa/__init__.py +0 -0
  58. evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py +169 -0
  59. evalscope/benchmarks/tau_bench/tau_bench_adapter.py +1 -1
  60. evalscope/benchmarks/tool_bench/tool_bench_adapter.py +1 -1
  61. evalscope/benchmarks/visu_logic/__init__.py +0 -0
  62. evalscope/benchmarks/visu_logic/visu_logic_adapter.py +75 -0
  63. evalscope/benchmarks/zerobench/__init__.py +0 -0
  64. evalscope/benchmarks/zerobench/zerobench_adapter.py +64 -0
  65. evalscope/constants.py +4 -0
  66. evalscope/evaluator/evaluator.py +72 -79
  67. evalscope/metrics/math_parser.py +14 -0
  68. evalscope/metrics/metric.py +1 -1
  69. evalscope/models/utils/openai.py +4 -0
  70. evalscope/perf/arguments.py +24 -4
  71. evalscope/perf/benchmark.py +74 -89
  72. evalscope/perf/http_client.py +31 -16
  73. evalscope/perf/main.py +15 -2
  74. evalscope/perf/plugin/api/base.py +9 -7
  75. evalscope/perf/plugin/api/custom_api.py +13 -58
  76. evalscope/perf/plugin/api/default_api.py +179 -79
  77. evalscope/perf/plugin/api/openai_api.py +4 -3
  78. evalscope/perf/plugin/datasets/base.py +21 -0
  79. evalscope/perf/plugin/datasets/custom.py +2 -3
  80. evalscope/perf/plugin/datasets/line_by_line.py +2 -3
  81. evalscope/perf/plugin/datasets/longalpaca.py +2 -3
  82. evalscope/perf/plugin/datasets/openqa.py +2 -4
  83. evalscope/perf/plugin/datasets/random_dataset.py +1 -3
  84. evalscope/perf/utils/benchmark_util.py +36 -22
  85. evalscope/perf/utils/db_util.py +14 -19
  86. evalscope/perf/utils/local_server.py +0 -44
  87. evalscope/perf/utils/log_utils.py +21 -6
  88. evalscope/report/__init__.py +2 -1
  89. evalscope/run.py +4 -0
  90. evalscope/utils/function_utils.py +195 -12
  91. evalscope/utils/io_utils.py +74 -0
  92. evalscope/utils/logger.py +49 -17
  93. evalscope/utils/ner.py +377 -0
  94. evalscope/version.py +2 -2
  95. {evalscope-1.1.0.dist-info → evalscope-1.1.1.dist-info}/METADATA +235 -363
  96. {evalscope-1.1.0.dist-info → evalscope-1.1.1.dist-info}/RECORD +100 -55
  97. {evalscope-1.1.0.dist-info → evalscope-1.1.1.dist-info}/WHEEL +1 -1
  98. {evalscope-1.1.0.dist-info → evalscope-1.1.1.dist-info}/entry_points.txt +0 -0
  99. {evalscope-1.1.0.dist-info → evalscope-1.1.1.dist-info/licenses}/LICENSE +0 -0
  100. {evalscope-1.1.0.dist-info → evalscope-1.1.1.dist-info}/top_level.txt +0 -0
@@ -1,24 +1,25 @@
1
1
  evalscope/__init__.py,sha256=oivLvqwNw2JlB-h-Z8_525IpfKcYEkS51F59tEfpy5w,445
2
2
  evalscope/arguments.py,sha256=jKAF47PsqXRioU21gRHw9hxJnfR31z_X7c__glRY5ns,6257
3
3
  evalscope/config.py,sha256=S2N11-AxQkT7lVffpjXdtpT4QpnSP6th-c8I-501mwM,11507
4
- evalscope/constants.py,sha256=W3E4Jp-x6qxvPOYtU9bNlzlERFvSAA_3F007apIwUlU,3601
5
- evalscope/run.py,sha256=A9_7pR3FiA-It46A3Mqk7ce6fQy548p0ux2QUugj2hI,6531
4
+ evalscope/constants.py,sha256=Su_CoL5Gn8AV2lZN3vmTxpnTXuJ3Y3xz7SOzF8BcISI,3717
5
+ evalscope/run.py,sha256=dKFesxZZteOhscHif2A8xQHsJnG78D-m2gdfaWyMNC4,6742
6
6
  evalscope/summarizer.py,sha256=HUDJ1zKi22uNst3AUfX67Z0sHzeZy-4S8sYyvxJnBzc,5901
7
- evalscope/version.py,sha256=hqGJMtjd3F6yPJucqhuYtXuGYSumthFmroHsUTY761Y,118
7
+ evalscope/version.py,sha256=-m3fRuGUlprLmY84Yfh4OA1j3cM9SSJUGypM9Z5Ltng,118
8
8
  evalscope/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  evalscope/api/registry.py,sha256=Qk0KMGDbt-iI0-OfoJZbOtxt76qreAVWh36HOoQAKM4,5448
10
- evalscope/api/benchmark/__init__.py,sha256=9xcTxpcQ6HhZ0QDwEIZhAT5IjybzaJ60VGLcmaFE5dU,188
11
- evalscope/api/benchmark/benchmark.py,sha256=gqAM81SeGb_Q0rA6Q-LFpnNkOUiwOj43aRWECtCxAOE,10832
10
+ evalscope/api/benchmark/__init__.py,sha256=tp5ZjfopdknpePwZ-byZd8BkQs6fFzW0UoOp0NBEaks,229
11
+ evalscope/api/benchmark/benchmark.py,sha256=iH35ugrPfL9RHFbtJjs0AMFnQiYO9lUqSkRnRPUQsVc,11194
12
12
  evalscope/api/benchmark/meta.py,sha256=N4u8NQjkjIw-xaf6KFnb6C8JDKB0DLbsXyXblDqIpvE,4304
13
- evalscope/api/benchmark/adapters/__init__.py,sha256=uLt_GiU4s-_6Rjgmr4OUTtE7dvEX-ZIQ403fd6oNuxA,264
13
+ evalscope/api/benchmark/adapters/__init__.py,sha256=TEVqVR0hDuvH78jX-e028XJgXVcrnVuE8FRV-36Aspc,300
14
14
  evalscope/api/benchmark/adapters/default_data_adapter.py,sha256=RWDweSmXKGv5hPPjeV4VF76gbKqYJEsab_lQYGUM2PA,28785
15
15
  evalscope/api/benchmark/adapters/image_edit_adapter.py,sha256=06V-_A8RKuMNYMt7-vaXn2qBa9LIZgfFO_6PUuhAkh0,3052
16
16
  evalscope/api/benchmark/adapters/multi_choice_adapter.py,sha256=auqLNvF50Or9bo3LOmQLXHfFaTTCTqvQzZog3glInng,3062
17
+ evalscope/api/benchmark/adapters/ner_adapter.py,sha256=_rvfl8cNlvKoQkHqR2tC_K-xZaV0TsB_pIzI4sP_SM0,8906
17
18
  evalscope/api/benchmark/adapters/text2image_adapter.py,sha256=jO64hwjQexIv-MTyHH0Ffp_6p--9TKufOmX_U39mAnE,6385
18
19
  evalscope/api/benchmark/adapters/vision_language_adapter.py,sha256=5d7ITkeosikb7u0ag0WkMaZ0SAYGkR_wKM9NP495GKk,280
19
20
  evalscope/api/dataset/__init__.py,sha256=RHFMzwfONEqmmn3vRtxyN3r29mipDUUUSEDhuwm0YpQ,147
20
- evalscope/api/dataset/dataset.py,sha256=9bwSx89zgOOBRQkRPVv-B5Yi30A6J1MLtekQSqwsy9g,11328
21
- evalscope/api/dataset/loader.py,sha256=t7KLH5ltLUumhiPIyYJzk6zn2iKLx-D2gIIoMhKdnhc,9714
21
+ evalscope/api/dataset/dataset.py,sha256=y-1DvPxN1Gxf-oEnrUq0Dcs4-rUQkApXP_rVYwsixSM,12119
22
+ evalscope/api/dataset/loader.py,sha256=44wQ3aBbn4YJyRjEsA1Bpg1DZicdCUzVybPoba_JhzY,9797
22
23
  evalscope/api/dataset/utils.py,sha256=3E0ikqr6QWV_lX0d3Z4F4xFuVTcwbeDPgCvJY7v83Bc,4935
23
24
  evalscope/api/evaluator/__init__.py,sha256=-Ure6X4GlE7VYSNWSZ_DpjbUBGa5irVTymLENEHTYqY,138
24
25
  evalscope/api/evaluator/cache.py,sha256=a_M2ouUjtkMr5m3wRbmsE8ETP_aacxbm0d38yY5RljM,13244
@@ -35,9 +36,9 @@ evalscope/api/metric/metric.py,sha256=XkjBqpZbFYynhTIH8WawfPmItbDQ6jWufE_ox9zDPC
35
36
  evalscope/api/metric/scorer.py,sha256=dczSQwkRmPk1uvNCMGT5G6nYbwWTcpwsZtyYXWkrJII,3749
36
37
  evalscope/api/mixin/__init__.py,sha256=xBuoTuao5o_EFThgeeeWI87x64Q12aJttsaZc8gak_c,83
37
38
  evalscope/api/mixin/llm_judge_mixin.py,sha256=ECVDfxCeAEkymFssD7xKhIDcct2qgQTqGnbijXk9leE,5675
38
- evalscope/api/mixin/sandbox_mixin.py,sha256=uKqBtTtttKwrUArY-CTMDdFHjRBOR7Kl1sxaGHe-S2Q,7653
39
+ evalscope/api/mixin/sandbox_mixin.py,sha256=RbTpZXr6ohxgp1vU4YGMKmGKiIzVqQZ44quAHBX8zvs,6539
39
40
  evalscope/api/model/__init__.py,sha256=YxKdz1IKUt6eYoC7nx81yD2BtyiWQDvaoTcc8O9lvoE,286
40
- evalscope/api/model/generate_config.py,sha256=wQeDknXb49yBKSRL9rlIyerPobGXqU-A4hL1vySNGPo,7656
41
+ evalscope/api/model/generate_config.py,sha256=xp8yDdDNomHpYaCNVDnZrZkQlfHpp9lrXKO7lqXEQHY,8017
41
42
  evalscope/api/model/model.py,sha256=c7YVbYYk47MHWwPjoB66xWjgmHdUGTOSOdtIsLcJfyc,12782
42
43
  evalscope/api/model/model_output.py,sha256=NeN6bLtAvg_3fTirewWfdP-_x4SJXa9pGuRpyXJY3B8,9333
43
44
  evalscope/api/tool/__init__.py,sha256=bEaW5ryY-erLcl2zMoDJNgiaBqlSPAL0jQ5daUHvvrw,272
@@ -108,6 +109,8 @@ evalscope/backend/rag_eval/utils/tools.py,sha256=FU7tNu-8y8V_o_kArFVTTLM_GzL12KB
108
109
  evalscope/backend/vlm_eval_kit/__init__.py,sha256=R-GuBm8dAwvDF73XHaGpPSjlt7Y4tycyy-FJgzLdjeY,84
109
110
  evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=jlwM13Ty-Ax6AeMsNlo9xIBupNFgnceYuXtCmh0hNTQ,6160
110
111
  evalscope/benchmarks/__init__.py,sha256=WHR4ej9Tqa2N9CyIaUWXS8EnHZtcujaNeg9hf8GT31Y,1182
112
+ evalscope/benchmarks/aa_lcr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
113
+ evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py,sha256=7KZRdIhg733vBMBWngxTjtrZtl_DHjwMNLt9C2tN0_w,7483
111
114
  evalscope/benchmarks/ai2d/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
112
115
  evalscope/benchmarks/ai2d/ai2d_adapter.py,sha256=qnQT2E0ZG8g4noOafu-QvBOKm-zEJ5X08QHw3ekNa4w,2473
113
116
  evalscope/benchmarks/aime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -152,7 +155,7 @@ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt
152
155
  evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt,sha256=s_x6u5MLeKpuAHZj3GNQqY1I8vWqQIfJasOp9XcM7Ck,2945
153
156
  evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt,sha256=qfTZafCzNiz9ULBaDlfy_LISL617NyH5Nc0-nO0K0LE,2164
154
157
  evalscope/benchmarks/bfcl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
- evalscope/benchmarks/bfcl/bfcl_adapter.py,sha256=ZmwGylqXCAcpJ8glQmj7HkDa8OqE9KODiHvWelTGLIo,17033
158
+ evalscope/benchmarks/bfcl/bfcl_adapter.py,sha256=GOHmMqP1jnXFU4lkke5dzCszyG9kHvBsDl-GhtRWlgg,17030
156
159
  evalscope/benchmarks/bfcl/generation.py,sha256=c6lNjo-VTSUrVg-pqyPSucrbCKBOdBSyN0aR5AAtE4A,8701
157
160
  evalscope/benchmarks/blink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
161
  evalscope/benchmarks/blink/blink_adapter.py,sha256=ocQKsDGwnUAg2si2p7tqIGeH3PKPqTSByjbt7ceraRo,2642
@@ -168,7 +171,7 @@ evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=P0VPAL5T2V_zj0q7im0FdDoq_W5ri
168
171
  evalscope/benchmarks/competition_math/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
169
172
  evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=NOqckeyuabH_nwaxL5IWmH887UO5rvBKA2jx7qb9fNs,2226
170
173
  evalscope/benchmarks/data_collection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
171
- evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=eetF21dN55e0MNPxTaiDbkPZDidt4cX2decQjC_deJI,8676
174
+ evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=PbV5S0rUVF0jgrBKNjuZh2oE1FAsbYnPymg5u7NBjqo,8712
172
175
  evalscope/benchmarks/docmath/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
173
176
  evalscope/benchmarks/docmath/docmath_adapter.py,sha256=-mel6hA-x_e7fV0uOHdX5BpoQEVyQ5VqwIwEqSNDpnc,4623
174
177
  evalscope/benchmarks/docmath/utils.py,sha256=d6Yjoa5q91kjr1SdVPVBndzDaUzMlO_GfEqMtUXXr0s,7707
@@ -181,17 +184,19 @@ evalscope/benchmarks/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
181
184
  evalscope/benchmarks/frames/frames_adapter.py,sha256=w1kRya7w5omt95HHE6AzbzYVhyTT5r521676d_xJ6Vg,5514
182
185
  evalscope/benchmarks/frames/utils.py,sha256=gULWM6Rwv5bTSSWcDYp-iSIoWj8r5VtbQakhRzHJq8A,1172
183
186
  evalscope/benchmarks/general_arena/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
- evalscope/benchmarks/general_arena/general_arena_adapter.py,sha256=DzJaokqZwR2L8HDiahss8EbQ3vcsMXkzkMghxU-uAOo,21639
187
+ evalscope/benchmarks/general_arena/general_arena_adapter.py,sha256=lb51HwfvKXJgJWdjYyyTRsT5pX876yEuWAz8G2oH6yM,21636
185
188
  evalscope/benchmarks/general_arena/utils.py,sha256=p6pZfvdNCMOU_vWHm_DYU57Sa2WTDdFOkVBubblCRN4,6912
186
189
  evalscope/benchmarks/general_mcq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
187
- evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=7VKg_EzXkRvoWpR7h8qB4sVVb1eZHCGcPk-X_NMS5tE,2062
190
+ evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=p7_C0CmKJthMY1Iri1SyNfssuYBws_dkhPMREu-uM94,2059
188
191
  evalscope/benchmarks/general_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
189
- evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=u29IsH5sgw-T0IezuI1jErGwykz-F7875AdEhdvS5rk,3522
192
+ evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=_7Jk_h-qcaxWHgrULojNqXwZ8XgicmXhYT8bOKwnyAU,3519
190
193
  evalscope/benchmarks/gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
191
194
  evalscope/benchmarks/gpqa/gpqa_adapter.py,sha256=zWK2hhyKw5n8K30YvMjSm6XMwyrireODGTE6wKmyuOo,3311
192
195
  evalscope/benchmarks/gpqa/prompt.py,sha256=b1Gw2D5dEdhvLYymPfcvGKJdHrIzpiZkOwURKSxiQJg,5576
193
196
  evalscope/benchmarks/gsm8k/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
194
- evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=W4vTXsC7iHN1AgvpaCf1Rj7y2O8QczIluucnpSC5aYo,2636
197
+ evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=59EaZI2veg89_yyJyJ8QxkDMrQPC4ZTj3YQOBZ2_Vbk,3844
198
+ evalscope/benchmarks/hallusion_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
+ evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py,sha256=cKnhPNtZTfu1zKlgeguH9qnMNjnDNPSUKLb9ZETDnqg,6518
195
200
  evalscope/benchmarks/healthbench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
196
201
  evalscope/benchmarks/healthbench/healthbench_adapter.py,sha256=1sL7i9yhORH4xiFWB9puPKWNZZFJGZFAlKdlzHp-fiw,13228
197
202
  evalscope/benchmarks/healthbench/utils.py,sha256=M8SnOEhlqXWm03CFE6CAtbMiu6MqdGgVczAv-LPjA7Y,3683
@@ -200,7 +205,7 @@ evalscope/benchmarks/hellaswag/hellaswag_adapter.py,sha256=tAe63NfV5ljUm1f4RTSFx
200
205
  evalscope/benchmarks/hle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
201
206
  evalscope/benchmarks/hle/hle_adapter.py,sha256=kJP7bzIDbr82GKi0FTy2zf_j1UWNBfuXYzokYJ-S9WE,6410
202
207
  evalscope/benchmarks/humaneval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
203
- evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=uLs3UHSALS3YHt0qzBismrIqdEUgbEalQbjC0CU7ym4,4085
208
+ evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=KArOIHNpjxD2ihH155Z0VxOzvlzKWqOwnqbp0J6aNzE,4375
204
209
  evalscope/benchmarks/humaneval/utils.py,sha256=rPnc_JuSjNg9aV7UMUwsLrDlm-ufj64GNIBCWBeuRcM,6517
205
210
  evalscope/benchmarks/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
206
211
  evalscope/benchmarks/ifeval/ifeval_adapter.py,sha256=55FQwJ0_eDijppkVVlM5XCXzgRFmjH1SvGMItGsvn6o,2769
@@ -220,7 +225,7 @@ evalscope/benchmarks/iquiz/iquiz_adapter.py,sha256=mNHA_Fuj_gAdOEoR7oChnGmErf1cz
220
225
  evalscope/benchmarks/live_code_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
221
226
  evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=maN8qHmDHJpexPeB0qwZoXJ5zrqPbJDYVRptqvXI9d4,6827
222
227
  evalscope/benchmarks/live_code_bench/extract_utils.py,sha256=ZcQ8y741uawPo6I_1_XglR3eqJFDNrqc8fILKZupVRs,2375
223
- evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=qnprJTv7zWA9aq6Lw4aDoall---kiivR0oDo3uSO2mI,6399
228
+ evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=GDKmbvV2NJe-QQfy-C6nDJVpeiP6SVxjJzgTba_K4_w,6682
224
229
  evalscope/benchmarks/live_code_bench/load_utils.py,sha256=fEzWz_fUGwi5Ncum5PNVF9jFcuDwGgs7Vt_10YKBE2Q,2087
225
230
  evalscope/benchmarks/live_code_bench/pass_k_utils.py,sha256=Ktrp_lXdfFzoHtQNQNdGfIl26ySjaPCHm4Zv-dFvRqM,2024
226
231
  evalscope/benchmarks/live_code_bench/prompts.py,sha256=P4KILIAIDT1MKDck0xHYV_6v9820wDZRhxVMazmlL-g,12600
@@ -230,8 +235,12 @@ evalscope/benchmarks/maritime_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
230
235
  evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py,sha256=Rx7iZ5JaEo73YwIzhm78gMDQ6gqcErbnWWXHxXM6BcU,2379
231
236
  evalscope/benchmarks/math_500/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
232
237
  evalscope/benchmarks/math_500/math_500_adapter.py,sha256=hn7SQhoIHKuH-2A_nGUhQPRw2gl2G-kZldc9ueY0G3A,1802
238
+ evalscope/benchmarks/math_verse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
239
+ evalscope/benchmarks/math_verse/math_verse_adapter.py,sha256=lRSNE9C9Y_zd9WJJpzsq3KgqFN5YHJzmpfBbW-h8D4M,4153
240
+ evalscope/benchmarks/math_vision/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
241
+ evalscope/benchmarks/math_vision/math_vision_adapter.py,sha256=oWwRZRX2ardjpR4_8ZzBRO58ki9rLtPYQvZ4Z3OoMfE,4475
233
242
  evalscope/benchmarks/math_vista/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
234
- evalscope/benchmarks/math_vista/math_vista_adapter.py,sha256=Mu9BpH0rDNM0yMrGws4SEOnXy2NTSIKwyLs5t4nAP-s,5842
243
+ evalscope/benchmarks/math_vista/math_vista_adapter.py,sha256=XXSHxWzCRPnRDCXEuY8--6o_j-gXyCb3S_yOShMzWfk,4928
235
244
  evalscope/benchmarks/minerva_math/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
236
245
  evalscope/benchmarks/minerva_math/minerva_math_adapter.py,sha256=jyT9_D4w8PTtLBN3Kn10_CnssH_mPuRNnn9rek_zUEs,1655
237
246
  evalscope/benchmarks/mm_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -255,8 +264,25 @@ evalscope/benchmarks/multi_if/multi_if_adapter.py,sha256=I3_YPPUuRbrs9Gt3Qjhx9RM
255
264
  evalscope/benchmarks/musr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
256
265
  evalscope/benchmarks/musr/musr_adapter.py,sha256=kx6bckj7Nijl4Wysuj-mKYdy0hIRDJho8yVTup403Hc,1473
257
266
  evalscope/benchmarks/needle_haystack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
258
- evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py,sha256=GYaswWPwYI3aV5HSpcuBTgW9-HDtf2xzNZg0WrsI0Yo,17033
267
+ evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py,sha256=aQw8Sss1-ZgQPWqwMITOpAtwzMoYWDGjLhUpZtkcrvY,17030
259
268
  evalscope/benchmarks/needle_haystack/utils.py,sha256=k8WDigqt5LgzHw6DtaYsLtb3BJL0FTZS9JOyJCpoPq8,2935
269
+ evalscope/benchmarks/ner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
270
+ evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py,sha256=sNL6D0uRlzdLSxUuKOcKzY4o82HQJeCiwBuUdfGVy_o,2012
271
+ evalscope/benchmarks/ner/conll2003_adapter.py,sha256=r_6UmR68ohFsn021zArkGRq0tRZSaIy9RNNJncag0i8,1970
272
+ evalscope/benchmarks/ner/copious_adapter.py,sha256=ufxsmTvEEayLaDJcUW5--oo6vkDY69W2yQ1fpD0E5lQ,3751
273
+ evalscope/benchmarks/ner/cross_ner_adapter.py,sha256=a37zVh_kfyOoHf-QgBlnVfee93RkEKqT-6BBwS1PWps,4916
274
+ evalscope/benchmarks/ner/genia_ner_adapter.py,sha256=WnuzyCQ0l2SU9ZvuDNOGeLubKZmJvxSnYPWQZ8TR8Yc,2457
275
+ evalscope/benchmarks/ner/harvey_ner_adapter.py,sha256=iUxnkg7yIQXAFU6lbrhAvJAqQqVysQUSkBeyNE7P6eM,2098
276
+ evalscope/benchmarks/ner/mit_movie_trivia_adapter.py,sha256=dAYHGX-eS7PVxcoT2nidMbpssG1yf2D5w55_mL5rqhw,3017
277
+ evalscope/benchmarks/ner/mit_restaurant_adapter.py,sha256=mIc_huJnksd3fZ8nIY7Uacs4x0r8W7pt5RjvEnTYZ00,2528
278
+ evalscope/benchmarks/ner/ontonotes5_adapter.py,sha256=oulC4XkVF42yjXWPuKg_zptLQiRItCmlZBlHN0shr6A,3546
279
+ evalscope/benchmarks/ner/wnut2017_adapter.py,sha256=uGrfp-4wYIcpEL9PqQx82uzCeWz6vIPKb7JlStTSE9M,2379
280
+ evalscope/benchmarks/ner/cross_ner_entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
281
+ evalscope/benchmarks/ner/cross_ner_entities/ai.py,sha256=RcgzYCygBmyrSOLacxxUN4cUznBZ3NemwfSR4hYBVKs,2484
282
+ evalscope/benchmarks/ner/cross_ner_entities/literature.py,sha256=ETzhu4PmiKS88NRkKPh96J9KiXKFdeQk5s_LSNqbD-M,1874
283
+ evalscope/benchmarks/ner/cross_ner_entities/music.py,sha256=_aJyKo83pO-j_LtGwXgrg9p8H1sHqXGPNW-wv1EIfWc,1999
284
+ evalscope/benchmarks/ner/cross_ner_entities/politics.py,sha256=taAqCnGdxHZGHM7sV0KONim8GjqVBrpMME6CVHwfJMo,1635
285
+ evalscope/benchmarks/ner/cross_ner_entities/science.py,sha256=DVZrCuMQ6-sPvRNTfx8iF_x9LaEBZ4o_RIWZADYKYGE,2919
260
286
  evalscope/benchmarks/ocr_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
261
287
  evalscope/benchmarks/ocr_bench/ocr_bench_adapter.py,sha256=gkQb7g0-Lf5Sjemqs5kqogCLGFJI6YQv8-vGI1EbyLE,4392
262
288
  evalscope/benchmarks/ocr_bench_v2/IoUscore_metric.py,sha256=cBpRDJvI9f6vKRD4wTPv-8ThGddR3EhVobgjQQUAYlE,2606
@@ -266,7 +292,7 @@ evalscope/benchmarks/ocr_bench_v2/ocr_bench_v2_adapter.py,sha256=QGY4R75UxDafIwS
266
292
  evalscope/benchmarks/ocr_bench_v2/page_ocr_metric.py,sha256=d1nU7LNwubBd_1rIe7i67hOVcJx5IUXkqVeqt1CQzak,1624
267
293
  evalscope/benchmarks/ocr_bench_v2/parallel.py,sha256=Q54wFSSRBp-kG2MhW4eOoXE1W9g-SDVhN8JuphDERsE,2029
268
294
  evalscope/benchmarks/ocr_bench_v2/spotting_metric.py,sha256=nftLaTOKEmqvSWr-c20f9hyyvNnd-Hg3E46KwqmkjLc,6149
269
- evalscope/benchmarks/ocr_bench_v2/utils.py,sha256=z9DSh2m1yvM3vsvxvqdHuPgRFxgdmEnzuNIuO7PAV3s,15914
295
+ evalscope/benchmarks/ocr_bench_v2/utils.py,sha256=hhF2MuPo5n6uM0OCgTHCNIgscNVhXRb3koqU73AErwY,15924
270
296
  evalscope/benchmarks/ocr_bench_v2/vqa_metric.py,sha256=XkAiXk1uE7lsWQQXvjnHXZMsga8B9FVyq5qG8ghePK4,8980
271
297
  evalscope/benchmarks/ocr_bench_v2/spotting_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
272
298
  evalscope/benchmarks/ocr_bench_v2/spotting_eval/readme.txt,sha256=QO0K9z1ethy_lgs9vaxGN1u5DnPFsssp8z62Cni24iw,1424
@@ -277,21 +303,35 @@ evalscope/benchmarks/olympiad_bench/olympiad_bench_adapter.py,sha256=zePVmGjmyuw
277
303
  evalscope/benchmarks/olympiad_bench/utils.py,sha256=w7vEZcT3vCVq8_DSMgAjZPpVFVHStJPJYsPkrs-yOFM,21412
278
304
  evalscope/benchmarks/omni_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
279
305
  evalscope/benchmarks/omni_bench/omni_bench_adapter.py,sha256=IJkRSokQC6MF_pN46Yofr_NaZaNt1XZFX1PUBmX4-qA,3651
306
+ evalscope/benchmarks/omnidoc_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
307
+ evalscope/benchmarks/omnidoc_bench/end2end_eval.py,sha256=71IEdeDsldtoFmMb1c_Pyugv-Wx-WOVIvccRkPvsJdU,15916
308
+ evalscope/benchmarks/omnidoc_bench/metrics.py,sha256=DZfaL5BlDjnW60kRnnfmsMgldPOKX0MJ2tAdsBf4dI0,20620
309
+ evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py,sha256=JBw9nS0e-P07MnfE4XAfbmaw3f-1okkJiOYl8a4lYYA,6192
310
+ evalscope/benchmarks/omnidoc_bench/utils.py,sha256=Db6QeIq_bc6Dl5xdYel5G7tnWib9_vn_KFiKeFN37IA,74435
311
+ evalscope/benchmarks/poly_math/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
312
+ evalscope/benchmarks/poly_math/poly_math_adapter.py,sha256=CHTDWgyYYaHwjwtT8K2at65GsDdl972onG7NLHuh8Gk,5377
313
+ evalscope/benchmarks/poly_math/utils/instruction.py,sha256=v3E8TnoWlooL_Ms5CQySzMmdyPKHAO005tGtTWMviPo,6901
314
+ evalscope/benchmarks/pope/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
315
+ evalscope/benchmarks/pope/pope_adapter.py,sha256=444tJqHdnecfOmqANwXUN5J-rp9w0PyCG_TVUpDyMXQ,5009
280
316
  evalscope/benchmarks/process_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
281
317
  evalscope/benchmarks/process_bench/process_bench_adapter.py,sha256=XN3F6NH7mF4ibwGX5nI01sqEHz05UQFnBAyfAe14QYE,6174
282
318
  evalscope/benchmarks/race/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
283
319
  evalscope/benchmarks/race/race_adapter.py,sha256=KibT9gHpIOZhTcWihG0dUDAX4gAHa2g1WdGPOcEP9OY,1705
284
320
  evalscope/benchmarks/real_world_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
285
321
  evalscope/benchmarks/real_world_qa/real_world_qa_adapter.py,sha256=J2u0J9d31uvkoz9nBI9tCMqG27hmYwdLQPPef9jx_pg,2788
322
+ evalscope/benchmarks/seed_bench_2_plus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
323
+ evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py,sha256=ngUOFhP8YFOE8ximkMg5U6TGLZMIXPHJsVJUurvbzM8,3064
286
324
  evalscope/benchmarks/simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
287
325
  evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=_duveAliSaPUqVSLQ2TtSv5sfwvFFy7t-MgIIokQ24s,9017
326
+ evalscope/benchmarks/simple_vqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
327
+ evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py,sha256=3ioSompYERllFE6yc3yZLl0NKWypRjg5d0uVf3b-4d0,9530
288
328
  evalscope/benchmarks/super_gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
289
329
  evalscope/benchmarks/super_gpqa/prompt.py,sha256=wQ8Y4NAvQJRhPS7gsrUBBzeM_UCHsHOloB_t5WfnIO8,4707
290
330
  evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py,sha256=SPqpBebiHj_oyEqU94p9NSqhVkO0KeXQYcBmpfH81nM,6888
291
331
  evalscope/benchmarks/super_gpqa/utils.py,sha256=OK_oT-DnWNssITEwu_Zc3Ty5v21n0IaJQYftK2cpwmQ,3401
292
332
  evalscope/benchmarks/tau_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
293
333
  evalscope/benchmarks/tau_bench/generation.py,sha256=d7J5xrxEI-0BYxdSuxdDavcR7f1ipBdpQsKZzwyzGds,5190
294
- evalscope/benchmarks/tau_bench/tau_bench_adapter.py,sha256=47wA0ia6gezA3nqvUpd4Pb8f5alCrBKEt7GOxJFupow,6464
334
+ evalscope/benchmarks/tau_bench/tau_bench_adapter.py,sha256=VMb63g1_d0lZRhsc2eZJQjrNLhCUF7wss6Lzt87LNNw,6461
295
335
  evalscope/benchmarks/text2image/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
296
336
  evalscope/benchmarks/text2image/evalmuse_adapter.py,sha256=g-Wc1qTg-xWLTjiZPo8zmQud75ac-8mBpYRxOHfiO0g,3024
297
337
  evalscope/benchmarks/text2image/genai_bench_adapter.py,sha256=1GDB3gS9zwrfb9C83LQdQyN7bvvqeYuu5ulJ9Igmi2k,1876
@@ -299,15 +339,19 @@ evalscope/benchmarks/text2image/general_t2i_adapter.py,sha256=CHy9ufvrVHc_5WkGVR
299
339
  evalscope/benchmarks/text2image/hpdv2_adapter.py,sha256=8-vWCV21eo_e9EbxDB5mGw2cFzD4OUQPLB66FvlO9W4,1781
300
340
  evalscope/benchmarks/text2image/tifa_adapter.py,sha256=4CcprucAe25UpTZRV3Qgb-8jbeNHtXNRWHw8RiYvfJA,784
301
341
  evalscope/benchmarks/tool_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
302
- evalscope/benchmarks/tool_bench/tool_bench_adapter.py,sha256=BHsesDDELEINdbWSR3WKCQGZ6MqWc2LiOZA3MbTp2_s,3805
342
+ evalscope/benchmarks/tool_bench/tool_bench_adapter.py,sha256=T3XtFLllrO5QOF13fU99LcigLKGqa0_VVi3C5mKPk84,3802
303
343
  evalscope/benchmarks/tool_bench/utils.py,sha256=led0d-Pa3rvmWkSWhEnZWP00fceudgESq5HXAQzJGls,7042
304
344
  evalscope/benchmarks/trivia_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
305
345
  evalscope/benchmarks/trivia_qa/samples.jsonl,sha256=1isBD62PGhCiNbzQa-GFrHHL4XLHIkojWfgSvn7ktf8,3445
306
346
  evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py,sha256=oZAiCmBpZbBAgzAKPfddaJWMckIyaoRM7fB2XJ5EoQU,2614
307
347
  evalscope/benchmarks/truthful_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
308
348
  evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py,sha256=W7ESUAcLsHwbssiiSCQNUeQcqx6JEeW7FSQiBFycS24,3512
349
+ evalscope/benchmarks/visu_logic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
350
+ evalscope/benchmarks/visu_logic/visu_logic_adapter.py,sha256=8dK8_HFxDhWTvCC8WTZjadChP6lNzgsFp_5qFSRGFoM,3277
309
351
  evalscope/benchmarks/winogrande/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
310
352
  evalscope/benchmarks/winogrande/winogrande_adapter.py,sha256=LWm6qZd3pJbtpcERq7WPK3adwY3uVm4wiUgfyEI_uHE,1310
353
+ evalscope/benchmarks/zerobench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
354
+ evalscope/benchmarks/zerobench/zerobench_adapter.py,sha256=pqnJEx4uOi3bxwYKqLxrxU5DX9p3F01N2itzbG_-VaU,2739
311
355
  evalscope/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
312
356
  evalscope/cli/base.py,sha256=m1DFlF16L0Lyrn0YNuFj8ByGjVJIoI0jKzAoodIXjRk,404
313
357
  evalscope/cli/cli.py,sha256=qXQ6k9GBkRy2dmBxM24tbVP42bQDyM6G7kkc32LdpCA,860
@@ -319,14 +363,14 @@ evalscope/collections/__init__.py,sha256=x05hFLrjGsdtuHtc6PyQXHNuucVdYaBN9ZrM8gB
319
363
  evalscope/collections/sampler.py,sha256=086pzXQO4CO_QYCd10z149Sjh6sBpRBeIHf5OTLOVu8,4896
320
364
  evalscope/collections/schema.py,sha256=yzAlnH0O7iiWB4UnkFXI_Dvxcsq9hDgl0aGK2OpyBY8,4158
321
365
  evalscope/evaluator/__init__.py,sha256=KzYmVTfU-1pdX7va7l3B1-5QKWG07hj1B7rYkMmxitY,91
322
- evalscope/evaluator/evaluator.py,sha256=o99m1CF7xuc3Qn2M25AhWulothZxICwZgZiWxSbynTc,14435
366
+ evalscope/evaluator/evaluator.py,sha256=pw-5uBYoMdOUtNv7CE9-ZP8IQrkTMwp-csMjb9hO_B0,13832
323
367
  evalscope/filters/__init__.py,sha256=AsXwKYDjGhFsJvtj036PRjMOPsHGt-CRicnHTtM_qA4,51
324
368
  evalscope/filters/extraction.py,sha256=KLFr_3XYsrv0PTvmXy0ugj2sqv2ZOWJFV7G_MmGjTHk,4146
325
369
  evalscope/filters/selection.py,sha256=yiJu2JjXDH_lgfEtB9umkGcA3zpo3zvnyoq2mKrXbnw,1609
326
370
  evalscope/metrics/__init__.py,sha256=1giVHESSjn98uBiAvYm5uLsmRQwmf9NHPSt7OT_QJss,1615
327
371
  evalscope/metrics/llm_judge.py,sha256=XukhH9PQtIZAcbjJlOmOD9ye3ngRv_IGKKJE9jhheOE,8653
328
- evalscope/metrics/math_parser.py,sha256=BMfautQtNNiF9f2DIEfO6SXSn_GYhzaddAjGWG10MJA,17257
329
- evalscope/metrics/metric.py,sha256=KNp_DNi9Ntq4my5G7La7AlP2Vj1p6hIgOheAh-4go5Q,12861
372
+ evalscope/metrics/math_parser.py,sha256=gJ1NR2Mcyzt9qMdR8I0-6U31Jzoe8a6yUuwvayYPi4c,17979
373
+ evalscope/metrics/metric.py,sha256=Hr1F_kuQfu3FNPwbCrJQA7VHb83-VPoDj7I9uX7d8U0,12840
330
374
  evalscope/metrics/metrics.py,sha256=Y7TQ6MYaGE32EntTz-18CmQqYMpo1rQSvUiSwzBgpaQ,14599
331
375
  evalscope/metrics/rouge_metric.py,sha256=bqvSotuDdC0MEKmt8v6y6tBTBx0S3Ma-tfF-cMCckA4,4645
332
376
  evalscope/metrics/bundled_rouge_score/__init__.py,sha256=PwbTdk8168FwDJe_l8XIqDuBgZQooDsP31vj7di05Fs,650
@@ -439,40 +483,40 @@ evalscope/models/model_apis.py,sha256=ZkZ_nfbeAFJnCndRvRIRLcbmJFTMhGRBi-WfMu0uZK
439
483
  evalscope/models/modelscope.py,sha256=jSFkho_Ir2py54y_Bwj9jpCoY2mMKkZ8ORzne-ldAIE,15806
440
484
  evalscope/models/openai_compatible.py,sha256=2uK78nDhWwgph7hcIiMc3NHRbIwvswRDM9o9ENahj4k,4659
441
485
  evalscope/models/text2image_model.py,sha256=Sdiyw6vewjVTiXK8RFEh1pohOhDge80EoIWYpnLjr5Y,3929
442
- evalscope/models/utils/openai.py,sha256=0DzuvTQYFEqcTp6sVtB2VZY7xeyWcOS0I6votqWegUg,28130
486
+ evalscope/models/utils/openai.py,sha256=qoq9xXP1NrwBfnIS0SqzK9gl8tvbDpNmJP5n17pKyqw,28292
443
487
  evalscope/perf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
444
- evalscope/perf/arguments.py,sha256=FmwVE4gC09B8nLd0sdczeEA9b5ztv4kwhOvLuby4wI8,11695
445
- evalscope/perf/benchmark.py,sha256=nSJr8lQvHDYiG33tNhkYaVOYONjhJ2wUb1x5RlUiXRY,7968
446
- evalscope/perf/http_client.py,sha256=4Ov1Cwi7gMgO05ZmazwyfYjUGAQNGWn7nbfl1ljRNh4,4610
447
- evalscope/perf/main.py,sha256=WZbBgFhIj9KqxzC7_NZxDlou019_EXatsHRt5vqDhFg,3439
488
+ evalscope/perf/arguments.py,sha256=JHB-JIEHq5p3zoHeKn6dkelGq0JrMVMRne-wbXK2yhg,12892
489
+ evalscope/perf/benchmark.py,sha256=Uc6BJJGYTsAnfFljPy0WJIXcapHOIwvym3o0yPRTVqU,6964
490
+ evalscope/perf/http_client.py,sha256=8xJFYja8FoQA0MDTj2NcxPkAmji4n81fsaw1gRuL1sA,5152
491
+ evalscope/perf/main.py,sha256=eEL0qUdNPMyHr3ZTixTfZxKN4IIw3gz3sw8sq3S_vs4,4015
448
492
  evalscope/perf/plugin/__init__.py,sha256=Ztj4h1_JYJqbbWkeuDTj5aTRyGQf5Woc4xEIyjcokVU,94
449
493
  evalscope/perf/plugin/registry.py,sha256=GhLe-h1rGzya2bgIUaV5VymQIaHqI7h5SG_i4PoGAm8,1967
450
494
  evalscope/perf/plugin/api/__init__.py,sha256=7RsGdYTSfnW6iVpveEzNu8v4x8Yc8H-Kk39DqOHMrd4,152
451
- evalscope/perf/plugin/api/base.py,sha256=RRZVk9MFuBwb9PFPTklFhQ_RTihg_E7W_LR26-ldPIA,2782
452
- evalscope/perf/plugin/api/custom_api.py,sha256=VYJO2lUt9EKdWz6zeYCfvdI0MqfcsIgcKvxqvY5C-3k,10376
495
+ evalscope/perf/plugin/api/base.py,sha256=LLBDKOWUXYbLLLTtO86X1Y4Erbp5egs2WCXGj4my754,2822
496
+ evalscope/perf/plugin/api/custom_api.py,sha256=HHvhNlqNQr43GhIC61yoa54QCEAy4MRMmJ0kBy-rnsQ,8305
453
497
  evalscope/perf/plugin/api/dashscope_api.py,sha256=Miv2pzMa6sxZyYYJhCzcbOI_QHuZx7tazKpb6Not7ck,3627
454
- evalscope/perf/plugin/api/default_api.py,sha256=qvMIjbe_rM13cDHcFCwjtCsjc11qE80Yg7LypaSNTXc,4251
455
- evalscope/perf/plugin/api/openai_api.py,sha256=a6w4C_voza61trHskHaWNPFr2x2zhRVwIXdiNnMH81E,10570
498
+ evalscope/perf/plugin/api/default_api.py,sha256=5hXjFN9bSIWUinrdFp9Cs1-jeKuNvRdBvDUX6VhSeZI,8991
499
+ evalscope/perf/plugin/api/openai_api.py,sha256=UVo9tAnqZbVNEQwAT0wOZb1Abbf-yQmr3iDKHwXDoI8,10628
456
500
  evalscope/perf/plugin/datasets/__init__.py,sha256=qzeQ9BrJhiJJm1wHaFeOQkvXXdSd15Ucspbn5zjs-6Q,495
457
- evalscope/perf/plugin/datasets/base.py,sha256=-3Ihnp2hYvZyPnP8Gh2Pu8ovlLNFHyZnNgRu3WHG4d0,2714
458
- evalscope/perf/plugin/datasets/custom.py,sha256=yoRHTvTGAglaZ-mmRkPjYNMG7uZYuT1_KrBxnl2i0qg,1385
501
+ evalscope/perf/plugin/datasets/base.py,sha256=PFBMdo3H_Hx2jOXNrMb97DvJ5gJg6QajSYymCgTXKmo,3629
502
+ evalscope/perf/plugin/datasets/custom.py,sha256=kCofjHfcihPcsc1XwyLxn9QG9E88eZ5qAQW7nW6ID0c,1311
459
503
  evalscope/perf/plugin/datasets/flickr8k.py,sha256=nhHiGNhXX-2c17NQ5q5Q7FgV2hB8XVeeAP8dKkboyHE,1033
460
504
  evalscope/perf/plugin/datasets/kontext_bench.py,sha256=cN70hiBX1940IWvNWZG9YGE4vO1yj41Bo7bqmOWusoQ,1081
461
- evalscope/perf/plugin/datasets/line_by_line.py,sha256=F4ppdjKKLzFNf_16h6S-6nAU4lOfOFI2-tPgIeZDTMA,996
462
- evalscope/perf/plugin/datasets/longalpaca.py,sha256=JjPGYP8NdPmP48wff2fL5IZQfajXL5qhZBvKmZxtfW4,1336
463
- evalscope/perf/plugin/datasets/openqa.py,sha256=5PqqiIvNTLlRrPb8PWqMGQyWRb6LuIqipYn67-xd-dY,1519
464
- evalscope/perf/plugin/datasets/random_dataset.py,sha256=NNAXvgFPkLDOSpYNex1DyE4X-ELtQRm13_oBooO30j8,3514
505
+ evalscope/perf/plugin/datasets/line_by_line.py,sha256=L3lj9evcr3q-Mcemyuy2WauBB5c6O-ttnIVw1t4UJUE,922
506
+ evalscope/perf/plugin/datasets/longalpaca.py,sha256=abFLvrRZFsno9IUr_bpvhMWHL9X2sahlIpGLUb-5BxA,1262
507
+ evalscope/perf/plugin/datasets/openqa.py,sha256=UlbHhzGoQTBXa4foEFhRTZX6v7So6pR-ExFhU2ws8YM,1427
508
+ evalscope/perf/plugin/datasets/random_dataset.py,sha256=GPuC5ovi3BW84RCiGSDd2cBZ3jRmFrtMRsxEocc1ud8,3347
465
509
  evalscope/perf/plugin/datasets/random_vl_dataset.py,sha256=e6exWQnupWkTDNwt2MmEK-hccuxEDmWLJRMM70onKi0,3230
466
510
  evalscope/perf/plugin/datasets/speed_benchmark.py,sha256=J6q7AF_Re5eHLVejXEw9c1jlk1T1PPmist0yO9UFTPE,2432
467
511
  evalscope/perf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
468
512
  evalscope/perf/utils/analysis_result.py,sha256=aoT7JD2zAzBeuZUfncKhJ2odX_7KnymwOmNB1Upam2c,935
469
- evalscope/perf/utils/benchmark_util.py,sha256=A5d--rCElabDOl6Aaxqnu0fNR5c763YZwKIHBSeTK00,7294
470
- evalscope/perf/utils/db_util.py,sha256=HAISq6M7xCD2gjUEqqfbK3FjBxA-tvr_n-751tU9ypo,11634
513
+ evalscope/perf/utils/benchmark_util.py,sha256=Uf4vUAsfgAZs2qsyv9cRY_i87QNEHl17XMhGgXq7wFw,8048
514
+ evalscope/perf/utils/db_util.py,sha256=lr26ah_KRznBBu_ssxXki_PgtELk5bUJV2JaM4LaeNI,11534
471
515
  evalscope/perf/utils/handler.py,sha256=HyKIxbzC0XCyQanlbb7UEY7yaeqjJTePNea8kMV3Sdc,1192
472
- evalscope/perf/utils/local_server.py,sha256=_lSPlNEnOmPA_DtREgPS_vj2w_7D8PPSpypXbb0YfJM,4880
473
- evalscope/perf/utils/log_utils.py,sha256=NWSK_ITG4yoVx5GMLbIRGDoXSs90s7X3mftdm37Os2U,1666
516
+ evalscope/perf/utils/local_server.py,sha256=dMoX8p6aCQq1JnoXxcyWknadLdBwpfQhvKwk5fn6G4Q,3727
517
+ evalscope/perf/utils/log_utils.py,sha256=YY8mnpJoHMlP6jtmEq7QujyuxhSUF1vqLk8TpBAkbY0,2162
474
518
  evalscope/perf/utils/rich_display.py,sha256=AQmXv1EuA1-IGgco-Jy1NLOmTKv4eBFH2K4QS8OoGVo,8206
475
- evalscope/report/__init__.py,sha256=xS6eeTgsPdIlIOhzUn-ND77uV34vMVug4PmXHmYAxwM,1080
519
+ evalscope/report/__init__.py,sha256=Za-5AljVqZ7N-ap44MHUPtlaVkLCzpmst-n7SPkXPVE,1110
476
520
  evalscope/report/combinator.py,sha256=F7KOClXVh56-XEw3Sb5uxwA6L8ZlH_P4-MOlm3Yp_Cg,5020
477
521
  evalscope/report/generator.py,sha256=t2R3WGa4SowTRUPOgITtyTR4QDiJ6i3FH__byDKZU8Y,4959
478
522
  evalscope/report/report.py,sha256=lEBD_E_RJiydFTaGFNLIMTFxNrqv8QcLZb_iuUg5HB0,8479
@@ -515,17 +559,18 @@ evalscope/utils/__init__.py,sha256=5OH8cOoX3YKMKUu0dMRvwzckXligIbUV-1jjJNXlpGI,2
515
559
  evalscope/utils/argument_utils.py,sha256=D7qOH85wf7LKh_cJ2X51OEaL7CMaddydmHZkfoYpvLk,1952
516
560
  evalscope/utils/chat_service.py,sha256=sSki2pKGQP3UjcIf_lbO06afI-vsaUAqglwX__wUDEw,8766
517
561
  evalscope/utils/deprecation_utils.py,sha256=aDv3HFNcJFZ7rxNgALQP0-ITO8L23HC_RX-C_m2i34Y,1610
518
- evalscope/utils/function_utils.py,sha256=E-AIzx_PKrZDGl1cBvlvqNvMa8yM2WUJ2wh73PNBXrQ,1887
562
+ evalscope/utils/function_utils.py,sha256=-WiBBrFaMzfgH7H2qOg3ciZ-BGyUGlsPUF-2nnU2KLw,9599
519
563
  evalscope/utils/import_utils.py,sha256=S0WQ3gt4zpwJHjGcyC-604pWWExg3JV7f3wzoOH-tuo,5794
520
- evalscope/utils/io_utils.py,sha256=79F0p7dFxA84tIVSL_C4piJgeQQtVUfb2R_Xcd8v_cE,11615
564
+ evalscope/utils/io_utils.py,sha256=BRBdPi_BejTRbevvbTWz6kHf33v6i9bOQUMam2yxL5Y,14070
521
565
  evalscope/utils/json_schema.py,sha256=GVP1m6g4mBrsFmOWOOVnmvl2joOz8gTlGEytLv5qy7s,8451
522
- evalscope/utils/logger.py,sha256=roFk4Su4aJwsF0s-uYc5-tABnghwYPX3gpkA5QUGzK8,5675
566
+ evalscope/utils/logger.py,sha256=Zv4lb9gXx5R0t_wnQ4rIIbRg0oC04qsr2z6cUoRM47Y,6771
523
567
  evalscope/utils/model_utils.py,sha256=mdtYoHhUdfpxUtnS52XZjNdO3uSK4yeIBHT3aDU7s-A,2455
524
568
  evalscope/utils/multi_choices.py,sha256=0UJbgr5eXNgitPC79JLcyUU-OXg9BlM-mVk-fWtUSno,9881
569
+ evalscope/utils/ner.py,sha256=gxvUURZVLJqZUrIqCy892rAAJ2ydYiGG5ZKPW_mpHsM,14148
525
570
  evalscope/utils/url_utils.py,sha256=9HcFt9uZNbOJR3ADUFQ_dBFKziHV6H66Df7HYs1M4Po,1757
526
- evalscope-1.1.0.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
527
- evalscope-1.1.0.dist-info/METADATA,sha256=pap4NeCTqw7bec2KqYboFj25zabm1m5rwoiqukX8EO4,39544
528
- evalscope-1.1.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
529
- evalscope-1.1.0.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
530
- evalscope-1.1.0.dist-info/top_level.txt,sha256=jNR-HMn3TR8Atolq7_4rW8IWVX6GhvYV5_1Y_KbJKlY,10
531
- evalscope-1.1.0.dist-info/RECORD,,
571
+ evalscope-1.1.1.dist-info/licenses/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
572
+ evalscope-1.1.1.dist-info/METADATA,sha256=vQzNoEz383srIkNFekFt_e9wzWQeWDMytu4aIaVTaYU,34332
573
+ evalscope-1.1.1.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
574
+ evalscope-1.1.1.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
575
+ evalscope-1.1.1.dist-info/top_level.txt,sha256=jNR-HMn3TR8Atolq7_4rW8IWVX6GhvYV5_1Y_KbJKlY,10
576
+ evalscope-1.1.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.45.1)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5