evalscope 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of evalscope might be problematic. Click here for more details.
- evalscope/api/benchmark/__init__.py +8 -1
- evalscope/api/benchmark/adapters/__init__.py +1 -0
- evalscope/api/benchmark/adapters/ner_adapter.py +212 -0
- evalscope/api/benchmark/benchmark.py +14 -0
- evalscope/api/dataset/dataset.py +21 -0
- evalscope/api/dataset/loader.py +6 -2
- evalscope/api/mixin/sandbox_mixin.py +32 -54
- evalscope/api/model/generate_config.py +6 -0
- evalscope/benchmarks/aa_lcr/__init__.py +0 -0
- evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py +205 -0
- evalscope/benchmarks/bfcl/bfcl_adapter.py +1 -1
- evalscope/benchmarks/data_collection/data_collection_adapter.py +2 -1
- evalscope/benchmarks/general_arena/general_arena_adapter.py +1 -1
- evalscope/benchmarks/general_mcq/general_mcq_adapter.py +1 -1
- evalscope/benchmarks/general_qa/general_qa_adapter.py +1 -1
- evalscope/benchmarks/gsm8k/gsm8k_adapter.py +23 -4
- evalscope/benchmarks/hallusion_bench/__init__.py +0 -0
- evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py +158 -0
- evalscope/benchmarks/humaneval/humaneval_adapter.py +2 -1
- evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +3 -1
- evalscope/benchmarks/math_verse/__init__.py +0 -0
- evalscope/benchmarks/math_verse/math_verse_adapter.py +100 -0
- evalscope/benchmarks/math_vision/__init__.py +0 -0
- evalscope/benchmarks/math_vision/math_vision_adapter.py +111 -0
- evalscope/benchmarks/math_vista/math_vista_adapter.py +6 -26
- evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +1 -1
- evalscope/benchmarks/ner/__init__.py +0 -0
- evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py +52 -0
- evalscope/benchmarks/ner/conll2003_adapter.py +48 -0
- evalscope/benchmarks/ner/copious_adapter.py +85 -0
- evalscope/benchmarks/ner/cross_ner_adapter.py +120 -0
- evalscope/benchmarks/ner/cross_ner_entities/__init__.py +0 -0
- evalscope/benchmarks/ner/cross_ner_entities/ai.py +54 -0
- evalscope/benchmarks/ner/cross_ner_entities/literature.py +36 -0
- evalscope/benchmarks/ner/cross_ner_entities/music.py +39 -0
- evalscope/benchmarks/ner/cross_ner_entities/politics.py +37 -0
- evalscope/benchmarks/ner/cross_ner_entities/science.py +58 -0
- evalscope/benchmarks/ner/genia_ner_adapter.py +66 -0
- evalscope/benchmarks/ner/harvey_ner_adapter.py +58 -0
- evalscope/benchmarks/ner/mit_movie_trivia_adapter.py +74 -0
- evalscope/benchmarks/ner/mit_restaurant_adapter.py +66 -0
- evalscope/benchmarks/ner/ontonotes5_adapter.py +87 -0
- evalscope/benchmarks/ner/wnut2017_adapter.py +61 -0
- evalscope/benchmarks/ocr_bench_v2/utils.py +1 -0
- evalscope/benchmarks/omnidoc_bench/__init__.py +0 -0
- evalscope/benchmarks/omnidoc_bench/end2end_eval.py +349 -0
- evalscope/benchmarks/omnidoc_bench/metrics.py +547 -0
- evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py +135 -0
- evalscope/benchmarks/omnidoc_bench/utils.py +1937 -0
- evalscope/benchmarks/poly_math/__init__.py +0 -0
- evalscope/benchmarks/poly_math/poly_math_adapter.py +127 -0
- evalscope/benchmarks/poly_math/utils/instruction.py +105 -0
- evalscope/benchmarks/pope/__init__.py +0 -0
- evalscope/benchmarks/pope/pope_adapter.py +111 -0
- evalscope/benchmarks/seed_bench_2_plus/__init__.py +0 -0
- evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py +72 -0
- evalscope/benchmarks/simple_vqa/__init__.py +0 -0
- evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py +169 -0
- evalscope/benchmarks/tau_bench/tau_bench_adapter.py +1 -1
- evalscope/benchmarks/tool_bench/tool_bench_adapter.py +1 -1
- evalscope/benchmarks/visu_logic/__init__.py +0 -0
- evalscope/benchmarks/visu_logic/visu_logic_adapter.py +75 -0
- evalscope/benchmarks/zerobench/__init__.py +0 -0
- evalscope/benchmarks/zerobench/zerobench_adapter.py +64 -0
- evalscope/constants.py +4 -0
- evalscope/evaluator/evaluator.py +72 -79
- evalscope/metrics/math_parser.py +14 -0
- evalscope/metrics/metric.py +1 -1
- evalscope/models/utils/openai.py +4 -0
- evalscope/perf/arguments.py +24 -4
- evalscope/perf/benchmark.py +74 -89
- evalscope/perf/http_client.py +31 -16
- evalscope/perf/main.py +15 -2
- evalscope/perf/plugin/api/base.py +9 -7
- evalscope/perf/plugin/api/custom_api.py +13 -58
- evalscope/perf/plugin/api/default_api.py +179 -79
- evalscope/perf/plugin/api/openai_api.py +4 -3
- evalscope/perf/plugin/datasets/base.py +21 -0
- evalscope/perf/plugin/datasets/custom.py +2 -3
- evalscope/perf/plugin/datasets/line_by_line.py +2 -3
- evalscope/perf/plugin/datasets/longalpaca.py +2 -3
- evalscope/perf/plugin/datasets/openqa.py +2 -4
- evalscope/perf/plugin/datasets/random_dataset.py +1 -3
- evalscope/perf/utils/benchmark_util.py +36 -22
- evalscope/perf/utils/db_util.py +14 -19
- evalscope/perf/utils/local_server.py +0 -44
- evalscope/perf/utils/log_utils.py +21 -6
- evalscope/report/__init__.py +2 -1
- evalscope/run.py +4 -0
- evalscope/utils/function_utils.py +195 -12
- evalscope/utils/io_utils.py +74 -0
- evalscope/utils/logger.py +49 -17
- evalscope/utils/ner.py +377 -0
- evalscope/version.py +2 -2
- {evalscope-1.1.0.dist-info → evalscope-1.1.1.dist-info}/METADATA +235 -363
- {evalscope-1.1.0.dist-info → evalscope-1.1.1.dist-info}/RECORD +100 -55
- {evalscope-1.1.0.dist-info → evalscope-1.1.1.dist-info}/WHEEL +1 -1
- {evalscope-1.1.0.dist-info → evalscope-1.1.1.dist-info}/entry_points.txt +0 -0
- {evalscope-1.1.0.dist-info → evalscope-1.1.1.dist-info/licenses}/LICENSE +0 -0
- {evalscope-1.1.0.dist-info → evalscope-1.1.1.dist-info}/top_level.txt +0 -0
|
@@ -1,24 +1,25 @@
|
|
|
1
1
|
evalscope/__init__.py,sha256=oivLvqwNw2JlB-h-Z8_525IpfKcYEkS51F59tEfpy5w,445
|
|
2
2
|
evalscope/arguments.py,sha256=jKAF47PsqXRioU21gRHw9hxJnfR31z_X7c__glRY5ns,6257
|
|
3
3
|
evalscope/config.py,sha256=S2N11-AxQkT7lVffpjXdtpT4QpnSP6th-c8I-501mwM,11507
|
|
4
|
-
evalscope/constants.py,sha256=
|
|
5
|
-
evalscope/run.py,sha256=
|
|
4
|
+
evalscope/constants.py,sha256=Su_CoL5Gn8AV2lZN3vmTxpnTXuJ3Y3xz7SOzF8BcISI,3717
|
|
5
|
+
evalscope/run.py,sha256=dKFesxZZteOhscHif2A8xQHsJnG78D-m2gdfaWyMNC4,6742
|
|
6
6
|
evalscope/summarizer.py,sha256=HUDJ1zKi22uNst3AUfX67Z0sHzeZy-4S8sYyvxJnBzc,5901
|
|
7
|
-
evalscope/version.py,sha256
|
|
7
|
+
evalscope/version.py,sha256=-m3fRuGUlprLmY84Yfh4OA1j3cM9SSJUGypM9Z5Ltng,118
|
|
8
8
|
evalscope/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
evalscope/api/registry.py,sha256=Qk0KMGDbt-iI0-OfoJZbOtxt76qreAVWh36HOoQAKM4,5448
|
|
10
|
-
evalscope/api/benchmark/__init__.py,sha256=
|
|
11
|
-
evalscope/api/benchmark/benchmark.py,sha256=
|
|
10
|
+
evalscope/api/benchmark/__init__.py,sha256=tp5ZjfopdknpePwZ-byZd8BkQs6fFzW0UoOp0NBEaks,229
|
|
11
|
+
evalscope/api/benchmark/benchmark.py,sha256=iH35ugrPfL9RHFbtJjs0AMFnQiYO9lUqSkRnRPUQsVc,11194
|
|
12
12
|
evalscope/api/benchmark/meta.py,sha256=N4u8NQjkjIw-xaf6KFnb6C8JDKB0DLbsXyXblDqIpvE,4304
|
|
13
|
-
evalscope/api/benchmark/adapters/__init__.py,sha256=
|
|
13
|
+
evalscope/api/benchmark/adapters/__init__.py,sha256=TEVqVR0hDuvH78jX-e028XJgXVcrnVuE8FRV-36Aspc,300
|
|
14
14
|
evalscope/api/benchmark/adapters/default_data_adapter.py,sha256=RWDweSmXKGv5hPPjeV4VF76gbKqYJEsab_lQYGUM2PA,28785
|
|
15
15
|
evalscope/api/benchmark/adapters/image_edit_adapter.py,sha256=06V-_A8RKuMNYMt7-vaXn2qBa9LIZgfFO_6PUuhAkh0,3052
|
|
16
16
|
evalscope/api/benchmark/adapters/multi_choice_adapter.py,sha256=auqLNvF50Or9bo3LOmQLXHfFaTTCTqvQzZog3glInng,3062
|
|
17
|
+
evalscope/api/benchmark/adapters/ner_adapter.py,sha256=_rvfl8cNlvKoQkHqR2tC_K-xZaV0TsB_pIzI4sP_SM0,8906
|
|
17
18
|
evalscope/api/benchmark/adapters/text2image_adapter.py,sha256=jO64hwjQexIv-MTyHH0Ffp_6p--9TKufOmX_U39mAnE,6385
|
|
18
19
|
evalscope/api/benchmark/adapters/vision_language_adapter.py,sha256=5d7ITkeosikb7u0ag0WkMaZ0SAYGkR_wKM9NP495GKk,280
|
|
19
20
|
evalscope/api/dataset/__init__.py,sha256=RHFMzwfONEqmmn3vRtxyN3r29mipDUUUSEDhuwm0YpQ,147
|
|
20
|
-
evalscope/api/dataset/dataset.py,sha256=
|
|
21
|
-
evalscope/api/dataset/loader.py,sha256=
|
|
21
|
+
evalscope/api/dataset/dataset.py,sha256=y-1DvPxN1Gxf-oEnrUq0Dcs4-rUQkApXP_rVYwsixSM,12119
|
|
22
|
+
evalscope/api/dataset/loader.py,sha256=44wQ3aBbn4YJyRjEsA1Bpg1DZicdCUzVybPoba_JhzY,9797
|
|
22
23
|
evalscope/api/dataset/utils.py,sha256=3E0ikqr6QWV_lX0d3Z4F4xFuVTcwbeDPgCvJY7v83Bc,4935
|
|
23
24
|
evalscope/api/evaluator/__init__.py,sha256=-Ure6X4GlE7VYSNWSZ_DpjbUBGa5irVTymLENEHTYqY,138
|
|
24
25
|
evalscope/api/evaluator/cache.py,sha256=a_M2ouUjtkMr5m3wRbmsE8ETP_aacxbm0d38yY5RljM,13244
|
|
@@ -35,9 +36,9 @@ evalscope/api/metric/metric.py,sha256=XkjBqpZbFYynhTIH8WawfPmItbDQ6jWufE_ox9zDPC
|
|
|
35
36
|
evalscope/api/metric/scorer.py,sha256=dczSQwkRmPk1uvNCMGT5G6nYbwWTcpwsZtyYXWkrJII,3749
|
|
36
37
|
evalscope/api/mixin/__init__.py,sha256=xBuoTuao5o_EFThgeeeWI87x64Q12aJttsaZc8gak_c,83
|
|
37
38
|
evalscope/api/mixin/llm_judge_mixin.py,sha256=ECVDfxCeAEkymFssD7xKhIDcct2qgQTqGnbijXk9leE,5675
|
|
38
|
-
evalscope/api/mixin/sandbox_mixin.py,sha256=
|
|
39
|
+
evalscope/api/mixin/sandbox_mixin.py,sha256=RbTpZXr6ohxgp1vU4YGMKmGKiIzVqQZ44quAHBX8zvs,6539
|
|
39
40
|
evalscope/api/model/__init__.py,sha256=YxKdz1IKUt6eYoC7nx81yD2BtyiWQDvaoTcc8O9lvoE,286
|
|
40
|
-
evalscope/api/model/generate_config.py,sha256=
|
|
41
|
+
evalscope/api/model/generate_config.py,sha256=xp8yDdDNomHpYaCNVDnZrZkQlfHpp9lrXKO7lqXEQHY,8017
|
|
41
42
|
evalscope/api/model/model.py,sha256=c7YVbYYk47MHWwPjoB66xWjgmHdUGTOSOdtIsLcJfyc,12782
|
|
42
43
|
evalscope/api/model/model_output.py,sha256=NeN6bLtAvg_3fTirewWfdP-_x4SJXa9pGuRpyXJY3B8,9333
|
|
43
44
|
evalscope/api/tool/__init__.py,sha256=bEaW5ryY-erLcl2zMoDJNgiaBqlSPAL0jQ5daUHvvrw,272
|
|
@@ -108,6 +109,8 @@ evalscope/backend/rag_eval/utils/tools.py,sha256=FU7tNu-8y8V_o_kArFVTTLM_GzL12KB
|
|
|
108
109
|
evalscope/backend/vlm_eval_kit/__init__.py,sha256=R-GuBm8dAwvDF73XHaGpPSjlt7Y4tycyy-FJgzLdjeY,84
|
|
109
110
|
evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=jlwM13Ty-Ax6AeMsNlo9xIBupNFgnceYuXtCmh0hNTQ,6160
|
|
110
111
|
evalscope/benchmarks/__init__.py,sha256=WHR4ej9Tqa2N9CyIaUWXS8EnHZtcujaNeg9hf8GT31Y,1182
|
|
112
|
+
evalscope/benchmarks/aa_lcr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
113
|
+
evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py,sha256=7KZRdIhg733vBMBWngxTjtrZtl_DHjwMNLt9C2tN0_w,7483
|
|
111
114
|
evalscope/benchmarks/ai2d/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
115
|
evalscope/benchmarks/ai2d/ai2d_adapter.py,sha256=qnQT2E0ZG8g4noOafu-QvBOKm-zEJ5X08QHw3ekNa4w,2473
|
|
113
116
|
evalscope/benchmarks/aime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -152,7 +155,7 @@ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt
|
|
|
152
155
|
evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt,sha256=s_x6u5MLeKpuAHZj3GNQqY1I8vWqQIfJasOp9XcM7Ck,2945
|
|
153
156
|
evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt,sha256=qfTZafCzNiz9ULBaDlfy_LISL617NyH5Nc0-nO0K0LE,2164
|
|
154
157
|
evalscope/benchmarks/bfcl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
|
-
evalscope/benchmarks/bfcl/bfcl_adapter.py,sha256=
|
|
158
|
+
evalscope/benchmarks/bfcl/bfcl_adapter.py,sha256=GOHmMqP1jnXFU4lkke5dzCszyG9kHvBsDl-GhtRWlgg,17030
|
|
156
159
|
evalscope/benchmarks/bfcl/generation.py,sha256=c6lNjo-VTSUrVg-pqyPSucrbCKBOdBSyN0aR5AAtE4A,8701
|
|
157
160
|
evalscope/benchmarks/blink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
158
161
|
evalscope/benchmarks/blink/blink_adapter.py,sha256=ocQKsDGwnUAg2si2p7tqIGeH3PKPqTSByjbt7ceraRo,2642
|
|
@@ -168,7 +171,7 @@ evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=P0VPAL5T2V_zj0q7im0FdDoq_W5ri
|
|
|
168
171
|
evalscope/benchmarks/competition_math/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
169
172
|
evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=NOqckeyuabH_nwaxL5IWmH887UO5rvBKA2jx7qb9fNs,2226
|
|
170
173
|
evalscope/benchmarks/data_collection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
171
|
-
evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=
|
|
174
|
+
evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=PbV5S0rUVF0jgrBKNjuZh2oE1FAsbYnPymg5u7NBjqo,8712
|
|
172
175
|
evalscope/benchmarks/docmath/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
173
176
|
evalscope/benchmarks/docmath/docmath_adapter.py,sha256=-mel6hA-x_e7fV0uOHdX5BpoQEVyQ5VqwIwEqSNDpnc,4623
|
|
174
177
|
evalscope/benchmarks/docmath/utils.py,sha256=d6Yjoa5q91kjr1SdVPVBndzDaUzMlO_GfEqMtUXXr0s,7707
|
|
@@ -181,17 +184,19 @@ evalscope/benchmarks/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
|
|
|
181
184
|
evalscope/benchmarks/frames/frames_adapter.py,sha256=w1kRya7w5omt95HHE6AzbzYVhyTT5r521676d_xJ6Vg,5514
|
|
182
185
|
evalscope/benchmarks/frames/utils.py,sha256=gULWM6Rwv5bTSSWcDYp-iSIoWj8r5VtbQakhRzHJq8A,1172
|
|
183
186
|
evalscope/benchmarks/general_arena/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
184
|
-
evalscope/benchmarks/general_arena/general_arena_adapter.py,sha256=
|
|
187
|
+
evalscope/benchmarks/general_arena/general_arena_adapter.py,sha256=lb51HwfvKXJgJWdjYyyTRsT5pX876yEuWAz8G2oH6yM,21636
|
|
185
188
|
evalscope/benchmarks/general_arena/utils.py,sha256=p6pZfvdNCMOU_vWHm_DYU57Sa2WTDdFOkVBubblCRN4,6912
|
|
186
189
|
evalscope/benchmarks/general_mcq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
187
|
-
evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=
|
|
190
|
+
evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=p7_C0CmKJthMY1Iri1SyNfssuYBws_dkhPMREu-uM94,2059
|
|
188
191
|
evalscope/benchmarks/general_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
189
|
-
evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=
|
|
192
|
+
evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=_7Jk_h-qcaxWHgrULojNqXwZ8XgicmXhYT8bOKwnyAU,3519
|
|
190
193
|
evalscope/benchmarks/gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
191
194
|
evalscope/benchmarks/gpqa/gpqa_adapter.py,sha256=zWK2hhyKw5n8K30YvMjSm6XMwyrireODGTE6wKmyuOo,3311
|
|
192
195
|
evalscope/benchmarks/gpqa/prompt.py,sha256=b1Gw2D5dEdhvLYymPfcvGKJdHrIzpiZkOwURKSxiQJg,5576
|
|
193
196
|
evalscope/benchmarks/gsm8k/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
194
|
-
evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=
|
|
197
|
+
evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=59EaZI2veg89_yyJyJ8QxkDMrQPC4ZTj3YQOBZ2_Vbk,3844
|
|
198
|
+
evalscope/benchmarks/hallusion_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
199
|
+
evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py,sha256=cKnhPNtZTfu1zKlgeguH9qnMNjnDNPSUKLb9ZETDnqg,6518
|
|
195
200
|
evalscope/benchmarks/healthbench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
196
201
|
evalscope/benchmarks/healthbench/healthbench_adapter.py,sha256=1sL7i9yhORH4xiFWB9puPKWNZZFJGZFAlKdlzHp-fiw,13228
|
|
197
202
|
evalscope/benchmarks/healthbench/utils.py,sha256=M8SnOEhlqXWm03CFE6CAtbMiu6MqdGgVczAv-LPjA7Y,3683
|
|
@@ -200,7 +205,7 @@ evalscope/benchmarks/hellaswag/hellaswag_adapter.py,sha256=tAe63NfV5ljUm1f4RTSFx
|
|
|
200
205
|
evalscope/benchmarks/hle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
201
206
|
evalscope/benchmarks/hle/hle_adapter.py,sha256=kJP7bzIDbr82GKi0FTy2zf_j1UWNBfuXYzokYJ-S9WE,6410
|
|
202
207
|
evalscope/benchmarks/humaneval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
203
|
-
evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=
|
|
208
|
+
evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=KArOIHNpjxD2ihH155Z0VxOzvlzKWqOwnqbp0J6aNzE,4375
|
|
204
209
|
evalscope/benchmarks/humaneval/utils.py,sha256=rPnc_JuSjNg9aV7UMUwsLrDlm-ufj64GNIBCWBeuRcM,6517
|
|
205
210
|
evalscope/benchmarks/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
206
211
|
evalscope/benchmarks/ifeval/ifeval_adapter.py,sha256=55FQwJ0_eDijppkVVlM5XCXzgRFmjH1SvGMItGsvn6o,2769
|
|
@@ -220,7 +225,7 @@ evalscope/benchmarks/iquiz/iquiz_adapter.py,sha256=mNHA_Fuj_gAdOEoR7oChnGmErf1cz
|
|
|
220
225
|
evalscope/benchmarks/live_code_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
221
226
|
evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=maN8qHmDHJpexPeB0qwZoXJ5zrqPbJDYVRptqvXI9d4,6827
|
|
222
227
|
evalscope/benchmarks/live_code_bench/extract_utils.py,sha256=ZcQ8y741uawPo6I_1_XglR3eqJFDNrqc8fILKZupVRs,2375
|
|
223
|
-
evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=
|
|
228
|
+
evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=GDKmbvV2NJe-QQfy-C6nDJVpeiP6SVxjJzgTba_K4_w,6682
|
|
224
229
|
evalscope/benchmarks/live_code_bench/load_utils.py,sha256=fEzWz_fUGwi5Ncum5PNVF9jFcuDwGgs7Vt_10YKBE2Q,2087
|
|
225
230
|
evalscope/benchmarks/live_code_bench/pass_k_utils.py,sha256=Ktrp_lXdfFzoHtQNQNdGfIl26ySjaPCHm4Zv-dFvRqM,2024
|
|
226
231
|
evalscope/benchmarks/live_code_bench/prompts.py,sha256=P4KILIAIDT1MKDck0xHYV_6v9820wDZRhxVMazmlL-g,12600
|
|
@@ -230,8 +235,12 @@ evalscope/benchmarks/maritime_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
|
|
|
230
235
|
evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py,sha256=Rx7iZ5JaEo73YwIzhm78gMDQ6gqcErbnWWXHxXM6BcU,2379
|
|
231
236
|
evalscope/benchmarks/math_500/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
232
237
|
evalscope/benchmarks/math_500/math_500_adapter.py,sha256=hn7SQhoIHKuH-2A_nGUhQPRw2gl2G-kZldc9ueY0G3A,1802
|
|
238
|
+
evalscope/benchmarks/math_verse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
239
|
+
evalscope/benchmarks/math_verse/math_verse_adapter.py,sha256=lRSNE9C9Y_zd9WJJpzsq3KgqFN5YHJzmpfBbW-h8D4M,4153
|
|
240
|
+
evalscope/benchmarks/math_vision/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
241
|
+
evalscope/benchmarks/math_vision/math_vision_adapter.py,sha256=oWwRZRX2ardjpR4_8ZzBRO58ki9rLtPYQvZ4Z3OoMfE,4475
|
|
233
242
|
evalscope/benchmarks/math_vista/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
234
|
-
evalscope/benchmarks/math_vista/math_vista_adapter.py,sha256=
|
|
243
|
+
evalscope/benchmarks/math_vista/math_vista_adapter.py,sha256=XXSHxWzCRPnRDCXEuY8--6o_j-gXyCb3S_yOShMzWfk,4928
|
|
235
244
|
evalscope/benchmarks/minerva_math/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
236
245
|
evalscope/benchmarks/minerva_math/minerva_math_adapter.py,sha256=jyT9_D4w8PTtLBN3Kn10_CnssH_mPuRNnn9rek_zUEs,1655
|
|
237
246
|
evalscope/benchmarks/mm_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -255,8 +264,25 @@ evalscope/benchmarks/multi_if/multi_if_adapter.py,sha256=I3_YPPUuRbrs9Gt3Qjhx9RM
|
|
|
255
264
|
evalscope/benchmarks/musr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
256
265
|
evalscope/benchmarks/musr/musr_adapter.py,sha256=kx6bckj7Nijl4Wysuj-mKYdy0hIRDJho8yVTup403Hc,1473
|
|
257
266
|
evalscope/benchmarks/needle_haystack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
258
|
-
evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py,sha256=
|
|
267
|
+
evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py,sha256=aQw8Sss1-ZgQPWqwMITOpAtwzMoYWDGjLhUpZtkcrvY,17030
|
|
259
268
|
evalscope/benchmarks/needle_haystack/utils.py,sha256=k8WDigqt5LgzHw6DtaYsLtb3BJL0FTZS9JOyJCpoPq8,2935
|
|
269
|
+
evalscope/benchmarks/ner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
270
|
+
evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py,sha256=sNL6D0uRlzdLSxUuKOcKzY4o82HQJeCiwBuUdfGVy_o,2012
|
|
271
|
+
evalscope/benchmarks/ner/conll2003_adapter.py,sha256=r_6UmR68ohFsn021zArkGRq0tRZSaIy9RNNJncag0i8,1970
|
|
272
|
+
evalscope/benchmarks/ner/copious_adapter.py,sha256=ufxsmTvEEayLaDJcUW5--oo6vkDY69W2yQ1fpD0E5lQ,3751
|
|
273
|
+
evalscope/benchmarks/ner/cross_ner_adapter.py,sha256=a37zVh_kfyOoHf-QgBlnVfee93RkEKqT-6BBwS1PWps,4916
|
|
274
|
+
evalscope/benchmarks/ner/genia_ner_adapter.py,sha256=WnuzyCQ0l2SU9ZvuDNOGeLubKZmJvxSnYPWQZ8TR8Yc,2457
|
|
275
|
+
evalscope/benchmarks/ner/harvey_ner_adapter.py,sha256=iUxnkg7yIQXAFU6lbrhAvJAqQqVysQUSkBeyNE7P6eM,2098
|
|
276
|
+
evalscope/benchmarks/ner/mit_movie_trivia_adapter.py,sha256=dAYHGX-eS7PVxcoT2nidMbpssG1yf2D5w55_mL5rqhw,3017
|
|
277
|
+
evalscope/benchmarks/ner/mit_restaurant_adapter.py,sha256=mIc_huJnksd3fZ8nIY7Uacs4x0r8W7pt5RjvEnTYZ00,2528
|
|
278
|
+
evalscope/benchmarks/ner/ontonotes5_adapter.py,sha256=oulC4XkVF42yjXWPuKg_zptLQiRItCmlZBlHN0shr6A,3546
|
|
279
|
+
evalscope/benchmarks/ner/wnut2017_adapter.py,sha256=uGrfp-4wYIcpEL9PqQx82uzCeWz6vIPKb7JlStTSE9M,2379
|
|
280
|
+
evalscope/benchmarks/ner/cross_ner_entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
281
|
+
evalscope/benchmarks/ner/cross_ner_entities/ai.py,sha256=RcgzYCygBmyrSOLacxxUN4cUznBZ3NemwfSR4hYBVKs,2484
|
|
282
|
+
evalscope/benchmarks/ner/cross_ner_entities/literature.py,sha256=ETzhu4PmiKS88NRkKPh96J9KiXKFdeQk5s_LSNqbD-M,1874
|
|
283
|
+
evalscope/benchmarks/ner/cross_ner_entities/music.py,sha256=_aJyKo83pO-j_LtGwXgrg9p8H1sHqXGPNW-wv1EIfWc,1999
|
|
284
|
+
evalscope/benchmarks/ner/cross_ner_entities/politics.py,sha256=taAqCnGdxHZGHM7sV0KONim8GjqVBrpMME6CVHwfJMo,1635
|
|
285
|
+
evalscope/benchmarks/ner/cross_ner_entities/science.py,sha256=DVZrCuMQ6-sPvRNTfx8iF_x9LaEBZ4o_RIWZADYKYGE,2919
|
|
260
286
|
evalscope/benchmarks/ocr_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
261
287
|
evalscope/benchmarks/ocr_bench/ocr_bench_adapter.py,sha256=gkQb7g0-Lf5Sjemqs5kqogCLGFJI6YQv8-vGI1EbyLE,4392
|
|
262
288
|
evalscope/benchmarks/ocr_bench_v2/IoUscore_metric.py,sha256=cBpRDJvI9f6vKRD4wTPv-8ThGddR3EhVobgjQQUAYlE,2606
|
|
@@ -266,7 +292,7 @@ evalscope/benchmarks/ocr_bench_v2/ocr_bench_v2_adapter.py,sha256=QGY4R75UxDafIwS
|
|
|
266
292
|
evalscope/benchmarks/ocr_bench_v2/page_ocr_metric.py,sha256=d1nU7LNwubBd_1rIe7i67hOVcJx5IUXkqVeqt1CQzak,1624
|
|
267
293
|
evalscope/benchmarks/ocr_bench_v2/parallel.py,sha256=Q54wFSSRBp-kG2MhW4eOoXE1W9g-SDVhN8JuphDERsE,2029
|
|
268
294
|
evalscope/benchmarks/ocr_bench_v2/spotting_metric.py,sha256=nftLaTOKEmqvSWr-c20f9hyyvNnd-Hg3E46KwqmkjLc,6149
|
|
269
|
-
evalscope/benchmarks/ocr_bench_v2/utils.py,sha256=
|
|
295
|
+
evalscope/benchmarks/ocr_bench_v2/utils.py,sha256=hhF2MuPo5n6uM0OCgTHCNIgscNVhXRb3koqU73AErwY,15924
|
|
270
296
|
evalscope/benchmarks/ocr_bench_v2/vqa_metric.py,sha256=XkAiXk1uE7lsWQQXvjnHXZMsga8B9FVyq5qG8ghePK4,8980
|
|
271
297
|
evalscope/benchmarks/ocr_bench_v2/spotting_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
272
298
|
evalscope/benchmarks/ocr_bench_v2/spotting_eval/readme.txt,sha256=QO0K9z1ethy_lgs9vaxGN1u5DnPFsssp8z62Cni24iw,1424
|
|
@@ -277,21 +303,35 @@ evalscope/benchmarks/olympiad_bench/olympiad_bench_adapter.py,sha256=zePVmGjmyuw
|
|
|
277
303
|
evalscope/benchmarks/olympiad_bench/utils.py,sha256=w7vEZcT3vCVq8_DSMgAjZPpVFVHStJPJYsPkrs-yOFM,21412
|
|
278
304
|
evalscope/benchmarks/omni_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
279
305
|
evalscope/benchmarks/omni_bench/omni_bench_adapter.py,sha256=IJkRSokQC6MF_pN46Yofr_NaZaNt1XZFX1PUBmX4-qA,3651
|
|
306
|
+
evalscope/benchmarks/omnidoc_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
307
|
+
evalscope/benchmarks/omnidoc_bench/end2end_eval.py,sha256=71IEdeDsldtoFmMb1c_Pyugv-Wx-WOVIvccRkPvsJdU,15916
|
|
308
|
+
evalscope/benchmarks/omnidoc_bench/metrics.py,sha256=DZfaL5BlDjnW60kRnnfmsMgldPOKX0MJ2tAdsBf4dI0,20620
|
|
309
|
+
evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py,sha256=JBw9nS0e-P07MnfE4XAfbmaw3f-1okkJiOYl8a4lYYA,6192
|
|
310
|
+
evalscope/benchmarks/omnidoc_bench/utils.py,sha256=Db6QeIq_bc6Dl5xdYel5G7tnWib9_vn_KFiKeFN37IA,74435
|
|
311
|
+
evalscope/benchmarks/poly_math/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
312
|
+
evalscope/benchmarks/poly_math/poly_math_adapter.py,sha256=CHTDWgyYYaHwjwtT8K2at65GsDdl972onG7NLHuh8Gk,5377
|
|
313
|
+
evalscope/benchmarks/poly_math/utils/instruction.py,sha256=v3E8TnoWlooL_Ms5CQySzMmdyPKHAO005tGtTWMviPo,6901
|
|
314
|
+
evalscope/benchmarks/pope/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
315
|
+
evalscope/benchmarks/pope/pope_adapter.py,sha256=444tJqHdnecfOmqANwXUN5J-rp9w0PyCG_TVUpDyMXQ,5009
|
|
280
316
|
evalscope/benchmarks/process_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
281
317
|
evalscope/benchmarks/process_bench/process_bench_adapter.py,sha256=XN3F6NH7mF4ibwGX5nI01sqEHz05UQFnBAyfAe14QYE,6174
|
|
282
318
|
evalscope/benchmarks/race/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
283
319
|
evalscope/benchmarks/race/race_adapter.py,sha256=KibT9gHpIOZhTcWihG0dUDAX4gAHa2g1WdGPOcEP9OY,1705
|
|
284
320
|
evalscope/benchmarks/real_world_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
285
321
|
evalscope/benchmarks/real_world_qa/real_world_qa_adapter.py,sha256=J2u0J9d31uvkoz9nBI9tCMqG27hmYwdLQPPef9jx_pg,2788
|
|
322
|
+
evalscope/benchmarks/seed_bench_2_plus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
323
|
+
evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py,sha256=ngUOFhP8YFOE8ximkMg5U6TGLZMIXPHJsVJUurvbzM8,3064
|
|
286
324
|
evalscope/benchmarks/simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
287
325
|
evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=_duveAliSaPUqVSLQ2TtSv5sfwvFFy7t-MgIIokQ24s,9017
|
|
326
|
+
evalscope/benchmarks/simple_vqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
327
|
+
evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py,sha256=3ioSompYERllFE6yc3yZLl0NKWypRjg5d0uVf3b-4d0,9530
|
|
288
328
|
evalscope/benchmarks/super_gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
289
329
|
evalscope/benchmarks/super_gpqa/prompt.py,sha256=wQ8Y4NAvQJRhPS7gsrUBBzeM_UCHsHOloB_t5WfnIO8,4707
|
|
290
330
|
evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py,sha256=SPqpBebiHj_oyEqU94p9NSqhVkO0KeXQYcBmpfH81nM,6888
|
|
291
331
|
evalscope/benchmarks/super_gpqa/utils.py,sha256=OK_oT-DnWNssITEwu_Zc3Ty5v21n0IaJQYftK2cpwmQ,3401
|
|
292
332
|
evalscope/benchmarks/tau_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
293
333
|
evalscope/benchmarks/tau_bench/generation.py,sha256=d7J5xrxEI-0BYxdSuxdDavcR7f1ipBdpQsKZzwyzGds,5190
|
|
294
|
-
evalscope/benchmarks/tau_bench/tau_bench_adapter.py,sha256=
|
|
334
|
+
evalscope/benchmarks/tau_bench/tau_bench_adapter.py,sha256=VMb63g1_d0lZRhsc2eZJQjrNLhCUF7wss6Lzt87LNNw,6461
|
|
295
335
|
evalscope/benchmarks/text2image/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
296
336
|
evalscope/benchmarks/text2image/evalmuse_adapter.py,sha256=g-Wc1qTg-xWLTjiZPo8zmQud75ac-8mBpYRxOHfiO0g,3024
|
|
297
337
|
evalscope/benchmarks/text2image/genai_bench_adapter.py,sha256=1GDB3gS9zwrfb9C83LQdQyN7bvvqeYuu5ulJ9Igmi2k,1876
|
|
@@ -299,15 +339,19 @@ evalscope/benchmarks/text2image/general_t2i_adapter.py,sha256=CHy9ufvrVHc_5WkGVR
|
|
|
299
339
|
evalscope/benchmarks/text2image/hpdv2_adapter.py,sha256=8-vWCV21eo_e9EbxDB5mGw2cFzD4OUQPLB66FvlO9W4,1781
|
|
300
340
|
evalscope/benchmarks/text2image/tifa_adapter.py,sha256=4CcprucAe25UpTZRV3Qgb-8jbeNHtXNRWHw8RiYvfJA,784
|
|
301
341
|
evalscope/benchmarks/tool_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
302
|
-
evalscope/benchmarks/tool_bench/tool_bench_adapter.py,sha256=
|
|
342
|
+
evalscope/benchmarks/tool_bench/tool_bench_adapter.py,sha256=T3XtFLllrO5QOF13fU99LcigLKGqa0_VVi3C5mKPk84,3802
|
|
303
343
|
evalscope/benchmarks/tool_bench/utils.py,sha256=led0d-Pa3rvmWkSWhEnZWP00fceudgESq5HXAQzJGls,7042
|
|
304
344
|
evalscope/benchmarks/trivia_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
305
345
|
evalscope/benchmarks/trivia_qa/samples.jsonl,sha256=1isBD62PGhCiNbzQa-GFrHHL4XLHIkojWfgSvn7ktf8,3445
|
|
306
346
|
evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py,sha256=oZAiCmBpZbBAgzAKPfddaJWMckIyaoRM7fB2XJ5EoQU,2614
|
|
307
347
|
evalscope/benchmarks/truthful_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
308
348
|
evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py,sha256=W7ESUAcLsHwbssiiSCQNUeQcqx6JEeW7FSQiBFycS24,3512
|
|
349
|
+
evalscope/benchmarks/visu_logic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
350
|
+
evalscope/benchmarks/visu_logic/visu_logic_adapter.py,sha256=8dK8_HFxDhWTvCC8WTZjadChP6lNzgsFp_5qFSRGFoM,3277
|
|
309
351
|
evalscope/benchmarks/winogrande/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
310
352
|
evalscope/benchmarks/winogrande/winogrande_adapter.py,sha256=LWm6qZd3pJbtpcERq7WPK3adwY3uVm4wiUgfyEI_uHE,1310
|
|
353
|
+
evalscope/benchmarks/zerobench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
354
|
+
evalscope/benchmarks/zerobench/zerobench_adapter.py,sha256=pqnJEx4uOi3bxwYKqLxrxU5DX9p3F01N2itzbG_-VaU,2739
|
|
311
355
|
evalscope/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
312
356
|
evalscope/cli/base.py,sha256=m1DFlF16L0Lyrn0YNuFj8ByGjVJIoI0jKzAoodIXjRk,404
|
|
313
357
|
evalscope/cli/cli.py,sha256=qXQ6k9GBkRy2dmBxM24tbVP42bQDyM6G7kkc32LdpCA,860
|
|
@@ -319,14 +363,14 @@ evalscope/collections/__init__.py,sha256=x05hFLrjGsdtuHtc6PyQXHNuucVdYaBN9ZrM8gB
|
|
|
319
363
|
evalscope/collections/sampler.py,sha256=086pzXQO4CO_QYCd10z149Sjh6sBpRBeIHf5OTLOVu8,4896
|
|
320
364
|
evalscope/collections/schema.py,sha256=yzAlnH0O7iiWB4UnkFXI_Dvxcsq9hDgl0aGK2OpyBY8,4158
|
|
321
365
|
evalscope/evaluator/__init__.py,sha256=KzYmVTfU-1pdX7va7l3B1-5QKWG07hj1B7rYkMmxitY,91
|
|
322
|
-
evalscope/evaluator/evaluator.py,sha256=
|
|
366
|
+
evalscope/evaluator/evaluator.py,sha256=pw-5uBYoMdOUtNv7CE9-ZP8IQrkTMwp-csMjb9hO_B0,13832
|
|
323
367
|
evalscope/filters/__init__.py,sha256=AsXwKYDjGhFsJvtj036PRjMOPsHGt-CRicnHTtM_qA4,51
|
|
324
368
|
evalscope/filters/extraction.py,sha256=KLFr_3XYsrv0PTvmXy0ugj2sqv2ZOWJFV7G_MmGjTHk,4146
|
|
325
369
|
evalscope/filters/selection.py,sha256=yiJu2JjXDH_lgfEtB9umkGcA3zpo3zvnyoq2mKrXbnw,1609
|
|
326
370
|
evalscope/metrics/__init__.py,sha256=1giVHESSjn98uBiAvYm5uLsmRQwmf9NHPSt7OT_QJss,1615
|
|
327
371
|
evalscope/metrics/llm_judge.py,sha256=XukhH9PQtIZAcbjJlOmOD9ye3ngRv_IGKKJE9jhheOE,8653
|
|
328
|
-
evalscope/metrics/math_parser.py,sha256=
|
|
329
|
-
evalscope/metrics/metric.py,sha256=
|
|
372
|
+
evalscope/metrics/math_parser.py,sha256=gJ1NR2Mcyzt9qMdR8I0-6U31Jzoe8a6yUuwvayYPi4c,17979
|
|
373
|
+
evalscope/metrics/metric.py,sha256=Hr1F_kuQfu3FNPwbCrJQA7VHb83-VPoDj7I9uX7d8U0,12840
|
|
330
374
|
evalscope/metrics/metrics.py,sha256=Y7TQ6MYaGE32EntTz-18CmQqYMpo1rQSvUiSwzBgpaQ,14599
|
|
331
375
|
evalscope/metrics/rouge_metric.py,sha256=bqvSotuDdC0MEKmt8v6y6tBTBx0S3Ma-tfF-cMCckA4,4645
|
|
332
376
|
evalscope/metrics/bundled_rouge_score/__init__.py,sha256=PwbTdk8168FwDJe_l8XIqDuBgZQooDsP31vj7di05Fs,650
|
|
@@ -439,40 +483,40 @@ evalscope/models/model_apis.py,sha256=ZkZ_nfbeAFJnCndRvRIRLcbmJFTMhGRBi-WfMu0uZK
|
|
|
439
483
|
evalscope/models/modelscope.py,sha256=jSFkho_Ir2py54y_Bwj9jpCoY2mMKkZ8ORzne-ldAIE,15806
|
|
440
484
|
evalscope/models/openai_compatible.py,sha256=2uK78nDhWwgph7hcIiMc3NHRbIwvswRDM9o9ENahj4k,4659
|
|
441
485
|
evalscope/models/text2image_model.py,sha256=Sdiyw6vewjVTiXK8RFEh1pohOhDge80EoIWYpnLjr5Y,3929
|
|
442
|
-
evalscope/models/utils/openai.py,sha256=
|
|
486
|
+
evalscope/models/utils/openai.py,sha256=qoq9xXP1NrwBfnIS0SqzK9gl8tvbDpNmJP5n17pKyqw,28292
|
|
443
487
|
evalscope/perf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
444
|
-
evalscope/perf/arguments.py,sha256=
|
|
445
|
-
evalscope/perf/benchmark.py,sha256=
|
|
446
|
-
evalscope/perf/http_client.py,sha256=
|
|
447
|
-
evalscope/perf/main.py,sha256=
|
|
488
|
+
evalscope/perf/arguments.py,sha256=JHB-JIEHq5p3zoHeKn6dkelGq0JrMVMRne-wbXK2yhg,12892
|
|
489
|
+
evalscope/perf/benchmark.py,sha256=Uc6BJJGYTsAnfFljPy0WJIXcapHOIwvym3o0yPRTVqU,6964
|
|
490
|
+
evalscope/perf/http_client.py,sha256=8xJFYja8FoQA0MDTj2NcxPkAmji4n81fsaw1gRuL1sA,5152
|
|
491
|
+
evalscope/perf/main.py,sha256=eEL0qUdNPMyHr3ZTixTfZxKN4IIw3gz3sw8sq3S_vs4,4015
|
|
448
492
|
evalscope/perf/plugin/__init__.py,sha256=Ztj4h1_JYJqbbWkeuDTj5aTRyGQf5Woc4xEIyjcokVU,94
|
|
449
493
|
evalscope/perf/plugin/registry.py,sha256=GhLe-h1rGzya2bgIUaV5VymQIaHqI7h5SG_i4PoGAm8,1967
|
|
450
494
|
evalscope/perf/plugin/api/__init__.py,sha256=7RsGdYTSfnW6iVpveEzNu8v4x8Yc8H-Kk39DqOHMrd4,152
|
|
451
|
-
evalscope/perf/plugin/api/base.py,sha256=
|
|
452
|
-
evalscope/perf/plugin/api/custom_api.py,sha256=
|
|
495
|
+
evalscope/perf/plugin/api/base.py,sha256=LLBDKOWUXYbLLLTtO86X1Y4Erbp5egs2WCXGj4my754,2822
|
|
496
|
+
evalscope/perf/plugin/api/custom_api.py,sha256=HHvhNlqNQr43GhIC61yoa54QCEAy4MRMmJ0kBy-rnsQ,8305
|
|
453
497
|
evalscope/perf/plugin/api/dashscope_api.py,sha256=Miv2pzMa6sxZyYYJhCzcbOI_QHuZx7tazKpb6Not7ck,3627
|
|
454
|
-
evalscope/perf/plugin/api/default_api.py,sha256=
|
|
455
|
-
evalscope/perf/plugin/api/openai_api.py,sha256=
|
|
498
|
+
evalscope/perf/plugin/api/default_api.py,sha256=5hXjFN9bSIWUinrdFp9Cs1-jeKuNvRdBvDUX6VhSeZI,8991
|
|
499
|
+
evalscope/perf/plugin/api/openai_api.py,sha256=UVo9tAnqZbVNEQwAT0wOZb1Abbf-yQmr3iDKHwXDoI8,10628
|
|
456
500
|
evalscope/perf/plugin/datasets/__init__.py,sha256=qzeQ9BrJhiJJm1wHaFeOQkvXXdSd15Ucspbn5zjs-6Q,495
|
|
457
|
-
evalscope/perf/plugin/datasets/base.py,sha256
|
|
458
|
-
evalscope/perf/plugin/datasets/custom.py,sha256=
|
|
501
|
+
evalscope/perf/plugin/datasets/base.py,sha256=PFBMdo3H_Hx2jOXNrMb97DvJ5gJg6QajSYymCgTXKmo,3629
|
|
502
|
+
evalscope/perf/plugin/datasets/custom.py,sha256=kCofjHfcihPcsc1XwyLxn9QG9E88eZ5qAQW7nW6ID0c,1311
|
|
459
503
|
evalscope/perf/plugin/datasets/flickr8k.py,sha256=nhHiGNhXX-2c17NQ5q5Q7FgV2hB8XVeeAP8dKkboyHE,1033
|
|
460
504
|
evalscope/perf/plugin/datasets/kontext_bench.py,sha256=cN70hiBX1940IWvNWZG9YGE4vO1yj41Bo7bqmOWusoQ,1081
|
|
461
|
-
evalscope/perf/plugin/datasets/line_by_line.py,sha256=
|
|
462
|
-
evalscope/perf/plugin/datasets/longalpaca.py,sha256=
|
|
463
|
-
evalscope/perf/plugin/datasets/openqa.py,sha256=
|
|
464
|
-
evalscope/perf/plugin/datasets/random_dataset.py,sha256=
|
|
505
|
+
evalscope/perf/plugin/datasets/line_by_line.py,sha256=L3lj9evcr3q-Mcemyuy2WauBB5c6O-ttnIVw1t4UJUE,922
|
|
506
|
+
evalscope/perf/plugin/datasets/longalpaca.py,sha256=abFLvrRZFsno9IUr_bpvhMWHL9X2sahlIpGLUb-5BxA,1262
|
|
507
|
+
evalscope/perf/plugin/datasets/openqa.py,sha256=UlbHhzGoQTBXa4foEFhRTZX6v7So6pR-ExFhU2ws8YM,1427
|
|
508
|
+
evalscope/perf/plugin/datasets/random_dataset.py,sha256=GPuC5ovi3BW84RCiGSDd2cBZ3jRmFrtMRsxEocc1ud8,3347
|
|
465
509
|
evalscope/perf/plugin/datasets/random_vl_dataset.py,sha256=e6exWQnupWkTDNwt2MmEK-hccuxEDmWLJRMM70onKi0,3230
|
|
466
510
|
evalscope/perf/plugin/datasets/speed_benchmark.py,sha256=J6q7AF_Re5eHLVejXEw9c1jlk1T1PPmist0yO9UFTPE,2432
|
|
467
511
|
evalscope/perf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
468
512
|
evalscope/perf/utils/analysis_result.py,sha256=aoT7JD2zAzBeuZUfncKhJ2odX_7KnymwOmNB1Upam2c,935
|
|
469
|
-
evalscope/perf/utils/benchmark_util.py,sha256=
|
|
470
|
-
evalscope/perf/utils/db_util.py,sha256=
|
|
513
|
+
evalscope/perf/utils/benchmark_util.py,sha256=Uf4vUAsfgAZs2qsyv9cRY_i87QNEHl17XMhGgXq7wFw,8048
|
|
514
|
+
evalscope/perf/utils/db_util.py,sha256=lr26ah_KRznBBu_ssxXki_PgtELk5bUJV2JaM4LaeNI,11534
|
|
471
515
|
evalscope/perf/utils/handler.py,sha256=HyKIxbzC0XCyQanlbb7UEY7yaeqjJTePNea8kMV3Sdc,1192
|
|
472
|
-
evalscope/perf/utils/local_server.py,sha256=
|
|
473
|
-
evalscope/perf/utils/log_utils.py,sha256=
|
|
516
|
+
evalscope/perf/utils/local_server.py,sha256=dMoX8p6aCQq1JnoXxcyWknadLdBwpfQhvKwk5fn6G4Q,3727
|
|
517
|
+
evalscope/perf/utils/log_utils.py,sha256=YY8mnpJoHMlP6jtmEq7QujyuxhSUF1vqLk8TpBAkbY0,2162
|
|
474
518
|
evalscope/perf/utils/rich_display.py,sha256=AQmXv1EuA1-IGgco-Jy1NLOmTKv4eBFH2K4QS8OoGVo,8206
|
|
475
|
-
evalscope/report/__init__.py,sha256=
|
|
519
|
+
evalscope/report/__init__.py,sha256=Za-5AljVqZ7N-ap44MHUPtlaVkLCzpmst-n7SPkXPVE,1110
|
|
476
520
|
evalscope/report/combinator.py,sha256=F7KOClXVh56-XEw3Sb5uxwA6L8ZlH_P4-MOlm3Yp_Cg,5020
|
|
477
521
|
evalscope/report/generator.py,sha256=t2R3WGa4SowTRUPOgITtyTR4QDiJ6i3FH__byDKZU8Y,4959
|
|
478
522
|
evalscope/report/report.py,sha256=lEBD_E_RJiydFTaGFNLIMTFxNrqv8QcLZb_iuUg5HB0,8479
|
|
@@ -515,17 +559,18 @@ evalscope/utils/__init__.py,sha256=5OH8cOoX3YKMKUu0dMRvwzckXligIbUV-1jjJNXlpGI,2
|
|
|
515
559
|
evalscope/utils/argument_utils.py,sha256=D7qOH85wf7LKh_cJ2X51OEaL7CMaddydmHZkfoYpvLk,1952
|
|
516
560
|
evalscope/utils/chat_service.py,sha256=sSki2pKGQP3UjcIf_lbO06afI-vsaUAqglwX__wUDEw,8766
|
|
517
561
|
evalscope/utils/deprecation_utils.py,sha256=aDv3HFNcJFZ7rxNgALQP0-ITO8L23HC_RX-C_m2i34Y,1610
|
|
518
|
-
evalscope/utils/function_utils.py,sha256
|
|
562
|
+
evalscope/utils/function_utils.py,sha256=-WiBBrFaMzfgH7H2qOg3ciZ-BGyUGlsPUF-2nnU2KLw,9599
|
|
519
563
|
evalscope/utils/import_utils.py,sha256=S0WQ3gt4zpwJHjGcyC-604pWWExg3JV7f3wzoOH-tuo,5794
|
|
520
|
-
evalscope/utils/io_utils.py,sha256=
|
|
564
|
+
evalscope/utils/io_utils.py,sha256=BRBdPi_BejTRbevvbTWz6kHf33v6i9bOQUMam2yxL5Y,14070
|
|
521
565
|
evalscope/utils/json_schema.py,sha256=GVP1m6g4mBrsFmOWOOVnmvl2joOz8gTlGEytLv5qy7s,8451
|
|
522
|
-
evalscope/utils/logger.py,sha256=
|
|
566
|
+
evalscope/utils/logger.py,sha256=Zv4lb9gXx5R0t_wnQ4rIIbRg0oC04qsr2z6cUoRM47Y,6771
|
|
523
567
|
evalscope/utils/model_utils.py,sha256=mdtYoHhUdfpxUtnS52XZjNdO3uSK4yeIBHT3aDU7s-A,2455
|
|
524
568
|
evalscope/utils/multi_choices.py,sha256=0UJbgr5eXNgitPC79JLcyUU-OXg9BlM-mVk-fWtUSno,9881
|
|
569
|
+
evalscope/utils/ner.py,sha256=gxvUURZVLJqZUrIqCy892rAAJ2ydYiGG5ZKPW_mpHsM,14148
|
|
525
570
|
evalscope/utils/url_utils.py,sha256=9HcFt9uZNbOJR3ADUFQ_dBFKziHV6H66Df7HYs1M4Po,1757
|
|
526
|
-
evalscope-1.1.
|
|
527
|
-
evalscope-1.1.
|
|
528
|
-
evalscope-1.1.
|
|
529
|
-
evalscope-1.1.
|
|
530
|
-
evalscope-1.1.
|
|
531
|
-
evalscope-1.1.
|
|
571
|
+
evalscope-1.1.1.dist-info/licenses/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
|
|
572
|
+
evalscope-1.1.1.dist-info/METADATA,sha256=vQzNoEz383srIkNFekFt_e9wzWQeWDMytu4aIaVTaYU,34332
|
|
573
|
+
evalscope-1.1.1.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
574
|
+
evalscope-1.1.1.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
|
|
575
|
+
evalscope-1.1.1.dist-info/top_level.txt,sha256=jNR-HMn3TR8Atolq7_4rW8IWVX6GhvYV5_1Y_KbJKlY,10
|
|
576
|
+
evalscope-1.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|