evalscope 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalscope/api/benchmark/__init__.py +9 -1
- evalscope/api/benchmark/adapters/__init__.py +4 -0
- evalscope/api/benchmark/adapters/agent_adapter.py +8 -0
- evalscope/api/benchmark/adapters/default_data_adapter.py +75 -4
- evalscope/api/benchmark/adapters/image_edit_adapter.py +82 -0
- evalscope/api/benchmark/adapters/multi_choice_adapter.py +5 -2
- evalscope/api/benchmark/adapters/ner_adapter.py +212 -0
- evalscope/api/benchmark/adapters/text2image_adapter.py +12 -10
- evalscope/api/benchmark/adapters/vision_language_adapter.py +8 -0
- evalscope/api/benchmark/benchmark.py +85 -2
- evalscope/api/benchmark/meta.py +10 -1
- evalscope/api/dataset/dataset.py +27 -6
- evalscope/api/dataset/loader.py +8 -3
- evalscope/api/evaluator/cache.py +31 -4
- evalscope/api/evaluator/evaluator.py +5 -0
- evalscope/api/evaluator/state.py +17 -1
- evalscope/api/messages/__init__.py +1 -0
- evalscope/api/messages/chat_message.py +52 -2
- evalscope/api/metric/__init__.py +1 -1
- evalscope/api/metric/metric.py +6 -1
- evalscope/api/metric/scorer.py +15 -7
- evalscope/api/mixin/__init__.py +1 -1
- evalscope/api/mixin/llm_judge_mixin.py +2 -0
- evalscope/api/mixin/sandbox_mixin.py +182 -0
- evalscope/api/model/generate_config.py +10 -6
- evalscope/api/model/model.py +5 -2
- evalscope/api/tool/tool_info.py +1 -1
- evalscope/app/app.py +3 -0
- evalscope/app/ui/multi_model.py +6 -1
- evalscope/app/ui/single_model.py +11 -5
- evalscope/app/utils/data_utils.py +8 -7
- evalscope/app/utils/env_utils.py +12 -0
- evalscope/app/utils/text_utils.py +14 -12
- evalscope/app/utils/visualization.py +2 -2
- evalscope/arguments.py +8 -4
- evalscope/backend/opencompass/backend_manager.py +0 -2
- evalscope/backend/rag_eval/utils/embedding.py +9 -1
- evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py +205 -0
- evalscope/benchmarks/ai2d/ai2d_adapter.py +54 -0
- evalscope/benchmarks/aime/aime24_adapter.py +5 -0
- evalscope/benchmarks/aime/aime25_adapter.py +136 -1
- evalscope/benchmarks/aime/grader.py +307 -0
- evalscope/benchmarks/aime/math_normalize.py +189 -0
- evalscope/benchmarks/amc/amc_adapter.py +51 -0
- evalscope/benchmarks/arena_hard/arena_hard_adapter.py +1 -0
- evalscope/benchmarks/bbh/bbh_adapter.py +43 -17
- evalscope/benchmarks/bfcl/{bfcl_adapter.py → v3/bfcl_v3_adapter.py} +131 -19
- evalscope/benchmarks/bfcl/{generation.py → v3/generation.py} +9 -9
- evalscope/benchmarks/bfcl/v3/utils.py +23 -0
- evalscope/benchmarks/bfcl/v4/__init__.py +0 -0
- evalscope/benchmarks/bfcl/v4/bfcl_v4_adapter.py +229 -0
- evalscope/benchmarks/bfcl/v4/utils.py +410 -0
- evalscope/benchmarks/biomix_qa/__init__.py +0 -0
- evalscope/benchmarks/biomix_qa/biomix_qa_adapter.py +36 -0
- evalscope/benchmarks/blink/__init__.py +0 -0
- evalscope/benchmarks/blink/blink_adapter.py +61 -0
- evalscope/benchmarks/ceval/ceval_adapter.py +1 -2
- evalscope/benchmarks/chartqa/__init__.py +0 -0
- evalscope/benchmarks/chartqa/chartqa_adapter.py +80 -0
- evalscope/benchmarks/chartqa/utils.py +38 -0
- evalscope/benchmarks/coin_flip/__init__.py +0 -0
- evalscope/benchmarks/coin_flip/coin_flip_adapter.py +128 -0
- evalscope/benchmarks/commonsense_qa/__init__.py +0 -0
- evalscope/benchmarks/commonsense_qa/commonsense_qa_adapter.py +32 -0
- evalscope/benchmarks/competition_math/competition_math_adapter.py +5 -0
- evalscope/benchmarks/data_collection/data_collection_adapter.py +24 -19
- evalscope/benchmarks/docvqa/__init__.py +0 -0
- evalscope/benchmarks/docvqa/docvqa_adapter.py +67 -0
- evalscope/benchmarks/drivelology/__init__.py +0 -0
- evalscope/benchmarks/drivelology/drivelology_binary_adapter.py +170 -0
- evalscope/benchmarks/drivelology/drivelology_multilabel_adapter.py +254 -0
- evalscope/benchmarks/drivelology/drivelology_selection_adapter.py +49 -0
- evalscope/benchmarks/drivelology/drivelology_writing_adapter.py +218 -0
- evalscope/benchmarks/drop/drop_adapter.py +15 -44
- evalscope/benchmarks/drop/utils.py +97 -0
- evalscope/benchmarks/frames/frames_adapter.py +2 -1
- evalscope/benchmarks/general_arena/general_arena_adapter.py +7 -2
- evalscope/benchmarks/general_arena/utils.py +2 -1
- evalscope/benchmarks/general_mcq/general_mcq_adapter.py +1 -1
- evalscope/benchmarks/general_qa/general_qa_adapter.py +1 -1
- evalscope/benchmarks/gsm8k/gsm8k_adapter.py +25 -9
- evalscope/benchmarks/hallusion_bench/__init__.py +0 -0
- evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py +159 -0
- evalscope/benchmarks/halu_eval/__init__.py +0 -0
- evalscope/benchmarks/halu_eval/halu_eval_adapter.py +128 -0
- evalscope/benchmarks/halu_eval/halu_eval_instructions.py +84 -0
- evalscope/benchmarks/healthbench/__init__.py +0 -0
- evalscope/benchmarks/healthbench/healthbench_adapter.py +282 -0
- evalscope/benchmarks/healthbench/utils.py +102 -0
- evalscope/benchmarks/hle/hle_adapter.py +3 -2
- evalscope/benchmarks/humaneval/humaneval_adapter.py +24 -52
- evalscope/benchmarks/humaneval/utils.py +235 -0
- evalscope/benchmarks/ifeval/instructions_util.py +2 -3
- evalscope/benchmarks/image_edit/__init__.py +0 -0
- evalscope/benchmarks/image_edit/gedit/__init__.py +0 -0
- evalscope/benchmarks/image_edit/gedit/gedit_adapter.py +138 -0
- evalscope/benchmarks/image_edit/gedit/utils.py +372 -0
- evalscope/benchmarks/image_edit/gedit/vie_prompts.py +406 -0
- evalscope/benchmarks/infovqa/__init__.py +0 -0
- evalscope/benchmarks/infovqa/infovqa_adapter.py +66 -0
- evalscope/benchmarks/live_code_bench/evaluate_utils.py +13 -6
- evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +66 -54
- evalscope/benchmarks/live_code_bench/sandbox_evaluate_utils.py +220 -0
- evalscope/benchmarks/logi_qa/__int__.py +0 -0
- evalscope/benchmarks/logi_qa/logi_qa_adapter.py +41 -0
- evalscope/benchmarks/math_500/math_500_adapter.py +5 -1
- evalscope/benchmarks/math_qa/__init__.py +0 -0
- evalscope/benchmarks/math_qa/math_qa_adapter.py +35 -0
- evalscope/benchmarks/math_verse/__init__.py +0 -0
- evalscope/benchmarks/math_verse/math_verse_adapter.py +105 -0
- evalscope/benchmarks/math_vision/__init__.py +0 -0
- evalscope/benchmarks/math_vision/math_vision_adapter.py +116 -0
- evalscope/benchmarks/math_vista/__init__.py +0 -0
- evalscope/benchmarks/math_vista/math_vista_adapter.py +114 -0
- evalscope/benchmarks/med_mcqa/__init__.py +0 -0
- evalscope/benchmarks/med_mcqa/med_mcqa_adapter.py +32 -0
- evalscope/benchmarks/minerva_math/__init__.py +0 -0
- evalscope/benchmarks/minerva_math/minerva_math_adapter.py +53 -0
- evalscope/benchmarks/mm_bench/__init__.py +0 -0
- evalscope/benchmarks/mm_bench/mm_bench_adapter.py +99 -0
- evalscope/benchmarks/mm_star/__init__.py +0 -0
- evalscope/benchmarks/mm_star/mm_star_adapter.py +73 -0
- evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py +1 -1
- evalscope/benchmarks/mmmu/__init__.py +0 -0
- evalscope/benchmarks/mmmu/mmmu_adapter.py +159 -0
- evalscope/benchmarks/mmmu_pro/__init__.py +0 -0
- evalscope/benchmarks/mmmu_pro/mmmu_pro_adapter.py +124 -0
- evalscope/benchmarks/mri_mcqa/__init__.py +0 -0
- evalscope/benchmarks/mri_mcqa/mri_mcqa_adapter.py +34 -0
- evalscope/benchmarks/multi_if/__init__.py +0 -0
- evalscope/benchmarks/multi_if/ifeval.py +3354 -0
- evalscope/benchmarks/multi_if/metrics.py +120 -0
- evalscope/benchmarks/multi_if/multi_if_adapter.py +161 -0
- evalscope/benchmarks/music_trivia/__init__.py +0 -0
- evalscope/benchmarks/music_trivia/music_trivia_adapter.py +36 -0
- evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +7 -6
- evalscope/benchmarks/ner/__init__.py +0 -0
- evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py +52 -0
- evalscope/benchmarks/ner/conll2003_adapter.py +48 -0
- evalscope/benchmarks/ner/copious_adapter.py +85 -0
- evalscope/benchmarks/ner/cross_ner_adapter.py +120 -0
- evalscope/benchmarks/ner/cross_ner_entities/__init__.py +0 -0
- evalscope/benchmarks/ner/cross_ner_entities/ai.py +54 -0
- evalscope/benchmarks/ner/cross_ner_entities/literature.py +36 -0
- evalscope/benchmarks/ner/cross_ner_entities/music.py +39 -0
- evalscope/benchmarks/ner/cross_ner_entities/politics.py +37 -0
- evalscope/benchmarks/ner/cross_ner_entities/science.py +58 -0
- evalscope/benchmarks/ner/genia_ner_adapter.py +66 -0
- evalscope/benchmarks/ner/harvey_ner_adapter.py +58 -0
- evalscope/benchmarks/ner/mit_movie_trivia_adapter.py +74 -0
- evalscope/benchmarks/ner/mit_restaurant_adapter.py +66 -0
- evalscope/benchmarks/ner/ontonotes5_adapter.py +87 -0
- evalscope/benchmarks/ner/wnut2017_adapter.py +61 -0
- evalscope/benchmarks/ocr_bench/__init__.py +0 -0
- evalscope/benchmarks/ocr_bench/ocr_bench/__init__.py +0 -0
- evalscope/benchmarks/ocr_bench/ocr_bench/ocr_bench_adapter.py +101 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/IoUscore_metric.py +87 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/TEDS_metric.py +963 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/__init__.py +0 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/ocr_bench_v2_adapter.py +161 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/page_ocr_metric.py +50 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/parallel.py +46 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/__init__.py +0 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/readme.txt +26 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py +537 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/script.py +481 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_metric.py +179 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/utils.py +433 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/vqa_metric.py +254 -0
- evalscope/benchmarks/olympiad_bench/__init__.py +0 -0
- evalscope/benchmarks/olympiad_bench/olympiad_bench_adapter.py +163 -0
- evalscope/benchmarks/olympiad_bench/utils.py +565 -0
- evalscope/benchmarks/omni_bench/__init__.py +0 -0
- evalscope/benchmarks/omni_bench/omni_bench_adapter.py +86 -0
- evalscope/benchmarks/omnidoc_bench/__init__.py +0 -0
- evalscope/benchmarks/omnidoc_bench/end2end_eval.py +349 -0
- evalscope/benchmarks/omnidoc_bench/metrics.py +547 -0
- evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py +135 -0
- evalscope/benchmarks/omnidoc_bench/utils.py +1937 -0
- evalscope/benchmarks/piqa/__init__.py +0 -0
- evalscope/benchmarks/piqa/piqa_adapter.py +32 -0
- evalscope/benchmarks/poly_math/__init__.py +0 -0
- evalscope/benchmarks/poly_math/poly_math_adapter.py +132 -0
- evalscope/benchmarks/poly_math/utils/instruction.py +105 -0
- evalscope/benchmarks/pope/__init__.py +0 -0
- evalscope/benchmarks/pope/pope_adapter.py +112 -0
- evalscope/benchmarks/process_bench/process_bench_adapter.py +1 -0
- evalscope/benchmarks/pumed_qa/__init__.py +0 -0
- evalscope/benchmarks/pumed_qa/pubmed_qa_adapter.py +175 -0
- evalscope/benchmarks/qasc/__init__.py +0 -0
- evalscope/benchmarks/qasc/qasc_adapter.py +35 -0
- evalscope/benchmarks/real_world_qa/__init__.py +0 -0
- evalscope/benchmarks/real_world_qa/real_world_qa_adapter.py +64 -0
- evalscope/benchmarks/sciq/__init__.py +0 -0
- evalscope/benchmarks/sciq/sciq_adapter.py +36 -0
- evalscope/benchmarks/seed_bench_2_plus/__init__.py +0 -0
- evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py +72 -0
- evalscope/benchmarks/simple_qa/simple_qa_adapter.py +1 -1
- evalscope/benchmarks/simple_vqa/__init__.py +0 -0
- evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py +169 -0
- evalscope/benchmarks/siqa/__init__.py +0 -0
- evalscope/benchmarks/siqa/siqa_adapter.py +39 -0
- evalscope/benchmarks/tau_bench/tau2_bench/__init__.py +0 -0
- evalscope/benchmarks/tau_bench/tau2_bench/generation.py +158 -0
- evalscope/benchmarks/tau_bench/tau2_bench/tau2_bench_adapter.py +146 -0
- evalscope/benchmarks/tau_bench/tau_bench/__init__.py +0 -0
- evalscope/benchmarks/tau_bench/{generation.py → tau_bench/generation.py} +1 -1
- evalscope/benchmarks/tau_bench/{tau_bench_adapter.py → tau_bench/tau_bench_adapter.py} +29 -29
- evalscope/benchmarks/text2image/__init__.py +0 -0
- evalscope/benchmarks/{aigc/t2i → text2image}/evalmuse_adapter.py +3 -1
- evalscope/benchmarks/{aigc/t2i → text2image}/genai_bench_adapter.py +2 -2
- evalscope/benchmarks/{aigc/t2i → text2image}/general_t2i_adapter.py +1 -1
- evalscope/benchmarks/{aigc/t2i → text2image}/hpdv2_adapter.py +7 -2
- evalscope/benchmarks/{aigc/t2i → text2image}/tifa_adapter.py +1 -0
- evalscope/benchmarks/tool_bench/tool_bench_adapter.py +3 -3
- evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +1 -2
- evalscope/benchmarks/visu_logic/__init__.py +0 -0
- evalscope/benchmarks/visu_logic/visu_logic_adapter.py +75 -0
- evalscope/benchmarks/wmt/__init__.py +0 -0
- evalscope/benchmarks/wmt/wmt24_adapter.py +294 -0
- evalscope/benchmarks/zerobench/__init__.py +0 -0
- evalscope/benchmarks/zerobench/zerobench_adapter.py +64 -0
- evalscope/cli/start_app.py +7 -1
- evalscope/cli/start_perf.py +7 -1
- evalscope/config.py +103 -18
- evalscope/constants.py +18 -0
- evalscope/evaluator/evaluator.py +138 -82
- evalscope/metrics/bert_score/__init__.py +0 -0
- evalscope/metrics/bert_score/scorer.py +338 -0
- evalscope/metrics/bert_score/utils.py +697 -0
- evalscope/metrics/llm_judge.py +19 -7
- evalscope/metrics/math_parser.py +14 -0
- evalscope/metrics/metric.py +317 -13
- evalscope/metrics/metrics.py +37 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +2 -6
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +2 -6
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +2 -6
- evalscope/models/image_edit_model.py +125 -0
- evalscope/models/model_apis.py +22 -0
- evalscope/models/openai_compatible.py +21 -0
- evalscope/models/text2image_model.py +2 -2
- evalscope/models/utils/openai.py +16 -6
- evalscope/perf/arguments.py +26 -4
- evalscope/perf/benchmark.py +76 -89
- evalscope/perf/http_client.py +31 -16
- evalscope/perf/main.py +15 -2
- evalscope/perf/plugin/api/base.py +9 -7
- evalscope/perf/plugin/api/custom_api.py +13 -58
- evalscope/perf/plugin/api/default_api.py +188 -79
- evalscope/perf/plugin/api/openai_api.py +85 -20
- evalscope/perf/plugin/datasets/base.py +21 -0
- evalscope/perf/plugin/datasets/custom.py +2 -3
- evalscope/perf/plugin/datasets/flickr8k.py +2 -2
- evalscope/perf/plugin/datasets/kontext_bench.py +2 -2
- evalscope/perf/plugin/datasets/line_by_line.py +2 -3
- evalscope/perf/plugin/datasets/longalpaca.py +2 -3
- evalscope/perf/plugin/datasets/openqa.py +2 -4
- evalscope/perf/plugin/datasets/random_dataset.py +1 -3
- evalscope/perf/plugin/datasets/random_vl_dataset.py +2 -2
- evalscope/perf/utils/benchmark_util.py +43 -27
- evalscope/perf/utils/db_util.py +14 -19
- evalscope/perf/utils/local_server.py +3 -44
- evalscope/perf/utils/log_utils.py +21 -6
- evalscope/report/__init__.py +13 -3
- evalscope/report/combinator.py +91 -20
- evalscope/report/generator.py +8 -87
- evalscope/report/report.py +8 -4
- evalscope/run.py +13 -5
- evalscope/third_party/toolbench_static/llm/swift_infer.py +0 -4
- evalscope/utils/argument_utils.py +1 -1
- evalscope/utils/chat_service.py +1 -1
- evalscope/utils/function_utils.py +249 -12
- evalscope/utils/import_utils.py +73 -1
- evalscope/utils/io_utils.py +132 -7
- evalscope/utils/json_schema.py +25 -2
- evalscope/utils/logger.py +69 -18
- evalscope/utils/model_utils.py +4 -3
- evalscope/utils/multi_choices.py +39 -7
- evalscope/utils/ner.py +377 -0
- evalscope/version.py +2 -2
- {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/METADATA +252 -408
- {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/RECORD +290 -154
- {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/WHEEL +1 -1
- {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/top_level.txt +0 -1
- evalscope/api/mixin/dataset_mixin.py +0 -105
- evalscope/benchmarks/aigc/i2i/general_i2i_adapter.py +0 -44
- tests/__init__.py +0 -1
- tests/aigc/__init__.py +0 -1
- tests/aigc/test_t2i.py +0 -142
- tests/benchmark/__init__.py +0 -1
- tests/benchmark/test_eval.py +0 -386
- tests/cli/__init__.py +0 -1
- tests/cli/test_all.py +0 -229
- tests/cli/test_collection.py +0 -96
- tests/cli/test_custom.py +0 -268
- tests/perf/__init__.py +0 -1
- tests/perf/test_perf.py +0 -176
- tests/rag/test_clip_benchmark.py +0 -90
- tests/rag/test_mteb.py +0 -213
- tests/rag/test_ragas.py +0 -128
- tests/swift/__init__.py +0 -1
- tests/swift/test_run_swift_eval.py +0 -146
- tests/swift/test_run_swift_vlm_eval.py +0 -128
- tests/swift/test_run_swift_vlm_jugde_eval.py +0 -157
- tests/test_run_all.py +0 -12
- tests/utils.py +0 -13
- tests/vlm/__init__.py +0 -1
- tests/vlm/test_vlmeval.py +0 -102
- /evalscope/benchmarks/{aigc → aa_lcr}/__init__.py +0 -0
- /evalscope/benchmarks/{aigc/i2i → ai2d}/__init__.py +0 -0
- /evalscope/benchmarks/{aigc/t2i → amc}/__init__.py +0 -0
- {tests/rag → evalscope/benchmarks/bfcl/v3}/__init__.py +0 -0
- {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/entry_points.txt +0 -0
- {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,66 +1,71 @@
|
|
|
1
1
|
evalscope/__init__.py,sha256=oivLvqwNw2JlB-h-Z8_525IpfKcYEkS51F59tEfpy5w,445
|
|
2
|
-
evalscope/arguments.py,sha256=
|
|
3
|
-
evalscope/config.py,sha256=
|
|
4
|
-
evalscope/constants.py,sha256=
|
|
5
|
-
evalscope/run.py,sha256=
|
|
2
|
+
evalscope/arguments.py,sha256=jKAF47PsqXRioU21gRHw9hxJnfR31z_X7c__glRY5ns,6257
|
|
3
|
+
evalscope/config.py,sha256=74sX1TH0OC6kSw9yedySfbcywVV6pXxna8DH-0_-hDA,11637
|
|
4
|
+
evalscope/constants.py,sha256=BRjknIG0NCuUK-040ZmrUsf7WRIHfObJgJ5ilJfPhAc,3791
|
|
5
|
+
evalscope/run.py,sha256=dKFesxZZteOhscHif2A8xQHsJnG78D-m2gdfaWyMNC4,6742
|
|
6
6
|
evalscope/summarizer.py,sha256=HUDJ1zKi22uNst3AUfX67Z0sHzeZy-4S8sYyvxJnBzc,5901
|
|
7
|
-
evalscope/version.py,sha256=
|
|
7
|
+
evalscope/version.py,sha256=qGtSETKmFUoaalidgoIBbZIYjbYTgLAapOQYohFtYxg,118
|
|
8
8
|
evalscope/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
evalscope/api/registry.py,sha256=Qk0KMGDbt-iI0-OfoJZbOtxt76qreAVWh36HOoQAKM4,5448
|
|
10
|
-
evalscope/api/benchmark/__init__.py,sha256=
|
|
11
|
-
evalscope/api/benchmark/benchmark.py,sha256=
|
|
12
|
-
evalscope/api/benchmark/meta.py,sha256
|
|
13
|
-
evalscope/api/benchmark/adapters/__init__.py,sha256=
|
|
14
|
-
evalscope/api/benchmark/adapters/
|
|
15
|
-
evalscope/api/benchmark/adapters/
|
|
16
|
-
evalscope/api/benchmark/adapters/
|
|
10
|
+
evalscope/api/benchmark/__init__.py,sha256=AFP3T_Pml-8LtT1ZaxVsrX_BCPXMw5ue7cfiBII-GrE,247
|
|
11
|
+
evalscope/api/benchmark/benchmark.py,sha256=pmb6Y6JRYDtkzBi5HTh2M64dkSabuXcemVg-WY7HdHo,11644
|
|
12
|
+
evalscope/api/benchmark/meta.py,sha256=SQ6b6wNSHN6BUCmr5Bdi3C-dFkgdCzZ_xnIjLmj3-y4,4302
|
|
13
|
+
evalscope/api/benchmark/adapters/__init__.py,sha256=XXER_DdSiz70t-I3w4CuMSeyxiOIRGzG8v3ZROs-o_g,340
|
|
14
|
+
evalscope/api/benchmark/adapters/agent_adapter.py,sha256=ecuvOayj9kRKuLaflbBDnFV3vibFxKkQw7a3fOvsYwI,234
|
|
15
|
+
evalscope/api/benchmark/adapters/default_data_adapter.py,sha256=xGz92QS-ADKrMColnG1jHbtvEKYwzodTIX6YEJhoDaM,31171
|
|
16
|
+
evalscope/api/benchmark/adapters/image_edit_adapter.py,sha256=06V-_A8RKuMNYMt7-vaXn2qBa9LIZgfFO_6PUuhAkh0,3052
|
|
17
|
+
evalscope/api/benchmark/adapters/multi_choice_adapter.py,sha256=auqLNvF50Or9bo3LOmQLXHfFaTTCTqvQzZog3glInng,3062
|
|
18
|
+
evalscope/api/benchmark/adapters/ner_adapter.py,sha256=_rvfl8cNlvKoQkHqR2tC_K-xZaV0TsB_pIzI4sP_SM0,8906
|
|
19
|
+
evalscope/api/benchmark/adapters/text2image_adapter.py,sha256=jO64hwjQexIv-MTyHH0Ffp_6p--9TKufOmX_U39mAnE,6385
|
|
20
|
+
evalscope/api/benchmark/adapters/vision_language_adapter.py,sha256=5d7ITkeosikb7u0ag0WkMaZ0SAYGkR_wKM9NP495GKk,280
|
|
17
21
|
evalscope/api/dataset/__init__.py,sha256=RHFMzwfONEqmmn3vRtxyN3r29mipDUUUSEDhuwm0YpQ,147
|
|
18
|
-
evalscope/api/dataset/dataset.py,sha256=
|
|
19
|
-
evalscope/api/dataset/loader.py,sha256=
|
|
22
|
+
evalscope/api/dataset/dataset.py,sha256=y-1DvPxN1Gxf-oEnrUq0Dcs4-rUQkApXP_rVYwsixSM,12119
|
|
23
|
+
evalscope/api/dataset/loader.py,sha256=44wQ3aBbn4YJyRjEsA1Bpg1DZicdCUzVybPoba_JhzY,9797
|
|
20
24
|
evalscope/api/dataset/utils.py,sha256=3E0ikqr6QWV_lX0d3Z4F4xFuVTcwbeDPgCvJY7v83Bc,4935
|
|
21
25
|
evalscope/api/evaluator/__init__.py,sha256=-Ure6X4GlE7VYSNWSZ_DpjbUBGa5irVTymLENEHTYqY,138
|
|
22
|
-
evalscope/api/evaluator/cache.py,sha256=
|
|
23
|
-
evalscope/api/evaluator/evaluator.py,sha256=
|
|
24
|
-
evalscope/api/evaluator/state.py,sha256=
|
|
26
|
+
evalscope/api/evaluator/cache.py,sha256=xzQvLd2EzZOrWcHAauT-hdeRCkx6BqNIJ2rxvrMFMak,13370
|
|
27
|
+
evalscope/api/evaluator/evaluator.py,sha256=xMF4w2qiQ7NNgOhSKs9Vd4VZ33SCDwTTJ82lDhaj1FQ,1734
|
|
28
|
+
evalscope/api/evaluator/state.py,sha256=Elz2cmbvOOqvOaEOAMatxgk4BdjqDZB3XKTaL4iqJLI,9039
|
|
25
29
|
evalscope/api/filter/__init__.py,sha256=5eWKjT-dAiz8nE0S6WnU6plqjXZHYn7CJOgFiHSoovM,66
|
|
26
30
|
evalscope/api/filter/filter.py,sha256=fsPddaHE5wwFIXgUWITFqlYXqdh6vx3QqcEf3rSXKVI,2068
|
|
27
|
-
evalscope/api/messages/__init__.py,sha256=
|
|
28
|
-
evalscope/api/messages/chat_message.py,sha256=
|
|
31
|
+
evalscope/api/messages/__init__.py,sha256=UKZ9VVCt7NPrcZXv_1e8MZ8mOWu0eLRvMIXykpJPZ9I,378
|
|
32
|
+
evalscope/api/messages/chat_message.py,sha256=D88TklSAWOaG21EBDVDoRPwzVCqzEGbVW4sA8Af4axc,10053
|
|
29
33
|
evalscope/api/messages/content.py,sha256=gUBUeK60BUhkwoulyzKL6q0iMt3VLlah9onLG1XVrWY,2772
|
|
30
34
|
evalscope/api/messages/utils.py,sha256=uqlEbYEoUKpXLW8tQtP-cY5Miq7W0Xl6a98j55u6m6E,1266
|
|
31
|
-
evalscope/api/metric/__init__.py,sha256=
|
|
32
|
-
evalscope/api/metric/metric.py,sha256=
|
|
33
|
-
evalscope/api/metric/scorer.py,sha256=
|
|
34
|
-
evalscope/api/mixin/__init__.py,sha256=
|
|
35
|
-
evalscope/api/mixin/
|
|
36
|
-
evalscope/api/mixin/
|
|
35
|
+
evalscope/api/metric/__init__.py,sha256=dVKKjUMwda_p6T3MR0Hz9NwSzXM7WVwzrjEy_RE2xyM,123
|
|
36
|
+
evalscope/api/metric/metric.py,sha256=DWMxAmAu8aisad81FpubQCkdfDLOiBaQ3NIgfhDp9y0,1702
|
|
37
|
+
evalscope/api/metric/scorer.py,sha256=dczSQwkRmPk1uvNCMGT5G6nYbwWTcpwsZtyYXWkrJII,3749
|
|
38
|
+
evalscope/api/mixin/__init__.py,sha256=xBuoTuao5o_EFThgeeeWI87x64Q12aJttsaZc8gak_c,83
|
|
39
|
+
evalscope/api/mixin/llm_judge_mixin.py,sha256=ECVDfxCeAEkymFssD7xKhIDcct2qgQTqGnbijXk9leE,5675
|
|
40
|
+
evalscope/api/mixin/sandbox_mixin.py,sha256=RbTpZXr6ohxgp1vU4YGMKmGKiIzVqQZ44quAHBX8zvs,6539
|
|
37
41
|
evalscope/api/model/__init__.py,sha256=YxKdz1IKUt6eYoC7nx81yD2BtyiWQDvaoTcc8O9lvoE,286
|
|
38
|
-
evalscope/api/model/generate_config.py,sha256=
|
|
39
|
-
evalscope/api/model/model.py,sha256=
|
|
42
|
+
evalscope/api/model/generate_config.py,sha256=W5Yg8EyEMumIfpTGQMlZQ3D0p282pVIlhXGPj8sVQuA,8218
|
|
43
|
+
evalscope/api/model/model.py,sha256=c7YVbYYk47MHWwPjoB66xWjgmHdUGTOSOdtIsLcJfyc,12782
|
|
40
44
|
evalscope/api/model/model_output.py,sha256=NeN6bLtAvg_3fTirewWfdP-_x4SJXa9pGuRpyXJY3B8,9333
|
|
41
45
|
evalscope/api/tool/__init__.py,sha256=bEaW5ryY-erLcl2zMoDJNgiaBqlSPAL0jQ5daUHvvrw,272
|
|
42
46
|
evalscope/api/tool/tool_call.py,sha256=WqMnw69L_yhQWycENZ7azPRhxRidhmrMcYAy7UTIqvg,2836
|
|
43
|
-
evalscope/api/tool/tool_info.py,sha256=
|
|
47
|
+
evalscope/api/tool/tool_info.py,sha256=FQOBqxKZ6Qb4f40iRH1mLg64cEhu1_-9Rn-f5iUrD2w,5733
|
|
44
48
|
evalscope/api/tool/utils.py,sha256=IWFzM6WspzBmNPicXn6b7KS6Y-1I-ErsK9fua4cb53Y,2324
|
|
45
49
|
evalscope/app/__init__.py,sha256=HWLXld_JXcBDsdL4L_4E8JsKyuBwwPUSwlejKnZ3HKc,579
|
|
46
|
-
evalscope/app/app.py,sha256=
|
|
50
|
+
evalscope/app/app.py,sha256=EaBWorA87ZmyIHovIE3styHWEVFsu_F70pTmP4-5zTQ,836
|
|
47
51
|
evalscope/app/arguments.py,sha256=1wHTLeFx1G94cKXYOeOVe_wTiOY2D929UctIRGOtRaQ,699
|
|
48
52
|
evalscope/app/constants.py,sha256=oG6tZ618zJcCnwZ5THnYL0gWTPDb5XKrnmdrWxY3Z4Q,385
|
|
49
53
|
evalscope/app/ui/__init__.py,sha256=IBxyQ2H-kSHoHJmXWDR8QMermvsMbiu673PQbXP_FnE,616
|
|
50
54
|
evalscope/app/ui/app_ui.py,sha256=wLrQ4VM7BnzvaYmPAk8NH9t5BaWooHFJcgmAOOd2I1w,2032
|
|
51
|
-
evalscope/app/ui/multi_model.py,sha256=
|
|
55
|
+
evalscope/app/ui/multi_model.py,sha256=mvMgpgiJGRrNRtReFcD_PiLatq-81zp65Vb3JYUP3PE,15356
|
|
52
56
|
evalscope/app/ui/sidebar.py,sha256=JA0QbG2iPStK-lFy6x_AjOHlQdesmgXoS0OYJUJ_Wyg,1339
|
|
53
|
-
evalscope/app/ui/single_model.py,sha256=
|
|
57
|
+
evalscope/app/ui/single_model.py,sha256=zFt1uDYrcgNJ7e_YLigrs6IXT3jyGMVn-7rv4CHAZvE,9741
|
|
54
58
|
evalscope/app/ui/visualization.py,sha256=jXFX_-7woQkcAiQkPAIRwVv1kdRdXonn9IvmB8yzPDU,1102
|
|
55
|
-
evalscope/app/utils/data_utils.py,sha256=
|
|
59
|
+
evalscope/app/utils/data_utils.py,sha256=GYOfkh0NoueeX3od-L852Q9C9SSkEFlW_40wjPa5b9w,7470
|
|
60
|
+
evalscope/app/utils/env_utils.py,sha256=2pmz4uNun-XNP6TqM6Oe576XopweEClhBaIdWO--kd0,382
|
|
56
61
|
evalscope/app/utils/localization.py,sha256=rWEviBmcnhIpAA-cG8djbbUA6p1Y358c0dxge5Pqi1U,6131
|
|
57
|
-
evalscope/app/utils/text_utils.py,sha256
|
|
58
|
-
evalscope/app/utils/visualization.py,sha256=
|
|
62
|
+
evalscope/app/utils/text_utils.py,sha256=-K-hRPMZ29Yqjhzd-391gPaD4B4wUuIg71PfbLnGJ38,3754
|
|
63
|
+
evalscope/app/utils/visualization.py,sha256=lycwcr-kFT2FKVw6iWMh3iD_n4dqpWVzhXMLDnkN8QY,3563
|
|
59
64
|
evalscope/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
65
|
evalscope/backend/base.py,sha256=qYu8Shokrtrx-N6T_BAJk_6OCpovUBYuN0p3wngt-dw,1030
|
|
61
66
|
evalscope/backend/opencompass/__init__.py,sha256=UP_TW5KBq6V_Nvqkeb7PGvGGX3rVYussT43npwCwDgE,135
|
|
62
67
|
evalscope/backend/opencompass/api_meta_template.py,sha256=OGH0lGJmBFKHs-6u6RPCov13_ArO63E6pV-aX1WVljU,1707
|
|
63
|
-
evalscope/backend/opencompass/backend_manager.py,sha256=
|
|
68
|
+
evalscope/backend/opencompass/backend_manager.py,sha256=q_5ABnnJb14T2L2bKY2y-ErJ9K4_65Rpl0a-h3hZ4TM,10337
|
|
64
69
|
evalscope/backend/opencompass/tasks/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
65
70
|
evalscope/backend/opencompass/tasks/eval_api.py,sha256=ZaGdUbEOtAW5VX3ZXmpHIttg_QrID34EnBTylD3uvos,1152
|
|
66
71
|
evalscope/backend/opencompass/tasks/eval_datasets.py,sha256=JHSq4EnPJgv4sRJJplLH80EqE3ghtkn2k8HnV6DaDew,5406
|
|
@@ -99,33 +104,32 @@ evalscope/backend/rag_eval/ragas/tasks/testset_generation.py,sha256=XMWW8ucN7ojR
|
|
|
99
104
|
evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py,sha256=dZAjsfiR839INO3nbb9psLn-eL4sZOzpU6JMdtJUXtw,1895
|
|
100
105
|
evalscope/backend/rag_eval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
101
106
|
evalscope/backend/rag_eval/utils/clip.py,sha256=GLHhPCac2AH35AvRLvVqePA1gIMAewHTFmCJCDZzvqU,5015
|
|
102
|
-
evalscope/backend/rag_eval/utils/embedding.py,sha256=
|
|
107
|
+
evalscope/backend/rag_eval/utils/embedding.py,sha256=nuwBsiXPAwZisEmg3V4fWekd2tqp5mWRVb_fxNB1zTg,9867
|
|
103
108
|
evalscope/backend/rag_eval/utils/llm.py,sha256=1OH-985iIDtCOlCtzGmHu6GT_l1vJe7Iv-WyltQbcSc,2451
|
|
104
109
|
evalscope/backend/rag_eval/utils/tools.py,sha256=FU7tNu-8y8V_o_kArFVTTLM_GzL12KBNeXiwQw5SpJA,1529
|
|
105
110
|
evalscope/backend/vlm_eval_kit/__init__.py,sha256=R-GuBm8dAwvDF73XHaGpPSjlt7Y4tycyy-FJgzLdjeY,84
|
|
106
111
|
evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=jlwM13Ty-Ax6AeMsNlo9xIBupNFgnceYuXtCmh0hNTQ,6160
|
|
107
112
|
evalscope/benchmarks/__init__.py,sha256=WHR4ej9Tqa2N9CyIaUWXS8EnHZtcujaNeg9hf8GT31Y,1182
|
|
108
|
-
evalscope/benchmarks/
|
|
109
|
-
evalscope/benchmarks/
|
|
110
|
-
evalscope/benchmarks/
|
|
111
|
-
evalscope/benchmarks/
|
|
112
|
-
evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py,sha256=3kRMglG82RXRiA-Hucj7o_O4hrrDaqJxExbmyohANQE,2898
|
|
113
|
-
evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py,sha256=CkJFoQJzF5tR46hr0X0Wu1VJ57uBr28BiUr3WT-5X2c,1840
|
|
114
|
-
evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py,sha256=nOZ8Lk_sRNiPK-d4a6hdmZ8mM40uIvpu5vlLF8Mb44s,1341
|
|
115
|
-
evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py,sha256=Pr2_YW31-DIiklSkR5bGuwEBQWyBQleRiRAR7L7MoH4,1460
|
|
116
|
-
evalscope/benchmarks/aigc/t2i/tifa_adapter.py,sha256=OuOO-txcE5ZQHRZj78XGUOBfxJoPZpL3K0k_P9X4kL4,752
|
|
113
|
+
evalscope/benchmarks/aa_lcr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
|
+
evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py,sha256=7KZRdIhg733vBMBWngxTjtrZtl_DHjwMNLt9C2tN0_w,7483
|
|
115
|
+
evalscope/benchmarks/ai2d/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
116
|
+
evalscope/benchmarks/ai2d/ai2d_adapter.py,sha256=qnQT2E0ZG8g4noOafu-QvBOKm-zEJ5X08QHw3ekNa4w,2473
|
|
117
117
|
evalscope/benchmarks/aime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
|
-
evalscope/benchmarks/aime/aime24_adapter.py,sha256=
|
|
119
|
-
evalscope/benchmarks/aime/aime25_adapter.py,sha256=
|
|
118
|
+
evalscope/benchmarks/aime/aime24_adapter.py,sha256=UGS0DhfylTbhyOfnOGKwDiXW0lMd47EPeMtY-WNPht0,1935
|
|
119
|
+
evalscope/benchmarks/aime/aime25_adapter.py,sha256=W2Jf68G8-QSgbZxgPJvCBq1VbQ-wRbeH1u9Qb2WNZkA,5157
|
|
120
|
+
evalscope/benchmarks/aime/grader.py,sha256=7qi3aFY6F-o70H3zRH_QHrXmPQz0euAhJaw_IATiw8k,9259
|
|
121
|
+
evalscope/benchmarks/aime/math_normalize.py,sha256=--ax2mPVb2jXtfk8__K0OYKit3HiDqKOFEcOSSX9SA4,5830
|
|
120
122
|
evalscope/benchmarks/alpaca_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
121
123
|
evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py,sha256=sjaWcK8WH1XY0kzm5eHsq_7J62EJocAf4gRV_UB8ZBE,4971
|
|
124
|
+
evalscope/benchmarks/amc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
125
|
+
evalscope/benchmarks/amc/amc_adapter.py,sha256=ame7mUbcXx1gvIVaqdv0HyBNZEaUYn3Amy06mO_sMos,1586
|
|
122
126
|
evalscope/benchmarks/arc/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
123
127
|
evalscope/benchmarks/arc/arc_adapter.py,sha256=GASZmoJ-PpzBG70cBdABZA5uVqoyosjV-jf9WShK7L8,1622
|
|
124
128
|
evalscope/benchmarks/arena_hard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
125
|
-
evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=
|
|
129
|
+
evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=1XNzbu12FIupWgVlruaOQZ4TGj_Tkg8xgYaqQ4q3H0M,7302
|
|
126
130
|
evalscope/benchmarks/arena_hard/utils.py,sha256=23xCd7_ksrM4xMJBp7N2ZwpUpq1zpoQFjLm1oBcdgQY,5559
|
|
127
131
|
evalscope/benchmarks/bbh/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
128
|
-
evalscope/benchmarks/bbh/bbh_adapter.py,sha256=
|
|
132
|
+
evalscope/benchmarks/bbh/bbh_adapter.py,sha256=lRI-DfdFkyg4ylW4d-6CUfiNqlF7K_IoTjzJz3jYTUs,6346
|
|
129
133
|
evalscope/benchmarks/bbh/cot_prompts/boolean_expressions.txt,sha256=xnzlaIRyeGlogG49v8nt4vpJO40J06ev4yc8cv0VSRY,1781
|
|
130
134
|
evalscope/benchmarks/bbh/cot_prompts/causal_judgement.txt,sha256=sfo-2iOeVzB0OGgd7NSQFELTGDTsr2DQ3u-g0ivI-sM,3653
|
|
131
135
|
evalscope/benchmarks/bbh/cot_prompts/date_understanding.txt,sha256=UJBsc3Mwz8TZngdWH_NFlhhNbLhNHK6FvW9FHcS8H5g,1167
|
|
@@ -154,120 +158,273 @@ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt
|
|
|
154
158
|
evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt,sha256=s_x6u5MLeKpuAHZj3GNQqY1I8vWqQIfJasOp9XcM7Ck,2945
|
|
155
159
|
evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt,sha256=qfTZafCzNiz9ULBaDlfy_LISL617NyH5Nc0-nO0K0LE,2164
|
|
156
160
|
evalscope/benchmarks/bfcl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
157
|
-
evalscope/benchmarks/bfcl/
|
|
158
|
-
evalscope/benchmarks/bfcl/
|
|
161
|
+
evalscope/benchmarks/bfcl/v3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
|
+
evalscope/benchmarks/bfcl/v3/bfcl_v3_adapter.py,sha256=ILOSPo9GR2BQAz-RrzptKGvCNs9HeW2YRUa0e-r1hPU,17509
|
|
163
|
+
evalscope/benchmarks/bfcl/v3/generation.py,sha256=c6lNjo-VTSUrVg-pqyPSucrbCKBOdBSyN0aR5AAtE4A,8701
|
|
164
|
+
evalscope/benchmarks/bfcl/v3/utils.py,sha256=X1nfKmXp_dKUoYb4BzNN0-EwArE0Ppfi6m0mYB7ccLc,859
|
|
165
|
+
evalscope/benchmarks/bfcl/v4/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
|
+
evalscope/benchmarks/bfcl/v4/bfcl_v4_adapter.py,sha256=hJWOT7WtgxuwKT_AmtAF3h25JnvYYXbDR1WWnyQOE9w,8974
|
|
167
|
+
evalscope/benchmarks/bfcl/v4/utils.py,sha256=bQInR19wJFPIOiRGjrJc3bGWWkJbL7zHwj3RdSavB5Q,15142
|
|
168
|
+
evalscope/benchmarks/biomix_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
169
|
+
evalscope/benchmarks/biomix_qa/biomix_qa_adapter.py,sha256=rHid3tPEfauB_Q5pF3mMoyuyV01SHyBJEXm-7A2HV24,1218
|
|
170
|
+
evalscope/benchmarks/blink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
171
|
+
evalscope/benchmarks/blink/blink_adapter.py,sha256=ocQKsDGwnUAg2si2p7tqIGeH3PKPqTSByjbt7ceraRo,2642
|
|
159
172
|
evalscope/benchmarks/ceval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
160
|
-
evalscope/benchmarks/ceval/ceval_adapter.py,sha256=
|
|
173
|
+
evalscope/benchmarks/ceval/ceval_adapter.py,sha256=4FLPgY-UtqINafnNxfOsE9AwS6GFXFCUGOBI-4EZUGk,8503
|
|
174
|
+
evalscope/benchmarks/chartqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
175
|
+
evalscope/benchmarks/chartqa/chartqa_adapter.py,sha256=DA1kthMUvn4_GUfdRfuR-au3RkhE3WKPnR_f8nlhd4c,2813
|
|
176
|
+
evalscope/benchmarks/chartqa/utils.py,sha256=Ta9ZUMpIqzrAszju7_WOMBAlilH1Tx6TCheVpjrZJJI,1672
|
|
161
177
|
evalscope/benchmarks/chinese_simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
178
|
evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=OWzRlSGswV24V-heLqqo7GQzpJp01TZ0DhFHq0iUP9A,8238
|
|
163
179
|
evalscope/benchmarks/cmmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
164
180
|
evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=P0VPAL5T2V_zj0q7im0FdDoq_W5rinorwN5FRYaFFUI,5377
|
|
181
|
+
evalscope/benchmarks/coin_flip/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
|
+
evalscope/benchmarks/coin_flip/coin_flip_adapter.py,sha256=qHg0kN4SX5cT_3cyFg0wfN69ldIEivyZTTX1A6j7LD8,4687
|
|
183
|
+
evalscope/benchmarks/commonsense_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
184
|
+
evalscope/benchmarks/commonsense_qa/commonsense_qa_adapter.py,sha256=Y1c13U5D4x9oNTQ5F3ve2_3Ia1fkQXiqcf3ESODT4HQ,1109
|
|
165
185
|
evalscope/benchmarks/competition_math/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
166
|
-
evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=
|
|
186
|
+
evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=0zOsMwl1mNGDzOEQqsISa6GcwliPtWz0EBEHm3TR-AI,2394
|
|
167
187
|
evalscope/benchmarks/data_collection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
168
|
-
evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=
|
|
188
|
+
evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=PbV5S0rUVF0jgrBKNjuZh2oE1FAsbYnPymg5u7NBjqo,8712
|
|
169
189
|
evalscope/benchmarks/docmath/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
170
190
|
evalscope/benchmarks/docmath/docmath_adapter.py,sha256=-mel6hA-x_e7fV0uOHdX5BpoQEVyQ5VqwIwEqSNDpnc,4623
|
|
171
191
|
evalscope/benchmarks/docmath/utils.py,sha256=d6Yjoa5q91kjr1SdVPVBndzDaUzMlO_GfEqMtUXXr0s,7707
|
|
192
|
+
evalscope/benchmarks/docvqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
193
|
+
evalscope/benchmarks/docvqa/docvqa_adapter.py,sha256=xGaayycILYoLd8r6wLLppDbU6Z1FdafbYFyjLHaftAA,2882
|
|
194
|
+
evalscope/benchmarks/drivelology/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
195
|
+
evalscope/benchmarks/drivelology/drivelology_binary_adapter.py,sha256=_wqS0h4qQBeGBx9W_KbtO_tfzpxXsM-jejI0TK_i0io,7005
|
|
196
|
+
evalscope/benchmarks/drivelology/drivelology_multilabel_adapter.py,sha256=fnPPFma-fTRe5B3n_1ObN5wS_jY1QvCA9mcovAMR4ss,11735
|
|
197
|
+
evalscope/benchmarks/drivelology/drivelology_selection_adapter.py,sha256=dUTs0Dqc-54haam478Y0UkiFoDH7YgKfQE-5vxk99NU,1655
|
|
198
|
+
evalscope/benchmarks/drivelology/drivelology_writing_adapter.py,sha256=6-DZxTlDGIJ8iM-egu29RlpGApNjUouv6jD11PVXU3U,8026
|
|
172
199
|
evalscope/benchmarks/drop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
173
|
-
evalscope/benchmarks/drop/drop_adapter.py,sha256=
|
|
174
|
-
evalscope/benchmarks/drop/utils.py,sha256=
|
|
200
|
+
evalscope/benchmarks/drop/drop_adapter.py,sha256=Vl6IgFtK3zIUujlrjcCI7oVLlvpaRanDcDDSTWNbDfU,8851
|
|
201
|
+
evalscope/benchmarks/drop/utils.py,sha256=zdT31cqVp6gzIcOxsxsqfTn97SZnTuM3vuvLls5VJWY,4878
|
|
175
202
|
evalscope/benchmarks/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
176
|
-
evalscope/benchmarks/frames/frames_adapter.py,sha256=
|
|
203
|
+
evalscope/benchmarks/frames/frames_adapter.py,sha256=w1kRya7w5omt95HHE6AzbzYVhyTT5r521676d_xJ6Vg,5514
|
|
177
204
|
evalscope/benchmarks/frames/utils.py,sha256=gULWM6Rwv5bTSSWcDYp-iSIoWj8r5VtbQakhRzHJq8A,1172
|
|
178
205
|
evalscope/benchmarks/general_arena/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
179
|
-
evalscope/benchmarks/general_arena/general_arena_adapter.py,sha256=
|
|
180
|
-
evalscope/benchmarks/general_arena/utils.py,sha256=
|
|
206
|
+
evalscope/benchmarks/general_arena/general_arena_adapter.py,sha256=0qydHMwVaD7bzyK2Mccv9m4JnOfSy2vE8g1YPOaWSg0,21663
|
|
207
|
+
evalscope/benchmarks/general_arena/utils.py,sha256=p6pZfvdNCMOU_vWHm_DYU57Sa2WTDdFOkVBubblCRN4,6912
|
|
181
208
|
evalscope/benchmarks/general_mcq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
|
-
evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=
|
|
209
|
+
evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=p7_C0CmKJthMY1Iri1SyNfssuYBws_dkhPMREu-uM94,2059
|
|
183
210
|
evalscope/benchmarks/general_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
184
|
-
evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=
|
|
211
|
+
evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=_7Jk_h-qcaxWHgrULojNqXwZ8XgicmXhYT8bOKwnyAU,3519
|
|
185
212
|
evalscope/benchmarks/gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
186
213
|
evalscope/benchmarks/gpqa/gpqa_adapter.py,sha256=zWK2hhyKw5n8K30YvMjSm6XMwyrireODGTE6wKmyuOo,3311
|
|
187
214
|
evalscope/benchmarks/gpqa/prompt.py,sha256=b1Gw2D5dEdhvLYymPfcvGKJdHrIzpiZkOwURKSxiQJg,5576
|
|
188
215
|
evalscope/benchmarks/gsm8k/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
189
|
-
evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=
|
|
216
|
+
evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=s0ytoqydH3FEG0KALrIlYXOLBKSrC7ikh0r8_v2dKGM,3579
|
|
217
|
+
evalscope/benchmarks/hallusion_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
218
|
+
evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py,sha256=LOnO1mvUJxU87-bZBC8qYtwlmFn3So2Yo9I3CkDjtIg,6544
|
|
219
|
+
evalscope/benchmarks/halu_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
220
|
+
evalscope/benchmarks/halu_eval/halu_eval_adapter.py,sha256=4bdzOEyOjxbTng9U94Yscknc7eeJSFyVf_ifZtTqYnM,5332
|
|
221
|
+
evalscope/benchmarks/halu_eval/halu_eval_instructions.py,sha256=z0_1rx3PqQHbheiUpUAdp4aUP6oBMMAEAIvDmWND07w,9770
|
|
222
|
+
evalscope/benchmarks/healthbench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
223
|
+
evalscope/benchmarks/healthbench/healthbench_adapter.py,sha256=gm5LOR5J1E3eXQ9aWF-rif2_l7Khx9UwS1Dfg-oEx8E,13242
|
|
224
|
+
evalscope/benchmarks/healthbench/utils.py,sha256=M8SnOEhlqXWm03CFE6CAtbMiu6MqdGgVczAv-LPjA7Y,3683
|
|
190
225
|
evalscope/benchmarks/hellaswag/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
191
226
|
evalscope/benchmarks/hellaswag/hellaswag_adapter.py,sha256=tAe63NfV5ljUm1f4RTSFxWOVKBUhk3Cc0EGzF5uYLK4,2041
|
|
192
227
|
evalscope/benchmarks/hle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
193
|
-
evalscope/benchmarks/hle/hle_adapter.py,sha256=
|
|
228
|
+
evalscope/benchmarks/hle/hle_adapter.py,sha256=kJP7bzIDbr82GKi0FTy2zf_j1UWNBfuXYzokYJ-S9WE,6410
|
|
194
229
|
evalscope/benchmarks/humaneval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
195
|
-
evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=
|
|
230
|
+
evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=KhJ91yqr964FccHIDE9A_nTldfhhLTzVB4Cpv3RDN5I,3933
|
|
231
|
+
evalscope/benchmarks/humaneval/utils.py,sha256=rPnc_JuSjNg9aV7UMUwsLrDlm-ufj64GNIBCWBeuRcM,6517
|
|
196
232
|
evalscope/benchmarks/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
197
233
|
evalscope/benchmarks/ifeval/ifeval_adapter.py,sha256=55FQwJ0_eDijppkVVlM5XCXzgRFmjH1SvGMItGsvn6o,2769
|
|
198
234
|
evalscope/benchmarks/ifeval/instructions.py,sha256=HXnn1JgU3dpYltqIovFAn02DxkYOGw337kLMlOfJxJE,56048
|
|
199
235
|
evalscope/benchmarks/ifeval/instructions_registry.py,sha256=3UXzVLgKwk_cf-2aG2tozjqYgvqm5Mj3ZRRb8rI-ucU,7262
|
|
200
|
-
evalscope/benchmarks/ifeval/instructions_util.py,sha256=
|
|
236
|
+
evalscope/benchmarks/ifeval/instructions_util.py,sha256=Zl9Q6xwtZtIkXLoVwz7oifSEyvbDGETljKHgc4tk6TM,25730
|
|
201
237
|
evalscope/benchmarks/ifeval/utils.py,sha256=MQt-b4K6uqU9H5TAM6Gxyz46r6XRBOgDsgdnwB0veg0,4470
|
|
238
|
+
evalscope/benchmarks/image_edit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
239
|
+
evalscope/benchmarks/image_edit/gedit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
240
|
+
evalscope/benchmarks/image_edit/gedit/gedit_adapter.py,sha256=a6hhRbnGCvMEMsbnSbczjXd4vHfMVEnFfP459FCF_Mc,5250
|
|
241
|
+
evalscope/benchmarks/image_edit/gedit/utils.py,sha256=UN0z9Dafs8d8lEXqxin321d8smiS3H9p3gyLkZFPFNg,14735
|
|
242
|
+
evalscope/benchmarks/image_edit/gedit/vie_prompts.py,sha256=qVXWQyVUwZxEasDjVmYBk30_JI4gnvHacMOmMsA4wcI,22056
|
|
243
|
+
evalscope/benchmarks/infovqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
244
|
+
evalscope/benchmarks/infovqa/infovqa_adapter.py,sha256=3m_EvfRZ5ItHkz-3mVlsF_NnPS7NH1-EXwUW-s4VMxA,2617
|
|
202
245
|
evalscope/benchmarks/iquiz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
203
246
|
evalscope/benchmarks/iquiz/iquiz_adapter.py,sha256=mNHA_Fuj_gAdOEoR7oChnGmErf1czqwnk8Zk-jRhBys,1304
|
|
204
247
|
evalscope/benchmarks/live_code_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
205
|
-
evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=
|
|
248
|
+
evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=maN8qHmDHJpexPeB0qwZoXJ5zrqPbJDYVRptqvXI9d4,6827
|
|
206
249
|
evalscope/benchmarks/live_code_bench/extract_utils.py,sha256=ZcQ8y741uawPo6I_1_XglR3eqJFDNrqc8fILKZupVRs,2375
|
|
207
|
-
evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=
|
|
250
|
+
evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=iJzmge-txK86lPn9mQbVlA7CPWXR4MpVdEDCfckkNRU,6236
|
|
208
251
|
evalscope/benchmarks/live_code_bench/load_utils.py,sha256=fEzWz_fUGwi5Ncum5PNVF9jFcuDwGgs7Vt_10YKBE2Q,2087
|
|
209
252
|
evalscope/benchmarks/live_code_bench/pass_k_utils.py,sha256=Ktrp_lXdfFzoHtQNQNdGfIl26ySjaPCHm4Zv-dFvRqM,2024
|
|
210
253
|
evalscope/benchmarks/live_code_bench/prompts.py,sha256=P4KILIAIDT1MKDck0xHYV_6v9820wDZRhxVMazmlL-g,12600
|
|
254
|
+
evalscope/benchmarks/live_code_bench/sandbox_evaluate_utils.py,sha256=7DDx46EwtoR776vWjofJl1zaYCLdmeq8cF3fhDGdZgA,7424
|
|
211
255
|
evalscope/benchmarks/live_code_bench/testing_util.py,sha256=TuoOTciC-hz3FTeDzsQB_THH3Be9UOP2XMrax-4sXkM,17282
|
|
256
|
+
evalscope/benchmarks/logi_qa/__int__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
257
|
+
evalscope/benchmarks/logi_qa/logi_qa_adapter.py,sha256=LmmG1gI8AOCpV-35_WdiZ_9Ges-pTufWEzbgrOUW3Go,1271
|
|
212
258
|
evalscope/benchmarks/maritime_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
213
259
|
evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py,sha256=Rx7iZ5JaEo73YwIzhm78gMDQ6gqcErbnWWXHxXM6BcU,2379
|
|
214
260
|
evalscope/benchmarks/math_500/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
215
|
-
evalscope/benchmarks/math_500/math_500_adapter.py,sha256=
|
|
261
|
+
evalscope/benchmarks/math_500/math_500_adapter.py,sha256=vPWqytzbJayhuJjh9Wv9gq44tkwrDehMmtPFx7QXYvs,1970
|
|
262
|
+
evalscope/benchmarks/math_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
263
|
+
evalscope/benchmarks/math_qa/math_qa_adapter.py,sha256=R0u0tiknCtxzMPEksstCDPT_M71pL2SxoF_KAEMxf7A,1170
|
|
264
|
+
evalscope/benchmarks/math_verse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
265
|
+
evalscope/benchmarks/math_verse/math_verse_adapter.py,sha256=Z4b2e3vCnllbhb457xJ6HJ6urfqfW3mHD9ZSumoEjQA,4321
|
|
266
|
+
evalscope/benchmarks/math_vision/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
267
|
+
evalscope/benchmarks/math_vision/math_vision_adapter.py,sha256=LEM5Zs8cqQpqeNaRW0CzHRPPMGu4NHrkjl-EgP4RHaU,4643
|
|
268
|
+
evalscope/benchmarks/math_vista/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
269
|
+
evalscope/benchmarks/math_vista/math_vista_adapter.py,sha256=yHz8kVPp0fkfL6n3lcPkdhOFrXsR2mOEA1oUoTFiwJs,5096
|
|
270
|
+
evalscope/benchmarks/med_mcqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
271
|
+
evalscope/benchmarks/med_mcqa/med_mcqa_adapter.py,sha256=oR2463a1kLAfBZOPA5gLOp2C6qx1cu2vKCutXZsoQys,1090
|
|
272
|
+
evalscope/benchmarks/minerva_math/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
273
|
+
evalscope/benchmarks/minerva_math/minerva_math_adapter.py,sha256=YaMT--hbOmlNlK5Q7iQ7c5XWVhLf3isVipeexOvrUOE,1823
|
|
274
|
+
evalscope/benchmarks/mm_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
275
|
+
evalscope/benchmarks/mm_bench/mm_bench_adapter.py,sha256=ht2DVt_zEBJp4jvGy3myHHgdUUP9eff2O5BpIc9Fv74,4376
|
|
276
|
+
evalscope/benchmarks/mm_star/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
277
|
+
evalscope/benchmarks/mm_star/mm_star_adapter.py,sha256=oamLv6U2-JAK5mdVLkUgYxkOahxQkQYMRKAyu_xPAUE,2818
|
|
216
278
|
evalscope/benchmarks/mmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
217
279
|
evalscope/benchmarks/mmlu/mmlu_adapter.py,sha256=2NT3QbfPzajUTFZ0tBCl6PRrtFtAr5jPZNQRW2Idlno,5947
|
|
218
280
|
evalscope/benchmarks/mmlu_pro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
219
|
-
evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=
|
|
281
|
+
evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=LTNNWREOc4HQ-A1_x5lItdZbzEvUCy77zkp7ZAh0hlY,3890
|
|
220
282
|
evalscope/benchmarks/mmlu_redux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
221
283
|
evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py,sha256=m_37OIFrJB4ZIvtbDJ_m9P9mA2QtrNjGfbbVo15awJg,7402
|
|
284
|
+
evalscope/benchmarks/mmmu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
285
|
+
evalscope/benchmarks/mmmu/mmmu_adapter.py,sha256=WrykWq8n61CVrQ4XQhI3iEySgErHdZyng3udOL-Pddk,6054
|
|
286
|
+
evalscope/benchmarks/mmmu_pro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
287
|
+
evalscope/benchmarks/mmmu_pro/mmmu_pro_adapter.py,sha256=banPS1nDt9bQ95urKbSZnR-hBTw23eL9MSrHt_0ZLp0,4725
|
|
288
|
+
evalscope/benchmarks/mri_mcqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
289
|
+
evalscope/benchmarks/mri_mcqa/mri_mcqa_adapter.py,sha256=OJnJDo_yLZihYXeAIyPQo9fMAgcAfSqEPpfhHpxvtXY,1095
|
|
290
|
+
evalscope/benchmarks/multi_if/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
291
|
+
evalscope/benchmarks/multi_if/ifeval.py,sha256=7y2rnJ4q1_DVA7I9mUnF7TBpu7Kez0X_Xhl-AJInzWk,87949
|
|
292
|
+
evalscope/benchmarks/multi_if/metrics.py,sha256=LWnhQw25cRNMReJ_xJ7Fx7WYHcT9i2FG1FUjYOuQDrI,4291
|
|
293
|
+
evalscope/benchmarks/multi_if/multi_if_adapter.py,sha256=I3_YPPUuRbrs9Gt3Qjhx9RM5Vu2gDFnheDcGu-oe840,5924
|
|
294
|
+
evalscope/benchmarks/music_trivia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
295
|
+
evalscope/benchmarks/music_trivia/music_trivia_adapter.py,sha256=zxJuNfCEQ2yU6OivAzrdhVSGcwPuu9dygho4VzELyZg,1281
|
|
222
296
|
evalscope/benchmarks/musr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
223
297
|
evalscope/benchmarks/musr/musr_adapter.py,sha256=kx6bckj7Nijl4Wysuj-mKYdy0hIRDJho8yVTup403Hc,1473
|
|
224
298
|
evalscope/benchmarks/needle_haystack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
225
|
-
evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py,sha256=
|
|
299
|
+
evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py,sha256=aQw8Sss1-ZgQPWqwMITOpAtwzMoYWDGjLhUpZtkcrvY,17030
|
|
226
300
|
evalscope/benchmarks/needle_haystack/utils.py,sha256=k8WDigqt5LgzHw6DtaYsLtb3BJL0FTZS9JOyJCpoPq8,2935
|
|
301
|
+
evalscope/benchmarks/ner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
302
|
+
evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py,sha256=zA4K2LuEtu5MV7yJ8ClJ8kttFqcyO82KBpt6SornYWs,2012
|
|
303
|
+
evalscope/benchmarks/ner/conll2003_adapter.py,sha256=r_6UmR68ohFsn021zArkGRq0tRZSaIy9RNNJncag0i8,1970
|
|
304
|
+
evalscope/benchmarks/ner/copious_adapter.py,sha256=ufxsmTvEEayLaDJcUW5--oo6vkDY69W2yQ1fpD0E5lQ,3751
|
|
305
|
+
evalscope/benchmarks/ner/cross_ner_adapter.py,sha256=8UHFvZxKEghk30JZgvWbYFXnRBna5PHwI8_WZXpmCfg,4916
|
|
306
|
+
evalscope/benchmarks/ner/genia_ner_adapter.py,sha256=WHTSRj8PFvm19F1iqnujZ4qySIQ0rV7tiI-3HxuV75s,2457
|
|
307
|
+
evalscope/benchmarks/ner/harvey_ner_adapter.py,sha256=UVqpPNbTeWo_UgJm4f8xxOq1umXyOWb4pCz7s1ZxJpg,2098
|
|
308
|
+
evalscope/benchmarks/ner/mit_movie_trivia_adapter.py,sha256=jOldqrPdrFDSvs8ajidwK-c4zktclKu7KgLfHXBH-nk,3017
|
|
309
|
+
evalscope/benchmarks/ner/mit_restaurant_adapter.py,sha256=wUJBLGH992pbeKhb-e8ywC0XVvTGMiAPKjUxrpg9Iqo,2528
|
|
310
|
+
evalscope/benchmarks/ner/ontonotes5_adapter.py,sha256=oulC4XkVF42yjXWPuKg_zptLQiRItCmlZBlHN0shr6A,3546
|
|
311
|
+
evalscope/benchmarks/ner/wnut2017_adapter.py,sha256=uGrfp-4wYIcpEL9PqQx82uzCeWz6vIPKb7JlStTSE9M,2379
|
|
312
|
+
evalscope/benchmarks/ner/cross_ner_entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
313
|
+
evalscope/benchmarks/ner/cross_ner_entities/ai.py,sha256=RcgzYCygBmyrSOLacxxUN4cUznBZ3NemwfSR4hYBVKs,2484
|
|
314
|
+
evalscope/benchmarks/ner/cross_ner_entities/literature.py,sha256=ETzhu4PmiKS88NRkKPh96J9KiXKFdeQk5s_LSNqbD-M,1874
|
|
315
|
+
evalscope/benchmarks/ner/cross_ner_entities/music.py,sha256=_aJyKo83pO-j_LtGwXgrg9p8H1sHqXGPNW-wv1EIfWc,1999
|
|
316
|
+
evalscope/benchmarks/ner/cross_ner_entities/politics.py,sha256=taAqCnGdxHZGHM7sV0KONim8GjqVBrpMME6CVHwfJMo,1635
|
|
317
|
+
evalscope/benchmarks/ner/cross_ner_entities/science.py,sha256=DVZrCuMQ6-sPvRNTfx8iF_x9LaEBZ4o_RIWZADYKYGE,2919
|
|
318
|
+
evalscope/benchmarks/ocr_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
319
|
+
evalscope/benchmarks/ocr_bench/ocr_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
320
|
+
evalscope/benchmarks/ocr_bench/ocr_bench/ocr_bench_adapter.py,sha256=gkQb7g0-Lf5Sjemqs5kqogCLGFJI6YQv8-vGI1EbyLE,4392
|
|
321
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/IoUscore_metric.py,sha256=cBpRDJvI9f6vKRD4wTPv-8ThGddR3EhVobgjQQUAYlE,2606
|
|
322
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/TEDS_metric.py,sha256=31bL0V32Fq7prF1WoVjXmrmMdhg0qNcoiOaKykKOrZM,36528
|
|
323
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
324
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/ocr_bench_v2_adapter.py,sha256=QGY4R75UxDafIwSaOEPPuCaX3Z8BGoZVvcc6OWbeO9w,7976
|
|
325
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/page_ocr_metric.py,sha256=d1nU7LNwubBd_1rIe7i67hOVcJx5IUXkqVeqt1CQzak,1624
|
|
326
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/parallel.py,sha256=Q54wFSSRBp-kG2MhW4eOoXE1W9g-SDVhN8JuphDERsE,2029
|
|
327
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_metric.py,sha256=nftLaTOKEmqvSWr-c20f9hyyvNnd-Hg3E46KwqmkjLc,6149
|
|
328
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/utils.py,sha256=hhF2MuPo5n6uM0OCgTHCNIgscNVhXRb3koqU73AErwY,15924
|
|
329
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/vqa_metric.py,sha256=XkAiXk1uE7lsWQQXvjnHXZMsga8B9FVyq5qG8ghePK4,8980
|
|
330
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
331
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/readme.txt,sha256=QO0K9z1ethy_lgs9vaxGN1u5DnPFsssp8z62Cni24iw,1424
|
|
332
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py,sha256=qCuqDtsCfxAiQHYLNdHU7BQ9kLIZ9iyfmRxtIrGOBck,20349
|
|
333
|
+
evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/script.py,sha256=7HzM1PEw8wNOhmQOsZe582Y2rr4u66Q3JKVvvMasntE,19565
|
|
334
|
+
evalscope/benchmarks/olympiad_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
335
|
+
evalscope/benchmarks/olympiad_bench/olympiad_bench_adapter.py,sha256=zePVmGjmyuwCWVb4h1PIQKAIFqBehwRwO2WOD0KX_ik,6565
|
|
336
|
+
evalscope/benchmarks/olympiad_bench/utils.py,sha256=w7vEZcT3vCVq8_DSMgAjZPpVFVHStJPJYsPkrs-yOFM,21412
|
|
337
|
+
evalscope/benchmarks/omni_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
338
|
+
evalscope/benchmarks/omni_bench/omni_bench_adapter.py,sha256=IJkRSokQC6MF_pN46Yofr_NaZaNt1XZFX1PUBmX4-qA,3651
|
|
339
|
+
evalscope/benchmarks/omnidoc_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
340
|
+
evalscope/benchmarks/omnidoc_bench/end2end_eval.py,sha256=71IEdeDsldtoFmMb1c_Pyugv-Wx-WOVIvccRkPvsJdU,15916
|
|
341
|
+
evalscope/benchmarks/omnidoc_bench/metrics.py,sha256=DZfaL5BlDjnW60kRnnfmsMgldPOKX0MJ2tAdsBf4dI0,20620
|
|
342
|
+
evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py,sha256=YpXl-HUiD-VjtwtWHG4KSUw6GAYIeKnpgqEXsweWnKY,6164
|
|
343
|
+
evalscope/benchmarks/omnidoc_bench/utils.py,sha256=Db6QeIq_bc6Dl5xdYel5G7tnWib9_vn_KFiKeFN37IA,74435
|
|
344
|
+
evalscope/benchmarks/piqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
345
|
+
evalscope/benchmarks/piqa/piqa_adapter.py,sha256=V3-8a7Ah04UgEWzYrQfGKiPk4xvpLS74G4mJWM1MqPI,1075
|
|
346
|
+
evalscope/benchmarks/poly_math/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
347
|
+
evalscope/benchmarks/poly_math/poly_math_adapter.py,sha256=yO8CSFXIoe8AzKIXq7IffefTiWPQrdh-4igt09KPb8o,5545
|
|
348
|
+
evalscope/benchmarks/poly_math/utils/instruction.py,sha256=v3E8TnoWlooL_Ms5CQySzMmdyPKHAO005tGtTWMviPo,6901
|
|
349
|
+
evalscope/benchmarks/pope/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
350
|
+
evalscope/benchmarks/pope/pope_adapter.py,sha256=iQFcAjh48Su76e8CV-Tj2oVzzls0mUIYRUUyknj0dqs,5035
|
|
227
351
|
evalscope/benchmarks/process_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
228
|
-
evalscope/benchmarks/process_bench/process_bench_adapter.py,sha256=
|
|
352
|
+
evalscope/benchmarks/process_bench/process_bench_adapter.py,sha256=7Bri8ZALJAMKKf1_rtQw1bH9-IuvutwZ9gMNXBgQpmY,6200
|
|
353
|
+
evalscope/benchmarks/pumed_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
354
|
+
evalscope/benchmarks/pumed_qa/pubmed_qa_adapter.py,sha256=R1AjPTqqV8N7IvBNx3Qydd39EP5QcA4BffG3a1WEDP4,6778
|
|
355
|
+
evalscope/benchmarks/qasc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
356
|
+
evalscope/benchmarks/qasc/qasc_adapter.py,sha256=rhYedQBhrCkv12b4-LWRSfY6LGNgV0I5n2Lm2BEFlak,1128
|
|
229
357
|
evalscope/benchmarks/race/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
230
358
|
evalscope/benchmarks/race/race_adapter.py,sha256=KibT9gHpIOZhTcWihG0dUDAX4gAHa2g1WdGPOcEP9OY,1705
|
|
359
|
+
evalscope/benchmarks/real_world_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
360
|
+
evalscope/benchmarks/real_world_qa/real_world_qa_adapter.py,sha256=J2u0J9d31uvkoz9nBI9tCMqG27hmYwdLQPPef9jx_pg,2788
|
|
361
|
+
evalscope/benchmarks/sciq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
362
|
+
evalscope/benchmarks/sciq/sciq_adapter.py,sha256=m0TMtTVR-cRQ0oMncgbN7w-v5d_m71hiGeIE5WRa2mA,1249
|
|
363
|
+
evalscope/benchmarks/seed_bench_2_plus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
364
|
+
evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py,sha256=ngUOFhP8YFOE8ximkMg5U6TGLZMIXPHJsVJUurvbzM8,3064
|
|
231
365
|
evalscope/benchmarks/simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
232
|
-
evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=
|
|
366
|
+
evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=OIVGeFVLEpZp7z2a6JLf_qdRjNhu1-GJgTVL7ocZFiU,9013
|
|
367
|
+
evalscope/benchmarks/simple_vqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
368
|
+
evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py,sha256=3ioSompYERllFE6yc3yZLl0NKWypRjg5d0uVf3b-4d0,9530
|
|
369
|
+
evalscope/benchmarks/siqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
370
|
+
evalscope/benchmarks/siqa/siqa_adapter.py,sha256=qpPbEaGrVMc5U0x8hwWRz8gR-1HL0Uvaa5QFInAsLm8,1342
|
|
233
371
|
evalscope/benchmarks/super_gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
234
372
|
evalscope/benchmarks/super_gpqa/prompt.py,sha256=wQ8Y4NAvQJRhPS7gsrUBBzeM_UCHsHOloB_t5WfnIO8,4707
|
|
235
373
|
evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py,sha256=SPqpBebiHj_oyEqU94p9NSqhVkO0KeXQYcBmpfH81nM,6888
|
|
236
374
|
evalscope/benchmarks/super_gpqa/utils.py,sha256=OK_oT-DnWNssITEwu_Zc3Ty5v21n0IaJQYftK2cpwmQ,3401
|
|
237
375
|
evalscope/benchmarks/tau_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
238
|
-
evalscope/benchmarks/tau_bench/
|
|
239
|
-
evalscope/benchmarks/tau_bench/
|
|
376
|
+
evalscope/benchmarks/tau_bench/tau2_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
377
|
+
evalscope/benchmarks/tau_bench/tau2_bench/generation.py,sha256=aMa_I12HmRUj33ELcKgvYCPE-sCimlpdGyean5QMSaE,5387
|
|
378
|
+
evalscope/benchmarks/tau_bench/tau2_bench/tau2_bench_adapter.py,sha256=49M4ABPjSbR5Pkus4RFcnnprN9b3UbSjwXqlJ7PbwHI,5748
|
|
379
|
+
evalscope/benchmarks/tau_bench/tau_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
380
|
+
evalscope/benchmarks/tau_bench/tau_bench/generation.py,sha256=d7J5xrxEI-0BYxdSuxdDavcR7f1ipBdpQsKZzwyzGds,5190
|
|
381
|
+
evalscope/benchmarks/tau_bench/tau_bench/tau_bench_adapter.py,sha256=vQJdHvvtWI7Eh5zp3M9vFSYmJ-GM4386PVLdf4IONYI,6443
|
|
382
|
+
evalscope/benchmarks/text2image/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
383
|
+
evalscope/benchmarks/text2image/evalmuse_adapter.py,sha256=g-Wc1qTg-xWLTjiZPo8zmQud75ac-8mBpYRxOHfiO0g,3024
|
|
384
|
+
evalscope/benchmarks/text2image/genai_bench_adapter.py,sha256=1GDB3gS9zwrfb9C83LQdQyN7bvvqeYuu5ulJ9Igmi2k,1876
|
|
385
|
+
evalscope/benchmarks/text2image/general_t2i_adapter.py,sha256=CHy9ufvrVHc_5WkGVR_F-5wfLQVFtxwubZOfdpx9rd8,1354
|
|
386
|
+
evalscope/benchmarks/text2image/hpdv2_adapter.py,sha256=8-vWCV21eo_e9EbxDB5mGw2cFzD4OUQPLB66FvlO9W4,1781
|
|
387
|
+
evalscope/benchmarks/text2image/tifa_adapter.py,sha256=4CcprucAe25UpTZRV3Qgb-8jbeNHtXNRWHw8RiYvfJA,784
|
|
240
388
|
evalscope/benchmarks/tool_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
241
|
-
evalscope/benchmarks/tool_bench/tool_bench_adapter.py,sha256=
|
|
389
|
+
evalscope/benchmarks/tool_bench/tool_bench_adapter.py,sha256=Lt1iSe9C0PgayqfgmjF0kGllFqizROqp4efjSl9SUUY,3790
|
|
242
390
|
evalscope/benchmarks/tool_bench/utils.py,sha256=led0d-Pa3rvmWkSWhEnZWP00fceudgESq5HXAQzJGls,7042
|
|
243
391
|
evalscope/benchmarks/trivia_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
244
392
|
evalscope/benchmarks/trivia_qa/samples.jsonl,sha256=1isBD62PGhCiNbzQa-GFrHHL4XLHIkojWfgSvn7ktf8,3445
|
|
245
393
|
evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py,sha256=oZAiCmBpZbBAgzAKPfddaJWMckIyaoRM7fB2XJ5EoQU,2614
|
|
246
394
|
evalscope/benchmarks/truthful_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
247
|
-
evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py,sha256=
|
|
395
|
+
evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py,sha256=W7ESUAcLsHwbssiiSCQNUeQcqx6JEeW7FSQiBFycS24,3512
|
|
396
|
+
evalscope/benchmarks/visu_logic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
397
|
+
evalscope/benchmarks/visu_logic/visu_logic_adapter.py,sha256=8dK8_HFxDhWTvCC8WTZjadChP6lNzgsFp_5qFSRGFoM,3277
|
|
248
398
|
evalscope/benchmarks/winogrande/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
249
399
|
evalscope/benchmarks/winogrande/winogrande_adapter.py,sha256=LWm6qZd3pJbtpcERq7WPK3adwY3uVm4wiUgfyEI_uHE,1310
|
|
400
|
+
evalscope/benchmarks/wmt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
401
|
+
evalscope/benchmarks/wmt/wmt24_adapter.py,sha256=58BhTjdfJRQS2WtGxwdmgFC5VTx2XjKU0pi7KNh0iO8,8759
|
|
402
|
+
evalscope/benchmarks/zerobench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
403
|
+
evalscope/benchmarks/zerobench/zerobench_adapter.py,sha256=pqnJEx4uOi3bxwYKqLxrxU5DX9p3F01N2itzbG_-VaU,2739
|
|
250
404
|
evalscope/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
251
405
|
evalscope/cli/base.py,sha256=m1DFlF16L0Lyrn0YNuFj8ByGjVJIoI0jKzAoodIXjRk,404
|
|
252
406
|
evalscope/cli/cli.py,sha256=qXQ6k9GBkRy2dmBxM24tbVP42bQDyM6G7kkc32LdpCA,860
|
|
253
|
-
evalscope/cli/start_app.py,sha256=
|
|
407
|
+
evalscope/cli/start_app.py,sha256=LqJ3cSBY8FsM_JjInw4jlpitjaVoIZscUShMpDRPbro,1030
|
|
254
408
|
evalscope/cli/start_eval.py,sha256=MXhVDeaMFd6ny88-gnVtQflH660UaDj240YGYnHccx8,775
|
|
255
|
-
evalscope/cli/start_perf.py,sha256=
|
|
409
|
+
evalscope/cli/start_perf.py,sha256=V8DwVPXTGmyDPma7Yk_pJbLb4iVkDj6Y3qPGHV03sE0,1082
|
|
256
410
|
evalscope/cli/start_server.py,sha256=01iDaEwLx59xRUrrZ_nhQE-QjUE1Rk5d43uMQ_4owbI,3677
|
|
257
411
|
evalscope/collections/__init__.py,sha256=x05hFLrjGsdtuHtc6PyQXHNuucVdYaBN9ZrM8gBiJWg,720
|
|
258
412
|
evalscope/collections/sampler.py,sha256=086pzXQO4CO_QYCd10z149Sjh6sBpRBeIHf5OTLOVu8,4896
|
|
259
413
|
evalscope/collections/schema.py,sha256=yzAlnH0O7iiWB4UnkFXI_Dvxcsq9hDgl0aGK2OpyBY8,4158
|
|
260
414
|
evalscope/evaluator/__init__.py,sha256=KzYmVTfU-1pdX7va7l3B1-5QKWG07hj1B7rYkMmxitY,91
|
|
261
|
-
evalscope/evaluator/evaluator.py,sha256=
|
|
415
|
+
evalscope/evaluator/evaluator.py,sha256=B4E6vTnG2v7efIsTwBHSyONT8GOwPwmyC6m3siubK08,15964
|
|
262
416
|
evalscope/filters/__init__.py,sha256=AsXwKYDjGhFsJvtj036PRjMOPsHGt-CRicnHTtM_qA4,51
|
|
263
417
|
evalscope/filters/extraction.py,sha256=KLFr_3XYsrv0PTvmXy0ugj2sqv2ZOWJFV7G_MmGjTHk,4146
|
|
264
418
|
evalscope/filters/selection.py,sha256=yiJu2JjXDH_lgfEtB9umkGcA3zpo3zvnyoq2mKrXbnw,1609
|
|
265
419
|
evalscope/metrics/__init__.py,sha256=1giVHESSjn98uBiAvYm5uLsmRQwmf9NHPSt7OT_QJss,1615
|
|
266
|
-
evalscope/metrics/llm_judge.py,sha256=
|
|
267
|
-
evalscope/metrics/math_parser.py,sha256=
|
|
268
|
-
evalscope/metrics/metric.py,sha256=
|
|
269
|
-
evalscope/metrics/metrics.py,sha256=
|
|
420
|
+
evalscope/metrics/llm_judge.py,sha256=XukhH9PQtIZAcbjJlOmOD9ye3ngRv_IGKKJE9jhheOE,8653
|
|
421
|
+
evalscope/metrics/math_parser.py,sha256=gJ1NR2Mcyzt9qMdR8I0-6U31Jzoe8a6yUuwvayYPi4c,17979
|
|
422
|
+
evalscope/metrics/metric.py,sha256=0NKTUgNdvL1T4171Y6-ImsopsnUqx2AioSZTBKB4SmE,21975
|
|
423
|
+
evalscope/metrics/metrics.py,sha256=g4EPKTLe_qwofg1UAD7vZhpqVktsSjxV-y1BoaD4WiM,15324
|
|
270
424
|
evalscope/metrics/rouge_metric.py,sha256=bqvSotuDdC0MEKmt8v6y6tBTBx0S3Ma-tfF-cMCckA4,4645
|
|
425
|
+
evalscope/metrics/bert_score/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
426
|
+
evalscope/metrics/bert_score/scorer.py,sha256=VobwRtTYS9ENHn5284sWkUlw5LBBNXvViZh185PkiPI,11969
|
|
427
|
+
evalscope/metrics/bert_score/utils.py,sha256=k7ekv1PBNkGMIj3W-KfkIV-i_ryMErcqBtAWjXv9gos,29659
|
|
271
428
|
evalscope/metrics/bundled_rouge_score/__init__.py,sha256=PwbTdk8168FwDJe_l8XIqDuBgZQooDsP31vj7di05Fs,650
|
|
272
429
|
evalscope/metrics/bundled_rouge_score/rouge_scorer.py,sha256=T91PgJfi1As7BR7I-Hq6rLlvHAtMB9JpBw9gMTH8VlE,12114
|
|
273
430
|
evalscope/metrics/t2v_metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -344,9 +501,9 @@ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py,sh
|
|
|
344
501
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py,sha256=OOr1JD9kTlUGXZNG5b3kvkUaNz7QTmhaGoHhIKL69qo,7613
|
|
345
502
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py,sha256=Ns7oM4KpKxWZTo8Lefe4EDFw-jzp5633zAArcWjoVZA,9772
|
|
346
503
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py,sha256=KIF5tsiE7a5dbDfa-IKwzuzMUpuEAQPrm1nWFFtAeoI,20032
|
|
347
|
-
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=
|
|
504
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=85ZvV2gKSnsbP5941PeJ-JJ4t8_lOYQe1EOxrHlIbNI,52728
|
|
348
505
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py,sha256=o5ykt3Q_WQlNmyxjQaS2-KPLGq1xqLZixNYam_Bs6NA,18701
|
|
349
|
-
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=
|
|
506
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=NPDpIRxjiroafZk5Z2uA9bC8Bi-yXY7um5HXxThF7N0,46857
|
|
350
507
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
351
508
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py,sha256=s7EkhtrIJ0LPUuLBArws8N23R1MoIoNaYUjwsbUqRkY,7994
|
|
352
509
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py,sha256=FnUyxxazEVaP69pAq9cig3j-mcX37BX-unPj0SVKUJI,3805
|
|
@@ -366,54 +523,55 @@ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/bl
|
|
|
366
523
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py,sha256=TOAI-KaUrtKjR1GNU_WwNXNpb9gGT-KX2FYe3muv_e0,4275
|
|
367
524
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py,sha256=-DprR09KYuwNEzEbhPvFRI3MR4_VdPMUGLPN6sL9Ym8,14625
|
|
368
525
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py,sha256=S68U0DxWYGDmreRbH5yLDHBNN9PsczY9H0Uik0hO-ds,13872
|
|
369
|
-
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=
|
|
526
|
+
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=zv_WyHi67hvgHQ4DkZ8a4UoPcgrADKayqVtiIq-p3V4,36695
|
|
370
527
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py,sha256=p67DDiFS-676z0z8jPj6NwXwNjEsqTXaXCh3g2UiDno,840
|
|
371
528
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py,sha256=LqMHlUTy2LEzoVwjALtrAw0UYmzIuHnFjQiVmn5nv-I,605
|
|
372
529
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py,sha256=d4HInkL_Phk0Bgg2cWaOvhsPa6lkqDeovFW86PL0I18,6371
|
|
373
530
|
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py,sha256=Epk72q5iTdzRbuzOR669QqAUMgrFFngAU8Z3Qy9KLbM,11209
|
|
374
531
|
evalscope/models/__init__.py,sha256=RmW2S31BHBhMN49_VVF_5PJAk-TsuZQkuF2ALShbhAw,556
|
|
532
|
+
evalscope/models/image_edit_model.py,sha256=oVjGgebnFu3ZXBJLNn62rJ65fcJR7DlG4qEVxisPJ2Y,4104
|
|
375
533
|
evalscope/models/mockllm.py,sha256=t1fFAHkEb1n_atOCfnGteCX3DWp774lnWcHzi5lBjwM,2511
|
|
376
|
-
evalscope/models/model_apis.py,sha256
|
|
534
|
+
evalscope/models/model_apis.py,sha256=ZkZ_nfbeAFJnCndRvRIRLcbmJFTMhGRBi-WfMu0uZKE,1922
|
|
377
535
|
evalscope/models/modelscope.py,sha256=jSFkho_Ir2py54y_Bwj9jpCoY2mMKkZ8ORzne-ldAIE,15806
|
|
378
|
-
evalscope/models/openai_compatible.py,sha256=
|
|
379
|
-
evalscope/models/text2image_model.py,sha256
|
|
380
|
-
evalscope/models/utils/openai.py,sha256=
|
|
536
|
+
evalscope/models/openai_compatible.py,sha256=7UxS4TZBYw7jGzuu-dUs6-5g1_nydtW-0mHSxkl74w0,5438
|
|
537
|
+
evalscope/models/text2image_model.py,sha256=Sdiyw6vewjVTiXK8RFEh1pohOhDge80EoIWYpnLjr5Y,3929
|
|
538
|
+
evalscope/models/utils/openai.py,sha256=A93Wd7egqmu6DNLeibnRAPLQJXTB5ucya6aBQSHkHGk,28475
|
|
381
539
|
evalscope/perf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
382
|
-
evalscope/perf/arguments.py,sha256=
|
|
383
|
-
evalscope/perf/benchmark.py,sha256=
|
|
384
|
-
evalscope/perf/http_client.py,sha256=
|
|
385
|
-
evalscope/perf/main.py,sha256=
|
|
540
|
+
evalscope/perf/arguments.py,sha256=JHB-JIEHq5p3zoHeKn6dkelGq0JrMVMRne-wbXK2yhg,12892
|
|
541
|
+
evalscope/perf/benchmark.py,sha256=Uc6BJJGYTsAnfFljPy0WJIXcapHOIwvym3o0yPRTVqU,6964
|
|
542
|
+
evalscope/perf/http_client.py,sha256=8xJFYja8FoQA0MDTj2NcxPkAmji4n81fsaw1gRuL1sA,5152
|
|
543
|
+
evalscope/perf/main.py,sha256=eEL0qUdNPMyHr3ZTixTfZxKN4IIw3gz3sw8sq3S_vs4,4015
|
|
386
544
|
evalscope/perf/plugin/__init__.py,sha256=Ztj4h1_JYJqbbWkeuDTj5aTRyGQf5Woc4xEIyjcokVU,94
|
|
387
545
|
evalscope/perf/plugin/registry.py,sha256=GhLe-h1rGzya2bgIUaV5VymQIaHqI7h5SG_i4PoGAm8,1967
|
|
388
546
|
evalscope/perf/plugin/api/__init__.py,sha256=7RsGdYTSfnW6iVpveEzNu8v4x8Yc8H-Kk39DqOHMrd4,152
|
|
389
|
-
evalscope/perf/plugin/api/base.py,sha256=
|
|
390
|
-
evalscope/perf/plugin/api/custom_api.py,sha256=
|
|
547
|
+
evalscope/perf/plugin/api/base.py,sha256=LLBDKOWUXYbLLLTtO86X1Y4Erbp5egs2WCXGj4my754,2822
|
|
548
|
+
evalscope/perf/plugin/api/custom_api.py,sha256=HHvhNlqNQr43GhIC61yoa54QCEAy4MRMmJ0kBy-rnsQ,8305
|
|
391
549
|
evalscope/perf/plugin/api/dashscope_api.py,sha256=Miv2pzMa6sxZyYYJhCzcbOI_QHuZx7tazKpb6Not7ck,3627
|
|
392
|
-
evalscope/perf/plugin/api/default_api.py,sha256=
|
|
393
|
-
evalscope/perf/plugin/api/openai_api.py,sha256=
|
|
550
|
+
evalscope/perf/plugin/api/default_api.py,sha256=A3_dUduoBo9-xUdYsKMI1X0WUw_wHsJFNe5hTr9LUEo,9418
|
|
551
|
+
evalscope/perf/plugin/api/openai_api.py,sha256=UVo9tAnqZbVNEQwAT0wOZb1Abbf-yQmr3iDKHwXDoI8,10628
|
|
394
552
|
evalscope/perf/plugin/datasets/__init__.py,sha256=qzeQ9BrJhiJJm1wHaFeOQkvXXdSd15Ucspbn5zjs-6Q,495
|
|
395
|
-
evalscope/perf/plugin/datasets/base.py,sha256
|
|
396
|
-
evalscope/perf/plugin/datasets/custom.py,sha256=
|
|
397
|
-
evalscope/perf/plugin/datasets/flickr8k.py,sha256=
|
|
398
|
-
evalscope/perf/plugin/datasets/kontext_bench.py,sha256
|
|
399
|
-
evalscope/perf/plugin/datasets/line_by_line.py,sha256=
|
|
400
|
-
evalscope/perf/plugin/datasets/longalpaca.py,sha256=
|
|
401
|
-
evalscope/perf/plugin/datasets/openqa.py,sha256=
|
|
402
|
-
evalscope/perf/plugin/datasets/random_dataset.py,sha256=
|
|
403
|
-
evalscope/perf/plugin/datasets/random_vl_dataset.py,sha256=
|
|
553
|
+
evalscope/perf/plugin/datasets/base.py,sha256=PFBMdo3H_Hx2jOXNrMb97DvJ5gJg6QajSYymCgTXKmo,3629
|
|
554
|
+
evalscope/perf/plugin/datasets/custom.py,sha256=kCofjHfcihPcsc1XwyLxn9QG9E88eZ5qAQW7nW6ID0c,1311
|
|
555
|
+
evalscope/perf/plugin/datasets/flickr8k.py,sha256=nhHiGNhXX-2c17NQ5q5Q7FgV2hB8XVeeAP8dKkboyHE,1033
|
|
556
|
+
evalscope/perf/plugin/datasets/kontext_bench.py,sha256=cN70hiBX1940IWvNWZG9YGE4vO1yj41Bo7bqmOWusoQ,1081
|
|
557
|
+
evalscope/perf/plugin/datasets/line_by_line.py,sha256=L3lj9evcr3q-Mcemyuy2WauBB5c6O-ttnIVw1t4UJUE,922
|
|
558
|
+
evalscope/perf/plugin/datasets/longalpaca.py,sha256=abFLvrRZFsno9IUr_bpvhMWHL9X2sahlIpGLUb-5BxA,1262
|
|
559
|
+
evalscope/perf/plugin/datasets/openqa.py,sha256=UlbHhzGoQTBXa4foEFhRTZX6v7So6pR-ExFhU2ws8YM,1427
|
|
560
|
+
evalscope/perf/plugin/datasets/random_dataset.py,sha256=GPuC5ovi3BW84RCiGSDd2cBZ3jRmFrtMRsxEocc1ud8,3347
|
|
561
|
+
evalscope/perf/plugin/datasets/random_vl_dataset.py,sha256=e6exWQnupWkTDNwt2MmEK-hccuxEDmWLJRMM70onKi0,3230
|
|
404
562
|
evalscope/perf/plugin/datasets/speed_benchmark.py,sha256=J6q7AF_Re5eHLVejXEw9c1jlk1T1PPmist0yO9UFTPE,2432
|
|
405
563
|
evalscope/perf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
406
564
|
evalscope/perf/utils/analysis_result.py,sha256=aoT7JD2zAzBeuZUfncKhJ2odX_7KnymwOmNB1Upam2c,935
|
|
407
|
-
evalscope/perf/utils/benchmark_util.py,sha256=
|
|
408
|
-
evalscope/perf/utils/db_util.py,sha256=
|
|
565
|
+
evalscope/perf/utils/benchmark_util.py,sha256=Uf4vUAsfgAZs2qsyv9cRY_i87QNEHl17XMhGgXq7wFw,8048
|
|
566
|
+
evalscope/perf/utils/db_util.py,sha256=lr26ah_KRznBBu_ssxXki_PgtELk5bUJV2JaM4LaeNI,11534
|
|
409
567
|
evalscope/perf/utils/handler.py,sha256=HyKIxbzC0XCyQanlbb7UEY7yaeqjJTePNea8kMV3Sdc,1192
|
|
410
|
-
evalscope/perf/utils/local_server.py,sha256=
|
|
411
|
-
evalscope/perf/utils/log_utils.py,sha256=
|
|
568
|
+
evalscope/perf/utils/local_server.py,sha256=dMoX8p6aCQq1JnoXxcyWknadLdBwpfQhvKwk5fn6G4Q,3727
|
|
569
|
+
evalscope/perf/utils/log_utils.py,sha256=YY8mnpJoHMlP6jtmEq7QujyuxhSUF1vqLk8TpBAkbY0,2162
|
|
412
570
|
evalscope/perf/utils/rich_display.py,sha256=AQmXv1EuA1-IGgco-Jy1NLOmTKv4eBFH2K4QS8OoGVo,8206
|
|
413
|
-
evalscope/report/__init__.py,sha256=
|
|
414
|
-
evalscope/report/combinator.py,sha256=
|
|
415
|
-
evalscope/report/generator.py,sha256=
|
|
416
|
-
evalscope/report/report.py,sha256=
|
|
571
|
+
evalscope/report/__init__.py,sha256=rjjg_4PHuUA_15XXjOgPjO6cOm08LOi5yUGYzOK6KX8,1216
|
|
572
|
+
evalscope/report/combinator.py,sha256=rpZJsMiVF9Uf4niP5WmZVaLcITPEXdER9Etgqn-BsU8,6740
|
|
573
|
+
evalscope/report/generator.py,sha256=t2R3WGa4SowTRUPOgITtyTR4QDiJ6i3FH__byDKZU8Y,4959
|
|
574
|
+
evalscope/report/report.py,sha256=lEBD_E_RJiydFTaGFNLIMTFxNrqv8QcLZb_iuUg5HB0,8479
|
|
417
575
|
evalscope/third_party/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
418
576
|
evalscope/third_party/longbench_write/README.md,sha256=1yLKeSVIcihpoc4KXr8NpK86JbcLssCPx76aOKdPbYI,5431
|
|
419
577
|
evalscope/third_party/longbench_write/__init__.py,sha256=GNbBDc7HAh_V2Hfy5HhND_u7z6OI79czoBlP8lX4PVo,126
|
|
@@ -448,45 +606,23 @@ evalscope/third_party/toolbench_static/infer.py,sha256=rsADLhEd2IBcC6EI9aD7hSJmo
|
|
|
448
606
|
evalscope/third_party/toolbench_static/requirements.txt,sha256=OW91Z8hfzh7yQUYgP1Di_E6DgNgGoGP1UcvnqrdCR68,22
|
|
449
607
|
evalscope/third_party/toolbench_static/toolbench_static.py,sha256=xE__eXvSwHmmSh1tXNvyBo6MCO4mDlYTbIYl9OGEfNI,2120
|
|
450
608
|
evalscope/third_party/toolbench_static/llm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
451
|
-
evalscope/third_party/toolbench_static/llm/swift_infer.py,sha256=
|
|
609
|
+
evalscope/third_party/toolbench_static/llm/swift_infer.py,sha256=hy0JpjSEkCJh3z5ZnY8gGfdJ2ajkS5zRl-2ZQq6Gu8A,2527
|
|
452
610
|
evalscope/utils/__init__.py,sha256=5OH8cOoX3YKMKUu0dMRvwzckXligIbUV-1jjJNXlpGI,2231
|
|
453
|
-
evalscope/utils/argument_utils.py,sha256=
|
|
454
|
-
evalscope/utils/chat_service.py,sha256=
|
|
611
|
+
evalscope/utils/argument_utils.py,sha256=zYsqWLFlVeiLSQdFruBI_seTUEVTJ87r8MKwL2JNGb0,1951
|
|
612
|
+
evalscope/utils/chat_service.py,sha256=sSki2pKGQP3UjcIf_lbO06afI-vsaUAqglwX__wUDEw,8766
|
|
455
613
|
evalscope/utils/deprecation_utils.py,sha256=aDv3HFNcJFZ7rxNgALQP0-ITO8L23HC_RX-C_m2i34Y,1610
|
|
456
|
-
evalscope/utils/function_utils.py,sha256=
|
|
457
|
-
evalscope/utils/import_utils.py,sha256=
|
|
458
|
-
evalscope/utils/io_utils.py,sha256=
|
|
459
|
-
evalscope/utils/json_schema.py,sha256=
|
|
460
|
-
evalscope/utils/logger.py,sha256=
|
|
461
|
-
evalscope/utils/model_utils.py,sha256=
|
|
462
|
-
evalscope/utils/multi_choices.py,sha256=
|
|
614
|
+
evalscope/utils/function_utils.py,sha256=Zu3njXZl6U5AhTyPUfhGXrdCRUCgY1Kvy6gtBpOrvHA,10380
|
|
615
|
+
evalscope/utils/import_utils.py,sha256=S0WQ3gt4zpwJHjGcyC-604pWWExg3JV7f3wzoOH-tuo,5794
|
|
616
|
+
evalscope/utils/io_utils.py,sha256=LSPYaIEYv8oj4ozAcbxtSCbsl4edWrr2aI5CP161DvM,14133
|
|
617
|
+
evalscope/utils/json_schema.py,sha256=GVP1m6g4mBrsFmOWOOVnmvl2joOz8gTlGEytLv5qy7s,8451
|
|
618
|
+
evalscope/utils/logger.py,sha256=su2D4d3apydmjiYrEBX0p2m8A6tPOlAupmnSfo4jttI,6807
|
|
619
|
+
evalscope/utils/model_utils.py,sha256=mdtYoHhUdfpxUtnS52XZjNdO3uSK4yeIBHT3aDU7s-A,2455
|
|
620
|
+
evalscope/utils/multi_choices.py,sha256=0UJbgr5eXNgitPC79JLcyUU-OXg9BlM-mVk-fWtUSno,9881
|
|
621
|
+
evalscope/utils/ner.py,sha256=gxvUURZVLJqZUrIqCy892rAAJ2ydYiGG5ZKPW_mpHsM,14148
|
|
463
622
|
evalscope/utils/url_utils.py,sha256=9HcFt9uZNbOJR3ADUFQ_dBFKziHV6H66Df7HYs1M4Po,1757
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
tests/benchmark/test_eval.py,sha256=fHAr4h2YjqIVk-FHp93HUZvRZ1fvlVFd1EUeRwrIwYw,12559
|
|
471
|
-
tests/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
472
|
-
tests/cli/test_all.py,sha256=a3G0LMgQx3M97uy0GfX1DFxbA7zWofkxgtwT8PMorQI,6268
|
|
473
|
-
tests/cli/test_collection.py,sha256=OUm2_Qt0zkQehPTAmUaGRNBes8ewr7wYfE0E-gUe1J8,4386
|
|
474
|
-
tests/cli/test_custom.py,sha256=9z_N7Re712xI62TqVSTBdzB_iFFEUb55wcWIcGvJb84,9254
|
|
475
|
-
tests/perf/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
476
|
-
tests/perf/test_perf.py,sha256=AEWvpN3ID6s-9MEoaZjQqUM8VVsqgk_v9KX8pDgvozA,5864
|
|
477
|
-
tests/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
478
|
-
tests/rag/test_clip_benchmark.py,sha256=YmfezEzqBrl9-Ga2pG4YXs0ARcD5gWmuzINjY08tPpM,2695
|
|
479
|
-
tests/rag/test_mteb.py,sha256=fdNQIyUEzE7puPCKw5QhCHTEu7hz-ieHeq1xCWGh6IM,7246
|
|
480
|
-
tests/rag/test_ragas.py,sha256=5qozXvPFIb67T-igJv87ijlOgkPnqgkkBVXu6Ht4D0A,4554
|
|
481
|
-
tests/swift/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
482
|
-
tests/swift/test_run_swift_eval.py,sha256=YbIhYNoI4kAB-ox-OXAKUifLIXTFqP-xGZicrAgK_V0,5784
|
|
483
|
-
tests/swift/test_run_swift_vlm_eval.py,sha256=RwrKkc1WHEZxetM11cGL81G4faKCn7SYn4VlwL03atI,4934
|
|
484
|
-
tests/swift/test_run_swift_vlm_jugde_eval.py,sha256=UAUtOCQ72xbm8s-sov3cBEpYVDy189wpB-qOL3KoU7M,6053
|
|
485
|
-
tests/vlm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
486
|
-
tests/vlm/test_vlmeval.py,sha256=EDQRkYfSyOICUwo_tm3p-puaE_xdFmqOPkrt5etxsqM,3307
|
|
487
|
-
evalscope-1.0.0.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
|
|
488
|
-
evalscope-1.0.0.dist-info/METADATA,sha256=FKr7sZCbyX_HxicgCX5rHrZz19STzLSK1Tgmm0CrWlg,39723
|
|
489
|
-
evalscope-1.0.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
490
|
-
evalscope-1.0.0.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
|
|
491
|
-
evalscope-1.0.0.dist-info/top_level.txt,sha256=Yv0iprOqZQ4rfUO-AWJp7Ni6m0Twxny1yvZwO-8hUDM,16
|
|
492
|
-
evalscope-1.0.0.dist-info/RECORD,,
|
|
623
|
+
evalscope-1.2.0.dist-info/licenses/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
|
|
624
|
+
evalscope-1.2.0.dist-info/METADATA,sha256=uERC07rUVf9mGqWR3b4-t4XyJW1OUmW8waA5CHFclHo,35423
|
|
625
|
+
evalscope-1.2.0.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
626
|
+
evalscope-1.2.0.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
|
|
627
|
+
evalscope-1.2.0.dist-info/top_level.txt,sha256=jNR-HMn3TR8Atolq7_4rW8IWVX6GhvYV5_1Y_KbJKlY,10
|
|
628
|
+
evalscope-1.2.0.dist-info/RECORD,,
|