evalscope 0.17.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (273) hide show
  1. evalscope/__init__.py +4 -1
  2. evalscope/api/__init__.py +0 -0
  3. evalscope/api/benchmark/__init__.py +3 -0
  4. evalscope/api/benchmark/adapters/__init__.py +3 -0
  5. evalscope/api/benchmark/adapters/default_data_adapter.py +683 -0
  6. evalscope/api/benchmark/adapters/multi_choice_adapter.py +83 -0
  7. evalscope/api/benchmark/adapters/text2image_adapter.py +155 -0
  8. evalscope/api/benchmark/benchmark.py +321 -0
  9. evalscope/api/benchmark/meta.py +115 -0
  10. evalscope/api/dataset/__init__.py +2 -0
  11. evalscope/api/dataset/dataset.py +349 -0
  12. evalscope/api/dataset/loader.py +261 -0
  13. evalscope/api/dataset/utils.py +143 -0
  14. evalscope/api/evaluator/__init__.py +3 -0
  15. evalscope/api/evaluator/cache.py +355 -0
  16. evalscope/api/evaluator/evaluator.py +56 -0
  17. evalscope/api/evaluator/state.py +264 -0
  18. evalscope/api/filter/__init__.py +1 -0
  19. evalscope/api/filter/filter.py +72 -0
  20. evalscope/api/messages/__init__.py +11 -0
  21. evalscope/api/messages/chat_message.py +198 -0
  22. evalscope/api/messages/content.py +102 -0
  23. evalscope/api/messages/utils.py +35 -0
  24. evalscope/api/metric/__init__.py +2 -0
  25. evalscope/api/metric/metric.py +55 -0
  26. evalscope/api/metric/scorer.py +105 -0
  27. evalscope/api/mixin/__init__.py +2 -0
  28. evalscope/api/mixin/dataset_mixin.py +105 -0
  29. evalscope/api/mixin/llm_judge_mixin.py +168 -0
  30. evalscope/api/model/__init__.py +12 -0
  31. evalscope/api/model/generate_config.py +157 -0
  32. evalscope/api/model/model.py +383 -0
  33. evalscope/api/model/model_output.py +285 -0
  34. evalscope/api/registry.py +182 -0
  35. evalscope/api/tool/__init__.py +3 -0
  36. evalscope/api/tool/tool_call.py +101 -0
  37. evalscope/api/tool/tool_info.py +173 -0
  38. evalscope/api/tool/utils.py +64 -0
  39. evalscope/app/ui/app_ui.py +2 -1
  40. evalscope/app/ui/multi_model.py +50 -25
  41. evalscope/app/ui/single_model.py +23 -11
  42. evalscope/app/utils/data_utils.py +42 -26
  43. evalscope/app/utils/text_utils.py +0 -2
  44. evalscope/app/utils/visualization.py +9 -4
  45. evalscope/arguments.py +6 -7
  46. evalscope/backend/opencompass/api_meta_template.py +2 -1
  47. evalscope/backend/opencompass/backend_manager.py +6 -3
  48. evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py +10 -10
  49. evalscope/backend/rag_eval/clip_benchmark/task_template.py +8 -4
  50. evalscope/backend/rag_eval/ragas/task_template.py +2 -1
  51. evalscope/backend/rag_eval/ragas/tasks/build_distribution.py +2 -1
  52. evalscope/backend/rag_eval/ragas/tasks/build_transform.py +7 -4
  53. evalscope/backend/rag_eval/ragas/tasks/testset_generation.py +2 -1
  54. evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py +2 -1
  55. evalscope/backend/rag_eval/utils/embedding.py +2 -1
  56. evalscope/backend/rag_eval/utils/llm.py +13 -12
  57. evalscope/benchmarks/__init__.py +0 -2
  58. evalscope/benchmarks/aigc/i2i/__init__.py +0 -0
  59. evalscope/benchmarks/aigc/i2i/general_i2i_adapter.py +44 -0
  60. evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py +53 -55
  61. evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py +41 -46
  62. evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py +29 -45
  63. evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py +34 -44
  64. evalscope/benchmarks/aigc/t2i/tifa_adapter.py +16 -27
  65. evalscope/benchmarks/aime/aime24_adapter.py +38 -40
  66. evalscope/benchmarks/aime/aime25_adapter.py +34 -40
  67. evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +86 -60
  68. evalscope/benchmarks/arc/arc_adapter.py +34 -147
  69. evalscope/benchmarks/arena_hard/arena_hard_adapter.py +96 -70
  70. evalscope/benchmarks/arena_hard/utils.py +37 -1
  71. evalscope/benchmarks/bbh/bbh_adapter.py +72 -144
  72. evalscope/benchmarks/bfcl/bfcl_adapter.py +181 -160
  73. evalscope/benchmarks/bfcl/generation.py +222 -0
  74. evalscope/benchmarks/ceval/ceval_adapter.py +94 -162
  75. evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +85 -82
  76. evalscope/benchmarks/cmmlu/cmmlu_adapter.py +34 -125
  77. evalscope/benchmarks/competition_math/competition_math_adapter.py +56 -108
  78. evalscope/benchmarks/data_collection/data_collection_adapter.py +183 -45
  79. evalscope/benchmarks/docmath/docmath_adapter.py +109 -51
  80. evalscope/benchmarks/docmath/utils.py +4 -5
  81. evalscope/benchmarks/drop/drop_adapter.py +88 -40
  82. evalscope/benchmarks/frames/frames_adapter.py +135 -52
  83. evalscope/benchmarks/general_arena/general_arena_adapter.py +136 -98
  84. evalscope/benchmarks/general_arena/utils.py +23 -27
  85. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +40 -101
  86. evalscope/benchmarks/general_qa/general_qa_adapter.py +73 -134
  87. evalscope/benchmarks/gpqa/gpqa_adapter.py +61 -100
  88. evalscope/benchmarks/gpqa/{chain_of_thought.txt → prompt.py} +12 -5
  89. evalscope/benchmarks/gsm8k/gsm8k_adapter.py +62 -142
  90. evalscope/benchmarks/hellaswag/hellaswag_adapter.py +35 -124
  91. evalscope/benchmarks/hle/hle_adapter.py +127 -93
  92. evalscope/benchmarks/humaneval/humaneval_adapter.py +86 -55
  93. evalscope/benchmarks/ifeval/ifeval_adapter.py +69 -40
  94. evalscope/benchmarks/ifeval/instructions.py +109 -64
  95. evalscope/benchmarks/ifeval/instructions_registry.py +1 -1
  96. evalscope/benchmarks/ifeval/utils.py +6 -7
  97. evalscope/benchmarks/iquiz/iquiz_adapter.py +30 -65
  98. evalscope/benchmarks/live_code_bench/evaluate_utils.py +2 -2
  99. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +121 -71
  100. evalscope/benchmarks/live_code_bench/load_utils.py +13 -21
  101. evalscope/benchmarks/live_code_bench/testing_util.py +6 -2
  102. evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py +49 -75
  103. evalscope/benchmarks/math_500/math_500_adapter.py +41 -48
  104. evalscope/benchmarks/mmlu/mmlu_adapter.py +32 -205
  105. evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py +80 -99
  106. evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py +64 -110
  107. evalscope/benchmarks/musr/musr_adapter.py +33 -64
  108. evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +192 -152
  109. evalscope/benchmarks/process_bench/process_bench_adapter.py +144 -76
  110. evalscope/benchmarks/race/race_adapter.py +33 -119
  111. evalscope/benchmarks/simple_qa/simple_qa_adapter.py +72 -70
  112. evalscope/benchmarks/super_gpqa/{five_shot_prompt.txt → prompt.py} +14 -16
  113. evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py +73 -117
  114. evalscope/benchmarks/super_gpqa/utils.py +2 -1
  115. evalscope/benchmarks/tau_bench/generation.py +147 -0
  116. evalscope/benchmarks/tau_bench/tau_bench_adapter.py +112 -54
  117. evalscope/benchmarks/tool_bench/tool_bench_adapter.py +91 -70
  118. evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py +56 -124
  119. evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +70 -265
  120. evalscope/benchmarks/winogrande/winogrande_adapter.py +28 -54
  121. evalscope/cli/cli.py +2 -0
  122. evalscope/cli/start_server.py +6 -3
  123. evalscope/collections/__init__.py +2 -10
  124. evalscope/collections/sampler.py +10 -10
  125. evalscope/collections/schema.py +13 -11
  126. evalscope/config.py +95 -54
  127. evalscope/constants.py +29 -61
  128. evalscope/evaluator/__init__.py +1 -1
  129. evalscope/evaluator/evaluator.py +277 -423
  130. evalscope/filters/__init__.py +2 -0
  131. evalscope/filters/extraction.py +126 -0
  132. evalscope/filters/selection.py +57 -0
  133. evalscope/metrics/__init__.py +13 -13
  134. evalscope/metrics/llm_judge.py +32 -30
  135. evalscope/metrics/math_parser.py +27 -22
  136. evalscope/metrics/metric.py +307 -0
  137. evalscope/metrics/metrics.py +22 -18
  138. evalscope/metrics/t2v_metrics/__init__.py +0 -52
  139. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +4 -2
  140. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +9 -13
  141. evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +2 -1
  142. evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +3 -2
  143. evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +2 -1
  144. evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +2 -2
  145. evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +2 -1
  146. evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +4 -2
  147. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +10 -5
  148. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +4 -2
  149. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +2 -1
  150. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +15 -9
  151. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +4 -2
  152. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +15 -10
  153. evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +9 -6
  154. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +2 -2
  155. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +4 -2
  156. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +4 -2
  157. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +3 -9
  158. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +16 -10
  159. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +3 -2
  160. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +4 -2
  161. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +8 -4
  162. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +47 -25
  163. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +12 -7
  164. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +23 -17
  165. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +33 -23
  166. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +2 -1
  167. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +46 -30
  168. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +69 -37
  169. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +7 -5
  170. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +6 -4
  171. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +7 -5
  172. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +3 -2
  173. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +5 -2
  174. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +17 -13
  175. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +35 -19
  176. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +14 -12
  177. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +63 -52
  178. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +63 -38
  179. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +6 -3
  180. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +6 -2
  181. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +3 -2
  182. evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +15 -13
  183. evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +3 -2
  184. evalscope/models/__init__.py +6 -29
  185. evalscope/models/mockllm.py +65 -0
  186. evalscope/models/model_apis.py +47 -0
  187. evalscope/models/modelscope.py +455 -0
  188. evalscope/models/openai_compatible.py +123 -0
  189. evalscope/models/text2image_model.py +124 -0
  190. evalscope/models/utils/openai.py +698 -0
  191. evalscope/perf/benchmark.py +2 -1
  192. evalscope/perf/http_client.py +4 -2
  193. evalscope/perf/plugin/api/custom_api.py +5 -4
  194. evalscope/perf/plugin/api/openai_api.py +11 -9
  195. evalscope/perf/plugin/datasets/custom.py +2 -1
  196. evalscope/perf/plugin/datasets/flickr8k.py +1 -1
  197. evalscope/perf/plugin/datasets/kontext_bench.py +1 -1
  198. evalscope/perf/plugin/datasets/line_by_line.py +2 -1
  199. evalscope/perf/plugin/datasets/longalpaca.py +2 -1
  200. evalscope/perf/plugin/datasets/openqa.py +4 -2
  201. evalscope/perf/utils/benchmark_util.py +7 -5
  202. evalscope/perf/utils/db_util.py +9 -6
  203. evalscope/perf/utils/local_server.py +8 -3
  204. evalscope/perf/utils/rich_display.py +16 -10
  205. evalscope/report/__init__.py +2 -2
  206. evalscope/report/combinator.py +18 -12
  207. evalscope/report/generator.py +101 -6
  208. evalscope/report/{utils.py → report.py} +8 -6
  209. evalscope/run.py +26 -44
  210. evalscope/summarizer.py +1 -1
  211. evalscope/utils/__init__.py +21 -2
  212. evalscope/utils/chat_service.py +2 -1
  213. evalscope/utils/deprecation_utils.py +12 -1
  214. evalscope/utils/function_utils.py +29 -0
  215. evalscope/utils/io_utils.py +100 -5
  216. evalscope/utils/json_schema.py +208 -0
  217. evalscope/utils/logger.py +51 -12
  218. evalscope/utils/model_utils.py +10 -7
  219. evalscope/utils/multi_choices.py +271 -0
  220. evalscope/utils/url_utils.py +65 -0
  221. evalscope/version.py +2 -2
  222. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/METADATA +98 -49
  223. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/RECORD +234 -216
  224. tests/aigc/test_t2i.py +22 -4
  225. tests/benchmark/__init__.py +1 -0
  226. tests/benchmark/test_eval.py +386 -0
  227. tests/cli/test_all.py +3 -5
  228. tests/cli/test_collection.py +13 -4
  229. tests/cli/test_custom.py +22 -15
  230. tests/rag/test_clip_benchmark.py +1 -0
  231. evalscope/benchmarks/aigc/t2i/base.py +0 -56
  232. evalscope/benchmarks/arc/ai2_arc.py +0 -151
  233. evalscope/benchmarks/benchmark.py +0 -81
  234. evalscope/benchmarks/ceval/ceval_exam.py +0 -146
  235. evalscope/benchmarks/cmmlu/cmmlu.py +0 -161
  236. evalscope/benchmarks/cmmlu/samples.jsonl +0 -5
  237. evalscope/benchmarks/competition_math/competition_math.py +0 -79
  238. evalscope/benchmarks/data_adapter.py +0 -528
  239. evalscope/benchmarks/filters.py +0 -59
  240. evalscope/benchmarks/gsm8k/gsm8k.py +0 -121
  241. evalscope/benchmarks/hellaswag/hellaswag.py +0 -112
  242. evalscope/benchmarks/humaneval/humaneval.py +0 -79
  243. evalscope/benchmarks/mmlu/mmlu.py +0 -160
  244. evalscope/benchmarks/mmlu/samples.jsonl +0 -5
  245. evalscope/benchmarks/process_bench/critique_template.txt +0 -13
  246. evalscope/benchmarks/race/race.py +0 -104
  247. evalscope/benchmarks/race/samples.jsonl +0 -5
  248. evalscope/benchmarks/super_gpqa/zero_shot_prompt.txt +0 -4
  249. evalscope/benchmarks/trivia_qa/trivia_qa.py +0 -89
  250. evalscope/benchmarks/truthful_qa/truthful_qa.py +0 -163
  251. evalscope/benchmarks/utils.py +0 -60
  252. evalscope/collections/evaluator.py +0 -375
  253. evalscope/metrics/completion_parsers.py +0 -227
  254. evalscope/metrics/named_metrics.py +0 -55
  255. evalscope/models/adapters/__init__.py +0 -14
  256. evalscope/models/adapters/base_adapter.py +0 -84
  257. evalscope/models/adapters/bfcl_adapter.py +0 -246
  258. evalscope/models/adapters/chat_adapter.py +0 -207
  259. evalscope/models/adapters/choice_adapter.py +0 -222
  260. evalscope/models/adapters/custom_adapter.py +0 -71
  261. evalscope/models/adapters/server_adapter.py +0 -236
  262. evalscope/models/adapters/t2i_adapter.py +0 -79
  263. evalscope/models/adapters/tau_bench_adapter.py +0 -189
  264. evalscope/models/custom/__init__.py +0 -4
  265. evalscope/models/custom/custom_model.py +0 -50
  266. evalscope/models/custom/dummy_model.py +0 -99
  267. evalscope/models/local_model.py +0 -128
  268. evalscope/models/register.py +0 -41
  269. tests/cli/test_run.py +0 -489
  270. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/LICENSE +0 -0
  271. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/WHEEL +0 -0
  272. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/entry_points.txt +0 -0
  273. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,29 +1,66 @@
1
- evalscope/__init__.py,sha256=XZYDn3ShhM_48je5qQgwymtSdpTt8zYEnNfanYnpBdA,181
2
- evalscope/arguments.py,sha256=QkxE8eGSryiyo9uDiNQNZUI3l_hGPYmhVz1-KHgtB6E,6044
3
- evalscope/config.py,sha256=1YfHXlIyYH70FQfi8TiUtpUH3VIRCh5YcbaayKZo5s4,6781
4
- evalscope/constants.py,sha256=Tc74W89SxeeEzISDzO5IoxSo9A_F0LqjH0mOrcAYJXc,3737
5
- evalscope/run.py,sha256=dL1deJ0J1RHW6X6ZStXzAVL7NwbjW6McfdOMkCpWrtc,7012
6
- evalscope/summarizer.py,sha256=ZLFDHmi0Bgo18ouQsxuUl9vmIES9zkoapLLWRLhy19Q,5911
7
- evalscope/version.py,sha256=wsTu-_Fq9Dmfg7bXg6eDVtNwZA5ui-MZ6IPs4EhytAc,119
1
+ evalscope/__init__.py,sha256=oivLvqwNw2JlB-h-Z8_525IpfKcYEkS51F59tEfpy5w,445
2
+ evalscope/arguments.py,sha256=3mYv_kPerYxxI426GifY5mUmy8CSUaaFy7mdskPFKgY,5881
3
+ evalscope/config.py,sha256=CRwJgcPnHp2mBmVA3IihsBrt6gGP0AIXqgBIwxIYAUM,8160
4
+ evalscope/constants.py,sha256=mtKSLlr92e6c4ze0-FdZrNE1mp1VlFcE42KMD3DwnK8,3239
5
+ evalscope/run.py,sha256=sksjcOsI1Q_0Jzgvs470_bkcKWA1zH6qJj_ZJgEGeMM,6281
6
+ evalscope/summarizer.py,sha256=HUDJ1zKi22uNst3AUfX67Z0sHzeZy-4S8sYyvxJnBzc,5901
7
+ evalscope/version.py,sha256=OOygMFcZzcXVtNof6kBqsKsaK1Y03SJIHehdMdcqyT0,118
8
+ evalscope/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ evalscope/api/registry.py,sha256=Qk0KMGDbt-iI0-OfoJZbOtxt76qreAVWh36HOoQAKM4,5448
10
+ evalscope/api/benchmark/__init__.py,sha256=PW--qkFb5b6DlKKNwhI873hXsaoP3IAgdHayQQYfKt8,147
11
+ evalscope/api/benchmark/benchmark.py,sha256=OyQElOvIjeYw6BlYG6OZ9RqX1Gaz5f9Ka_JuUZPj-_A,9099
12
+ evalscope/api/benchmark/meta.py,sha256=-zhwrUPbhFCI35ugE5jBhElpiZeSNZ8XP83pdgArqIc,4026
13
+ evalscope/api/benchmark/adapters/__init__.py,sha256=qtjE9gXnx4SiITtEl2ii_IYoKBKXMVO7ntabexSXLD4,156
14
+ evalscope/api/benchmark/adapters/default_data_adapter.py,sha256=YtiPzjZ2IB4spvNwnJ9Lg7bTWjBXJjQ-7druKi5xlNc,27913
15
+ evalscope/api/benchmark/adapters/multi_choice_adapter.py,sha256=wp_6Kws3GoBk_mSzQP8Nr40osFf3iPJpntkANYAuIcc,2979
16
+ evalscope/api/benchmark/adapters/text2image_adapter.py,sha256=Q0G0VI8zaxggLO7Q0S6G0ZnK3nTwo7rngXqpaY_cVgo,6281
17
+ evalscope/api/dataset/__init__.py,sha256=RHFMzwfONEqmmn3vRtxyN3r29mipDUUUSEDhuwm0YpQ,147
18
+ evalscope/api/dataset/dataset.py,sha256=rqVQxnEKfNDGowSxTwV9HSDRtmjUfu4oCEIc_y1ExkY,11333
19
+ evalscope/api/dataset/loader.py,sha256=7BHtxlV3smrV5R_RNifvMNmH9o_uSSKmngSM-ewDQ70,9668
20
+ evalscope/api/dataset/utils.py,sha256=3E0ikqr6QWV_lX0d3Z4F4xFuVTcwbeDPgCvJY7v83Bc,4935
21
+ evalscope/api/evaluator/__init__.py,sha256=-Ure6X4GlE7VYSNWSZ_DpjbUBGa5irVTymLENEHTYqY,138
22
+ evalscope/api/evaluator/cache.py,sha256=Hovka77enKYTxomeTnH-e1vPzozRiZ4CnHunHEWATiA,12546
23
+ evalscope/api/evaluator/evaluator.py,sha256=SGW4RIKc79IlUP5FisrEycJlqORcaYxyIP5eabaSfeU,1600
24
+ evalscope/api/evaluator/state.py,sha256=vLTrICWWqcK9asfPJFB0JfOGXZnVKmeLcvBACtvEfX4,8543
25
+ evalscope/api/filter/__init__.py,sha256=5eWKjT-dAiz8nE0S6WnU6plqjXZHYn7CJOgFiHSoovM,66
26
+ evalscope/api/filter/filter.py,sha256=fsPddaHE5wwFIXgUWITFqlYXqdh6vx3QqcEf3rSXKVI,2068
27
+ evalscope/api/messages/__init__.py,sha256=31jIVA2zSwDgAcOdOd9pmj6-w-U27izxf2Pz-1cMvbk,352
28
+ evalscope/api/messages/chat_message.py,sha256=XFalZ4e7Z-V3bbABMMsDvad0UiYcIz8kzrp3Muyzqfg,7698
29
+ evalscope/api/messages/content.py,sha256=gUBUeK60BUhkwoulyzKL6q0iMt3VLlah9onLG1XVrWY,2772
30
+ evalscope/api/messages/utils.py,sha256=uqlEbYEoUKpXLW8tQtP-cY5Miq7W0Xl6a98j55u6m6E,1266
31
+ evalscope/api/metric/__init__.py,sha256=Cj2F8eiVny5uNtfPXKwQDq2owlHVKNzfr-COLYMEox4,106
32
+ evalscope/api/metric/metric.py,sha256=XkjBqpZbFYynhTIH8WawfPmItbDQ6jWufE_ox9zDPCU,1568
33
+ evalscope/api/metric/scorer.py,sha256=9IATvlJbp47b2iAn5KNO2v2tQIa1lqRlVaXWXce2iN8,3309
34
+ evalscope/api/mixin/__init__.py,sha256=PagRD_Dz93Tsl-5YKQMZQAodx867Ow06P8uPQSBx4KM,89
35
+ evalscope/api/mixin/dataset_mixin.py,sha256=ZJMcX3J4L0uNC_GkDwndSRjytxlbgldDeFIRfVCPCks,4395
36
+ evalscope/api/mixin/llm_judge_mixin.py,sha256=KPNH41IL7md5XEYqC2ZbmnYm4tIrV-MgxpfKOWbYsMc,5624
37
+ evalscope/api/model/__init__.py,sha256=YxKdz1IKUt6eYoC7nx81yD2BtyiWQDvaoTcc8O9lvoE,286
38
+ evalscope/api/model/generate_config.py,sha256=QMOgi9PUhvdkHzuP5DdOWUX6dOUPh4lqJd1d-0w7XGE,7852
39
+ evalscope/api/model/model.py,sha256=YjXI6rIGhaUZavhQ0O77XdCCMruNy-iSIehP8gPcN4k,12662
40
+ evalscope/api/model/model_output.py,sha256=NeN6bLtAvg_3fTirewWfdP-_x4SJXa9pGuRpyXJY3B8,9333
41
+ evalscope/api/tool/__init__.py,sha256=bEaW5ryY-erLcl2zMoDJNgiaBqlSPAL0jQ5daUHvvrw,272
42
+ evalscope/api/tool/tool_call.py,sha256=WqMnw69L_yhQWycENZ7azPRhxRidhmrMcYAy7UTIqvg,2836
43
+ evalscope/api/tool/tool_info.py,sha256=aqquWQRWWx7fPItIwiubiz2VRe2TLl_Jmn1ArIlngbw,5716
44
+ evalscope/api/tool/utils.py,sha256=IWFzM6WspzBmNPicXn6b7KS6Y-1I-ErsK9fua4cb53Y,2324
8
45
  evalscope/app/__init__.py,sha256=HWLXld_JXcBDsdL4L_4E8JsKyuBwwPUSwlejKnZ3HKc,579
9
46
  evalscope/app/app.py,sha256=8mSBp8qUCCmqupV4FEPMPdT9jL-bYu4DdH2qj8P0ktk,776
10
47
  evalscope/app/arguments.py,sha256=1wHTLeFx1G94cKXYOeOVe_wTiOY2D929UctIRGOtRaQ,699
11
48
  evalscope/app/constants.py,sha256=oG6tZ618zJcCnwZ5THnYL0gWTPDb5XKrnmdrWxY3Z4Q,385
12
49
  evalscope/app/ui/__init__.py,sha256=IBxyQ2H-kSHoHJmXWDR8QMermvsMbiu673PQbXP_FnE,616
13
- evalscope/app/ui/app_ui.py,sha256=FvpHsr4Lc0LAcwXIaVn9sUAAjO8QLNYCuojmKrjKvaE,2023
14
- evalscope/app/ui/multi_model.py,sha256=7pe71PSaU7gnvogmCBMLUr_DUAgAeni12k4QcoHkFDs,15040
50
+ evalscope/app/ui/app_ui.py,sha256=wLrQ4VM7BnzvaYmPAk8NH9t5BaWooHFJcgmAOOd2I1w,2032
51
+ evalscope/app/ui/multi_model.py,sha256=fO8z-ZFucWtgaKmuQ50AkUp4BoYOFqOkxeTBUUAK0bM,15122
15
52
  evalscope/app/ui/sidebar.py,sha256=JA0QbG2iPStK-lFy6x_AjOHlQdesmgXoS0OYJUJ_Wyg,1339
16
- evalscope/app/ui/single_model.py,sha256=K5SU_S7WXWsbYLih2rQfRrVE50enzbCrq4rbhpo2uXo,9406
53
+ evalscope/app/ui/single_model.py,sha256=7HjfmufZm7wXNtT-ZKnQ4PgX-I_tX5og-s30leX_Xr4,9487
17
54
  evalscope/app/ui/visualization.py,sha256=jXFX_-7woQkcAiQkPAIRwVv1kdRdXonn9IvmB8yzPDU,1102
18
- evalscope/app/utils/data_utils.py,sha256=TMgiDu4MGvWgyd8G_nNOAOw39ZCRVFfRLLvrxCX_Ocw,6806
55
+ evalscope/app/utils/data_utils.py,sha256=H4XYWgynmkY0ENU6FFSmrgse3aq3Is11jRRf-_nCvKw,7408
19
56
  evalscope/app/utils/localization.py,sha256=rWEviBmcnhIpAA-cG8djbbUA6p1Y358c0dxge5Pqi1U,6131
20
- evalscope/app/utils/text_utils.py,sha256=7DJow2W3Fna5Qny-AbwVRTWDh44ualONS5A5uUtesuk,3590
21
- evalscope/app/utils/visualization.py,sha256=N9M7OV6lxcCvFtXmLBcUWw3RPlYZva7YH3rvhgTElqk,3522
57
+ evalscope/app/utils/text_utils.py,sha256=lZy-sXccv24KyjvOGpZSQYMfM4XSKz3qcriOhsiYAdY,3499
58
+ evalscope/app/utils/visualization.py,sha256=dwEXbGfY7vFysnL0HmrHS2BEWaJkg-dZ9ayDlRhdvv4,3559
22
59
  evalscope/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
60
  evalscope/backend/base.py,sha256=qYu8Shokrtrx-N6T_BAJk_6OCpovUBYuN0p3wngt-dw,1030
24
61
  evalscope/backend/opencompass/__init__.py,sha256=UP_TW5KBq6V_Nvqkeb7PGvGGX3rVYussT43npwCwDgE,135
25
- evalscope/backend/opencompass/api_meta_template.py,sha256=DaBJg15ZSIjxroXiygl3-4RdmIe_FD7xHbXvjSZmkQA,1706
26
- evalscope/backend/opencompass/backend_manager.py,sha256=POEYRmNlptoRYlTNcpRcHEXwqrYo34RW4TM_kf7wMQQ,10458
62
+ evalscope/backend/opencompass/api_meta_template.py,sha256=OGH0lGJmBFKHs-6u6RPCov13_ArO63E6pV-aX1WVljU,1707
63
+ evalscope/backend/opencompass/backend_manager.py,sha256=fxTERjtndDoxfjFDxULYc1XZ24lY9HNkNLsmqhkpZtw,10500
27
64
  evalscope/backend/opencompass/tasks/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
28
65
  evalscope/backend/opencompass/tasks/eval_api.py,sha256=ZaGdUbEOtAW5VX3ZXmpHIttg_QrID34EnBTylD3uvos,1152
29
66
  evalscope/backend/opencompass/tasks/eval_datasets.py,sha256=JHSq4EnPJgv4sRJJplLH80EqE3ghtkn2k8HnV6DaDew,5406
@@ -31,8 +68,8 @@ evalscope/backend/rag_eval/__init__.py,sha256=Tbj7HboP5zzJ77-9qVEwwhHKjHL5V8MwLF
31
68
  evalscope/backend/rag_eval/backend_manager.py,sha256=iEer5IhEJ8nOXW_s3j6l5jvfLgBftcGQMAtJk69Wzdc,3521
32
69
  evalscope/backend/rag_eval/clip_benchmark/__init__.py,sha256=C8Vetf52nyHiRwY2Pm74Bjn3UpWboQeghCGNh67X1EM,151
33
70
  evalscope/backend/rag_eval/clip_benchmark/arguments.py,sha256=d5UkbC3RXb6iyzy_ILumToAVO1AdwvDeyOiX5KB2u0g,1530
34
- evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py,sha256=2OdPj4gSUWdAGCfS9PHpPGbd6q5RqEyli2G6UGb1ffw,8888
35
- evalscope/backend/rag_eval/clip_benchmark/task_template.py,sha256=2NQRvlYY2SOzvOOj9WRLyxvRlyj8CAcgbQqgsv-Xjgw,3929
71
+ evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py,sha256=_MuzGblPP-QBRB7IQJ9r08FmJfH7S82nynzijK7bvsM,8848
72
+ evalscope/backend/rag_eval/clip_benchmark/task_template.py,sha256=lvgGVQ-EHwGxo61bf_X8ofkaPJ3qTbsRv7-xNjyIzUQ,3883
36
73
  evalscope/backend/rag_eval/clip_benchmark/tasks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
74
  evalscope/backend/rag_eval/clip_benchmark/tasks/image_caption.py,sha256=CQnWZZTQ0FOzDtmGv7OF0W4Cv4g6u4_LQ93koDu1pes,2556
38
75
  evalscope/backend/rag_eval/clip_benchmark/tasks/zeroshot_classification.py,sha256=NwpxNECN7NFgtlVdKY7vet5m-gAmIp8MJYka0eexWu0,7424
@@ -53,46 +90,42 @@ evalscope/backend/rag_eval/cmteb/tasks/STS.py,sha256=uhGLsQTo5lM3-L2Na3WJGqOLQw3
53
90
  evalscope/backend/rag_eval/cmteb/tasks/__init__.py,sha256=PKBNyp45hIa3FYNA1psiwtwfwUcn7s9eNt6r5aUpyyY,1505
54
91
  evalscope/backend/rag_eval/ragas/__init__.py,sha256=D0yJkN9SuNGIAL3niZw4BI08Yh3HznsUUewdIAa_-LM,171
55
92
  evalscope/backend/rag_eval/ragas/arguments.py,sha256=S6M1nsqwMQ8lnZZDtlQTdzyOCfLn9WP0QJ_7wAEsVgc,1695
56
- evalscope/backend/rag_eval/ragas/task_template.py,sha256=a_3bWfLx0j2zJkWgEWNStO0XXAeUFdnFpeukpoGfxLg,1669
93
+ evalscope/backend/rag_eval/ragas/task_template.py,sha256=ikLBEwYKuXe4dcc0SC7orWOEpYpT0kBG46op_s2yM6U,1674
57
94
  evalscope/backend/rag_eval/ragas/prompts/persona_prompt.py,sha256=fX9sCci787ViGiL3BhGsykx0bnWfOWWEFueaJKyR8g4,793
58
95
  evalscope/backend/rag_eval/ragas/tasks/__init__.py,sha256=hErdWKbvV9aRqOpQTzdFHw1tcYoDbnttmic7GpZzKx8,173
59
- evalscope/backend/rag_eval/ragas/tasks/build_distribution.py,sha256=vFfemiqtPx22u5pwwZxEQJKYf3B9efYmwbpWDI5hY30,1491
60
- evalscope/backend/rag_eval/ragas/tasks/build_transform.py,sha256=GtAYqdVOy7BxIGyC4rSZ_UfXagKYzE6eEtXbaOI_g-k,5425
61
- evalscope/backend/rag_eval/ragas/tasks/testset_generation.py,sha256=YSqpaXMFVe8mkVfq3i_oJg1MSnPm98E7WdOBdyUwMpA,5784
62
- evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py,sha256=6x-4O2pgsjZCVfJNvwZEKcgLe_QhSknPg-f2jGjZkU4,1890
96
+ evalscope/backend/rag_eval/ragas/tasks/build_distribution.py,sha256=zHUbUkLPoqcTpJfZQlmIs2GIbuJwH2PjhgvRzXZGbTM,1496
97
+ evalscope/backend/rag_eval/ragas/tasks/build_transform.py,sha256=kbk9pwxQgWCgAV26kfWtgz8Ji2GHPZX_kkOP6ayoSI0,5449
98
+ evalscope/backend/rag_eval/ragas/tasks/testset_generation.py,sha256=XMWW8ucN7ojRLLCii_jbUtvOqiISFO1NQl1XBNimHkY,5789
99
+ evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py,sha256=dZAjsfiR839INO3nbb9psLn-eL4sZOzpU6JMdtJUXtw,1895
63
100
  evalscope/backend/rag_eval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
101
  evalscope/backend/rag_eval/utils/clip.py,sha256=GLHhPCac2AH35AvRLvVqePA1gIMAewHTFmCJCDZzvqU,5015
65
- evalscope/backend/rag_eval/utils/embedding.py,sha256=uqodHHvOKlza-bCLJ9Zkm8G1Jt2y2JT88jtIqCjA0sA,9379
66
- evalscope/backend/rag_eval/utils/llm.py,sha256=NHjm0SeQVsSIG8uISXZcQypku4QRc3KtteeO9ldv0FI,2611
102
+ evalscope/backend/rag_eval/utils/embedding.py,sha256=QR9ewFMTV35JEWl0nCw4gWxlg8UodosMxRTH-JghMJY,9388
103
+ evalscope/backend/rag_eval/utils/llm.py,sha256=1OH-985iIDtCOlCtzGmHu6GT_l1vJe7Iv-WyltQbcSc,2451
67
104
  evalscope/backend/rag_eval/utils/tools.py,sha256=FU7tNu-8y8V_o_kArFVTTLM_GzL12KBNeXiwQw5SpJA,1529
68
105
  evalscope/backend/vlm_eval_kit/__init__.py,sha256=R-GuBm8dAwvDF73XHaGpPSjlt7Y4tycyy-FJgzLdjeY,84
69
106
  evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=jlwM13Ty-Ax6AeMsNlo9xIBupNFgnceYuXtCmh0hNTQ,6160
70
- evalscope/benchmarks/__init__.py,sha256=NVd_VvmkY36LxdHNmgeogSBwMFfWoLJAZF8vDg-CoFc,1308
71
- evalscope/benchmarks/benchmark.py,sha256=uZ_-Y_wPhy6TxufWiElF4BwEWN93azT1JHtGRW8tR-w,2633
72
- evalscope/benchmarks/data_adapter.py,sha256=UI4HpnJNYo18GXRiU0HwNUxjRfoSXlCB-xEBIGs2ckg,23914
73
- evalscope/benchmarks/filters.py,sha256=x_NX40uWMmUsVrAGHCeeV2e63HZZFugWUgdUhk64ivM,1523
74
- evalscope/benchmarks/utils.py,sha256=mIk8n6zVMICQ5JWMyEwUqwlkxva4L-oD5SZzpIKw1sI,1851
107
+ evalscope/benchmarks/__init__.py,sha256=WHR4ej9Tqa2N9CyIaUWXS8EnHZtcujaNeg9hf8GT31Y,1182
75
108
  evalscope/benchmarks/aigc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
109
+ evalscope/benchmarks/aigc/i2i/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
+ evalscope/benchmarks/aigc/i2i/general_i2i_adapter.py,sha256=QOen4eJ1wE_KOrXk-JDDifDbn6ulqLTgVC61a3TSEYA,1665
76
111
  evalscope/benchmarks/aigc/t2i/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
- evalscope/benchmarks/aigc/t2i/base.py,sha256=4GFAvceT1Gpt5teDLRCZi62RwvPazuhG3zwft3gN3X4,2102
78
- evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py,sha256=cmkny4nIWofHJdQCvu_7wR-2NZVTaJo2l98zZlgGSAM,3081
79
- evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py,sha256=baDGFRpVcSKpc1CdzNAMBtjeCZDUpyEc5l1KyrPNoEU,1892
80
- evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py,sha256=t9h5qlo4KrHOgXIhHo3z6fEAi0HfUqDZvaItQdS7dZ4,2097
81
- evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py,sha256=U0RKN3apyD3YyZfIvqgO8TNuDO-zctlftHsSfBRyQxU,1825
82
- evalscope/benchmarks/aigc/t2i/tifa_adapter.py,sha256=vOOiOe26H2dk9VN2WbB_Oi3lzavMIaYDBq6sqeSIiAU,1093
112
+ evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py,sha256=3kRMglG82RXRiA-Hucj7o_O4hrrDaqJxExbmyohANQE,2898
113
+ evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py,sha256=CkJFoQJzF5tR46hr0X0Wu1VJ57uBr28BiUr3WT-5X2c,1840
114
+ evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py,sha256=nOZ8Lk_sRNiPK-d4a6hdmZ8mM40uIvpu5vlLF8Mb44s,1341
115
+ evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py,sha256=Pr2_YW31-DIiklSkR5bGuwEBQWyBQleRiRAR7L7MoH4,1460
116
+ evalscope/benchmarks/aigc/t2i/tifa_adapter.py,sha256=OuOO-txcE5ZQHRZj78XGUOBfxJoPZpL3K0k_P9X4kL4,752
83
117
  evalscope/benchmarks/aime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
- evalscope/benchmarks/aime/aime24_adapter.py,sha256=iwOvjB-hwUYFRNDTe8xuRCFxASh69gCzuU3Vz9qnsUs,2070
85
- evalscope/benchmarks/aime/aime25_adapter.py,sha256=fNJXUSCxjGyvtX_gkp4bveC_oXHwr1VNQdUePAuwjIE,2071
118
+ evalscope/benchmarks/aime/aime24_adapter.py,sha256=HTlriHoHzlm1Rf3KAiGRLs8sx6Gyf6s7RGtOjk_hGS4,1767
119
+ evalscope/benchmarks/aime/aime25_adapter.py,sha256=ZOE_6Zhg1MatWJSu2Zq372nKUODYtNFZimS1MJRFz5A,1591
86
120
  evalscope/benchmarks/alpaca_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
- evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py,sha256=BLU3G7IB3gmIYiXtznzHjPIrvi65nYZwqSF7FFnP7Aw,4324
121
+ evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py,sha256=sjaWcK8WH1XY0kzm5eHsq_7J62EJocAf4gRV_UB8ZBE,4971
88
122
  evalscope/benchmarks/arc/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
89
- evalscope/benchmarks/arc/ai2_arc.py,sha256=WtL4Z_ulcCU2KfptWTjTm75T2I2rVGd9aDBBB76P14w,5697
90
- evalscope/benchmarks/arc/arc_adapter.py,sha256=OO2khZxfgsRzYk64zLvq4yEbgPdQuvbIVPO4t0E4Hcc,6703
123
+ evalscope/benchmarks/arc/arc_adapter.py,sha256=GASZmoJ-PpzBG70cBdABZA5uVqoyosjV-jf9WShK7L8,1622
91
124
  evalscope/benchmarks/arena_hard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
92
- evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=iJeIh-xiQbuc3E8ea48DTCfDW_KnlGMdTeIek5AlKnk,6668
93
- evalscope/benchmarks/arena_hard/utils.py,sha256=kRgKXdVt4Ep3XGOzUQpf9JThnp1OOt8oUQhvQEtOzRY,4596
125
+ evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=Ddn_hVO1PvNQ_kNknXfdJCz1AVnXZEdGWq4gX1_Qqow,7275
126
+ evalscope/benchmarks/arena_hard/utils.py,sha256=23xCd7_ksrM4xMJBp7N2ZwpUpq1zpoQFjLm1oBcdgQY,5559
94
127
  evalscope/benchmarks/bbh/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
95
- evalscope/benchmarks/bbh/bbh_adapter.py,sha256=IFu9XctrLNJcIFXK4jV3LmyqQCVb66z8YhL07Osc1TA,8623
128
+ evalscope/benchmarks/bbh/bbh_adapter.py,sha256=GcvgwBhIw7OG-ljWQ_urVOoWlrFjrBy1LAZ-Atm02Dw,5570
96
129
  evalscope/benchmarks/bbh/cot_prompts/boolean_expressions.txt,sha256=xnzlaIRyeGlogG49v8nt4vpJO40J06ev4yc8cv0VSRY,1781
97
130
  evalscope/benchmarks/bbh/cot_prompts/causal_judgement.txt,sha256=sfo-2iOeVzB0OGgd7NSQFELTGDTsr2DQ3u-g0ivI-sM,3653
98
131
  evalscope/benchmarks/bbh/cot_prompts/date_understanding.txt,sha256=UJBsc3Mwz8TZngdWH_NFlhhNbLhNHK6FvW9FHcS8H5g,1167
@@ -121,135 +154,123 @@ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt
121
154
  evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt,sha256=s_x6u5MLeKpuAHZj3GNQqY1I8vWqQIfJasOp9XcM7Ck,2945
122
155
  evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt,sha256=qfTZafCzNiz9ULBaDlfy_LISL617NyH5Nc0-nO0K0LE,2164
123
156
  evalscope/benchmarks/bfcl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
124
- evalscope/benchmarks/bfcl/bfcl_adapter.py,sha256=ThDOYrJY_RdXMLSC1S9lP-8zYd1syZWpcrXXV1ZPLVs,10100
157
+ evalscope/benchmarks/bfcl/bfcl_adapter.py,sha256=FH1y0ROEypq9beIX41FVzBS7zcCl_qUTxcG0N0lYiV4,11092
158
+ evalscope/benchmarks/bfcl/generation.py,sha256=kf4BrNC24nHkwVysxJPUiFU55pUkev-7y5Op2Ws8GyI,8704
125
159
  evalscope/benchmarks/ceval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
126
- evalscope/benchmarks/ceval/ceval_adapter.py,sha256=V_TC_E0lKXaFcV_qIdrg2_iddmGJ4um8iIdaXVaK_EM,11146
127
- evalscope/benchmarks/ceval/ceval_exam.py,sha256=ngOvb6Fymt7iPWIb2fzrUVpqmUT2VBoqh7X_IH8Bcsc,4824
160
+ evalscope/benchmarks/ceval/ceval_adapter.py,sha256=j1WkDrEUiNo6MOu-Kt3NzD9DBgye20JMbMTlQMQw39o,8560
128
161
  evalscope/benchmarks/chinese_simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
- evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=kaZ8fZK2a9oVwpGRUA3wz3FkxtcTY_FkRDYrdLjDNro,8433
162
+ evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=OWzRlSGswV24V-heLqqo7GQzpJp01TZ0DhFHq0iUP9A,8238
130
163
  evalscope/benchmarks/cmmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
131
- evalscope/benchmarks/cmmlu/cmmlu.py,sha256=Y59NIGUFzJEztJbkehZsG4Cz0J_v9Cyju6xazHMYIcA,5022
132
- evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=3oh79iFR006vnlpwjsRVO5cl6pOav00I5uU98DPCORM,10119
133
- evalscope/benchmarks/cmmlu/samples.jsonl,sha256=FXbyPQSDorKBGSD0lnOzioZmFjG07lIL87FRDRaMPSY,1722
164
+ evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=P0VPAL5T2V_zj0q7im0FdDoq_W5rinorwN5FRYaFFUI,5377
134
165
  evalscope/benchmarks/competition_math/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
135
- evalscope/benchmarks/competition_math/competition_math.py,sha256=Cehyokift7oDKjc8TdmfblZ6mMc39wQWtqqbUi34QLc,2629
136
- evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=ZBIZJZDSy-b9lTgm2-ZU2pEh053rveMwccI1fu6xpkc,7038
166
+ evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=NOqckeyuabH_nwaxL5IWmH887UO5rvBKA2jx7qb9fNs,2226
137
167
  evalscope/benchmarks/data_collection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
138
- evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=z_wbrA4yJoMwfg4TJkvEZB2aV5cPFcxCZ3JIj49F4Do,2604
168
+ evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=Ut5sIcTiJQGaFEgE7gM9q3bsDeFKXg_2sDzxcOqchsE,8451
139
169
  evalscope/benchmarks/docmath/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
140
- evalscope/benchmarks/docmath/docmath_adapter.py,sha256=LQ_beSN5RrvNqIQa5BYgwasLRrpUvM08R6BNOhIh6zA,2967
141
- evalscope/benchmarks/docmath/utils.py,sha256=ptd-Sot4QtUmUG4dMlqXtUWHKZplo5jSTolsypqX9Ho,7716
170
+ evalscope/benchmarks/docmath/docmath_adapter.py,sha256=-mel6hA-x_e7fV0uOHdX5BpoQEVyQ5VqwIwEqSNDpnc,4623
171
+ evalscope/benchmarks/docmath/utils.py,sha256=d6Yjoa5q91kjr1SdVPVBndzDaUzMlO_GfEqMtUXXr0s,7707
142
172
  evalscope/benchmarks/drop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
143
- evalscope/benchmarks/drop/drop_adapter.py,sha256=ltt-9w6n_92crepfyb9yLBr5QzzHCWj0y1i5fYw1oF4,8645
173
+ evalscope/benchmarks/drop/drop_adapter.py,sha256=PyvZ1WOdHQ0u0_JpuP97_yQsCUbzGcYsJf3bWKbakzg,9968
144
174
  evalscope/benchmarks/drop/utils.py,sha256=Z9PHrNnRfGqFHCLONg5SWKARp1eTJlHFc_bU46t_YrM,1344
145
175
  evalscope/benchmarks/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
146
- evalscope/benchmarks/frames/frames_adapter.py,sha256=xYvxGzqj_YPDSZYogP9TxUhOxvZFbud1S2SOvz1nlDU,3136
176
+ evalscope/benchmarks/frames/frames_adapter.py,sha256=hqFBJsf6fxwDmkE0ZQnkELiEP0dp2cUodKn7kUbTuqM,5479
147
177
  evalscope/benchmarks/frames/utils.py,sha256=gULWM6Rwv5bTSSWcDYp-iSIoWj8r5VtbQakhRzHJq8A,1172
148
178
  evalscope/benchmarks/general_arena/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
- evalscope/benchmarks/general_arena/general_arena_adapter.py,sha256=j2aDzikz9obxvrR-damdvSCXR0rfjEo-OzX8vujj2N0,19887
150
- evalscope/benchmarks/general_arena/utils.py,sha256=u0q4FNIOFka1_gC344OCvBXUz89Ah6M8asjIXbNSweM,7188
179
+ evalscope/benchmarks/general_arena/general_arena_adapter.py,sha256=UkIjxPSr-qbrloNf2KhDhZClvnes7qP7Bcu1XTXxsKU,21553
180
+ evalscope/benchmarks/general_arena/utils.py,sha256=zS4l1RKwvl0Z9Mk7kth9WVQGHTgE_aNDZa_XNy9tGyM,6874
151
181
  evalscope/benchmarks/general_mcq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
152
- evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=cPN-p0tndjocQYqfc6OFkT5k8KL7kkVklmOtps-F08Y,5391
182
+ evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=7VKg_EzXkRvoWpR7h8qB4sVVb1eZHCGcPk-X_NMS5tE,2062
153
183
  evalscope/benchmarks/general_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
154
- evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=dpIGe635CoW4ejVohVwcarBxSckqvlnxcJ2ElpRlQ9o,5669
184
+ evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=u29IsH5sgw-T0IezuI1jErGwykz-F7875AdEhdvS5rk,3522
155
185
  evalscope/benchmarks/gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
156
- evalscope/benchmarks/gpqa/chain_of_thought.txt,sha256=pgoT5a-DMPJaMhoH_M8zfU5s80ibWDTVW6vnonITd8k,5610
157
- evalscope/benchmarks/gpqa/gpqa_adapter.py,sha256=J6RfxpUT1l8Jj3vT_Vtsn1z8MKCg32XTlKn_eihCI50,5071
186
+ evalscope/benchmarks/gpqa/gpqa_adapter.py,sha256=zWK2hhyKw5n8K30YvMjSm6XMwyrireODGTE6wKmyuOo,3311
187
+ evalscope/benchmarks/gpqa/prompt.py,sha256=b1Gw2D5dEdhvLYymPfcvGKJdHrIzpiZkOwURKSxiQJg,5576
158
188
  evalscope/benchmarks/gsm8k/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
159
- evalscope/benchmarks/gsm8k/gsm8k.py,sha256=ZDN5lfeZyc_pkTDVY0voC_zUExHE1ZoEgEaTvt5hpXg,4233
160
- evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=IBMdsvQ1w45_raCiACTBm7DVHtOYfckv8x15_OXIwTI,10752
189
+ evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=W4vTXsC7iHN1AgvpaCf1Rj7y2O8QczIluucnpSC5aYo,2636
161
190
  evalscope/benchmarks/hellaswag/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
162
- evalscope/benchmarks/hellaswag/hellaswag.py,sha256=5_c9WbaS1LIdvgXzqEcvjAEtKi2V2Yn0YtszPlFqhXI,4610
163
- evalscope/benchmarks/hellaswag/hellaswag_adapter.py,sha256=kgHz-n8_93J8DdR7XBlzfM2KDRoKcvg80h6CCjWv_Xk,6191
191
+ evalscope/benchmarks/hellaswag/hellaswag_adapter.py,sha256=tAe63NfV5ljUm1f4RTSFxWOVKBUhk3Cc0EGzF5uYLK4,2041
164
192
  evalscope/benchmarks/hle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
- evalscope/benchmarks/hle/hle_adapter.py,sha256=ts38e-AqtUcbfc6VqRtWLacZDh7KzSm4rj7xKm9vTFc,4445
193
+ evalscope/benchmarks/hle/hle_adapter.py,sha256=4YVmETL9mEiLxF4vWRjePLyFaxelax6nOaqoAH5ZxmU,6389
166
194
  evalscope/benchmarks/humaneval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
167
- evalscope/benchmarks/humaneval/humaneval.py,sha256=2Exsg6u8FEu0buADY2tETJluSM8tWacvX06nykKKLSE,3395
168
- evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=ZqNG3L8yMY44B7HleUjlSbVG-GLk9RBsvaGWOm2fQVw,4788
195
+ evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=5x2pnkbI9ZPPOyrRBsJ5ZcOCGJr8OR7qXLgVlY6eJxs,5825
169
196
  evalscope/benchmarks/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
170
- evalscope/benchmarks/ifeval/ifeval_adapter.py,sha256=2oStqiTD4w2f2n0kbjcbg7GJQfKCsHFieokQcNndWb4,2041
171
- evalscope/benchmarks/ifeval/instructions.py,sha256=oaJ9D_4rvS67BraHBNPpDtFd4TblFAnR4A3YW9HWfuY,56304
172
- evalscope/benchmarks/ifeval/instructions_registry.py,sha256=tVUmhuSwnOidLtI8onOAw_gpJ6bi8FL07GiX19hSuo8,7288
197
+ evalscope/benchmarks/ifeval/ifeval_adapter.py,sha256=55FQwJ0_eDijppkVVlM5XCXzgRFmjH1SvGMItGsvn6o,2769
198
+ evalscope/benchmarks/ifeval/instructions.py,sha256=HXnn1JgU3dpYltqIovFAn02DxkYOGw337kLMlOfJxJE,56048
199
+ evalscope/benchmarks/ifeval/instructions_registry.py,sha256=3UXzVLgKwk_cf-2aG2tozjqYgvqm5Mj3ZRRb8rI-ucU,7262
173
200
  evalscope/benchmarks/ifeval/instructions_util.py,sha256=vkemXeylJMmgW8LgfQe4cSy2OF-oH_NcSZtzyZDURW4,25780
174
- evalscope/benchmarks/ifeval/utils.py,sha256=TKrM1m2qDCUauahogItDdICf4mDk0OjasSxgnxjt2KY,4517
201
+ evalscope/benchmarks/ifeval/utils.py,sha256=MQt-b4K6uqU9H5TAM6Gxyz46r6XRBOgDsgdnwB0veg0,4470
175
202
  evalscope/benchmarks/iquiz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
176
- evalscope/benchmarks/iquiz/iquiz_adapter.py,sha256=kYXKiiFa_F5Gl3mIOAtKxXW5myi0VW_XGidbSjArd6M,2730
203
+ evalscope/benchmarks/iquiz/iquiz_adapter.py,sha256=mNHA_Fuj_gAdOEoR7oChnGmErf1czqwnk8Zk-jRhBys,1304
177
204
  evalscope/benchmarks/live_code_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
178
- evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=iqmVUMZmyRhzOOXXQ-NN9P1nGvvbzTjOSEp6djbN_rw,6503
205
+ evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=wgx8RDbkXi2Mlt-aK_6o4VcoPb7I3eL8z8h8JW4SnEo,6510
179
206
  evalscope/benchmarks/live_code_bench/extract_utils.py,sha256=ZcQ8y741uawPo6I_1_XglR3eqJFDNrqc8fILKZupVRs,2375
180
- evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=a4Vz73V1q8A0tV_DeKnTJKVxnDWmXs84diaqSym8gLM,3550
181
- evalscope/benchmarks/live_code_bench/load_utils.py,sha256=5i9wtdPLYR8ckjx5MaYQVC2LFYvjKzR6Fa6UZmeOTRc,2445
207
+ evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=tl7nGLDUgmNtyR4faE0aoW11OgLhsx7ZdKmONGDlQnQ,5203
208
+ evalscope/benchmarks/live_code_bench/load_utils.py,sha256=fEzWz_fUGwi5Ncum5PNVF9jFcuDwGgs7Vt_10YKBE2Q,2087
182
209
  evalscope/benchmarks/live_code_bench/pass_k_utils.py,sha256=Ktrp_lXdfFzoHtQNQNdGfIl26ySjaPCHm4Zv-dFvRqM,2024
183
210
  evalscope/benchmarks/live_code_bench/prompts.py,sha256=P4KILIAIDT1MKDck0xHYV_6v9820wDZRhxVMazmlL-g,12600
184
- evalscope/benchmarks/live_code_bench/testing_util.py,sha256=abjlwp6HDayf88mMI_daOKm06nEOeNBaMkmGWqk2DJo,17286
211
+ evalscope/benchmarks/live_code_bench/testing_util.py,sha256=TuoOTciC-hz3FTeDzsQB_THH3Be9UOP2XMrax-4sXkM,17282
185
212
  evalscope/benchmarks/maritime_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
186
- evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py,sha256=aibJmtIJkpvWlyLBiiL7TCdjUGfW8pxkAU2KQEZDIPM,3149
213
+ evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py,sha256=Rx7iZ5JaEo73YwIzhm78gMDQ6gqcErbnWWXHxXM6BcU,2379
187
214
  evalscope/benchmarks/math_500/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
188
- evalscope/benchmarks/math_500/math_500_adapter.py,sha256=Oc9XnBgMAjEerYAk3GtY2TTKm1QH_UI896kUuW2_a5Y,2324
215
+ evalscope/benchmarks/math_500/math_500_adapter.py,sha256=uuxjmqftY_r-hJBCjfBgYUELrBaB86MG8dIu2wTikgI,1848
189
216
  evalscope/benchmarks/mmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
190
- evalscope/benchmarks/mmlu/mmlu.py,sha256=sA8AC0bN7iURrSazqkY31s_reNVbDZSUCB-NCTQsVeI,5042
191
- evalscope/benchmarks/mmlu/mmlu_adapter.py,sha256=Rhi-J6oGWawRVBk38ZgXk8-XrZ7wL8sf4zrncU73jgs,12111
192
- evalscope/benchmarks/mmlu/samples.jsonl,sha256=f5Y2vwbEvNtpE7vrl9BHoJzsdceI4vUAo1frexYyX2o,1345
217
+ evalscope/benchmarks/mmlu/mmlu_adapter.py,sha256=2NT3QbfPzajUTFZ0tBCl6PRrtFtAr5jPZNQRW2Idlno,5947
193
218
  evalscope/benchmarks/mmlu_pro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
194
- evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=uglOOZBZfQBIuJOG7iT4THk2LNcfHQoakxQDpS4jB1U,4554
219
+ evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=GtIyUubUg6Q6Ydh1Adj0-32OdiwcsF-u-NQ0U-4AnQA,3891
195
220
  evalscope/benchmarks/mmlu_redux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
196
- evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py,sha256=fYtAVKKGGfzRnDlEzU7IULruj2vYzey9aWoyZBBeftc,9886
221
+ evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py,sha256=m_37OIFrJB4ZIvtbDJ_m9P9mA2QtrNjGfbbVo15awJg,7402
197
222
  evalscope/benchmarks/musr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
198
- evalscope/benchmarks/musr/musr_adapter.py,sha256=YTRFGsVuogdYlZoylfD3ij4AbyYrvT4hpY7MueVfu6c,2691
223
+ evalscope/benchmarks/musr/musr_adapter.py,sha256=kx6bckj7Nijl4Wysuj-mKYdy0hIRDJho8yVTup403Hc,1473
199
224
  evalscope/benchmarks/needle_haystack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
200
- evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py,sha256=AybH_Ka2B2WCh-EvwAsMPlCGzJ78dHBhe5sJ6nDgNK4,15691
225
+ evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py,sha256=GRJrJ7O0OZlIMH-FyghcA54xNfBSYjPd-0TgtMw7vHA,17048
201
226
  evalscope/benchmarks/needle_haystack/utils.py,sha256=k8WDigqt5LgzHw6DtaYsLtb3BJL0FTZS9JOyJCpoPq8,2935
202
227
  evalscope/benchmarks/process_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
203
- evalscope/benchmarks/process_bench/critique_template.txt,sha256=tycx8n42QEC0uGcwbIvHfZvfTnchlRxGz8Tp1R2_e_Y,489
204
- evalscope/benchmarks/process_bench/process_bench_adapter.py,sha256=ULuXG68ifTEc_ucH_cj0p5AGdbL-ahA7kcJ-AzYVmSM,3767
228
+ evalscope/benchmarks/process_bench/process_bench_adapter.py,sha256=XN3F6NH7mF4ibwGX5nI01sqEHz05UQFnBAyfAe14QYE,6174
205
229
  evalscope/benchmarks/race/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
206
- evalscope/benchmarks/race/race.py,sha256=TtFC3opqEA6q8AQIAFQRGx07FjD9z7iW8wmtxeO61nU,3608
207
- evalscope/benchmarks/race/race_adapter.py,sha256=FW_FSUGq5Iyz2cTACdk3qOqDt2kXwtCpVB9FT_Bc6LM,6635
208
- evalscope/benchmarks/race/samples.jsonl,sha256=bhSktBgU6axYQCClRtQ7nN8D1x815AU8xMAIG1oflG0,1243
230
+ evalscope/benchmarks/race/race_adapter.py,sha256=KibT9gHpIOZhTcWihG0dUDAX4gAHa2g1WdGPOcEP9OY,1705
209
231
  evalscope/benchmarks/simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
210
- evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=gQzrmslukHOJf-VBSnVKYddIg34EEOvQuGYTurQgBy0,9289
232
+ evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=_duveAliSaPUqVSLQ2TtSv5sfwvFFy7t-MgIIokQ24s,9017
211
233
  evalscope/benchmarks/super_gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
212
- evalscope/benchmarks/super_gpqa/five_shot_prompt.txt,sha256=CQxRszzUrSIygOSd1G10VpLSYWHqle6Jg7JQO1Sze1E,4728
213
- evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py,sha256=ce99v28wkhlGnfmihwpv3ikTqy3aumT8Jzm1LGxz-ck,10147
214
- evalscope/benchmarks/super_gpqa/utils.py,sha256=ftYPP9ODvLBlQSd9ltACx9iRIvjB8u1bg4AtgcJ4JAI,3360
215
- evalscope/benchmarks/super_gpqa/zero_shot_prompt.txt,sha256=XZb0CN83YbfH2dF-iIV-ciNLbIb3ON220qHe7zf8KF0,247
234
+ evalscope/benchmarks/super_gpqa/prompt.py,sha256=wQ8Y4NAvQJRhPS7gsrUBBzeM_UCHsHOloB_t5WfnIO8,4707
235
+ evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py,sha256=SPqpBebiHj_oyEqU94p9NSqhVkO0KeXQYcBmpfH81nM,6888
236
+ evalscope/benchmarks/super_gpqa/utils.py,sha256=OK_oT-DnWNssITEwu_Zc3Ty5v21n0IaJQYftK2cpwmQ,3401
216
237
  evalscope/benchmarks/tau_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
217
- evalscope/benchmarks/tau_bench/tau_bench_adapter.py,sha256=5_VgRUtEjeZ-8gRZj4cnwwso1GUqf2GB49AlI4xqyDM,4221
238
+ evalscope/benchmarks/tau_bench/generation.py,sha256=SankPe87Zi85CGlSKWZyOYo6Q4gRN22I7fkl3ef547U,5165
239
+ evalscope/benchmarks/tau_bench/tau_bench_adapter.py,sha256=2Gjvc8RVavcIHQPDXI5d6zxpeHqghRnOhlzwWjGKc24,6438
218
240
  evalscope/benchmarks/tool_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
219
- evalscope/benchmarks/tool_bench/tool_bench_adapter.py,sha256=fy6Hb84cm6s-pOoQXmT-N8D1OUYVGCuq77-2xwM_WLA,3093
241
+ evalscope/benchmarks/tool_bench/tool_bench_adapter.py,sha256=BHsesDDELEINdbWSR3WKCQGZ6MqWc2LiOZA3MbTp2_s,3805
220
242
  evalscope/benchmarks/tool_bench/utils.py,sha256=led0d-Pa3rvmWkSWhEnZWP00fceudgESq5HXAQzJGls,7042
221
243
  evalscope/benchmarks/trivia_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
222
244
  evalscope/benchmarks/trivia_qa/samples.jsonl,sha256=1isBD62PGhCiNbzQa-GFrHHL4XLHIkojWfgSvn7ktf8,3445
223
- evalscope/benchmarks/trivia_qa/trivia_qa.py,sha256=eekxaXppMLb5tCQqNLOw2MaWlYDhI2IicPzRsTHqb5A,3070
224
- evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py,sha256=IT5l6cFzZQi2i68kp8rWBdXWxiDVd14MORgk-lusPBM,5516
245
+ evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py,sha256=oZAiCmBpZbBAgzAKPfddaJWMckIyaoRM7fB2XJ5EoQU,2614
225
246
  evalscope/benchmarks/truthful_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
226
- evalscope/benchmarks/truthful_qa/truthful_qa.py,sha256=A4abSL6WMRcXez8flxsHy-0ZFyRg-moq9rTeOA1TalY,6909
227
- evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py,sha256=xY4Kr-GzyyE_TWGlaKL5mo9qTaza0frWLy7EgIwlZn4,12958
247
+ evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py,sha256=LkmJFWzLpk0ryUf_XVZvEBIVTcSJ2a4pB9bh7k0DIJI,3519
228
248
  evalscope/benchmarks/winogrande/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
229
- evalscope/benchmarks/winogrande/winogrande_adapter.py,sha256=GkmTsrlpU1IA-E7dJXmsHXyY9ivRbmbeVKxFmMwWtLc,2209
249
+ evalscope/benchmarks/winogrande/winogrande_adapter.py,sha256=LWm6qZd3pJbtpcERq7WPK3adwY3uVm4wiUgfyEI_uHE,1310
230
250
  evalscope/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
231
251
  evalscope/cli/base.py,sha256=m1DFlF16L0Lyrn0YNuFj8ByGjVJIoI0jKzAoodIXjRk,404
232
- evalscope/cli/cli.py,sha256=w_dtXljur9s5lmTn6LbbFL_viTPQB1WAEzhYcId09Og,729
252
+ evalscope/cli/cli.py,sha256=qXQ6k9GBkRy2dmBxM24tbVP42bQDyM6G7kkc32LdpCA,860
233
253
  evalscope/cli/start_app.py,sha256=dV63nvBYEUl2sGeVxoUH4IJBXJSLecaq293i3alBWxo,794
234
254
  evalscope/cli/start_eval.py,sha256=MXhVDeaMFd6ny88-gnVtQflH660UaDj240YGYnHccx8,775
235
255
  evalscope/cli/start_perf.py,sha256=5hLi5jWgM9BJPXLd8d9D1zqrcj_5c0KvkfB1DgD4_RU,831
236
- evalscope/cli/start_server.py,sha256=DQRIfbsHaOAsVcLGF6iRyJnxmd5Sf_tgytpJNfiWCeE,3662
237
- evalscope/collections/__init__.py,sha256=3v7tVLcJk86FeNBrxw3pWhu_lcpKYrnT_dDACCeR2Io,853
238
- evalscope/collections/evaluator.py,sha256=RJ337S0sy8dsV25I2OAxeWgSx_HrmXTyuuHKSt9vQtM,17474
239
- evalscope/collections/sampler.py,sha256=2NwvhJVdi-mrDeK7RWwEGOoE7DdxtpyASRUZU_D6hWw,4855
240
- evalscope/collections/schema.py,sha256=mjJfNmy_athJ1TmnuJRkrKRlefzefuQXZuTtjn8SHKo,4073
241
- evalscope/evaluator/__init__.py,sha256=XqPnEp5MvfRwC5M5cEeOAC0-MMEPxBIESqiSa3YMBgo,84
242
- evalscope/evaluator/evaluator.py,sha256=HKEF2k0S_dJR8cF9lrqf_W4diXbb6H3L81pD6XcmLiA,22481
243
- evalscope/metrics/__init__.py,sha256=CH3bNyRx9dJ3gOqNwKDlaZ7zan4MShM0h8SnzarjokU,1851
244
- evalscope/metrics/completion_parsers.py,sha256=56ZNzOfNU0O1ba9fs9Cyi4Vk_YUmcgWUbxW0SJ2KrlU,8974
245
- evalscope/metrics/llm_judge.py,sha256=1hPFnGc3Szszqo21O618a7mxOgkdba3KsbZ66vvTbSA,8380
246
- evalscope/metrics/math_parser.py,sha256=JtOkj28XOtwoUACXOXLzCeRYz0rx0tBsQLQDU8cbC20,17311
247
- evalscope/metrics/metrics.py,sha256=OLfvEljGbQnv-bBiFD-GR2On4mpZ0xhKxiKkjZfoDX8,14268
248
- evalscope/metrics/named_metrics.py,sha256=PrzU_1mGTeRFxVJFT1aXxIOiS7MnNoWyZsb8uCRVDeE,2278
256
+ evalscope/cli/start_server.py,sha256=01iDaEwLx59xRUrrZ_nhQE-QjUE1Rk5d43uMQ_4owbI,3677
257
+ evalscope/collections/__init__.py,sha256=x05hFLrjGsdtuHtc6PyQXHNuucVdYaBN9ZrM8gBiJWg,720
258
+ evalscope/collections/sampler.py,sha256=086pzXQO4CO_QYCd10z149Sjh6sBpRBeIHf5OTLOVu8,4896
259
+ evalscope/collections/schema.py,sha256=yzAlnH0O7iiWB4UnkFXI_Dvxcsq9hDgl0aGK2OpyBY8,4158
260
+ evalscope/evaluator/__init__.py,sha256=KzYmVTfU-1pdX7va7l3B1-5QKWG07hj1B7rYkMmxitY,91
261
+ evalscope/evaluator/evaluator.py,sha256=IY0LElXZXfe2HW1v99dKkN3qhyzo0WO4aR8OyxUny3A,13545
262
+ evalscope/filters/__init__.py,sha256=AsXwKYDjGhFsJvtj036PRjMOPsHGt-CRicnHTtM_qA4,51
263
+ evalscope/filters/extraction.py,sha256=KLFr_3XYsrv0PTvmXy0ugj2sqv2ZOWJFV7G_MmGjTHk,4146
264
+ evalscope/filters/selection.py,sha256=yiJu2JjXDH_lgfEtB9umkGcA3zpo3zvnyoq2mKrXbnw,1609
265
+ evalscope/metrics/__init__.py,sha256=1giVHESSjn98uBiAvYm5uLsmRQwmf9NHPSt7OT_QJss,1615
266
+ evalscope/metrics/llm_judge.py,sha256=xNmchB6ZDlrQnxv-Vit_zcQjeAH-C0o3m4bF4OFDRCI,8174
267
+ evalscope/metrics/math_parser.py,sha256=BMfautQtNNiF9f2DIEfO6SXSn_GYhzaddAjGWG10MJA,17257
268
+ evalscope/metrics/metric.py,sha256=6la8Nq2E_brArDcNwkbRX3ECef0AAE3IrBCfUVE7UKc,10176
269
+ evalscope/metrics/metrics.py,sha256=VxAggzEfaLKxWcXyuve8QbEBwV2W71udVyt0gynzGec,14134
249
270
  evalscope/metrics/rouge_metric.py,sha256=bqvSotuDdC0MEKmt8v6y6tBTBx0S3Ma-tfF-cMCckA4,4645
250
271
  evalscope/metrics/bundled_rouge_score/__init__.py,sha256=PwbTdk8168FwDJe_l8XIqDuBgZQooDsP31vj7di05Fs,650
251
272
  evalscope/metrics/bundled_rouge_score/rouge_scorer.py,sha256=T91PgJfi1As7BR7I-Hq6rLlvHAtMB9JpBw9gMTH8VlE,12114
252
- evalscope/metrics/t2v_metrics/__init__.py,sha256=IwI3umI5wBwMJ7zlvU-l3aw8KmiQ72DgaoJXnwlWHiE,1202
273
+ evalscope/metrics/t2v_metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
253
274
  evalscope/metrics/t2v_metrics/clipscore.py,sha256=IsrYKIlFb04-FfBq4MbSv4diS6706J15Y3G4qEFIwfU,455
254
275
  evalscope/metrics/t2v_metrics/constants.py,sha256=oY5l5fOFl8qylah9eeebZm0pgY1PYmHDa7JlUC8Qls0,451
255
276
  evalscope/metrics/t2v_metrics/itmscore.py,sha256=cIaz_urio_Of1FiA2DZW7pWRIvo487zr33-x8C3Wx0o,443
@@ -259,43 +280,43 @@ evalscope/metrics/t2v_metrics/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeu
259
280
  evalscope/metrics/t2v_metrics/models/model.py,sha256=zL2LMvJqXyyZo3KEBl4o_0cGqkTeVTOfs8xJihOKWpk,1295
260
281
  evalscope/metrics/t2v_metrics/models/utils.py,sha256=c9A8YGepQ0wier9rMTWkdiyQRfQEaRyEQKDtt_iVkS4,888
261
282
  evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py,sha256=_Mwyud2HZVZAhkSmDXlHOkKkT5CwXQUChmQr1xRGtm4,1076
262
- evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py,sha256=QhksCBA12Ekm67H4TiROkC84dcbHB4zL5oO7BU4fwnI,8099
263
- evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py,sha256=h5z0HcnobkGw7vEeIwiVauwYC5GRyKczdevZi60a1aw,3328
264
- evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py,sha256=atX4JAxR4xAmBZ0WIVf_K8g3tNvqeuXNIIUX50bzo4Q,3806
265
- evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py,sha256=dp6ZyWKU9sJ2MjsyQJvTi_tBoEs6l2-KYmjz8cN_SL0,2394
283
+ evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py,sha256=oEILZrtRTpJj2FHH0DbK88IoeKdoUg_AsBDOMjTQ-yU,8108
284
+ evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py,sha256=79zgnp5hemgeyGgaWC-HVYJGX8PZ-cwOW6xaZwfm_qs,3357
285
+ evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py,sha256=EWEFLL98xG2s_a7ZvDlvGFzJvfSgCAzxVvdd-LvKuNE,3815
286
+ evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py,sha256=LI6233xfRgSTwyvR3iXgtMAPrBcdUph0HOuBjP-k2W8,2412
266
287
  evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
267
288
  evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py,sha256=Nxo0b7Xj0qTMlVg4O3vbj05X1eNTdVXrFTsVEq8j75g,79
268
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py,sha256=7Jl2noVHFZUN5rXd9XDBr2ILChP56JPOM1mbJSq5G8c,5047
269
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py,sha256=I6KFiy1DPCtev7TLOFxjUjkC13Mt2eBBha_XaN31nlI,7940
289
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py,sha256=ahGbFR2PyigN5iDjRpmUDLNzLRll57W3145Paf8AVlE,5065
290
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py,sha256=PKY6WMtGMt3wJFg4VbvV45oBQq3_r8FgxvPdLYqZ3c4,7839
270
291
  evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py,sha256=iPug2fxMo_VXn_77yTLLyjUqyAvh8qOqYF2saHiuPQA,982
271
- evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py,sha256=9IGXCC8d6a6WTIICcC_KgZf0KdDcJ3L_HOSXILJmMIA,3447
272
- evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py,sha256=3uduuEmoliezG5BwymoNbRm3FXvKh9gtkN74_LX9yIs,3674
273
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py,sha256=OtnnYsW0G2vGoUHfyB0F-m5r10A5-N6k9agFRXav-Uw,3199
274
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py,sha256=IPQcC4-cYeJjHGRysh26E3iNGHz1UJA-oxxEpSIXpX0,6021
292
+ evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py,sha256=eZEQbey0IWWxxhjAJZusbksH2iA1xR9nGpQekM5_oCk,3456
293
+ evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py,sha256=AEMb5qkUuFWQyFWojePpZ3un4odo0BHTKouhbUqF6rk,3692
294
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py,sha256=t8LgtokWZZZfuK2Guxddp4HA4F5mEs2xv3o7RpljIcM,3212
295
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py,sha256=rJmt-XwWodtvR9x4XIz2GCBgHqus6GAwFw9fQXUil-M,6078
275
296
  evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
276
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py,sha256=ZCN_PJ3jz-a9I0oGbeuOcGuMOJT5iVb-yh5Dzq49VwE,2700
297
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py,sha256=PFTbLLUQ2kSJ9YohuZpuUHIcnndFc9TXsdRjBYZfDgc,2718
277
298
  evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py,sha256=sMET64JKY_rqVu8f24UcGfUVb9O5hzTKA6PlMEDe8DE,727
278
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py,sha256=krWGPe--eLHwK9M2tqWkmu7iKlbAM_qanP46NUkmkhM,9896
279
- evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py,sha256=R5VS77aDRX4HVcwJ7xOAnf_uP8jhix4PXbxWim1BOdg,5903
280
- evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py,sha256=p22TqpCDUFV39I9anYjl5zehNXOCtPQ15fHnEeDrF3A,4712
281
- evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py,sha256=oEsZOQCZl6NS0SaNn9mM4S3NSg6lT5Lm_HH9Ju7i8O0,587
299
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py,sha256=8lLGnK6Xnws-3XXUtmTzdXt0HKLh7fyeiVJwnHOqLpY,9924
300
+ evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py,sha256=jaXN7bpbApfvbm9uZlKAS8D4zetqIP_D17nyZTxHog0,5894
301
+ evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py,sha256=TlvYuUBvaNFQGtZN7UklCq1N9yI_oKGtgB5r6qZ6hi0,4662
302
+ evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py,sha256=KaiCeLRq5NGRPsocQLKLon9qzaEFuqcYlTJInR9x0fA,585
282
303
  evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
283
304
  evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py,sha256=yDqpm4jIeJbq-Ej28OJwWbF2eWoxVv8CXxl_OelJ1lA,97
284
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py,sha256=mMDRPYpSUsnbc3ijicy4IPD6J2z44iAIgKUdhkf5Nkw,14037
305
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py,sha256=XDdIsVWYkRwWFOWeGQGBpbXArrkAp-eeRBWoTWFT358,14022
285
306
  evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py,sha256=aXBQpNrmk9dbUDK-gNGne0hfgti2cYiYTq8fRMNfNx4,525
286
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py,sha256=Em8li9Mqoc3Xv3sDz4lAlKU4h9vZpUkzycGGyM6a-sM,2807
307
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py,sha256=uK4ewgxU3Am1VloBeVWrGTwMam47pjvZxwUXpPp1WZg,2837
287
308
  evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py,sha256=jq0zLZypPsoieM8JR33k3fb3Tzal-Zb1ZT5i6Rl2g_U,1394
288
309
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py,sha256=RtW7q0OrIyJa6Lcjr2AGmRwfePuIRVHQw2sso1IUV8A,848
289
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py,sha256=R38FVQMznUzTAfq6DLp-Y34XYYWkqmy1-aMfNcGKthk,14998
310
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py,sha256=eAB0TlSTyC8oljLMgTw2Y56PbFIwOdmwbrCo2W0WMkU,14995
290
311
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py,sha256=U0xsstadVQrKS7ggO-Mh4lGt9VKwHJCv-V_RiTeqzHk,3956
291
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py,sha256=0spv43GzXh_5l9BUZN8tuPZgGKkkU8InWjvFuUuRbLw,801
292
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py,sha256=LPZP_XK612apDxYkvGsH8B8E2Z2Q2CaT6JY0T1dghEk,5866
293
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py,sha256=1eMz5jxA06uoK-sZyD7SNnBy87gbwplt8526koTRBLo,3330
294
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py,sha256=he84q4G6OdycZDQHVkHUxENuIQBBH8WJ-9GBiMpYC-A,9715
312
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py,sha256=Z7yq6RpGd97NMzIp1WgCdHF05LJ6VqA2DA0ZksPrqRc,817
313
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py,sha256=rs1dGXB6rINW2eFmcPPfTa7Su1tk0Mf0elXWrPJfZvo,5908
314
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py,sha256=OcHWUa4MBC5BCfeROnjzX2a8Swf1u-KGhQJbwo3JsFI,3208
315
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py,sha256=NLW0uR2wGby9FdrotM-Trcl6mrNUbqu2sst9riOTUEA,9666
295
316
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py,sha256=TgWnH1IblIrcTTEe3AXG4E66pX6R1314ZZ4Cx6HdYq4,13678
296
317
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py,sha256=ABgzv5fGmXjYuQnV77280hzJWOwLt5YjuaBfdWjXcu8,246
297
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py,sha256=F2OldVATVzyvaSxO2l-tBexhOkmoVb7n23S89JfFIEc,8313
298
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py,sha256=QKYKGyXpQbDWJfz6EDZCB5meB5HGj59ygmoPm00Q1dQ,10955
318
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py,sha256=5xpOaMnci_eH0iIJwPGCBREQ7irqg-zTnfuFXxIVB_4,8327
319
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py,sha256=fKIO8PIt1kkirfF7UMgQE0b4Jc4-NtftHKTwg6S3_oI,10920
299
320
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml,sha256=tGWYH9wsUFC2BqlJ-Uv_v9IbAvvaY89PFqkSnx0v7T8,360
300
321
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json,sha256=ZcTVdwa_pISMxp8J3F0Uaee3yyrQIn65lqT3_y4KncI,490
301
322
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json,sha256=4Yuqi1OutvXMdCfAVIe14uEIZIhApndd6uqc1vpGwL4,511
@@ -319,88 +340,80 @@ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/
319
340
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml,sha256=7AWFlM92SDySB4-InH9aw83yBhQ3HSKqvGofm-xiDM4,887
320
341
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml,sha256=xKS5v94CTLIIgQ4NAEuBpVjToRQ7yLme276gN5O_J0w,974
321
342
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml,sha256=8keYB132xFDzBsMF5nk0lOqfEIT9qupBtDiQRC3nH9o,1004
322
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py,sha256=9QnWMeulCQm_c__-b9cBYbjqihLDSblW3-luGnxfBXQ,6391
343
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py,sha256=nCdAW3SvWMTgkQqEXNFoOrb_Tb5FIOewNqy1A5_e2I4,6431
323
344
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py,sha256=OOr1JD9kTlUGXZNG5b3kvkUaNz7QTmhaGoHhIKL69qo,7613
324
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py,sha256=eCm7opG-Ld--sZEG67creYqoRsHjWBuWwUha2s13AuM,9806
325
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py,sha256=1vuUf6EGzfiyMOHmacI3NhjCIq1LFhANpLJ_Frgh3Mk,20423
326
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=PobqtMnfh7HuD4zdlj5tBMufvufIO5fIL0_NvTOK4AQ,52965
327
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py,sha256=L20LYyiI8AGOTKfTJuYZgrkcvg2UHsnFJE866v5jlyE,18674
328
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=gKOIPQhyB8SVbx0wW3W-VQNmZhdA4186C1XVHHWYkjc,46810
345
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py,sha256=Ns7oM4KpKxWZTo8Lefe4EDFw-jzp5633zAArcWjoVZA,9772
346
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py,sha256=KIF5tsiE7a5dbDfa-IKwzuzMUpuEAQPrm1nWFFtAeoI,20032
347
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=uhaehowhTqRhQtq_dVCgF-9Iu4yU19AMxx2sJimYwlA,52711
348
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py,sha256=o5ykt3Q_WQlNmyxjQaS2-KPLGq1xqLZixNYam_Bs6NA,18701
349
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=aBKdQQS7cHMPgYqIknCdHCZ7j2_QLACPn_jU_njiMIs,46840
329
350
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
330
351
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py,sha256=s7EkhtrIJ0LPUuLBArws8N23R1MoIoNaYUjwsbUqRkY,7994
331
352
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py,sha256=FnUyxxazEVaP69pAq9cig3j-mcX37BX-unPj0SVKUJI,3805
332
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py,sha256=A5y_qCsmW9j78w92L9VEjXRaqcsyI5FCu0Z9QJvKF_4,18960
333
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py,sha256=hcPHQjb-QJRAEFpDackLqjKeu8z_uKz_5VyyeU3AQQI,13879
334
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py,sha256=DM1W0vZ0ZhoR9vvbl4O_MlShApZHv2MtK37zHsVoCjc,30527
335
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py,sha256=5Ub0AYnQFcG32ByitEah6Hog6oAKNrK7HTnmlLI4XfE,11325
336
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py,sha256=uT-ubavkEWPsqVtFy-rPQCw2rf-VOd-ZLqP_oDyNTIs,39581
337
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py,sha256=grUl2rtGJVDoP6u47jywBzcmqL9kylqCf1IkyKCgNLE,83469
338
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py,sha256=iuiXv-jZcWVoUle6GDQXLy1SMXt4WPJEOD4qhjPe6U8,3856
339
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py,sha256=1XxKgsA6QYBkaADLUoVwFDPtRJzCvcSrk-1lwGKzF_c,2286
353
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py,sha256=oS0lFHje_0ncOy8fg-mg5u2whowTz8ghSrGk3FlNNIQ,18896
354
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py,sha256=vTJhL5pkUxNbCi24AcZFWBbqqKw-gCgh937woIKbSjE,13694
355
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py,sha256=GqHUSTk0N9PFT553h5Om4XuuFe0LtG5_yWDM87MbUJs,30387
356
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py,sha256=YXZqOjCnkP6sPW31FU-qNewtRekMANK0Uvlp3EIu2aQ,11334
357
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py,sha256=AZlUbo_rcsp_VdSP5JA8BfcIBtlqNRqgloZ9c3gcnp8,39422
358
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py,sha256=ndKj5JF6ch3IDDGwD-T3fipZEgJqOHKjt03s78rzMgY,83664
359
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py,sha256=ZxBm6k7D6harpWLesr-6kQZeGXKyo-9dE1QZYso1Tp0,3867
360
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py,sha256=Ng0QP_r7qn2DFmBGhSoZxqSItAkgVtyq_sZU_Vf25Hw,2190
340
361
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py,sha256=LFvs5dx8bnhLOZc4jM5uxXhyhOHoTprKn9B7gCmIOKU,8600
341
362
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py,sha256=hTIleqj6--CMndUNCT-HFPxGer8c_l2KbkUvi3U24oM,5502
342
363
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py,sha256=Au8iMYscDk7va-EKpwLuFJpNjfV1aChNRStkA0dzlWQ,7679
343
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py,sha256=uLV8Qp8lRGkMVq5EtvbPa4l8ZpykN6godzblV7oj8bg,7086
344
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py,sha256=r-CRo6u9qmFcdSYNz9pWeuwj0XKykuWyFm7pQVK2yI4,6939
345
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py,sha256=Vk1Fm7ED1eAHW6lmSKF7VBZa55EN4h1zBEBdm5uKW24,4303
364
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py,sha256=wYVz388WuJ5e6rip4FyyN82JNbGXmGdkds50eTYQEJw,7130
365
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py,sha256=qYGsWcZozURx6U07cW5LrLL4TW39-1jaa4R3ROfkl_E,6890
366
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py,sha256=TOAI-KaUrtKjR1GNU_WwNXNpb9gGT-KX2FYe3muv_e0,4275
346
367
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py,sha256=-DprR09KYuwNEzEbhPvFRI3MR4_VdPMUGLPN6sL9Ym8,14625
347
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py,sha256=8THNmMvp7bYNIfExYGRfNJ88cuEvdK9KTO-3-51fUsY,13961
348
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=TkSEKghtqXW_1bHFsG9dhGVKv2XAmTMcdAH2UDvuSFM,36639
349
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py,sha256=rGifS4I0o3faXrLnW-eSbksmkx7ppMkRlnVBomsbD3Q,851
368
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py,sha256=S68U0DxWYGDmreRbH5yLDHBNN9PsczY9H0Uik0hO-ds,13872
369
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=i1XlJe_PTSmiPkZKIhUXC_lc0-z2ewNYo4W1DvZQxjY,36678
370
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py,sha256=p67DDiFS-676z0z8jPj6NwXwNjEsqTXaXCh3g2UiDno,840
350
371
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py,sha256=LqMHlUTy2LEzoVwjALtrAw0UYmzIuHnFjQiVmn5nv-I,605
351
372
  evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py,sha256=d4HInkL_Phk0Bgg2cWaOvhsPa6lkqDeovFW86PL0I18,6371
352
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py,sha256=XzebAHBAjOpkIMZm43dd55PESgmyq_J45Ji6bogYR3s,11204
353
- evalscope/models/__init__.py,sha256=x0Sna8mbujdOVqIYSGwIULbiPOue_Ifp-2JElSZsuMs,1481
354
- evalscope/models/local_model.py,sha256=UWsmZlWpT8JNGjijzZQKirvq4YywBkKOS9G-U2cuxAw,4115
355
- evalscope/models/register.py,sha256=G35J6BULFWwuqZO_rTkKBru1llZAyfPztcAASp_cb8M,1257
356
- evalscope/models/adapters/__init__.py,sha256=WRaZsHlnz0MvGg9Jq565-XJjED-4cAyu4KbmrOhrHO4,688
357
- evalscope/models/adapters/base_adapter.py,sha256=P4aicNmz1nsX9QLY9t4c6OIQPzIYfOhcrqjlAjR-ENY,3477
358
- evalscope/models/adapters/bfcl_adapter.py,sha256=cG0vsQ3H2pmabo6tC0Y5Gonw0ng5-RFljDyRBMSj6xE,10422
359
- evalscope/models/adapters/chat_adapter.py,sha256=epxA_on9ipsak8Lnkweh9en2AjVm5G0L1ARXYmDEEbk,8026
360
- evalscope/models/adapters/choice_adapter.py,sha256=wIXnDcgnKaIMdhToaqy6fidhuZDpEz2vhxIB_V9u3Z8,8203
361
- evalscope/models/adapters/custom_adapter.py,sha256=W8DIBiMWvHHcc0Mn9Frjj1YbpHRi7w-UQVJDiU2PakU,2400
362
- evalscope/models/adapters/server_adapter.py,sha256=W6SXrPy-hZXpnISDjupu_j7bnmt-cP55sDojPXThitc,9701
363
- evalscope/models/adapters/t2i_adapter.py,sha256=d6OviQFi_uN8PPXKrFpivk5Awm1O6wd_Gii8t3hVahY,2806
364
- evalscope/models/adapters/tau_bench_adapter.py,sha256=jYGaj2L2wxtEiTdiSwZdY1XNkSzm6os7IvkxgK4msR0,6889
365
- evalscope/models/custom/__init__.py,sha256=MZylegALg1HerOYtp-qbzu4Wb6PW3JbrxwONHU-PAVs,131
366
- evalscope/models/custom/custom_model.py,sha256=rBccFVpCIfTGt9cgXLcxeUWc7w1sTRtbTO5w5qqQIQE,1405
367
- evalscope/models/custom/dummy_model.py,sha256=WpfrS3kvwRRdyThx9baaJ5vodYYh29VGRKsGKMWFflI,3124
373
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py,sha256=Epk72q5iTdzRbuzOR669QqAUMgrFFngAU8Z3Qy9KLbM,11209
374
+ evalscope/models/__init__.py,sha256=RmW2S31BHBhMN49_VVF_5PJAk-TsuZQkuF2ALShbhAw,556
375
+ evalscope/models/mockllm.py,sha256=t1fFAHkEb1n_atOCfnGteCX3DWp774lnWcHzi5lBjwM,2511
376
+ evalscope/models/model_apis.py,sha256=-yj2cY0Z0Ku6ZTnFqpoxArHLJyoUdq4YA_ChLo3_xw4,1176
377
+ evalscope/models/modelscope.py,sha256=jSFkho_Ir2py54y_Bwj9jpCoY2mMKkZ8ORzne-ldAIE,15806
378
+ evalscope/models/openai_compatible.py,sha256=8WlWtu7EWr3Y5e5ErpeLQ7ZKfN4HXkFN3gV_jl5p1NM,4528
379
+ evalscope/models/text2image_model.py,sha256=-NMLtZuT7L86HpkMpsz_gNaA5Z9_6p1MYzmjYZN6mvo,3929
380
+ evalscope/models/utils/openai.py,sha256=u343L0OVqv4NbVSICSzwfWXh3QEyIIIv9ZWrBzW6IGk,28013
368
381
  evalscope/perf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
369
382
  evalscope/perf/arguments.py,sha256=lG2IOOzxg29pdnF6IobzPcqEcYqopulFpVU2QzRaEJA,11429
370
- evalscope/perf/benchmark.py,sha256=ZVmsSeKDUKkApt3y5tIMMFZAyAj3UNVT7JPp1fh5mhE,7880
371
- evalscope/perf/http_client.py,sha256=l_OKL80kTP6sM_PEBvsJ1_TejYJdUQnE2UlB-ud1WQM,4588
383
+ evalscope/perf/benchmark.py,sha256=w6gb5ofGAXxBdp6hejowKgXu0rv1xfTqhg5VYBTcGc0,7885
384
+ evalscope/perf/http_client.py,sha256=4Ov1Cwi7gMgO05ZmazwyfYjUGAQNGWn7nbfl1ljRNh4,4610
372
385
  evalscope/perf/main.py,sha256=WZbBgFhIj9KqxzC7_NZxDlou019_EXatsHRt5vqDhFg,3439
373
386
  evalscope/perf/plugin/__init__.py,sha256=Ztj4h1_JYJqbbWkeuDTj5aTRyGQf5Woc4xEIyjcokVU,94
374
387
  evalscope/perf/plugin/registry.py,sha256=GhLe-h1rGzya2bgIUaV5VymQIaHqI7h5SG_i4PoGAm8,1967
375
388
  evalscope/perf/plugin/api/__init__.py,sha256=7RsGdYTSfnW6iVpveEzNu8v4x8Yc8H-Kk39DqOHMrd4,152
376
389
  evalscope/perf/plugin/api/base.py,sha256=9cX4xwTzy5ycnWqmQqRGMLasTEX6jVlobtADkh1KwXE,2782
377
- evalscope/perf/plugin/api/custom_api.py,sha256=f8rUixcV9mTxoYyabu3wedEC4YVB70Yw6Az1NpfeWPQ,10375
390
+ evalscope/perf/plugin/api/custom_api.py,sha256=VYJO2lUt9EKdWz6zeYCfvdI0MqfcsIgcKvxqvY5C-3k,10376
378
391
  evalscope/perf/plugin/api/dashscope_api.py,sha256=Miv2pzMa6sxZyYYJhCzcbOI_QHuZx7tazKpb6Not7ck,3627
379
392
  evalscope/perf/plugin/api/default_api.py,sha256=kjuHQ-zRHe5WU4ofSzWBpWbIxBQBOh_ucu1z2g62gWg,4315
380
- evalscope/perf/plugin/api/openai_api.py,sha256=Mt_VedJUaCH3g-oVSJ_fsGcPk0KkspSzIMkrkih2Zb0,7777
393
+ evalscope/perf/plugin/api/openai_api.py,sha256=oewwOPhv0BLdC7n3BUngpVrDYst5wMrBEPhN8oGMKNU,7703
381
394
  evalscope/perf/plugin/datasets/__init__.py,sha256=qzeQ9BrJhiJJm1wHaFeOQkvXXdSd15Ucspbn5zjs-6Q,495
382
395
  evalscope/perf/plugin/datasets/base.py,sha256=-3Ihnp2hYvZyPnP8Gh2Pu8ovlLNFHyZnNgRu3WHG4d0,2714
383
- evalscope/perf/plugin/datasets/custom.py,sha256=UuOk8xYfSYyyYZL3U4grUjtfQhWHHZeAEC63n_4Siuw,1376
384
- evalscope/perf/plugin/datasets/flickr8k.py,sha256=IXz5uu5SlqF1l_tJ_ITr2vx_R_d7gxWzqPuyEOx7rYo,1043
385
- evalscope/perf/plugin/datasets/kontext_bench.py,sha256=XjKzr7nMzI3cfk83IH0PH1TNJaQMRXUpACnzFfP2n6g,1091
386
- evalscope/perf/plugin/datasets/line_by_line.py,sha256=c3ydW4GqxkG0vl2g64jG0vBMql2FuFPyWh3mgkIh9Do,987
387
- evalscope/perf/plugin/datasets/longalpaca.py,sha256=VnMjdHl_JV3NmZ6wRxVlJ99e8PYSjQTcVxoTkl21Ei0,1327
388
- evalscope/perf/plugin/datasets/openqa.py,sha256=33AR419IrH-FxZRjjcYdAIEZXaX4TKEoirVVfX--N9I,1493
396
+ evalscope/perf/plugin/datasets/custom.py,sha256=yoRHTvTGAglaZ-mmRkPjYNMG7uZYuT1_KrBxnl2i0qg,1385
397
+ evalscope/perf/plugin/datasets/flickr8k.py,sha256=M-w1UjOMkA6Uh9v-SURDrm1YCL-m1Cn1u1cIcEJFDpY,1044
398
+ evalscope/perf/plugin/datasets/kontext_bench.py,sha256=-KsoXS7nAd6hzN4oCe85zcLkZQT-1IGWQFThuuvE7vo,1092
399
+ evalscope/perf/plugin/datasets/line_by_line.py,sha256=F4ppdjKKLzFNf_16h6S-6nAU4lOfOFI2-tPgIeZDTMA,996
400
+ evalscope/perf/plugin/datasets/longalpaca.py,sha256=JjPGYP8NdPmP48wff2fL5IZQfajXL5qhZBvKmZxtfW4,1336
401
+ evalscope/perf/plugin/datasets/openqa.py,sha256=5PqqiIvNTLlRrPb8PWqMGQyWRb6LuIqipYn67-xd-dY,1519
389
402
  evalscope/perf/plugin/datasets/random_dataset.py,sha256=NNAXvgFPkLDOSpYNex1DyE4X-ELtQRm13_oBooO30j8,3514
390
403
  evalscope/perf/plugin/datasets/random_vl_dataset.py,sha256=F3yA9Ih3YO895lZKCo3i85LeKTzjvGcvhzc8UNN-gUI,3240
391
404
  evalscope/perf/plugin/datasets/speed_benchmark.py,sha256=J6q7AF_Re5eHLVejXEw9c1jlk1T1PPmist0yO9UFTPE,2432
392
405
  evalscope/perf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
393
406
  evalscope/perf/utils/analysis_result.py,sha256=aoT7JD2zAzBeuZUfncKhJ2odX_7KnymwOmNB1Upam2c,935
394
- evalscope/perf/utils/benchmark_util.py,sha256=7bHpa5oaqcPJX7DSUkzK9assoFSHC27Q7-QylUOiklQ,7136
395
- evalscope/perf/utils/db_util.py,sha256=TCdmoEx5iScL6h8wzucPojPwn6J1wTmQqX4sVk-ilHo,11630
407
+ evalscope/perf/utils/benchmark_util.py,sha256=91hZabkrDnv-0hgmqHi5TmyCT1ztbGbhu3Y5pB6vgRU,7157
408
+ evalscope/perf/utils/db_util.py,sha256=HAISq6M7xCD2gjUEqqfbK3FjBxA-tvr_n-751tU9ypo,11634
396
409
  evalscope/perf/utils/handler.py,sha256=HyKIxbzC0XCyQanlbb7UEY7yaeqjJTePNea8kMV3Sdc,1192
397
- evalscope/perf/utils/local_server.py,sha256=RL9rGd5tEniZ0aErhHcbVXMX22YmujfE11T3j37VL8k,4684
410
+ evalscope/perf/utils/local_server.py,sha256=Bp4pWmjZS6CDlumedea_XRsAqWuoCbmr3z4TPOk2vEc,4768
398
411
  evalscope/perf/utils/log_utils.py,sha256=NWSK_ITG4yoVx5GMLbIRGDoXSs90s7X3mftdm37Os2U,1666
399
- evalscope/perf/utils/rich_display.py,sha256=xZzeryQbYM6Cv8g1ulK6OQUE2CalQ_KtFxiy7pioeEU,8127
400
- evalscope/report/__init__.py,sha256=DIoXbj0mjs1m2kEgFvIyqy4skDuoBu0UDVmTDa60Ymk,905
401
- evalscope/report/combinator.py,sha256=4ahUtTFPTNiSjamldX3IcLf33yKTJKs6ZsC4fsCafe8,4192
402
- evalscope/report/generator.py,sha256=oykmQROG-Bt8ttCH4RtvmGJ39HmDJMTU6gG26lg5LHE,4321
403
- evalscope/report/utils.py,sha256=taTSLvMKzAtJ9oha7pe0WF2UZZfEqPQgdj4urq7ZJIE,8298
412
+ evalscope/perf/utils/rich_display.py,sha256=AQmXv1EuA1-IGgco-Jy1NLOmTKv4eBFH2K4QS8OoGVo,8206
413
+ evalscope/report/__init__.py,sha256=qpiOJkM4PO8l9X9ZPUsqBFBfNOzYVEkYfwisfli9bJE,907
414
+ evalscope/report/combinator.py,sha256=MAiOCj_q5mXm8-3lARvCSG12jUVEdJ8VcoEHJapoWzo,4134
415
+ evalscope/report/generator.py,sha256=_ovCzV7E5SfFWLeKIW6CotSqiqiJ8qkNQU5UlPGscSM,8041
416
+ evalscope/report/report.py,sha256=KxboijAVNENxYHjiwyyqW_aQZ0F2CyJ6MbqUJTRHJMs,8273
404
417
  evalscope/third_party/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
405
418
  evalscope/third_party/longbench_write/README.md,sha256=1yLKeSVIcihpoc4KXr8NpK86JbcLssCPx76aOKdPbYI,5431
406
419
  evalscope/third_party/longbench_write/__init__.py,sha256=GNbBDc7HAh_V2Hfy5HhND_u7z6OI79czoBlP8lX4PVo,126
@@ -436,28 +449,33 @@ evalscope/third_party/toolbench_static/requirements.txt,sha256=OW91Z8hfzh7yQUYgP
436
449
  evalscope/third_party/toolbench_static/toolbench_static.py,sha256=xE__eXvSwHmmSh1tXNvyBo6MCO4mDlYTbIYl9OGEfNI,2120
437
450
  evalscope/third_party/toolbench_static/llm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
438
451
  evalscope/third_party/toolbench_static/llm/swift_infer.py,sha256=GITEbyiER10Zi-ZWpSqYCdAsiVtNeGK24hvR3kmYn2s,2689
439
- evalscope/utils/__init__.py,sha256=OiVmYHVkC_d8s6Zp1I6p6oTyhCEGvN-I9E6uzn8dgF4,1940
452
+ evalscope/utils/__init__.py,sha256=5OH8cOoX3YKMKUu0dMRvwzckXligIbUV-1jjJNXlpGI,2231
440
453
  evalscope/utils/argument_utils.py,sha256=D7qOH85wf7LKh_cJ2X51OEaL7CMaddydmHZkfoYpvLk,1952
441
- evalscope/utils/chat_service.py,sha256=U2jtrkOa2asRp16Zam0zIi_38mCyWQqql_L6JSwii4I,8749
442
- evalscope/utils/deprecation_utils.py,sha256=WyeiLWSi5ti6FkuMbhimcPPUB43paa1FZ5-JOAWNFZI,1344
454
+ evalscope/utils/chat_service.py,sha256=47VmV4NdiYazfSAPww5wPf53L5avw6VETYgyGZyGvGc,8758
455
+ evalscope/utils/deprecation_utils.py,sha256=aDv3HFNcJFZ7rxNgALQP0-ITO8L23HC_RX-C_m2i34Y,1610
456
+ evalscope/utils/function_utils.py,sha256=a752Z4Xb1rznnLJU9g5Pxqd3r_XzfLzAkdcjSX0kOVc,650
443
457
  evalscope/utils/import_utils.py,sha256=BSdp7RQSZu67129TBbtJvMWU0CfCFu864K31eiM3pr8,2975
444
- evalscope/utils/io_utils.py,sha256=2eEkLx4jhekgIV4vYL8yTN0PT6dbHUERMBZwmvxuiEc,7109
445
- evalscope/utils/logger.py,sha256=Q2IeV_0jxz8L34b5GddPeCKXVh0UClbuhjyLe5Wtj7M,3648
446
- evalscope/utils/model_utils.py,sha256=F1_WBHvBehWqrTd6kPtKICeeYucaZn5H0Gc3cCplYB8,2329
458
+ evalscope/utils/io_utils.py,sha256=elAFpyb5FGwV3AZxapkXqZmjtJCSvZZVe0QQEH4nxWM,10020
459
+ evalscope/utils/json_schema.py,sha256=MLCS8cSLXF83UPebBaVWDfXJnf0qXsXnr-bIRG88cI4,7485
460
+ evalscope/utils/logger.py,sha256=SPhhXo9gyZtWDYDLumII2CEmwHsaW8Bu1IjK5UqWrKQ,5273
461
+ evalscope/utils/model_utils.py,sha256=q0mmcfUJVks21NHP8awTQk_1q6ruupjzIBN_Xo3wt40,2394
462
+ evalscope/utils/multi_choices.py,sha256=ZEpN8LcZfXhhuATeMZx_uEnMg3l981J_OdSL90iFoZQ,8951
463
+ evalscope/utils/url_utils.py,sha256=9HcFt9uZNbOJR3ADUFQ_dBFKziHV6H66Df7HYs1M4Po,1757
447
464
  tests/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
448
465
  tests/test_run_all.py,sha256=YcMTlWoFpvWY8jevWyIf2G_tz8hgDD1cAwSvmyZt96M,429
449
466
  tests/utils.py,sha256=Fgm0CU6ilZjCGOfOMJH-Trxy0UIAGbhvy0Ijy_zDGUk,323
450
467
  tests/aigc/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
451
- tests/aigc/test_t2i.py,sha256=XtVknpwlVMb6FSw3_WMFxMq0gZX6iG-ffdSQkcW2Fzw,3856
468
+ tests/aigc/test_t2i.py,sha256=fciaGsOrkOpT4WQlsnmjrqw6qolCzI0DGyWQAJkM-Es,4513
469
+ tests/benchmark/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
470
+ tests/benchmark/test_eval.py,sha256=fHAr4h2YjqIVk-FHp93HUZvRZ1fvlVFd1EUeRwrIwYw,12559
452
471
  tests/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
453
- tests/cli/test_all.py,sha256=IT0mxjiuHCC0PpT4z3oN1Bbr_0viMcm8GnShZ02kp8w,6333
454
- tests/cli/test_collection.py,sha256=bXWzccH822Y2B1Ed251U6TE8G_osI6MXYNxzmfv9kBI,4197
455
- tests/cli/test_custom.py,sha256=0YE-TCAeaQMRVRFla_TIvTd8d0USvvsSeqvYAD3NDNg,8796
456
- tests/cli/test_run.py,sha256=YKX2XCHPxnStgzzP67U90RV9r1MC2GM3JoGQqfZKqrI,17324
472
+ tests/cli/test_all.py,sha256=a3G0LMgQx3M97uy0GfX1DFxbA7zWofkxgtwT8PMorQI,6268
473
+ tests/cli/test_collection.py,sha256=OUm2_Qt0zkQehPTAmUaGRNBes8ewr7wYfE0E-gUe1J8,4386
474
+ tests/cli/test_custom.py,sha256=9z_N7Re712xI62TqVSTBdzB_iFFEUb55wcWIcGvJb84,9254
457
475
  tests/perf/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
458
476
  tests/perf/test_perf.py,sha256=AEWvpN3ID6s-9MEoaZjQqUM8VVsqgk_v9KX8pDgvozA,5864
459
477
  tests/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
460
- tests/rag/test_clip_benchmark.py,sha256=13pcY3gYHNQh2KfEHCqtCSqiOcbngSJ1BlVZzI58JCE,2694
478
+ tests/rag/test_clip_benchmark.py,sha256=YmfezEzqBrl9-Ga2pG4YXs0ARcD5gWmuzINjY08tPpM,2695
461
479
  tests/rag/test_mteb.py,sha256=fdNQIyUEzE7puPCKw5QhCHTEu7hz-ieHeq1xCWGh6IM,7246
462
480
  tests/rag/test_ragas.py,sha256=5qozXvPFIb67T-igJv87ijlOgkPnqgkkBVXu6Ht4D0A,4554
463
481
  tests/swift/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
@@ -466,9 +484,9 @@ tests/swift/test_run_swift_vlm_eval.py,sha256=RwrKkc1WHEZxetM11cGL81G4faKCn7SYn4
466
484
  tests/swift/test_run_swift_vlm_jugde_eval.py,sha256=UAUtOCQ72xbm8s-sov3cBEpYVDy189wpB-qOL3KoU7M,6053
467
485
  tests/vlm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
468
486
  tests/vlm/test_vlmeval.py,sha256=EDQRkYfSyOICUwo_tm3p-puaE_xdFmqOPkrt5etxsqM,3307
469
- evalscope-0.17.1.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
470
- evalscope-0.17.1.dist-info/METADATA,sha256=1PRiimjOBZgSWjvT3iL4VcvdaWk8v3fGp9xCXLpM1Dw,38469
471
- evalscope-0.17.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
472
- evalscope-0.17.1.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
473
- evalscope-0.17.1.dist-info/top_level.txt,sha256=Yv0iprOqZQ4rfUO-AWJp7Ni6m0Twxny1yvZwO-8hUDM,16
474
- evalscope-0.17.1.dist-info/RECORD,,
487
+ evalscope-1.0.0.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
488
+ evalscope-1.0.0.dist-info/METADATA,sha256=FKr7sZCbyX_HxicgCX5rHrZz19STzLSK1Tgmm0CrWlg,39723
489
+ evalscope-1.0.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
490
+ evalscope-1.0.0.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
491
+ evalscope-1.0.0.dist-info/top_level.txt,sha256=Yv0iprOqZQ4rfUO-AWJp7Ni6m0Twxny1yvZwO-8hUDM,16
492
+ evalscope-1.0.0.dist-info/RECORD,,