evalscope 0.10.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (606) hide show
  1. evalscope/__init__.py +4 -1
  2. evalscope/api/benchmark/__init__.py +11 -0
  3. evalscope/api/benchmark/adapters/__init__.py +7 -0
  4. evalscope/api/benchmark/adapters/agent_adapter.py +8 -0
  5. evalscope/api/benchmark/adapters/default_data_adapter.py +754 -0
  6. evalscope/api/benchmark/adapters/image_edit_adapter.py +82 -0
  7. evalscope/api/benchmark/adapters/multi_choice_adapter.py +86 -0
  8. evalscope/api/benchmark/adapters/ner_adapter.py +212 -0
  9. evalscope/api/benchmark/adapters/text2image_adapter.py +157 -0
  10. evalscope/api/benchmark/adapters/vision_language_adapter.py +8 -0
  11. evalscope/api/benchmark/benchmark.py +404 -0
  12. evalscope/api/benchmark/meta.py +124 -0
  13. evalscope/api/dataset/__init__.py +2 -0
  14. evalscope/api/dataset/dataset.py +370 -0
  15. evalscope/api/dataset/loader.py +266 -0
  16. evalscope/api/dataset/utils.py +143 -0
  17. evalscope/api/evaluator/__init__.py +3 -0
  18. evalscope/api/evaluator/cache.py +382 -0
  19. evalscope/api/evaluator/evaluator.py +61 -0
  20. evalscope/api/evaluator/state.py +280 -0
  21. evalscope/api/filter/__init__.py +1 -0
  22. evalscope/api/filter/filter.py +72 -0
  23. evalscope/api/messages/__init__.py +12 -0
  24. evalscope/api/messages/chat_message.py +248 -0
  25. evalscope/api/messages/content.py +102 -0
  26. evalscope/api/messages/utils.py +35 -0
  27. evalscope/api/metric/__init__.py +2 -0
  28. evalscope/api/metric/metric.py +60 -0
  29. evalscope/api/metric/scorer.py +113 -0
  30. evalscope/api/mixin/__init__.py +2 -0
  31. evalscope/api/mixin/llm_judge_mixin.py +170 -0
  32. evalscope/api/mixin/sandbox_mixin.py +182 -0
  33. evalscope/api/model/__init__.py +12 -0
  34. evalscope/api/model/generate_config.py +161 -0
  35. evalscope/api/model/model.py +386 -0
  36. evalscope/api/model/model_output.py +285 -0
  37. evalscope/api/registry.py +182 -0
  38. evalscope/api/tool/__init__.py +3 -0
  39. evalscope/api/tool/tool_call.py +101 -0
  40. evalscope/api/tool/tool_info.py +173 -0
  41. evalscope/api/tool/utils.py +64 -0
  42. evalscope/app/__init__.py +28 -0
  43. evalscope/app/app.py +38 -0
  44. evalscope/app/arguments.py +11 -0
  45. evalscope/app/constants.py +22 -0
  46. evalscope/app/ui/__init__.py +20 -0
  47. evalscope/app/ui/app_ui.py +53 -0
  48. evalscope/app/ui/multi_model.py +353 -0
  49. evalscope/app/ui/sidebar.py +42 -0
  50. evalscope/app/ui/single_model.py +220 -0
  51. evalscope/app/ui/visualization.py +36 -0
  52. evalscope/app/utils/data_utils.py +195 -0
  53. evalscope/app/utils/env_utils.py +12 -0
  54. evalscope/app/utils/localization.py +221 -0
  55. evalscope/app/utils/text_utils.py +119 -0
  56. evalscope/app/utils/visualization.py +96 -0
  57. evalscope/arguments.py +32 -9
  58. evalscope/backend/opencompass/api_meta_template.py +2 -1
  59. evalscope/backend/opencompass/backend_manager.py +10 -7
  60. evalscope/backend/rag_eval/__init__.py +1 -1
  61. evalscope/backend/rag_eval/backend_manager.py +23 -6
  62. evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py +33 -21
  63. evalscope/backend/rag_eval/clip_benchmark/task_template.py +8 -4
  64. evalscope/backend/rag_eval/cmteb/arguments.py +14 -1
  65. evalscope/backend/rag_eval/cmteb/task_template.py +19 -3
  66. evalscope/backend/rag_eval/cmteb/tasks/CustomTask.py +1 -1
  67. evalscope/backend/rag_eval/ragas/arguments.py +0 -1
  68. evalscope/backend/rag_eval/ragas/task_template.py +2 -1
  69. evalscope/backend/rag_eval/ragas/tasks/build_distribution.py +2 -1
  70. evalscope/backend/rag_eval/ragas/tasks/build_transform.py +7 -4
  71. evalscope/backend/rag_eval/ragas/tasks/testset_generation.py +9 -3
  72. evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py +2 -6
  73. evalscope/backend/rag_eval/utils/embedding.py +125 -32
  74. evalscope/backend/rag_eval/utils/llm.py +16 -16
  75. evalscope/backend/vlm_eval_kit/backend_manager.py +8 -3
  76. evalscope/benchmarks/__init__.py +17 -5
  77. evalscope/benchmarks/aa_lcr/__init__.py +0 -0
  78. evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py +205 -0
  79. evalscope/benchmarks/ai2d/__init__.py +0 -0
  80. evalscope/benchmarks/ai2d/ai2d_adapter.py +54 -0
  81. evalscope/benchmarks/aime/__init__.py +0 -0
  82. evalscope/benchmarks/aime/aime24_adapter.py +55 -0
  83. evalscope/benchmarks/aime/aime25_adapter.py +181 -0
  84. evalscope/benchmarks/aime/grader.py +307 -0
  85. evalscope/{metrics/math_accuracy.py → benchmarks/aime/math_normalize.py} +61 -72
  86. evalscope/benchmarks/alpaca_eval/__init__.py +0 -0
  87. evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +133 -0
  88. evalscope/benchmarks/amc/__init__.py +0 -0
  89. evalscope/benchmarks/amc/amc_adapter.py +51 -0
  90. evalscope/benchmarks/arc/arc_adapter.py +34 -149
  91. evalscope/benchmarks/arena_hard/__init__.py +0 -0
  92. evalscope/benchmarks/arena_hard/arena_hard_adapter.py +149 -0
  93. evalscope/benchmarks/arena_hard/utils.py +186 -0
  94. evalscope/benchmarks/bbh/bbh_adapter.py +117 -157
  95. evalscope/benchmarks/bfcl/__init__.py +0 -0
  96. evalscope/benchmarks/bfcl/v3/__init__.py +0 -0
  97. evalscope/benchmarks/bfcl/v3/bfcl_v3_adapter.py +370 -0
  98. evalscope/benchmarks/bfcl/v3/generation.py +222 -0
  99. evalscope/benchmarks/bfcl/v3/utils.py +23 -0
  100. evalscope/benchmarks/bfcl/v4/__init__.py +0 -0
  101. evalscope/benchmarks/bfcl/v4/bfcl_v4_adapter.py +229 -0
  102. evalscope/benchmarks/bfcl/v4/utils.py +410 -0
  103. evalscope/benchmarks/biomix_qa/__init__.py +0 -0
  104. evalscope/benchmarks/biomix_qa/biomix_qa_adapter.py +36 -0
  105. evalscope/benchmarks/blink/__init__.py +0 -0
  106. evalscope/benchmarks/blink/blink_adapter.py +61 -0
  107. evalscope/benchmarks/ceval/ceval_adapter.py +93 -174
  108. evalscope/benchmarks/chartqa/__init__.py +0 -0
  109. evalscope/benchmarks/chartqa/chartqa_adapter.py +80 -0
  110. evalscope/benchmarks/chartqa/utils.py +38 -0
  111. evalscope/benchmarks/chinese_simple_qa/__init__.py +0 -0
  112. evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +170 -0
  113. evalscope/benchmarks/cmmlu/cmmlu_adapter.py +34 -140
  114. evalscope/benchmarks/coin_flip/__init__.py +0 -0
  115. evalscope/benchmarks/coin_flip/coin_flip_adapter.py +128 -0
  116. evalscope/benchmarks/commonsense_qa/__init__.py +0 -0
  117. evalscope/benchmarks/commonsense_qa/commonsense_qa_adapter.py +32 -0
  118. evalscope/benchmarks/competition_math/competition_math_adapter.py +64 -112
  119. evalscope/benchmarks/data_collection/__init__.py +0 -0
  120. evalscope/benchmarks/data_collection/data_collection_adapter.py +215 -0
  121. evalscope/benchmarks/docmath/__init__.py +0 -0
  122. evalscope/benchmarks/docmath/docmath_adapter.py +143 -0
  123. evalscope/benchmarks/docmath/utils.py +219 -0
  124. evalscope/benchmarks/docvqa/__init__.py +0 -0
  125. evalscope/benchmarks/docvqa/docvqa_adapter.py +67 -0
  126. evalscope/benchmarks/drivelology/__init__.py +0 -0
  127. evalscope/benchmarks/drivelology/drivelology_binary_adapter.py +170 -0
  128. evalscope/benchmarks/drivelology/drivelology_multilabel_adapter.py +254 -0
  129. evalscope/benchmarks/drivelology/drivelology_selection_adapter.py +49 -0
  130. evalscope/benchmarks/drivelology/drivelology_writing_adapter.py +218 -0
  131. evalscope/benchmarks/drop/__init__.py +0 -0
  132. evalscope/benchmarks/drop/drop_adapter.py +155 -0
  133. evalscope/benchmarks/drop/utils.py +156 -0
  134. evalscope/benchmarks/frames/__init__.py +0 -0
  135. evalscope/benchmarks/frames/frames_adapter.py +175 -0
  136. evalscope/benchmarks/frames/utils.py +37 -0
  137. evalscope/benchmarks/general_arena/__init__.py +0 -0
  138. evalscope/benchmarks/general_arena/general_arena_adapter.py +454 -0
  139. evalscope/benchmarks/general_arena/utils.py +223 -0
  140. evalscope/benchmarks/general_mcq/__init__.py +0 -0
  141. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +58 -0
  142. evalscope/benchmarks/general_qa/general_qa_adapter.py +75 -107
  143. evalscope/benchmarks/gpqa/__init__.py +0 -0
  144. evalscope/benchmarks/gpqa/gpqa_adapter.py +90 -0
  145. evalscope/benchmarks/gpqa/prompt.py +88 -0
  146. evalscope/benchmarks/gsm8k/gsm8k_adapter.py +77 -144
  147. evalscope/benchmarks/hallusion_bench/__init__.py +0 -0
  148. evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py +159 -0
  149. evalscope/benchmarks/halu_eval/__init__.py +0 -0
  150. evalscope/benchmarks/halu_eval/halu_eval_adapter.py +128 -0
  151. evalscope/benchmarks/halu_eval/halu_eval_instructions.py +84 -0
  152. evalscope/benchmarks/healthbench/__init__.py +0 -0
  153. evalscope/benchmarks/healthbench/healthbench_adapter.py +282 -0
  154. evalscope/benchmarks/healthbench/utils.py +102 -0
  155. evalscope/benchmarks/hellaswag/hellaswag_adapter.py +36 -134
  156. evalscope/benchmarks/hle/__init__.py +0 -0
  157. evalscope/benchmarks/hle/hle_adapter.py +153 -0
  158. evalscope/benchmarks/humaneval/humaneval_adapter.py +80 -88
  159. evalscope/benchmarks/humaneval/utils.py +235 -0
  160. evalscope/benchmarks/ifeval/ifeval_adapter.py +71 -45
  161. evalscope/benchmarks/ifeval/instructions.py +112 -68
  162. evalscope/benchmarks/ifeval/instructions_registry.py +1 -1
  163. evalscope/benchmarks/ifeval/instructions_util.py +2 -3
  164. evalscope/benchmarks/ifeval/utils.py +6 -7
  165. evalscope/benchmarks/image_edit/__init__.py +0 -0
  166. evalscope/benchmarks/image_edit/gedit/__init__.py +0 -0
  167. evalscope/benchmarks/image_edit/gedit/gedit_adapter.py +138 -0
  168. evalscope/benchmarks/image_edit/gedit/utils.py +372 -0
  169. evalscope/benchmarks/image_edit/gedit/vie_prompts.py +406 -0
  170. evalscope/benchmarks/infovqa/__init__.py +0 -0
  171. evalscope/benchmarks/infovqa/infovqa_adapter.py +66 -0
  172. evalscope/benchmarks/iquiz/iquiz_adapter.py +30 -58
  173. evalscope/benchmarks/live_code_bench/__init__.py +0 -0
  174. evalscope/benchmarks/live_code_bench/evaluate_utils.py +195 -0
  175. evalscope/benchmarks/live_code_bench/extract_utils.py +70 -0
  176. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +150 -0
  177. evalscope/benchmarks/live_code_bench/load_utils.py +63 -0
  178. evalscope/benchmarks/live_code_bench/pass_k_utils.py +56 -0
  179. evalscope/benchmarks/live_code_bench/prompts.py +207 -0
  180. evalscope/benchmarks/live_code_bench/sandbox_evaluate_utils.py +220 -0
  181. evalscope/benchmarks/live_code_bench/testing_util.py +544 -0
  182. evalscope/benchmarks/logi_qa/__int__.py +0 -0
  183. evalscope/benchmarks/logi_qa/logi_qa_adapter.py +41 -0
  184. evalscope/benchmarks/maritime_bench/__init__.py +0 -0
  185. evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py +56 -0
  186. evalscope/benchmarks/math_500/__init__.py +0 -0
  187. evalscope/benchmarks/math_500/math_500_adapter.py +55 -0
  188. evalscope/benchmarks/math_qa/__init__.py +0 -0
  189. evalscope/benchmarks/math_qa/math_qa_adapter.py +35 -0
  190. evalscope/benchmarks/math_verse/__init__.py +0 -0
  191. evalscope/benchmarks/math_verse/math_verse_adapter.py +105 -0
  192. evalscope/benchmarks/math_vision/__init__.py +0 -0
  193. evalscope/benchmarks/math_vision/math_vision_adapter.py +116 -0
  194. evalscope/benchmarks/math_vista/__init__.py +0 -0
  195. evalscope/benchmarks/math_vista/math_vista_adapter.py +114 -0
  196. evalscope/benchmarks/med_mcqa/__init__.py +0 -0
  197. evalscope/benchmarks/med_mcqa/med_mcqa_adapter.py +32 -0
  198. evalscope/benchmarks/minerva_math/__init__.py +0 -0
  199. evalscope/benchmarks/minerva_math/minerva_math_adapter.py +53 -0
  200. evalscope/benchmarks/mm_bench/__init__.py +0 -0
  201. evalscope/benchmarks/mm_bench/mm_bench_adapter.py +99 -0
  202. evalscope/benchmarks/mm_star/__init__.py +0 -0
  203. evalscope/benchmarks/mm_star/mm_star_adapter.py +73 -0
  204. evalscope/benchmarks/mmlu/mmlu_adapter.py +32 -210
  205. evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py +87 -103
  206. evalscope/benchmarks/mmlu_redux/__init__.py +0 -0
  207. evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py +139 -0
  208. evalscope/benchmarks/mmmu/__init__.py +0 -0
  209. evalscope/benchmarks/mmmu/mmmu_adapter.py +159 -0
  210. evalscope/benchmarks/mmmu_pro/__init__.py +0 -0
  211. evalscope/benchmarks/mmmu_pro/mmmu_pro_adapter.py +124 -0
  212. evalscope/benchmarks/mri_mcqa/__init__.py +0 -0
  213. evalscope/benchmarks/mri_mcqa/mri_mcqa_adapter.py +34 -0
  214. evalscope/benchmarks/multi_if/__init__.py +0 -0
  215. evalscope/benchmarks/multi_if/ifeval.py +3354 -0
  216. evalscope/benchmarks/multi_if/metrics.py +120 -0
  217. evalscope/benchmarks/multi_if/multi_if_adapter.py +161 -0
  218. evalscope/benchmarks/music_trivia/__init__.py +0 -0
  219. evalscope/benchmarks/music_trivia/music_trivia_adapter.py +36 -0
  220. evalscope/benchmarks/musr/__init__.py +0 -0
  221. evalscope/benchmarks/musr/musr_adapter.py +43 -0
  222. evalscope/benchmarks/needle_haystack/__init__.py +0 -0
  223. evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +389 -0
  224. evalscope/benchmarks/needle_haystack/utils.py +79 -0
  225. evalscope/benchmarks/ner/__init__.py +0 -0
  226. evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py +52 -0
  227. evalscope/benchmarks/ner/conll2003_adapter.py +48 -0
  228. evalscope/benchmarks/ner/copious_adapter.py +85 -0
  229. evalscope/benchmarks/ner/cross_ner_adapter.py +120 -0
  230. evalscope/benchmarks/ner/cross_ner_entities/__init__.py +0 -0
  231. evalscope/benchmarks/ner/cross_ner_entities/ai.py +54 -0
  232. evalscope/benchmarks/ner/cross_ner_entities/literature.py +36 -0
  233. evalscope/benchmarks/ner/cross_ner_entities/music.py +39 -0
  234. evalscope/benchmarks/ner/cross_ner_entities/politics.py +37 -0
  235. evalscope/benchmarks/ner/cross_ner_entities/science.py +58 -0
  236. evalscope/benchmarks/ner/genia_ner_adapter.py +66 -0
  237. evalscope/benchmarks/ner/harvey_ner_adapter.py +58 -0
  238. evalscope/benchmarks/ner/mit_movie_trivia_adapter.py +74 -0
  239. evalscope/benchmarks/ner/mit_restaurant_adapter.py +66 -0
  240. evalscope/benchmarks/ner/ontonotes5_adapter.py +87 -0
  241. evalscope/benchmarks/ner/wnut2017_adapter.py +61 -0
  242. evalscope/benchmarks/ocr_bench/__init__.py +0 -0
  243. evalscope/benchmarks/ocr_bench/ocr_bench/__init__.py +0 -0
  244. evalscope/benchmarks/ocr_bench/ocr_bench/ocr_bench_adapter.py +101 -0
  245. evalscope/benchmarks/ocr_bench/ocr_bench_v2/IoUscore_metric.py +87 -0
  246. evalscope/benchmarks/ocr_bench/ocr_bench_v2/TEDS_metric.py +963 -0
  247. evalscope/benchmarks/ocr_bench/ocr_bench_v2/__init__.py +0 -0
  248. evalscope/benchmarks/ocr_bench/ocr_bench_v2/ocr_bench_v2_adapter.py +161 -0
  249. evalscope/benchmarks/ocr_bench/ocr_bench_v2/page_ocr_metric.py +50 -0
  250. evalscope/benchmarks/ocr_bench/ocr_bench_v2/parallel.py +46 -0
  251. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/__init__.py +0 -0
  252. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/readme.txt +26 -0
  253. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py +537 -0
  254. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/script.py +481 -0
  255. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_metric.py +179 -0
  256. evalscope/benchmarks/ocr_bench/ocr_bench_v2/utils.py +433 -0
  257. evalscope/benchmarks/ocr_bench/ocr_bench_v2/vqa_metric.py +254 -0
  258. evalscope/benchmarks/olympiad_bench/__init__.py +0 -0
  259. evalscope/benchmarks/olympiad_bench/olympiad_bench_adapter.py +163 -0
  260. evalscope/benchmarks/olympiad_bench/utils.py +565 -0
  261. evalscope/benchmarks/omni_bench/__init__.py +0 -0
  262. evalscope/benchmarks/omni_bench/omni_bench_adapter.py +86 -0
  263. evalscope/benchmarks/omnidoc_bench/__init__.py +0 -0
  264. evalscope/benchmarks/omnidoc_bench/end2end_eval.py +349 -0
  265. evalscope/benchmarks/omnidoc_bench/metrics.py +547 -0
  266. evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py +135 -0
  267. evalscope/benchmarks/omnidoc_bench/utils.py +1937 -0
  268. evalscope/benchmarks/piqa/__init__.py +0 -0
  269. evalscope/benchmarks/piqa/piqa_adapter.py +32 -0
  270. evalscope/benchmarks/poly_math/__init__.py +0 -0
  271. evalscope/benchmarks/poly_math/poly_math_adapter.py +132 -0
  272. evalscope/benchmarks/poly_math/utils/instruction.py +105 -0
  273. evalscope/benchmarks/pope/__init__.py +0 -0
  274. evalscope/benchmarks/pope/pope_adapter.py +112 -0
  275. evalscope/benchmarks/process_bench/__init__.py +0 -0
  276. evalscope/benchmarks/process_bench/process_bench_adapter.py +171 -0
  277. evalscope/benchmarks/pumed_qa/__init__.py +0 -0
  278. evalscope/benchmarks/pumed_qa/pubmed_qa_adapter.py +175 -0
  279. evalscope/benchmarks/qasc/__init__.py +0 -0
  280. evalscope/benchmarks/qasc/qasc_adapter.py +35 -0
  281. evalscope/benchmarks/race/race_adapter.py +33 -120
  282. evalscope/benchmarks/real_world_qa/__init__.py +0 -0
  283. evalscope/benchmarks/real_world_qa/real_world_qa_adapter.py +64 -0
  284. evalscope/benchmarks/sciq/__init__.py +0 -0
  285. evalscope/benchmarks/sciq/sciq_adapter.py +36 -0
  286. evalscope/benchmarks/seed_bench_2_plus/__init__.py +0 -0
  287. evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py +72 -0
  288. evalscope/benchmarks/simple_qa/__init__.py +0 -0
  289. evalscope/benchmarks/simple_qa/simple_qa_adapter.py +169 -0
  290. evalscope/benchmarks/simple_vqa/__init__.py +0 -0
  291. evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py +169 -0
  292. evalscope/benchmarks/siqa/__init__.py +0 -0
  293. evalscope/benchmarks/siqa/siqa_adapter.py +39 -0
  294. evalscope/benchmarks/super_gpqa/__init__.py +0 -0
  295. evalscope/benchmarks/super_gpqa/prompt.py +88 -0
  296. evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py +165 -0
  297. evalscope/benchmarks/super_gpqa/utils.py +86 -0
  298. evalscope/benchmarks/tau_bench/__init__.py +0 -0
  299. evalscope/benchmarks/tau_bench/tau2_bench/__init__.py +0 -0
  300. evalscope/benchmarks/tau_bench/tau2_bench/generation.py +158 -0
  301. evalscope/benchmarks/tau_bench/tau2_bench/tau2_bench_adapter.py +146 -0
  302. evalscope/benchmarks/tau_bench/tau_bench/__init__.py +0 -0
  303. evalscope/benchmarks/tau_bench/tau_bench/generation.py +147 -0
  304. evalscope/benchmarks/tau_bench/tau_bench/tau_bench_adapter.py +168 -0
  305. evalscope/benchmarks/text2image/__init__.py +0 -0
  306. evalscope/benchmarks/text2image/evalmuse_adapter.py +78 -0
  307. evalscope/benchmarks/text2image/genai_bench_adapter.py +53 -0
  308. evalscope/benchmarks/text2image/general_t2i_adapter.py +42 -0
  309. evalscope/benchmarks/text2image/hpdv2_adapter.py +52 -0
  310. evalscope/benchmarks/text2image/tifa_adapter.py +27 -0
  311. evalscope/benchmarks/tool_bench/__init__.py +0 -0
  312. evalscope/benchmarks/tool_bench/tool_bench_adapter.py +102 -0
  313. evalscope/benchmarks/tool_bench/utils.py +203 -0
  314. evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py +56 -118
  315. evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +70 -270
  316. evalscope/benchmarks/visu_logic/__init__.py +0 -0
  317. evalscope/benchmarks/visu_logic/visu_logic_adapter.py +75 -0
  318. evalscope/benchmarks/winogrande/__init__.py +0 -0
  319. evalscope/benchmarks/winogrande/winogrande_adapter.py +34 -0
  320. evalscope/benchmarks/wmt/__init__.py +0 -0
  321. evalscope/benchmarks/wmt/wmt24_adapter.py +294 -0
  322. evalscope/benchmarks/zerobench/__init__.py +0 -0
  323. evalscope/benchmarks/zerobench/zerobench_adapter.py +64 -0
  324. evalscope/cli/cli.py +2 -0
  325. evalscope/cli/start_app.py +12 -2
  326. evalscope/cli/start_eval.py +4 -3
  327. evalscope/cli/start_perf.py +10 -2
  328. evalscope/cli/start_server.py +6 -3
  329. evalscope/collections/__init__.py +27 -3
  330. evalscope/collections/sampler.py +12 -11
  331. evalscope/collections/schema.py +13 -12
  332. evalscope/config.py +218 -147
  333. evalscope/constants.py +78 -82
  334. evalscope/evaluator/__init__.py +1 -1
  335. evalscope/evaluator/evaluator.py +334 -318
  336. evalscope/filters/__init__.py +2 -0
  337. evalscope/filters/extraction.py +126 -0
  338. evalscope/filters/selection.py +57 -0
  339. evalscope/metrics/__init__.py +59 -3
  340. evalscope/metrics/bert_score/__init__.py +0 -0
  341. evalscope/metrics/bert_score/scorer.py +338 -0
  342. evalscope/metrics/bert_score/utils.py +697 -0
  343. evalscope/metrics/bundled_rouge_score/rouge_scorer.py +20 -15
  344. evalscope/metrics/llm_judge.py +211 -0
  345. evalscope/metrics/math_parser.py +545 -0
  346. evalscope/metrics/metric.py +611 -0
  347. evalscope/metrics/metrics.py +112 -23
  348. evalscope/metrics/rouge_metric.py +11 -13
  349. evalscope/metrics/t2v_metrics/__init__.py +0 -0
  350. evalscope/metrics/t2v_metrics/clipscore.py +14 -0
  351. evalscope/metrics/t2v_metrics/constants.py +12 -0
  352. evalscope/metrics/t2v_metrics/itmscore.py +14 -0
  353. evalscope/metrics/t2v_metrics/models/__init__.py +0 -0
  354. evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py +30 -0
  355. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py +0 -0
  356. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py +6 -0
  357. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +134 -0
  358. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +282 -0
  359. evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +115 -0
  360. evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +87 -0
  361. evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +86 -0
  362. evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +62 -0
  363. evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py +26 -0
  364. evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +85 -0
  365. evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +99 -0
  366. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +176 -0
  367. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py +0 -0
  368. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +82 -0
  369. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +74 -0
  370. evalscope/metrics/t2v_metrics/models/model.py +45 -0
  371. evalscope/metrics/t2v_metrics/models/utils.py +25 -0
  372. evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py +22 -0
  373. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py +0 -0
  374. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py +1 -0
  375. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +306 -0
  376. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py +12 -0
  377. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +84 -0
  378. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py +50 -0
  379. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +223 -0
  380. evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +153 -0
  381. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py +26 -0
  382. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +465 -0
  383. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +141 -0
  384. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +24 -0
  385. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +190 -0
  386. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +100 -0
  387. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +313 -0
  388. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +416 -0
  389. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +8 -0
  390. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +192 -0
  391. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +320 -0
  392. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml +10 -0
  393. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml +42 -0
  394. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml +42 -0
  395. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml +42 -0
  396. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml +36 -0
  397. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml +43 -0
  398. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml +43 -0
  399. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml +43 -0
  400. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml +43 -0
  401. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml +36 -0
  402. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml +42 -0
  403. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml +42 -0
  404. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml +42 -0
  405. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml +43 -0
  406. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml +42 -0
  407. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml +42 -0
  408. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml +42 -0
  409. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml +37 -0
  410. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml +43 -0
  411. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml +43 -0
  412. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json +21 -0
  413. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json +22 -0
  414. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json +21 -0
  415. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +212 -0
  416. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py +231 -0
  417. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +1111 -0
  418. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py +0 -0
  419. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py +211 -0
  420. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py +109 -0
  421. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +457 -0
  422. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +370 -0
  423. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +765 -0
  424. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +274 -0
  425. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +896 -0
  426. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +1876 -0
  427. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +83 -0
  428. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +58 -0
  429. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py +212 -0
  430. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py +164 -0
  431. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py +202 -0
  432. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +187 -0
  433. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +179 -0
  434. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +115 -0
  435. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py +371 -0
  436. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +348 -0
  437. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +870 -0
  438. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +273 -0
  439. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +514 -0
  440. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +1291 -0
  441. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +476 -0
  442. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +35 -0
  443. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py +27 -0
  444. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py +233 -0
  445. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +393 -0
  446. evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +129 -0
  447. evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +18 -0
  448. evalscope/metrics/t2v_metrics/score.py +78 -0
  449. evalscope/metrics/t2v_metrics/vqascore.py +14 -0
  450. evalscope/models/__init__.py +23 -13
  451. evalscope/models/image_edit_model.py +125 -0
  452. evalscope/models/mockllm.py +65 -0
  453. evalscope/models/model_apis.py +69 -0
  454. evalscope/models/modelscope.py +455 -0
  455. evalscope/models/openai_compatible.py +144 -0
  456. evalscope/models/text2image_model.py +124 -0
  457. evalscope/models/utils/openai.py +708 -0
  458. evalscope/perf/__init__.py +0 -1
  459. evalscope/perf/arguments.py +103 -69
  460. evalscope/perf/benchmark.py +114 -163
  461. evalscope/perf/http_client.py +59 -89
  462. evalscope/perf/main.py +91 -18
  463. evalscope/perf/plugin/__init__.py +3 -2
  464. evalscope/perf/plugin/api/__init__.py +4 -3
  465. evalscope/perf/plugin/api/base.py +27 -7
  466. evalscope/perf/plugin/api/custom_api.py +170 -57
  467. evalscope/perf/plugin/api/dashscope_api.py +4 -10
  468. evalscope/perf/plugin/api/default_api.py +214 -0
  469. evalscope/perf/plugin/api/openai_api.py +120 -41
  470. evalscope/perf/plugin/datasets/__init__.py +10 -6
  471. evalscope/perf/plugin/datasets/base.py +43 -1
  472. evalscope/perf/plugin/datasets/custom.py +22 -3
  473. evalscope/perf/plugin/datasets/flickr8k.py +5 -27
  474. evalscope/perf/plugin/datasets/kontext_bench.py +28 -0
  475. evalscope/perf/plugin/datasets/line_by_line.py +7 -3
  476. evalscope/perf/plugin/datasets/longalpaca.py +7 -3
  477. evalscope/perf/plugin/datasets/openqa.py +13 -14
  478. evalscope/perf/plugin/datasets/random_dataset.py +67 -0
  479. evalscope/perf/plugin/datasets/random_vl_dataset.py +80 -0
  480. evalscope/perf/plugin/datasets/speed_benchmark.py +11 -0
  481. evalscope/perf/plugin/registry.py +36 -16
  482. evalscope/perf/utils/analysis_result.py +24 -23
  483. evalscope/perf/utils/benchmark_util.py +95 -55
  484. evalscope/perf/utils/db_util.py +115 -78
  485. evalscope/perf/utils/local_server.py +12 -47
  486. evalscope/perf/utils/log_utils.py +63 -0
  487. evalscope/perf/utils/rich_display.py +192 -0
  488. evalscope/report/__init__.py +46 -3
  489. evalscope/report/combinator.py +143 -32
  490. evalscope/report/generator.py +74 -34
  491. evalscope/report/report.py +238 -0
  492. evalscope/run.py +71 -46
  493. evalscope/summarizer.py +5 -5
  494. evalscope/third_party/longbench_write/infer.py +1 -1
  495. evalscope/third_party/thinkbench/__init__.py +3 -0
  496. evalscope/third_party/thinkbench/eval.py +441 -0
  497. evalscope/third_party/thinkbench/infer.py +130 -0
  498. evalscope/third_party/thinkbench/resources/critique_template.txt +17 -0
  499. evalscope/third_party/thinkbench/resources/reformat_template.txt +31 -0
  500. evalscope/third_party/thinkbench/tools/__init__.py +0 -0
  501. evalscope/third_party/thinkbench/tools/llm.py +48 -0
  502. evalscope/third_party/thinkbench/tools/utils.py +13 -0
  503. evalscope/third_party/toolbench_static/llm/swift_infer.py +46 -20
  504. evalscope/third_party/toolbench_static/toolbench_static.py +2 -1
  505. evalscope/utils/__init__.py +82 -2
  506. evalscope/utils/argument_utils.py +64 -0
  507. evalscope/utils/chat_service.py +8 -6
  508. evalscope/utils/deprecation_utils.py +53 -0
  509. evalscope/utils/function_utils.py +266 -0
  510. evalscope/utils/import_utils.py +154 -0
  511. evalscope/utils/io_utils.py +336 -8
  512. evalscope/utils/json_schema.py +231 -0
  513. evalscope/utils/logger.py +121 -31
  514. evalscope/utils/model_utils.py +57 -1
  515. evalscope/utils/multi_choices.py +303 -0
  516. evalscope/utils/ner.py +377 -0
  517. evalscope/utils/url_utils.py +65 -0
  518. evalscope/version.py +2 -2
  519. evalscope-1.2.0.dist-info/METADATA +553 -0
  520. evalscope-1.2.0.dist-info/RECORD +628 -0
  521. {evalscope-0.10.0.dist-info → evalscope-1.2.0.dist-info}/WHEEL +1 -1
  522. {evalscope-0.10.0.dist-info → evalscope-1.2.0.dist-info}/top_level.txt +0 -1
  523. evalscope/backend/vlm_eval_kit/custom_dataset.py +0 -46
  524. evalscope/benchmarks/arc/ai2_arc.py +0 -151
  525. evalscope/benchmarks/benchmark.py +0 -76
  526. evalscope/benchmarks/ceval/ceval_exam.py +0 -146
  527. evalscope/benchmarks/ceval/samples.jsonl +0 -1
  528. evalscope/benchmarks/cmmlu/cmmlu.py +0 -161
  529. evalscope/benchmarks/cmmlu/samples.jsonl +0 -5
  530. evalscope/benchmarks/competition_math/competition_math.py +0 -79
  531. evalscope/benchmarks/data_adapter.py +0 -291
  532. evalscope/benchmarks/gsm8k/gsm8k.py +0 -121
  533. evalscope/benchmarks/hellaswag/hellaswag.py +0 -112
  534. evalscope/benchmarks/humaneval/humaneval.py +0 -79
  535. evalscope/benchmarks/mmlu/mmlu.py +0 -160
  536. evalscope/benchmarks/mmlu/samples.jsonl +0 -5
  537. evalscope/benchmarks/race/race.py +0 -104
  538. evalscope/benchmarks/race/samples.jsonl +0 -5
  539. evalscope/benchmarks/trivia_qa/trivia_qa.py +0 -89
  540. evalscope/benchmarks/truthful_qa/truthful_qa.py +0 -163
  541. evalscope/collections/evaluator.py +0 -198
  542. evalscope/evaluator/rating_eval.py +0 -157
  543. evalscope/evaluator/reviewer/__init__.py +0 -1
  544. evalscope/evaluator/reviewer/auto_reviewer.py +0 -391
  545. evalscope/metrics/code_metric.py +0 -98
  546. evalscope/metrics/named_metrics.py +0 -17
  547. evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +0 -58485
  548. evalscope/metrics/resources/gpt2-zhcn3-v4.json +0 -1
  549. evalscope/models/base_adapter.py +0 -52
  550. evalscope/models/chat_adapter.py +0 -138
  551. evalscope/models/choice_adapter.py +0 -211
  552. evalscope/models/custom/__init__.py +0 -3
  553. evalscope/models/custom/custom_model.py +0 -53
  554. evalscope/models/custom/dummy_model.py +0 -63
  555. evalscope/models/custom_adapter.py +0 -67
  556. evalscope/models/local_model.py +0 -74
  557. evalscope/models/model.py +0 -229
  558. evalscope/models/server_adapter.py +0 -111
  559. evalscope/registry/__init__.py +0 -1
  560. evalscope/registry/config/cfg_arena.yaml +0 -77
  561. evalscope/registry/config/cfg_arena_zhihu.yaml +0 -63
  562. evalscope/registry/config/cfg_pairwise_baseline.yaml +0 -83
  563. evalscope/registry/config/cfg_single.yaml +0 -78
  564. evalscope/registry/data/prompt_template/lmsys_v2.jsonl +0 -8
  565. evalscope/registry/data/prompt_template/prompt_templates.jsonl +0 -8
  566. evalscope/registry/data/qa_browser/battle.jsonl +0 -634
  567. evalscope/registry/data/qa_browser/category_mapping.yaml +0 -10
  568. evalscope/registry/data/question.jsonl +0 -80
  569. evalscope/registry/tasks/arc.yaml +0 -28
  570. evalscope/registry/tasks/bbh.yaml +0 -26
  571. evalscope/registry/tasks/bbh_mini.yaml +0 -26
  572. evalscope/registry/tasks/ceval.yaml +0 -27
  573. evalscope/registry/tasks/ceval_mini.yaml +0 -26
  574. evalscope/registry/tasks/cmmlu.yaml +0 -27
  575. evalscope/registry/tasks/eval_qwen-7b-chat_v100.yaml +0 -28
  576. evalscope/registry/tasks/general_qa.yaml +0 -27
  577. evalscope/registry/tasks/gsm8k.yaml +0 -29
  578. evalscope/registry/tasks/mmlu.yaml +0 -29
  579. evalscope/registry/tasks/mmlu_mini.yaml +0 -27
  580. evalscope/report/app.py +0 -506
  581. evalscope/report/utils.py +0 -133
  582. evalscope/run_arena.py +0 -202
  583. evalscope/utils/arena_utils.py +0 -217
  584. evalscope/utils/completion_parsers.py +0 -82
  585. evalscope/utils/utils.py +0 -301
  586. evalscope-0.10.0.dist-info/METADATA +0 -565
  587. evalscope-0.10.0.dist-info/RECORD +0 -286
  588. tests/__init__.py +0 -1
  589. tests/cli/__init__.py +0 -1
  590. tests/cli/test_collection.py +0 -57
  591. tests/cli/test_run.py +0 -165
  592. tests/perf/__init__.py +0 -1
  593. tests/perf/test_perf.py +0 -101
  594. tests/rag/test_clip_benchmark.py +0 -85
  595. tests/rag/test_mteb.py +0 -138
  596. tests/rag/test_ragas.py +0 -120
  597. tests/swift/__init__.py +0 -1
  598. tests/swift/test_run_swift_eval.py +0 -145
  599. tests/swift/test_run_swift_vlm_eval.py +0 -127
  600. tests/swift/test_run_swift_vlm_jugde_eval.py +0 -156
  601. tests/test_run_all.py +0 -12
  602. tests/vlm/__init__.py +0 -1
  603. tests/vlm/test_vlmeval.py +0 -60
  604. {tests/rag → evalscope/api}/__init__.py +0 -0
  605. {evalscope-0.10.0.dist-info → evalscope-1.2.0.dist-info}/entry_points.txt +0 -0
  606. {evalscope-0.10.0.dist-info → evalscope-1.2.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,628 @@
1
+ evalscope/__init__.py,sha256=oivLvqwNw2JlB-h-Z8_525IpfKcYEkS51F59tEfpy5w,445
2
+ evalscope/arguments.py,sha256=jKAF47PsqXRioU21gRHw9hxJnfR31z_X7c__glRY5ns,6257
3
+ evalscope/config.py,sha256=74sX1TH0OC6kSw9yedySfbcywVV6pXxna8DH-0_-hDA,11637
4
+ evalscope/constants.py,sha256=BRjknIG0NCuUK-040ZmrUsf7WRIHfObJgJ5ilJfPhAc,3791
5
+ evalscope/run.py,sha256=dKFesxZZteOhscHif2A8xQHsJnG78D-m2gdfaWyMNC4,6742
6
+ evalscope/summarizer.py,sha256=HUDJ1zKi22uNst3AUfX67Z0sHzeZy-4S8sYyvxJnBzc,5901
7
+ evalscope/version.py,sha256=qGtSETKmFUoaalidgoIBbZIYjbYTgLAapOQYohFtYxg,118
8
+ evalscope/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ evalscope/api/registry.py,sha256=Qk0KMGDbt-iI0-OfoJZbOtxt76qreAVWh36HOoQAKM4,5448
10
+ evalscope/api/benchmark/__init__.py,sha256=AFP3T_Pml-8LtT1ZaxVsrX_BCPXMw5ue7cfiBII-GrE,247
11
+ evalscope/api/benchmark/benchmark.py,sha256=pmb6Y6JRYDtkzBi5HTh2M64dkSabuXcemVg-WY7HdHo,11644
12
+ evalscope/api/benchmark/meta.py,sha256=SQ6b6wNSHN6BUCmr5Bdi3C-dFkgdCzZ_xnIjLmj3-y4,4302
13
+ evalscope/api/benchmark/adapters/__init__.py,sha256=XXER_DdSiz70t-I3w4CuMSeyxiOIRGzG8v3ZROs-o_g,340
14
+ evalscope/api/benchmark/adapters/agent_adapter.py,sha256=ecuvOayj9kRKuLaflbBDnFV3vibFxKkQw7a3fOvsYwI,234
15
+ evalscope/api/benchmark/adapters/default_data_adapter.py,sha256=xGz92QS-ADKrMColnG1jHbtvEKYwzodTIX6YEJhoDaM,31171
16
+ evalscope/api/benchmark/adapters/image_edit_adapter.py,sha256=06V-_A8RKuMNYMt7-vaXn2qBa9LIZgfFO_6PUuhAkh0,3052
17
+ evalscope/api/benchmark/adapters/multi_choice_adapter.py,sha256=auqLNvF50Or9bo3LOmQLXHfFaTTCTqvQzZog3glInng,3062
18
+ evalscope/api/benchmark/adapters/ner_adapter.py,sha256=_rvfl8cNlvKoQkHqR2tC_K-xZaV0TsB_pIzI4sP_SM0,8906
19
+ evalscope/api/benchmark/adapters/text2image_adapter.py,sha256=jO64hwjQexIv-MTyHH0Ffp_6p--9TKufOmX_U39mAnE,6385
20
+ evalscope/api/benchmark/adapters/vision_language_adapter.py,sha256=5d7ITkeosikb7u0ag0WkMaZ0SAYGkR_wKM9NP495GKk,280
21
+ evalscope/api/dataset/__init__.py,sha256=RHFMzwfONEqmmn3vRtxyN3r29mipDUUUSEDhuwm0YpQ,147
22
+ evalscope/api/dataset/dataset.py,sha256=y-1DvPxN1Gxf-oEnrUq0Dcs4-rUQkApXP_rVYwsixSM,12119
23
+ evalscope/api/dataset/loader.py,sha256=44wQ3aBbn4YJyRjEsA1Bpg1DZicdCUzVybPoba_JhzY,9797
24
+ evalscope/api/dataset/utils.py,sha256=3E0ikqr6QWV_lX0d3Z4F4xFuVTcwbeDPgCvJY7v83Bc,4935
25
+ evalscope/api/evaluator/__init__.py,sha256=-Ure6X4GlE7VYSNWSZ_DpjbUBGa5irVTymLENEHTYqY,138
26
+ evalscope/api/evaluator/cache.py,sha256=xzQvLd2EzZOrWcHAauT-hdeRCkx6BqNIJ2rxvrMFMak,13370
27
+ evalscope/api/evaluator/evaluator.py,sha256=xMF4w2qiQ7NNgOhSKs9Vd4VZ33SCDwTTJ82lDhaj1FQ,1734
28
+ evalscope/api/evaluator/state.py,sha256=Elz2cmbvOOqvOaEOAMatxgk4BdjqDZB3XKTaL4iqJLI,9039
29
+ evalscope/api/filter/__init__.py,sha256=5eWKjT-dAiz8nE0S6WnU6plqjXZHYn7CJOgFiHSoovM,66
30
+ evalscope/api/filter/filter.py,sha256=fsPddaHE5wwFIXgUWITFqlYXqdh6vx3QqcEf3rSXKVI,2068
31
+ evalscope/api/messages/__init__.py,sha256=UKZ9VVCt7NPrcZXv_1e8MZ8mOWu0eLRvMIXykpJPZ9I,378
32
+ evalscope/api/messages/chat_message.py,sha256=D88TklSAWOaG21EBDVDoRPwzVCqzEGbVW4sA8Af4axc,10053
33
+ evalscope/api/messages/content.py,sha256=gUBUeK60BUhkwoulyzKL6q0iMt3VLlah9onLG1XVrWY,2772
34
+ evalscope/api/messages/utils.py,sha256=uqlEbYEoUKpXLW8tQtP-cY5Miq7W0Xl6a98j55u6m6E,1266
35
+ evalscope/api/metric/__init__.py,sha256=dVKKjUMwda_p6T3MR0Hz9NwSzXM7WVwzrjEy_RE2xyM,123
36
+ evalscope/api/metric/metric.py,sha256=DWMxAmAu8aisad81FpubQCkdfDLOiBaQ3NIgfhDp9y0,1702
37
+ evalscope/api/metric/scorer.py,sha256=dczSQwkRmPk1uvNCMGT5G6nYbwWTcpwsZtyYXWkrJII,3749
38
+ evalscope/api/mixin/__init__.py,sha256=xBuoTuao5o_EFThgeeeWI87x64Q12aJttsaZc8gak_c,83
39
+ evalscope/api/mixin/llm_judge_mixin.py,sha256=ECVDfxCeAEkymFssD7xKhIDcct2qgQTqGnbijXk9leE,5675
40
+ evalscope/api/mixin/sandbox_mixin.py,sha256=RbTpZXr6ohxgp1vU4YGMKmGKiIzVqQZ44quAHBX8zvs,6539
41
+ evalscope/api/model/__init__.py,sha256=YxKdz1IKUt6eYoC7nx81yD2BtyiWQDvaoTcc8O9lvoE,286
42
+ evalscope/api/model/generate_config.py,sha256=W5Yg8EyEMumIfpTGQMlZQ3D0p282pVIlhXGPj8sVQuA,8218
43
+ evalscope/api/model/model.py,sha256=c7YVbYYk47MHWwPjoB66xWjgmHdUGTOSOdtIsLcJfyc,12782
44
+ evalscope/api/model/model_output.py,sha256=NeN6bLtAvg_3fTirewWfdP-_x4SJXa9pGuRpyXJY3B8,9333
45
+ evalscope/api/tool/__init__.py,sha256=bEaW5ryY-erLcl2zMoDJNgiaBqlSPAL0jQ5daUHvvrw,272
46
+ evalscope/api/tool/tool_call.py,sha256=WqMnw69L_yhQWycENZ7azPRhxRidhmrMcYAy7UTIqvg,2836
47
+ evalscope/api/tool/tool_info.py,sha256=FQOBqxKZ6Qb4f40iRH1mLg64cEhu1_-9Rn-f5iUrD2w,5733
48
+ evalscope/api/tool/utils.py,sha256=IWFzM6WspzBmNPicXn6b7KS6Y-1I-ErsK9fua4cb53Y,2324
49
+ evalscope/app/__init__.py,sha256=HWLXld_JXcBDsdL4L_4E8JsKyuBwwPUSwlejKnZ3HKc,579
50
+ evalscope/app/app.py,sha256=EaBWorA87ZmyIHovIE3styHWEVFsu_F70pTmP4-5zTQ,836
51
+ evalscope/app/arguments.py,sha256=1wHTLeFx1G94cKXYOeOVe_wTiOY2D929UctIRGOtRaQ,699
52
+ evalscope/app/constants.py,sha256=oG6tZ618zJcCnwZ5THnYL0gWTPDb5XKrnmdrWxY3Z4Q,385
53
+ evalscope/app/ui/__init__.py,sha256=IBxyQ2H-kSHoHJmXWDR8QMermvsMbiu673PQbXP_FnE,616
54
+ evalscope/app/ui/app_ui.py,sha256=wLrQ4VM7BnzvaYmPAk8NH9t5BaWooHFJcgmAOOd2I1w,2032
55
+ evalscope/app/ui/multi_model.py,sha256=mvMgpgiJGRrNRtReFcD_PiLatq-81zp65Vb3JYUP3PE,15356
56
+ evalscope/app/ui/sidebar.py,sha256=JA0QbG2iPStK-lFy6x_AjOHlQdesmgXoS0OYJUJ_Wyg,1339
57
+ evalscope/app/ui/single_model.py,sha256=zFt1uDYrcgNJ7e_YLigrs6IXT3jyGMVn-7rv4CHAZvE,9741
58
+ evalscope/app/ui/visualization.py,sha256=jXFX_-7woQkcAiQkPAIRwVv1kdRdXonn9IvmB8yzPDU,1102
59
+ evalscope/app/utils/data_utils.py,sha256=GYOfkh0NoueeX3od-L852Q9C9SSkEFlW_40wjPa5b9w,7470
60
+ evalscope/app/utils/env_utils.py,sha256=2pmz4uNun-XNP6TqM6Oe576XopweEClhBaIdWO--kd0,382
61
+ evalscope/app/utils/localization.py,sha256=rWEviBmcnhIpAA-cG8djbbUA6p1Y358c0dxge5Pqi1U,6131
62
+ evalscope/app/utils/text_utils.py,sha256=-K-hRPMZ29Yqjhzd-391gPaD4B4wUuIg71PfbLnGJ38,3754
63
+ evalscope/app/utils/visualization.py,sha256=lycwcr-kFT2FKVw6iWMh3iD_n4dqpWVzhXMLDnkN8QY,3563
64
+ evalscope/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
+ evalscope/backend/base.py,sha256=qYu8Shokrtrx-N6T_BAJk_6OCpovUBYuN0p3wngt-dw,1030
66
+ evalscope/backend/opencompass/__init__.py,sha256=UP_TW5KBq6V_Nvqkeb7PGvGGX3rVYussT43npwCwDgE,135
67
+ evalscope/backend/opencompass/api_meta_template.py,sha256=OGH0lGJmBFKHs-6u6RPCov13_ArO63E6pV-aX1WVljU,1707
68
+ evalscope/backend/opencompass/backend_manager.py,sha256=q_5ABnnJb14T2L2bKY2y-ErJ9K4_65Rpl0a-h3hZ4TM,10337
69
+ evalscope/backend/opencompass/tasks/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
70
+ evalscope/backend/opencompass/tasks/eval_api.py,sha256=ZaGdUbEOtAW5VX3ZXmpHIttg_QrID34EnBTylD3uvos,1152
71
+ evalscope/backend/opencompass/tasks/eval_datasets.py,sha256=JHSq4EnPJgv4sRJJplLH80EqE3ghtkn2k8HnV6DaDew,5406
72
+ evalscope/backend/rag_eval/__init__.py,sha256=Tbj7HboP5zzJ77-9qVEwwhHKjHL5V8MwLFr6sw1oeoA,291
73
+ evalscope/backend/rag_eval/backend_manager.py,sha256=iEer5IhEJ8nOXW_s3j6l5jvfLgBftcGQMAtJk69Wzdc,3521
74
+ evalscope/backend/rag_eval/clip_benchmark/__init__.py,sha256=C8Vetf52nyHiRwY2Pm74Bjn3UpWboQeghCGNh67X1EM,151
75
+ evalscope/backend/rag_eval/clip_benchmark/arguments.py,sha256=d5UkbC3RXb6iyzy_ILumToAVO1AdwvDeyOiX5KB2u0g,1530
76
+ evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py,sha256=_MuzGblPP-QBRB7IQJ9r08FmJfH7S82nynzijK7bvsM,8848
77
+ evalscope/backend/rag_eval/clip_benchmark/task_template.py,sha256=lvgGVQ-EHwGxo61bf_X8ofkaPJ3qTbsRv7-xNjyIzUQ,3883
78
+ evalscope/backend/rag_eval/clip_benchmark/tasks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
79
+ evalscope/backend/rag_eval/clip_benchmark/tasks/image_caption.py,sha256=CQnWZZTQ0FOzDtmGv7OF0W4Cv4g6u4_LQ93koDu1pes,2556
80
+ evalscope/backend/rag_eval/clip_benchmark/tasks/zeroshot_classification.py,sha256=NwpxNECN7NFgtlVdKY7vet5m-gAmIp8MJYka0eexWu0,7424
81
+ evalscope/backend/rag_eval/clip_benchmark/tasks/zeroshot_retrieval.py,sha256=t0Uq7W0sPDBJS1rqp70KgSfeRQ3c7u8YeGhj5Yiu6rk,5646
82
+ evalscope/backend/rag_eval/clip_benchmark/utils/webdataset_convert.py,sha256=rZY-TulG-Cb8b6GTBxqTDYQ_4Ois3kbgKhuunZq8Ato,8407
83
+ evalscope/backend/rag_eval/clip_benchmark/utils/webdatasets.txt,sha256=eiiAaxhS48b5rVLy5O9VvFfV2AfxY86ITu_iqT7ZLkQ,649
84
+ evalscope/backend/rag_eval/cmteb/__init__.py,sha256=I502GHPFYo8BwlFvoljGKI24PY76eBXJQiquWk8nJNU,280
85
+ evalscope/backend/rag_eval/cmteb/arguments.py,sha256=xROhoVxJvMhhU9S5SKtiavQHM447esbrVWlbmes4AVI,2814
86
+ evalscope/backend/rag_eval/cmteb/base.py,sha256=UCobQ81dHkiTmIz_0BJ_VANj_uG6mkJbYLKJztvMXfo,2849
87
+ evalscope/backend/rag_eval/cmteb/task_template.py,sha256=vPfbBvtVjX6U6QHEG5mRP9CQjFMF-_8EdrpYoNHbDFU,3303
88
+ evalscope/backend/rag_eval/cmteb/tasks/Classification.py,sha256=sqbH0XmSiIm4n5UX5sXMwJHby1r-d35mwW1tKIhb2Hg,10848
89
+ evalscope/backend/rag_eval/cmteb/tasks/Clustering.py,sha256=-GTwORxILSkkXXGtTxuPTKSHNXQEllCRoUjuR7pnwFM,8962
90
+ evalscope/backend/rag_eval/cmteb/tasks/CustomTask.py,sha256=_uuDPaerh6qbxw7W3DiPrWuxfEyLeKCHeduYcp-1Veg,2025
91
+ evalscope/backend/rag_eval/cmteb/tasks/PairClassification.py,sha256=yISp67pXw4fSrsqTiYmfas6uPyqwE45L1c58Tpydc0E,4075
92
+ evalscope/backend/rag_eval/cmteb/tasks/Reranking.py,sha256=AH7jwJ45WAVxVb60I2DTURVanIAbrlZzk-ey_dHWEO0,5491
93
+ evalscope/backend/rag_eval/cmteb/tasks/Retrieval.py,sha256=ofmmeoieXHmU6O14JKWO9GUpuEEmcWwc78Q7ZJjRDZs,11454
94
+ evalscope/backend/rag_eval/cmteb/tasks/STS.py,sha256=uhGLsQTo5lM3-L2Na3WJGqOLQw3c1WxHDA22ePJPxtU,12285
95
+ evalscope/backend/rag_eval/cmteb/tasks/__init__.py,sha256=PKBNyp45hIa3FYNA1psiwtwfwUcn7s9eNt6r5aUpyyY,1505
96
+ evalscope/backend/rag_eval/ragas/__init__.py,sha256=D0yJkN9SuNGIAL3niZw4BI08Yh3HznsUUewdIAa_-LM,171
97
+ evalscope/backend/rag_eval/ragas/arguments.py,sha256=S6M1nsqwMQ8lnZZDtlQTdzyOCfLn9WP0QJ_7wAEsVgc,1695
98
+ evalscope/backend/rag_eval/ragas/task_template.py,sha256=ikLBEwYKuXe4dcc0SC7orWOEpYpT0kBG46op_s2yM6U,1674
99
+ evalscope/backend/rag_eval/ragas/prompts/persona_prompt.py,sha256=fX9sCci787ViGiL3BhGsykx0bnWfOWWEFueaJKyR8g4,793
100
+ evalscope/backend/rag_eval/ragas/tasks/__init__.py,sha256=hErdWKbvV9aRqOpQTzdFHw1tcYoDbnttmic7GpZzKx8,173
101
+ evalscope/backend/rag_eval/ragas/tasks/build_distribution.py,sha256=zHUbUkLPoqcTpJfZQlmIs2GIbuJwH2PjhgvRzXZGbTM,1496
102
+ evalscope/backend/rag_eval/ragas/tasks/build_transform.py,sha256=kbk9pwxQgWCgAV26kfWtgz8Ji2GHPZX_kkOP6ayoSI0,5449
103
+ evalscope/backend/rag_eval/ragas/tasks/testset_generation.py,sha256=XMWW8ucN7ojRLLCii_jbUtvOqiISFO1NQl1XBNimHkY,5789
104
+ evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py,sha256=dZAjsfiR839INO3nbb9psLn-eL4sZOzpU6JMdtJUXtw,1895
105
+ evalscope/backend/rag_eval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
106
+ evalscope/backend/rag_eval/utils/clip.py,sha256=GLHhPCac2AH35AvRLvVqePA1gIMAewHTFmCJCDZzvqU,5015
107
+ evalscope/backend/rag_eval/utils/embedding.py,sha256=nuwBsiXPAwZisEmg3V4fWekd2tqp5mWRVb_fxNB1zTg,9867
108
+ evalscope/backend/rag_eval/utils/llm.py,sha256=1OH-985iIDtCOlCtzGmHu6GT_l1vJe7Iv-WyltQbcSc,2451
109
+ evalscope/backend/rag_eval/utils/tools.py,sha256=FU7tNu-8y8V_o_kArFVTTLM_GzL12KBNeXiwQw5SpJA,1529
110
+ evalscope/backend/vlm_eval_kit/__init__.py,sha256=R-GuBm8dAwvDF73XHaGpPSjlt7Y4tycyy-FJgzLdjeY,84
111
+ evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=jlwM13Ty-Ax6AeMsNlo9xIBupNFgnceYuXtCmh0hNTQ,6160
112
+ evalscope/benchmarks/__init__.py,sha256=WHR4ej9Tqa2N9CyIaUWXS8EnHZtcujaNeg9hf8GT31Y,1182
113
+ evalscope/benchmarks/aa_lcr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
+ evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py,sha256=7KZRdIhg733vBMBWngxTjtrZtl_DHjwMNLt9C2tN0_w,7483
115
+ evalscope/benchmarks/ai2d/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
+ evalscope/benchmarks/ai2d/ai2d_adapter.py,sha256=qnQT2E0ZG8g4noOafu-QvBOKm-zEJ5X08QHw3ekNa4w,2473
117
+ evalscope/benchmarks/aime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
118
+ evalscope/benchmarks/aime/aime24_adapter.py,sha256=UGS0DhfylTbhyOfnOGKwDiXW0lMd47EPeMtY-WNPht0,1935
119
+ evalscope/benchmarks/aime/aime25_adapter.py,sha256=W2Jf68G8-QSgbZxgPJvCBq1VbQ-wRbeH1u9Qb2WNZkA,5157
120
+ evalscope/benchmarks/aime/grader.py,sha256=7qi3aFY6F-o70H3zRH_QHrXmPQz0euAhJaw_IATiw8k,9259
121
+ evalscope/benchmarks/aime/math_normalize.py,sha256=--ax2mPVb2jXtfk8__K0OYKit3HiDqKOFEcOSSX9SA4,5830
122
+ evalscope/benchmarks/alpaca_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
123
+ evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py,sha256=sjaWcK8WH1XY0kzm5eHsq_7J62EJocAf4gRV_UB8ZBE,4971
124
+ evalscope/benchmarks/amc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
125
+ evalscope/benchmarks/amc/amc_adapter.py,sha256=ame7mUbcXx1gvIVaqdv0HyBNZEaUYn3Amy06mO_sMos,1586
126
+ evalscope/benchmarks/arc/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
127
+ evalscope/benchmarks/arc/arc_adapter.py,sha256=GASZmoJ-PpzBG70cBdABZA5uVqoyosjV-jf9WShK7L8,1622
128
+ evalscope/benchmarks/arena_hard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
+ evalscope/benchmarks/arena_hard/arena_hard_adapter.py,sha256=1XNzbu12FIupWgVlruaOQZ4TGj_Tkg8xgYaqQ4q3H0M,7302
130
+ evalscope/benchmarks/arena_hard/utils.py,sha256=23xCd7_ksrM4xMJBp7N2ZwpUpq1zpoQFjLm1oBcdgQY,5559
131
+ evalscope/benchmarks/bbh/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
132
+ evalscope/benchmarks/bbh/bbh_adapter.py,sha256=lRI-DfdFkyg4ylW4d-6CUfiNqlF7K_IoTjzJz3jYTUs,6346
133
+ evalscope/benchmarks/bbh/cot_prompts/boolean_expressions.txt,sha256=xnzlaIRyeGlogG49v8nt4vpJO40J06ev4yc8cv0VSRY,1781
134
+ evalscope/benchmarks/bbh/cot_prompts/causal_judgement.txt,sha256=sfo-2iOeVzB0OGgd7NSQFELTGDTsr2DQ3u-g0ivI-sM,3653
135
+ evalscope/benchmarks/bbh/cot_prompts/date_understanding.txt,sha256=UJBsc3Mwz8TZngdWH_NFlhhNbLhNHK6FvW9FHcS8H5g,1167
136
+ evalscope/benchmarks/bbh/cot_prompts/disambiguation_qa.txt,sha256=N28JGB5_023fBzoo5HImvjz3A0zPZGoiTMPngQY8pNo,3568
137
+ evalscope/benchmarks/bbh/cot_prompts/dyck_languages.txt,sha256=pkUw0ezfvNgbYzUKCchJ6o7bg6UCVL_62LWqS6lKZY0,2405
138
+ evalscope/benchmarks/bbh/cot_prompts/formal_fallacies.txt,sha256=LDRYUp6fGvG6K2l69VvDF5Szf-CUtgPqobWaQ3MHJ7A,4477
139
+ evalscope/benchmarks/bbh/cot_prompts/geometric_shapes.txt,sha256=H4LkGVmx3U4F6vuqRYXKDpHOBCu7MYksLFWp1QfyDPk,4831
140
+ evalscope/benchmarks/bbh/cot_prompts/hyperbaton.txt,sha256=H8BorN-CyUrf0vrIANSgEILynJhpS02CiGjn-qad9NQ,3114
141
+ evalscope/benchmarks/bbh/cot_prompts/logical_deduction_five_objects.txt,sha256=0e-abSxfoGJC8aYYtRCHlK_2UkiFwffSnv4iN7XXPLs,2505
142
+ evalscope/benchmarks/bbh/cot_prompts/logical_deduction_seven_objects.txt,sha256=0e-abSxfoGJC8aYYtRCHlK_2UkiFwffSnv4iN7XXPLs,2505
143
+ evalscope/benchmarks/bbh/cot_prompts/logical_deduction_three_objects.txt,sha256=0e-abSxfoGJC8aYYtRCHlK_2UkiFwffSnv4iN7XXPLs,2505
144
+ evalscope/benchmarks/bbh/cot_prompts/movie_recommendation.txt,sha256=Q4XZmrnTL1r8JCcB0mvJnb3oNUj45qjM-AfNK2ElWOQ,2121
145
+ evalscope/benchmarks/bbh/cot_prompts/multistep_arithmetic_two.txt,sha256=YJ7chn5QrpNm8VHHqG2B7gYwBUt08fyT_qHhcc9KT-Y,2386
146
+ evalscope/benchmarks/bbh/cot_prompts/navigate.txt,sha256=n3Evl10cdk8VeMfZgUdu3knBH64LmLY5d4cQTnGMLuU,2147
147
+ evalscope/benchmarks/bbh/cot_prompts/object_counting.txt,sha256=SMQGqNi8JVCEVWcVVgQDedzKjslZSxHLcP68ECWX-Xc,1418
148
+ evalscope/benchmarks/bbh/cot_prompts/penguins_in_a_table.txt,sha256=MlrdrvrBaUcW7VjWLLdN-O_yfwVFfYWHobGq099Cyhs,2386
149
+ evalscope/benchmarks/bbh/cot_prompts/reasoning_about_colored_objects.txt,sha256=jcL33cVyscRutNM793hWCryMBWQ-JFLip0DGM1UdAUc,2295
150
+ evalscope/benchmarks/bbh/cot_prompts/ruin_names.txt,sha256=StrmTfLxHtvx4QM-zf1V2u8u1VQSxnZrI7Mwiizvjyw,3481
151
+ evalscope/benchmarks/bbh/cot_prompts/salient_translation_error_detection.txt,sha256=fJKB5sYhH0an4Oeqm53RDIu4mExZJVijCvGzje0nLz4,6141
152
+ evalscope/benchmarks/bbh/cot_prompts/snarks.txt,sha256=tvp4IAtaSNv8CKKeRx_G_PTVMICkenBNmMaq10SNXAE,3114
153
+ evalscope/benchmarks/bbh/cot_prompts/sports_understanding.txt,sha256=yRYmj1f0fwY8tiXTj_iiBYz5u4E4n7Sd3r0bJXHjSco,821
154
+ evalscope/benchmarks/bbh/cot_prompts/temporal_sequences.txt,sha256=-qG7hItFjeahSB0EVvcikmLIR08P_fTIC-J38eV2fyk,3023
155
+ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_five_objects.txt,sha256=Su_-fICm9LxGpAkQlRbUZKvet_wPqTK-5jQo_VqJxQI,2604
156
+ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_seven_objects.txt,sha256=Su_-fICm9LxGpAkQlRbUZKvet_wPqTK-5jQo_VqJxQI,2604
157
+ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt,sha256=Su_-fICm9LxGpAkQlRbUZKvet_wPqTK-5jQo_VqJxQI,2604
158
+ evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt,sha256=s_x6u5MLeKpuAHZj3GNQqY1I8vWqQIfJasOp9XcM7Ck,2945
159
+ evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt,sha256=qfTZafCzNiz9ULBaDlfy_LISL617NyH5Nc0-nO0K0LE,2164
160
+ evalscope/benchmarks/bfcl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
+ evalscope/benchmarks/bfcl/v3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
+ evalscope/benchmarks/bfcl/v3/bfcl_v3_adapter.py,sha256=ILOSPo9GR2BQAz-RrzptKGvCNs9HeW2YRUa0e-r1hPU,17509
163
+ evalscope/benchmarks/bfcl/v3/generation.py,sha256=c6lNjo-VTSUrVg-pqyPSucrbCKBOdBSyN0aR5AAtE4A,8701
164
+ evalscope/benchmarks/bfcl/v3/utils.py,sha256=X1nfKmXp_dKUoYb4BzNN0-EwArE0Ppfi6m0mYB7ccLc,859
165
+ evalscope/benchmarks/bfcl/v4/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
+ evalscope/benchmarks/bfcl/v4/bfcl_v4_adapter.py,sha256=hJWOT7WtgxuwKT_AmtAF3h25JnvYYXbDR1WWnyQOE9w,8974
167
+ evalscope/benchmarks/bfcl/v4/utils.py,sha256=bQInR19wJFPIOiRGjrJc3bGWWkJbL7zHwj3RdSavB5Q,15142
168
+ evalscope/benchmarks/biomix_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
169
+ evalscope/benchmarks/biomix_qa/biomix_qa_adapter.py,sha256=rHid3tPEfauB_Q5pF3mMoyuyV01SHyBJEXm-7A2HV24,1218
170
+ evalscope/benchmarks/blink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
171
+ evalscope/benchmarks/blink/blink_adapter.py,sha256=ocQKsDGwnUAg2si2p7tqIGeH3PKPqTSByjbt7ceraRo,2642
172
+ evalscope/benchmarks/ceval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
173
+ evalscope/benchmarks/ceval/ceval_adapter.py,sha256=4FLPgY-UtqINafnNxfOsE9AwS6GFXFCUGOBI-4EZUGk,8503
174
+ evalscope/benchmarks/chartqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
175
+ evalscope/benchmarks/chartqa/chartqa_adapter.py,sha256=DA1kthMUvn4_GUfdRfuR-au3RkhE3WKPnR_f8nlhd4c,2813
176
+ evalscope/benchmarks/chartqa/utils.py,sha256=Ta9ZUMpIqzrAszju7_WOMBAlilH1Tx6TCheVpjrZJJI,1672
177
+ evalscope/benchmarks/chinese_simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
178
+ evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=OWzRlSGswV24V-heLqqo7GQzpJp01TZ0DhFHq0iUP9A,8238
179
+ evalscope/benchmarks/cmmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
180
+ evalscope/benchmarks/cmmlu/cmmlu_adapter.py,sha256=P0VPAL5T2V_zj0q7im0FdDoq_W5rinorwN5FRYaFFUI,5377
181
+ evalscope/benchmarks/coin_flip/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
182
+ evalscope/benchmarks/coin_flip/coin_flip_adapter.py,sha256=qHg0kN4SX5cT_3cyFg0wfN69ldIEivyZTTX1A6j7LD8,4687
183
+ evalscope/benchmarks/commonsense_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
+ evalscope/benchmarks/commonsense_qa/commonsense_qa_adapter.py,sha256=Y1c13U5D4x9oNTQ5F3ve2_3Ia1fkQXiqcf3ESODT4HQ,1109
185
+ evalscope/benchmarks/competition_math/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
186
+ evalscope/benchmarks/competition_math/competition_math_adapter.py,sha256=0zOsMwl1mNGDzOEQqsISa6GcwliPtWz0EBEHm3TR-AI,2394
187
+ evalscope/benchmarks/data_collection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
188
+ evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=PbV5S0rUVF0jgrBKNjuZh2oE1FAsbYnPymg5u7NBjqo,8712
189
+ evalscope/benchmarks/docmath/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
190
+ evalscope/benchmarks/docmath/docmath_adapter.py,sha256=-mel6hA-x_e7fV0uOHdX5BpoQEVyQ5VqwIwEqSNDpnc,4623
191
+ evalscope/benchmarks/docmath/utils.py,sha256=d6Yjoa5q91kjr1SdVPVBndzDaUzMlO_GfEqMtUXXr0s,7707
192
+ evalscope/benchmarks/docvqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
193
+ evalscope/benchmarks/docvqa/docvqa_adapter.py,sha256=xGaayycILYoLd8r6wLLppDbU6Z1FdafbYFyjLHaftAA,2882
194
+ evalscope/benchmarks/drivelology/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
195
+ evalscope/benchmarks/drivelology/drivelology_binary_adapter.py,sha256=_wqS0h4qQBeGBx9W_KbtO_tfzpxXsM-jejI0TK_i0io,7005
196
+ evalscope/benchmarks/drivelology/drivelology_multilabel_adapter.py,sha256=fnPPFma-fTRe5B3n_1ObN5wS_jY1QvCA9mcovAMR4ss,11735
197
+ evalscope/benchmarks/drivelology/drivelology_selection_adapter.py,sha256=dUTs0Dqc-54haam478Y0UkiFoDH7YgKfQE-5vxk99NU,1655
198
+ evalscope/benchmarks/drivelology/drivelology_writing_adapter.py,sha256=6-DZxTlDGIJ8iM-egu29RlpGApNjUouv6jD11PVXU3U,8026
199
+ evalscope/benchmarks/drop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
200
+ evalscope/benchmarks/drop/drop_adapter.py,sha256=Vl6IgFtK3zIUujlrjcCI7oVLlvpaRanDcDDSTWNbDfU,8851
201
+ evalscope/benchmarks/drop/utils.py,sha256=zdT31cqVp6gzIcOxsxsqfTn97SZnTuM3vuvLls5VJWY,4878
202
+ evalscope/benchmarks/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
203
+ evalscope/benchmarks/frames/frames_adapter.py,sha256=w1kRya7w5omt95HHE6AzbzYVhyTT5r521676d_xJ6Vg,5514
204
+ evalscope/benchmarks/frames/utils.py,sha256=gULWM6Rwv5bTSSWcDYp-iSIoWj8r5VtbQakhRzHJq8A,1172
205
+ evalscope/benchmarks/general_arena/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
206
+ evalscope/benchmarks/general_arena/general_arena_adapter.py,sha256=0qydHMwVaD7bzyK2Mccv9m4JnOfSy2vE8g1YPOaWSg0,21663
207
+ evalscope/benchmarks/general_arena/utils.py,sha256=p6pZfvdNCMOU_vWHm_DYU57Sa2WTDdFOkVBubblCRN4,6912
208
+ evalscope/benchmarks/general_mcq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
209
+ evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=p7_C0CmKJthMY1Iri1SyNfssuYBws_dkhPMREu-uM94,2059
210
+ evalscope/benchmarks/general_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
211
+ evalscope/benchmarks/general_qa/general_qa_adapter.py,sha256=_7Jk_h-qcaxWHgrULojNqXwZ8XgicmXhYT8bOKwnyAU,3519
212
+ evalscope/benchmarks/gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
213
+ evalscope/benchmarks/gpqa/gpqa_adapter.py,sha256=zWK2hhyKw5n8K30YvMjSm6XMwyrireODGTE6wKmyuOo,3311
214
+ evalscope/benchmarks/gpqa/prompt.py,sha256=b1Gw2D5dEdhvLYymPfcvGKJdHrIzpiZkOwURKSxiQJg,5576
215
+ evalscope/benchmarks/gsm8k/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
216
+ evalscope/benchmarks/gsm8k/gsm8k_adapter.py,sha256=s0ytoqydH3FEG0KALrIlYXOLBKSrC7ikh0r8_v2dKGM,3579
217
+ evalscope/benchmarks/hallusion_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
218
+ evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py,sha256=LOnO1mvUJxU87-bZBC8qYtwlmFn3So2Yo9I3CkDjtIg,6544
219
+ evalscope/benchmarks/halu_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
220
+ evalscope/benchmarks/halu_eval/halu_eval_adapter.py,sha256=4bdzOEyOjxbTng9U94Yscknc7eeJSFyVf_ifZtTqYnM,5332
221
+ evalscope/benchmarks/halu_eval/halu_eval_instructions.py,sha256=z0_1rx3PqQHbheiUpUAdp4aUP6oBMMAEAIvDmWND07w,9770
222
+ evalscope/benchmarks/healthbench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
223
+ evalscope/benchmarks/healthbench/healthbench_adapter.py,sha256=gm5LOR5J1E3eXQ9aWF-rif2_l7Khx9UwS1Dfg-oEx8E,13242
224
+ evalscope/benchmarks/healthbench/utils.py,sha256=M8SnOEhlqXWm03CFE6CAtbMiu6MqdGgVczAv-LPjA7Y,3683
225
+ evalscope/benchmarks/hellaswag/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
226
+ evalscope/benchmarks/hellaswag/hellaswag_adapter.py,sha256=tAe63NfV5ljUm1f4RTSFxWOVKBUhk3Cc0EGzF5uYLK4,2041
227
+ evalscope/benchmarks/hle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
228
+ evalscope/benchmarks/hle/hle_adapter.py,sha256=kJP7bzIDbr82GKi0FTy2zf_j1UWNBfuXYzokYJ-S9WE,6410
229
+ evalscope/benchmarks/humaneval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
230
+ evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=KhJ91yqr964FccHIDE9A_nTldfhhLTzVB4Cpv3RDN5I,3933
231
+ evalscope/benchmarks/humaneval/utils.py,sha256=rPnc_JuSjNg9aV7UMUwsLrDlm-ufj64GNIBCWBeuRcM,6517
232
+ evalscope/benchmarks/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
233
+ evalscope/benchmarks/ifeval/ifeval_adapter.py,sha256=55FQwJ0_eDijppkVVlM5XCXzgRFmjH1SvGMItGsvn6o,2769
234
+ evalscope/benchmarks/ifeval/instructions.py,sha256=HXnn1JgU3dpYltqIovFAn02DxkYOGw337kLMlOfJxJE,56048
235
+ evalscope/benchmarks/ifeval/instructions_registry.py,sha256=3UXzVLgKwk_cf-2aG2tozjqYgvqm5Mj3ZRRb8rI-ucU,7262
236
+ evalscope/benchmarks/ifeval/instructions_util.py,sha256=Zl9Q6xwtZtIkXLoVwz7oifSEyvbDGETljKHgc4tk6TM,25730
237
+ evalscope/benchmarks/ifeval/utils.py,sha256=MQt-b4K6uqU9H5TAM6Gxyz46r6XRBOgDsgdnwB0veg0,4470
238
+ evalscope/benchmarks/image_edit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
239
+ evalscope/benchmarks/image_edit/gedit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
240
+ evalscope/benchmarks/image_edit/gedit/gedit_adapter.py,sha256=a6hhRbnGCvMEMsbnSbczjXd4vHfMVEnFfP459FCF_Mc,5250
241
+ evalscope/benchmarks/image_edit/gedit/utils.py,sha256=UN0z9Dafs8d8lEXqxin321d8smiS3H9p3gyLkZFPFNg,14735
242
+ evalscope/benchmarks/image_edit/gedit/vie_prompts.py,sha256=qVXWQyVUwZxEasDjVmYBk30_JI4gnvHacMOmMsA4wcI,22056
243
+ evalscope/benchmarks/infovqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
244
+ evalscope/benchmarks/infovqa/infovqa_adapter.py,sha256=3m_EvfRZ5ItHkz-3mVlsF_NnPS7NH1-EXwUW-s4VMxA,2617
245
+ evalscope/benchmarks/iquiz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
246
+ evalscope/benchmarks/iquiz/iquiz_adapter.py,sha256=mNHA_Fuj_gAdOEoR7oChnGmErf1czqwnk8Zk-jRhBys,1304
247
+ evalscope/benchmarks/live_code_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
248
+ evalscope/benchmarks/live_code_bench/evaluate_utils.py,sha256=maN8qHmDHJpexPeB0qwZoXJ5zrqPbJDYVRptqvXI9d4,6827
249
+ evalscope/benchmarks/live_code_bench/extract_utils.py,sha256=ZcQ8y741uawPo6I_1_XglR3eqJFDNrqc8fILKZupVRs,2375
250
+ evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py,sha256=iJzmge-txK86lPn9mQbVlA7CPWXR4MpVdEDCfckkNRU,6236
251
+ evalscope/benchmarks/live_code_bench/load_utils.py,sha256=fEzWz_fUGwi5Ncum5PNVF9jFcuDwGgs7Vt_10YKBE2Q,2087
252
+ evalscope/benchmarks/live_code_bench/pass_k_utils.py,sha256=Ktrp_lXdfFzoHtQNQNdGfIl26ySjaPCHm4Zv-dFvRqM,2024
253
+ evalscope/benchmarks/live_code_bench/prompts.py,sha256=P4KILIAIDT1MKDck0xHYV_6v9820wDZRhxVMazmlL-g,12600
254
+ evalscope/benchmarks/live_code_bench/sandbox_evaluate_utils.py,sha256=7DDx46EwtoR776vWjofJl1zaYCLdmeq8cF3fhDGdZgA,7424
255
+ evalscope/benchmarks/live_code_bench/testing_util.py,sha256=TuoOTciC-hz3FTeDzsQB_THH3Be9UOP2XMrax-4sXkM,17282
256
+ evalscope/benchmarks/logi_qa/__int__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
257
+ evalscope/benchmarks/logi_qa/logi_qa_adapter.py,sha256=LmmG1gI8AOCpV-35_WdiZ_9Ges-pTufWEzbgrOUW3Go,1271
258
+ evalscope/benchmarks/maritime_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
259
+ evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py,sha256=Rx7iZ5JaEo73YwIzhm78gMDQ6gqcErbnWWXHxXM6BcU,2379
260
+ evalscope/benchmarks/math_500/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
261
+ evalscope/benchmarks/math_500/math_500_adapter.py,sha256=vPWqytzbJayhuJjh9Wv9gq44tkwrDehMmtPFx7QXYvs,1970
262
+ evalscope/benchmarks/math_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
263
+ evalscope/benchmarks/math_qa/math_qa_adapter.py,sha256=R0u0tiknCtxzMPEksstCDPT_M71pL2SxoF_KAEMxf7A,1170
264
+ evalscope/benchmarks/math_verse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
265
+ evalscope/benchmarks/math_verse/math_verse_adapter.py,sha256=Z4b2e3vCnllbhb457xJ6HJ6urfqfW3mHD9ZSumoEjQA,4321
266
+ evalscope/benchmarks/math_vision/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
267
+ evalscope/benchmarks/math_vision/math_vision_adapter.py,sha256=LEM5Zs8cqQpqeNaRW0CzHRPPMGu4NHrkjl-EgP4RHaU,4643
268
+ evalscope/benchmarks/math_vista/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
269
+ evalscope/benchmarks/math_vista/math_vista_adapter.py,sha256=yHz8kVPp0fkfL6n3lcPkdhOFrXsR2mOEA1oUoTFiwJs,5096
270
+ evalscope/benchmarks/med_mcqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
271
+ evalscope/benchmarks/med_mcqa/med_mcqa_adapter.py,sha256=oR2463a1kLAfBZOPA5gLOp2C6qx1cu2vKCutXZsoQys,1090
272
+ evalscope/benchmarks/minerva_math/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
273
+ evalscope/benchmarks/minerva_math/minerva_math_adapter.py,sha256=YaMT--hbOmlNlK5Q7iQ7c5XWVhLf3isVipeexOvrUOE,1823
274
+ evalscope/benchmarks/mm_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
275
+ evalscope/benchmarks/mm_bench/mm_bench_adapter.py,sha256=ht2DVt_zEBJp4jvGy3myHHgdUUP9eff2O5BpIc9Fv74,4376
276
+ evalscope/benchmarks/mm_star/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
277
+ evalscope/benchmarks/mm_star/mm_star_adapter.py,sha256=oamLv6U2-JAK5mdVLkUgYxkOahxQkQYMRKAyu_xPAUE,2818
278
+ evalscope/benchmarks/mmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
279
+ evalscope/benchmarks/mmlu/mmlu_adapter.py,sha256=2NT3QbfPzajUTFZ0tBCl6PRrtFtAr5jPZNQRW2Idlno,5947
280
+ evalscope/benchmarks/mmlu_pro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
281
+ evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=LTNNWREOc4HQ-A1_x5lItdZbzEvUCy77zkp7ZAh0hlY,3890
282
+ evalscope/benchmarks/mmlu_redux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
283
+ evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py,sha256=m_37OIFrJB4ZIvtbDJ_m9P9mA2QtrNjGfbbVo15awJg,7402
284
+ evalscope/benchmarks/mmmu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
285
+ evalscope/benchmarks/mmmu/mmmu_adapter.py,sha256=WrykWq8n61CVrQ4XQhI3iEySgErHdZyng3udOL-Pddk,6054
286
+ evalscope/benchmarks/mmmu_pro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
287
+ evalscope/benchmarks/mmmu_pro/mmmu_pro_adapter.py,sha256=banPS1nDt9bQ95urKbSZnR-hBTw23eL9MSrHt_0ZLp0,4725
288
+ evalscope/benchmarks/mri_mcqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
289
+ evalscope/benchmarks/mri_mcqa/mri_mcqa_adapter.py,sha256=OJnJDo_yLZihYXeAIyPQo9fMAgcAfSqEPpfhHpxvtXY,1095
290
+ evalscope/benchmarks/multi_if/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
291
+ evalscope/benchmarks/multi_if/ifeval.py,sha256=7y2rnJ4q1_DVA7I9mUnF7TBpu7Kez0X_Xhl-AJInzWk,87949
292
+ evalscope/benchmarks/multi_if/metrics.py,sha256=LWnhQw25cRNMReJ_xJ7Fx7WYHcT9i2FG1FUjYOuQDrI,4291
293
+ evalscope/benchmarks/multi_if/multi_if_adapter.py,sha256=I3_YPPUuRbrs9Gt3Qjhx9RM5Vu2gDFnheDcGu-oe840,5924
294
+ evalscope/benchmarks/music_trivia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
295
+ evalscope/benchmarks/music_trivia/music_trivia_adapter.py,sha256=zxJuNfCEQ2yU6OivAzrdhVSGcwPuu9dygho4VzELyZg,1281
296
+ evalscope/benchmarks/musr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
297
+ evalscope/benchmarks/musr/musr_adapter.py,sha256=kx6bckj7Nijl4Wysuj-mKYdy0hIRDJho8yVTup403Hc,1473
298
+ evalscope/benchmarks/needle_haystack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
299
+ evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py,sha256=aQw8Sss1-ZgQPWqwMITOpAtwzMoYWDGjLhUpZtkcrvY,17030
300
+ evalscope/benchmarks/needle_haystack/utils.py,sha256=k8WDigqt5LgzHw6DtaYsLtb3BJL0FTZS9JOyJCpoPq8,2935
301
+ evalscope/benchmarks/ner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
302
+ evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py,sha256=zA4K2LuEtu5MV7yJ8ClJ8kttFqcyO82KBpt6SornYWs,2012
303
+ evalscope/benchmarks/ner/conll2003_adapter.py,sha256=r_6UmR68ohFsn021zArkGRq0tRZSaIy9RNNJncag0i8,1970
304
+ evalscope/benchmarks/ner/copious_adapter.py,sha256=ufxsmTvEEayLaDJcUW5--oo6vkDY69W2yQ1fpD0E5lQ,3751
305
+ evalscope/benchmarks/ner/cross_ner_adapter.py,sha256=8UHFvZxKEghk30JZgvWbYFXnRBna5PHwI8_WZXpmCfg,4916
306
+ evalscope/benchmarks/ner/genia_ner_adapter.py,sha256=WHTSRj8PFvm19F1iqnujZ4qySIQ0rV7tiI-3HxuV75s,2457
307
+ evalscope/benchmarks/ner/harvey_ner_adapter.py,sha256=UVqpPNbTeWo_UgJm4f8xxOq1umXyOWb4pCz7s1ZxJpg,2098
308
+ evalscope/benchmarks/ner/mit_movie_trivia_adapter.py,sha256=jOldqrPdrFDSvs8ajidwK-c4zktclKu7KgLfHXBH-nk,3017
309
+ evalscope/benchmarks/ner/mit_restaurant_adapter.py,sha256=wUJBLGH992pbeKhb-e8ywC0XVvTGMiAPKjUxrpg9Iqo,2528
310
+ evalscope/benchmarks/ner/ontonotes5_adapter.py,sha256=oulC4XkVF42yjXWPuKg_zptLQiRItCmlZBlHN0shr6A,3546
311
+ evalscope/benchmarks/ner/wnut2017_adapter.py,sha256=uGrfp-4wYIcpEL9PqQx82uzCeWz6vIPKb7JlStTSE9M,2379
312
+ evalscope/benchmarks/ner/cross_ner_entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
313
+ evalscope/benchmarks/ner/cross_ner_entities/ai.py,sha256=RcgzYCygBmyrSOLacxxUN4cUznBZ3NemwfSR4hYBVKs,2484
314
+ evalscope/benchmarks/ner/cross_ner_entities/literature.py,sha256=ETzhu4PmiKS88NRkKPh96J9KiXKFdeQk5s_LSNqbD-M,1874
315
+ evalscope/benchmarks/ner/cross_ner_entities/music.py,sha256=_aJyKo83pO-j_LtGwXgrg9p8H1sHqXGPNW-wv1EIfWc,1999
316
+ evalscope/benchmarks/ner/cross_ner_entities/politics.py,sha256=taAqCnGdxHZGHM7sV0KONim8GjqVBrpMME6CVHwfJMo,1635
317
+ evalscope/benchmarks/ner/cross_ner_entities/science.py,sha256=DVZrCuMQ6-sPvRNTfx8iF_x9LaEBZ4o_RIWZADYKYGE,2919
318
+ evalscope/benchmarks/ocr_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
319
+ evalscope/benchmarks/ocr_bench/ocr_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
320
+ evalscope/benchmarks/ocr_bench/ocr_bench/ocr_bench_adapter.py,sha256=gkQb7g0-Lf5Sjemqs5kqogCLGFJI6YQv8-vGI1EbyLE,4392
321
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/IoUscore_metric.py,sha256=cBpRDJvI9f6vKRD4wTPv-8ThGddR3EhVobgjQQUAYlE,2606
322
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/TEDS_metric.py,sha256=31bL0V32Fq7prF1WoVjXmrmMdhg0qNcoiOaKykKOrZM,36528
323
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
324
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/ocr_bench_v2_adapter.py,sha256=QGY4R75UxDafIwSaOEPPuCaX3Z8BGoZVvcc6OWbeO9w,7976
325
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/page_ocr_metric.py,sha256=d1nU7LNwubBd_1rIe7i67hOVcJx5IUXkqVeqt1CQzak,1624
326
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/parallel.py,sha256=Q54wFSSRBp-kG2MhW4eOoXE1W9g-SDVhN8JuphDERsE,2029
327
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_metric.py,sha256=nftLaTOKEmqvSWr-c20f9hyyvNnd-Hg3E46KwqmkjLc,6149
328
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/utils.py,sha256=hhF2MuPo5n6uM0OCgTHCNIgscNVhXRb3koqU73AErwY,15924
329
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/vqa_metric.py,sha256=XkAiXk1uE7lsWQQXvjnHXZMsga8B9FVyq5qG8ghePK4,8980
330
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
331
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/readme.txt,sha256=QO0K9z1ethy_lgs9vaxGN1u5DnPFsssp8z62Cni24iw,1424
332
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py,sha256=qCuqDtsCfxAiQHYLNdHU7BQ9kLIZ9iyfmRxtIrGOBck,20349
333
+ evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/script.py,sha256=7HzM1PEw8wNOhmQOsZe582Y2rr4u66Q3JKVvvMasntE,19565
334
+ evalscope/benchmarks/olympiad_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
335
+ evalscope/benchmarks/olympiad_bench/olympiad_bench_adapter.py,sha256=zePVmGjmyuwCWVb4h1PIQKAIFqBehwRwO2WOD0KX_ik,6565
336
+ evalscope/benchmarks/olympiad_bench/utils.py,sha256=w7vEZcT3vCVq8_DSMgAjZPpVFVHStJPJYsPkrs-yOFM,21412
337
+ evalscope/benchmarks/omni_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
338
+ evalscope/benchmarks/omni_bench/omni_bench_adapter.py,sha256=IJkRSokQC6MF_pN46Yofr_NaZaNt1XZFX1PUBmX4-qA,3651
339
+ evalscope/benchmarks/omnidoc_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
340
+ evalscope/benchmarks/omnidoc_bench/end2end_eval.py,sha256=71IEdeDsldtoFmMb1c_Pyugv-Wx-WOVIvccRkPvsJdU,15916
341
+ evalscope/benchmarks/omnidoc_bench/metrics.py,sha256=DZfaL5BlDjnW60kRnnfmsMgldPOKX0MJ2tAdsBf4dI0,20620
342
+ evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py,sha256=YpXl-HUiD-VjtwtWHG4KSUw6GAYIeKnpgqEXsweWnKY,6164
343
+ evalscope/benchmarks/omnidoc_bench/utils.py,sha256=Db6QeIq_bc6Dl5xdYel5G7tnWib9_vn_KFiKeFN37IA,74435
344
+ evalscope/benchmarks/piqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
345
+ evalscope/benchmarks/piqa/piqa_adapter.py,sha256=V3-8a7Ah04UgEWzYrQfGKiPk4xvpLS74G4mJWM1MqPI,1075
346
+ evalscope/benchmarks/poly_math/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
347
+ evalscope/benchmarks/poly_math/poly_math_adapter.py,sha256=yO8CSFXIoe8AzKIXq7IffefTiWPQrdh-4igt09KPb8o,5545
348
+ evalscope/benchmarks/poly_math/utils/instruction.py,sha256=v3E8TnoWlooL_Ms5CQySzMmdyPKHAO005tGtTWMviPo,6901
349
+ evalscope/benchmarks/pope/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
350
+ evalscope/benchmarks/pope/pope_adapter.py,sha256=iQFcAjh48Su76e8CV-Tj2oVzzls0mUIYRUUyknj0dqs,5035
351
+ evalscope/benchmarks/process_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
352
+ evalscope/benchmarks/process_bench/process_bench_adapter.py,sha256=7Bri8ZALJAMKKf1_rtQw1bH9-IuvutwZ9gMNXBgQpmY,6200
353
+ evalscope/benchmarks/pumed_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
354
+ evalscope/benchmarks/pumed_qa/pubmed_qa_adapter.py,sha256=R1AjPTqqV8N7IvBNx3Qydd39EP5QcA4BffG3a1WEDP4,6778
355
+ evalscope/benchmarks/qasc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
356
+ evalscope/benchmarks/qasc/qasc_adapter.py,sha256=rhYedQBhrCkv12b4-LWRSfY6LGNgV0I5n2Lm2BEFlak,1128
357
+ evalscope/benchmarks/race/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
358
+ evalscope/benchmarks/race/race_adapter.py,sha256=KibT9gHpIOZhTcWihG0dUDAX4gAHa2g1WdGPOcEP9OY,1705
359
+ evalscope/benchmarks/real_world_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
360
+ evalscope/benchmarks/real_world_qa/real_world_qa_adapter.py,sha256=J2u0J9d31uvkoz9nBI9tCMqG27hmYwdLQPPef9jx_pg,2788
361
+ evalscope/benchmarks/sciq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
362
+ evalscope/benchmarks/sciq/sciq_adapter.py,sha256=m0TMtTVR-cRQ0oMncgbN7w-v5d_m71hiGeIE5WRa2mA,1249
363
+ evalscope/benchmarks/seed_bench_2_plus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
364
+ evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py,sha256=ngUOFhP8YFOE8ximkMg5U6TGLZMIXPHJsVJUurvbzM8,3064
365
+ evalscope/benchmarks/simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
366
+ evalscope/benchmarks/simple_qa/simple_qa_adapter.py,sha256=OIVGeFVLEpZp7z2a6JLf_qdRjNhu1-GJgTVL7ocZFiU,9013
367
+ evalscope/benchmarks/simple_vqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
368
+ evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py,sha256=3ioSompYERllFE6yc3yZLl0NKWypRjg5d0uVf3b-4d0,9530
369
+ evalscope/benchmarks/siqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
370
+ evalscope/benchmarks/siqa/siqa_adapter.py,sha256=qpPbEaGrVMc5U0x8hwWRz8gR-1HL0Uvaa5QFInAsLm8,1342
371
+ evalscope/benchmarks/super_gpqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
372
+ evalscope/benchmarks/super_gpqa/prompt.py,sha256=wQ8Y4NAvQJRhPS7gsrUBBzeM_UCHsHOloB_t5WfnIO8,4707
373
+ evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py,sha256=SPqpBebiHj_oyEqU94p9NSqhVkO0KeXQYcBmpfH81nM,6888
374
+ evalscope/benchmarks/super_gpqa/utils.py,sha256=OK_oT-DnWNssITEwu_Zc3Ty5v21n0IaJQYftK2cpwmQ,3401
375
+ evalscope/benchmarks/tau_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
376
+ evalscope/benchmarks/tau_bench/tau2_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
377
+ evalscope/benchmarks/tau_bench/tau2_bench/generation.py,sha256=aMa_I12HmRUj33ELcKgvYCPE-sCimlpdGyean5QMSaE,5387
378
+ evalscope/benchmarks/tau_bench/tau2_bench/tau2_bench_adapter.py,sha256=49M4ABPjSbR5Pkus4RFcnnprN9b3UbSjwXqlJ7PbwHI,5748
379
+ evalscope/benchmarks/tau_bench/tau_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
380
+ evalscope/benchmarks/tau_bench/tau_bench/generation.py,sha256=d7J5xrxEI-0BYxdSuxdDavcR7f1ipBdpQsKZzwyzGds,5190
381
+ evalscope/benchmarks/tau_bench/tau_bench/tau_bench_adapter.py,sha256=vQJdHvvtWI7Eh5zp3M9vFSYmJ-GM4386PVLdf4IONYI,6443
382
+ evalscope/benchmarks/text2image/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
383
+ evalscope/benchmarks/text2image/evalmuse_adapter.py,sha256=g-Wc1qTg-xWLTjiZPo8zmQud75ac-8mBpYRxOHfiO0g,3024
384
+ evalscope/benchmarks/text2image/genai_bench_adapter.py,sha256=1GDB3gS9zwrfb9C83LQdQyN7bvvqeYuu5ulJ9Igmi2k,1876
385
+ evalscope/benchmarks/text2image/general_t2i_adapter.py,sha256=CHy9ufvrVHc_5WkGVR_F-5wfLQVFtxwubZOfdpx9rd8,1354
386
+ evalscope/benchmarks/text2image/hpdv2_adapter.py,sha256=8-vWCV21eo_e9EbxDB5mGw2cFzD4OUQPLB66FvlO9W4,1781
387
+ evalscope/benchmarks/text2image/tifa_adapter.py,sha256=4CcprucAe25UpTZRV3Qgb-8jbeNHtXNRWHw8RiYvfJA,784
388
+ evalscope/benchmarks/tool_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
389
+ evalscope/benchmarks/tool_bench/tool_bench_adapter.py,sha256=Lt1iSe9C0PgayqfgmjF0kGllFqizROqp4efjSl9SUUY,3790
390
+ evalscope/benchmarks/tool_bench/utils.py,sha256=led0d-Pa3rvmWkSWhEnZWP00fceudgESq5HXAQzJGls,7042
391
+ evalscope/benchmarks/trivia_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
392
+ evalscope/benchmarks/trivia_qa/samples.jsonl,sha256=1isBD62PGhCiNbzQa-GFrHHL4XLHIkojWfgSvn7ktf8,3445
393
+ evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py,sha256=oZAiCmBpZbBAgzAKPfddaJWMckIyaoRM7fB2XJ5EoQU,2614
394
+ evalscope/benchmarks/truthful_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
395
+ evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py,sha256=W7ESUAcLsHwbssiiSCQNUeQcqx6JEeW7FSQiBFycS24,3512
396
+ evalscope/benchmarks/visu_logic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
397
+ evalscope/benchmarks/visu_logic/visu_logic_adapter.py,sha256=8dK8_HFxDhWTvCC8WTZjadChP6lNzgsFp_5qFSRGFoM,3277
398
+ evalscope/benchmarks/winogrande/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
399
+ evalscope/benchmarks/winogrande/winogrande_adapter.py,sha256=LWm6qZd3pJbtpcERq7WPK3adwY3uVm4wiUgfyEI_uHE,1310
400
+ evalscope/benchmarks/wmt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
401
+ evalscope/benchmarks/wmt/wmt24_adapter.py,sha256=58BhTjdfJRQS2WtGxwdmgFC5VTx2XjKU0pi7KNh0iO8,8759
402
+ evalscope/benchmarks/zerobench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
403
+ evalscope/benchmarks/zerobench/zerobench_adapter.py,sha256=pqnJEx4uOi3bxwYKqLxrxU5DX9p3F01N2itzbG_-VaU,2739
404
+ evalscope/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
405
+ evalscope/cli/base.py,sha256=m1DFlF16L0Lyrn0YNuFj8ByGjVJIoI0jKzAoodIXjRk,404
406
+ evalscope/cli/cli.py,sha256=qXQ6k9GBkRy2dmBxM24tbVP42bQDyM6G7kkc32LdpCA,860
407
+ evalscope/cli/start_app.py,sha256=LqJ3cSBY8FsM_JjInw4jlpitjaVoIZscUShMpDRPbro,1030
408
+ evalscope/cli/start_eval.py,sha256=MXhVDeaMFd6ny88-gnVtQflH660UaDj240YGYnHccx8,775
409
+ evalscope/cli/start_perf.py,sha256=V8DwVPXTGmyDPma7Yk_pJbLb4iVkDj6Y3qPGHV03sE0,1082
410
+ evalscope/cli/start_server.py,sha256=01iDaEwLx59xRUrrZ_nhQE-QjUE1Rk5d43uMQ_4owbI,3677
411
+ evalscope/collections/__init__.py,sha256=x05hFLrjGsdtuHtc6PyQXHNuucVdYaBN9ZrM8gBiJWg,720
412
+ evalscope/collections/sampler.py,sha256=086pzXQO4CO_QYCd10z149Sjh6sBpRBeIHf5OTLOVu8,4896
413
+ evalscope/collections/schema.py,sha256=yzAlnH0O7iiWB4UnkFXI_Dvxcsq9hDgl0aGK2OpyBY8,4158
414
+ evalscope/evaluator/__init__.py,sha256=KzYmVTfU-1pdX7va7l3B1-5QKWG07hj1B7rYkMmxitY,91
415
+ evalscope/evaluator/evaluator.py,sha256=B4E6vTnG2v7efIsTwBHSyONT8GOwPwmyC6m3siubK08,15964
416
+ evalscope/filters/__init__.py,sha256=AsXwKYDjGhFsJvtj036PRjMOPsHGt-CRicnHTtM_qA4,51
417
+ evalscope/filters/extraction.py,sha256=KLFr_3XYsrv0PTvmXy0ugj2sqv2ZOWJFV7G_MmGjTHk,4146
418
+ evalscope/filters/selection.py,sha256=yiJu2JjXDH_lgfEtB9umkGcA3zpo3zvnyoq2mKrXbnw,1609
419
+ evalscope/metrics/__init__.py,sha256=1giVHESSjn98uBiAvYm5uLsmRQwmf9NHPSt7OT_QJss,1615
420
+ evalscope/metrics/llm_judge.py,sha256=XukhH9PQtIZAcbjJlOmOD9ye3ngRv_IGKKJE9jhheOE,8653
421
+ evalscope/metrics/math_parser.py,sha256=gJ1NR2Mcyzt9qMdR8I0-6U31Jzoe8a6yUuwvayYPi4c,17979
422
+ evalscope/metrics/metric.py,sha256=0NKTUgNdvL1T4171Y6-ImsopsnUqx2AioSZTBKB4SmE,21975
423
+ evalscope/metrics/metrics.py,sha256=g4EPKTLe_qwofg1UAD7vZhpqVktsSjxV-y1BoaD4WiM,15324
424
+ evalscope/metrics/rouge_metric.py,sha256=bqvSotuDdC0MEKmt8v6y6tBTBx0S3Ma-tfF-cMCckA4,4645
425
+ evalscope/metrics/bert_score/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
426
+ evalscope/metrics/bert_score/scorer.py,sha256=VobwRtTYS9ENHn5284sWkUlw5LBBNXvViZh185PkiPI,11969
427
+ evalscope/metrics/bert_score/utils.py,sha256=k7ekv1PBNkGMIj3W-KfkIV-i_ryMErcqBtAWjXv9gos,29659
428
+ evalscope/metrics/bundled_rouge_score/__init__.py,sha256=PwbTdk8168FwDJe_l8XIqDuBgZQooDsP31vj7di05Fs,650
429
+ evalscope/metrics/bundled_rouge_score/rouge_scorer.py,sha256=T91PgJfi1As7BR7I-Hq6rLlvHAtMB9JpBw9gMTH8VlE,12114
430
+ evalscope/metrics/t2v_metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
431
+ evalscope/metrics/t2v_metrics/clipscore.py,sha256=IsrYKIlFb04-FfBq4MbSv4diS6706J15Y3G4qEFIwfU,455
432
+ evalscope/metrics/t2v_metrics/constants.py,sha256=oY5l5fOFl8qylah9eeebZm0pgY1PYmHDa7JlUC8Qls0,451
433
+ evalscope/metrics/t2v_metrics/itmscore.py,sha256=cIaz_urio_Of1FiA2DZW7pWRIvo487zr33-x8C3Wx0o,443
434
+ evalscope/metrics/t2v_metrics/score.py,sha256=6tIKZoQprlQOBoV-2E-3InIi2Jl29a9W2BFPjKnV1nw,3044
435
+ evalscope/metrics/t2v_metrics/vqascore.py,sha256=UmcSSdQN8mzs3b11sD5Z31WIyQVQUpgXKWQ1XYoX1c8,469
436
+ evalscope/metrics/t2v_metrics/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
437
+ evalscope/metrics/t2v_metrics/models/model.py,sha256=zL2LMvJqXyyZo3KEBl4o_0cGqkTeVTOfs8xJihOKWpk,1295
438
+ evalscope/metrics/t2v_metrics/models/utils.py,sha256=c9A8YGepQ0wier9rMTWkdiyQRfQEaRyEQKDtt_iVkS4,888
439
+ evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py,sha256=_Mwyud2HZVZAhkSmDXlHOkKkT5CwXQUChmQr1xRGtm4,1076
440
+ evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py,sha256=oEILZrtRTpJj2FHH0DbK88IoeKdoUg_AsBDOMjTQ-yU,8108
441
+ evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py,sha256=79zgnp5hemgeyGgaWC-HVYJGX8PZ-cwOW6xaZwfm_qs,3357
442
+ evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py,sha256=EWEFLL98xG2s_a7ZvDlvGFzJvfSgCAzxVvdd-LvKuNE,3815
443
+ evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py,sha256=LI6233xfRgSTwyvR3iXgtMAPrBcdUph0HOuBjP-k2W8,2412
444
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
445
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py,sha256=Nxo0b7Xj0qTMlVg4O3vbj05X1eNTdVXrFTsVEq8j75g,79
446
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py,sha256=ahGbFR2PyigN5iDjRpmUDLNzLRll57W3145Paf8AVlE,5065
447
+ evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py,sha256=PKY6WMtGMt3wJFg4VbvV45oBQq3_r8FgxvPdLYqZ3c4,7839
448
+ evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py,sha256=iPug2fxMo_VXn_77yTLLyjUqyAvh8qOqYF2saHiuPQA,982
449
+ evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py,sha256=eZEQbey0IWWxxhjAJZusbksH2iA1xR9nGpQekM5_oCk,3456
450
+ evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py,sha256=AEMb5qkUuFWQyFWojePpZ3un4odo0BHTKouhbUqF6rk,3692
451
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py,sha256=t8LgtokWZZZfuK2Guxddp4HA4F5mEs2xv3o7RpljIcM,3212
452
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py,sha256=rJmt-XwWodtvR9x4XIz2GCBgHqus6GAwFw9fQXUil-M,6078
453
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
454
+ evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py,sha256=PFTbLLUQ2kSJ9YohuZpuUHIcnndFc9TXsdRjBYZfDgc,2718
455
+ evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py,sha256=sMET64JKY_rqVu8f24UcGfUVb9O5hzTKA6PlMEDe8DE,727
456
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py,sha256=8lLGnK6Xnws-3XXUtmTzdXt0HKLh7fyeiVJwnHOqLpY,9924
457
+ evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py,sha256=jaXN7bpbApfvbm9uZlKAS8D4zetqIP_D17nyZTxHog0,5894
458
+ evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py,sha256=TlvYuUBvaNFQGtZN7UklCq1N9yI_oKGtgB5r6qZ6hi0,4662
459
+ evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py,sha256=KaiCeLRq5NGRPsocQLKLon9qzaEFuqcYlTJInR9x0fA,585
460
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
461
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py,sha256=yDqpm4jIeJbq-Ej28OJwWbF2eWoxVv8CXxl_OelJ1lA,97
462
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py,sha256=XDdIsVWYkRwWFOWeGQGBpbXArrkAp-eeRBWoTWFT358,14022
463
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py,sha256=aXBQpNrmk9dbUDK-gNGne0hfgti2cYiYTq8fRMNfNx4,525
464
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py,sha256=uK4ewgxU3Am1VloBeVWrGTwMam47pjvZxwUXpPp1WZg,2837
465
+ evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py,sha256=jq0zLZypPsoieM8JR33k3fb3Tzal-Zb1ZT5i6Rl2g_U,1394
466
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py,sha256=RtW7q0OrIyJa6Lcjr2AGmRwfePuIRVHQw2sso1IUV8A,848
467
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py,sha256=eAB0TlSTyC8oljLMgTw2Y56PbFIwOdmwbrCo2W0WMkU,14995
468
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py,sha256=U0xsstadVQrKS7ggO-Mh4lGt9VKwHJCv-V_RiTeqzHk,3956
469
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py,sha256=Z7yq6RpGd97NMzIp1WgCdHF05LJ6VqA2DA0ZksPrqRc,817
470
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py,sha256=rs1dGXB6rINW2eFmcPPfTa7Su1tk0Mf0elXWrPJfZvo,5908
471
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py,sha256=OcHWUa4MBC5BCfeROnjzX2a8Swf1u-KGhQJbwo3JsFI,3208
472
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py,sha256=NLW0uR2wGby9FdrotM-Trcl6mrNUbqu2sst9riOTUEA,9666
473
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py,sha256=TgWnH1IblIrcTTEe3AXG4E66pX6R1314ZZ4Cx6HdYq4,13678
474
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py,sha256=ABgzv5fGmXjYuQnV77280hzJWOwLt5YjuaBfdWjXcu8,246
475
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py,sha256=5xpOaMnci_eH0iIJwPGCBREQ7irqg-zTnfuFXxIVB_4,8327
476
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py,sha256=fKIO8PIt1kkirfF7UMgQE0b4Jc4-NtftHKTwg6S3_oI,10920
477
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml,sha256=tGWYH9wsUFC2BqlJ-Uv_v9IbAvvaY89PFqkSnx0v7T8,360
478
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json,sha256=ZcTVdwa_pISMxp8J3F0Uaee3yyrQIn65lqT3_y4KncI,490
479
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json,sha256=4Yuqi1OutvXMdCfAVIe14uEIZIhApndd6uqc1vpGwL4,511
480
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json,sha256=zVHOJiAdTS92rHzg62Q0oTZZsZalondGMqDJJfbolAU,491
481
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml,sha256=Ls2ZfsKV3gDzg6F2zBHPhFbK-3na7ozNGWFmMq_8hTg,1074
482
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml,sha256=yXP6HQVyEYc1X2C_SawNIye4eoaQPxl8JazV7CXUPDc,1073
483
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml,sha256=OpdOvll7UX6nURi84rDvWiFZrLsNNHtoFRWdugVPvdA,1073
484
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml,sha256=hPHF831LSjBGbo1fg9fqhbeSAGOVW-iiZbWHVQVs8wU,957
485
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml,sha256=xMLIGVhkgwBsP9IbKFoZNW_lbVwwjz44ArlSRPS1Q98,980
486
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml,sha256=kyfbbtLOG8cY9CUFm-_g56djMMwfZhOgXSFV2gRMomQ,983
487
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml,sha256=_e45RN28lhhyFRg7JsDczNMU_nJbwirn3eVB3mgkmrI,1022
488
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml,sha256=6ycWPKz5alaQCxpuPuqX1e_whroRULgb8gICOWLDBO4,1019
489
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml,sha256=svzg_Ao0g0-tAnBKT0Jj4PDRvv1ikSxS1Dq5YkzrUTU,860
490
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml,sha256=ZZE6AWe8iiLTXYiJk60P0J4cRLwehLYzRn1ohZxgstI,955
491
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml,sha256=T7L97c2yFLZ5N3_4NFqvRxShvr7relE2GNREuukufCU,955
492
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml,sha256=U0s_BMVb21E2aGnLGBstzdR3WSTP_gk4Hubnnt50lcg,952
493
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml,sha256=AIgEAeTUqFiNVq-uIjbE_zh7jDPLFwchZsw0fCvWqU0,982
494
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml,sha256=CQZQICT2ogqwKklzWVUnfWidOY-Deflh_WD-vq08sys,958
495
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml,sha256=UGuL20xRPZPy3sZeqMgIzovdd1BOTESwTS2gfwsdGFk,955
496
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml,sha256=eKuyCU7yFIU1VSHNRzEu4Bm7NY6NPppIHcTd0RKXUrI,955
497
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml,sha256=7AWFlM92SDySB4-InH9aw83yBhQ3HSKqvGofm-xiDM4,887
498
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml,sha256=xKS5v94CTLIIgQ4NAEuBpVjToRQ7yLme276gN5O_J0w,974
499
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml,sha256=8keYB132xFDzBsMF5nk0lOqfEIT9qupBtDiQRC3nH9o,1004
500
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py,sha256=nCdAW3SvWMTgkQqEXNFoOrb_Tb5FIOewNqy1A5_e2I4,6431
501
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py,sha256=OOr1JD9kTlUGXZNG5b3kvkUaNz7QTmhaGoHhIKL69qo,7613
502
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py,sha256=Ns7oM4KpKxWZTo8Lefe4EDFw-jzp5633zAArcWjoVZA,9772
503
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py,sha256=KIF5tsiE7a5dbDfa-IKwzuzMUpuEAQPrm1nWFFtAeoI,20032
504
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=85ZvV2gKSnsbP5941PeJ-JJ4t8_lOYQe1EOxrHlIbNI,52728
505
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py,sha256=o5ykt3Q_WQlNmyxjQaS2-KPLGq1xqLZixNYam_Bs6NA,18701
506
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=NPDpIRxjiroafZk5Z2uA9bC8Bi-yXY7um5HXxThF7N0,46857
507
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
508
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py,sha256=s7EkhtrIJ0LPUuLBArws8N23R1MoIoNaYUjwsbUqRkY,7994
509
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py,sha256=FnUyxxazEVaP69pAq9cig3j-mcX37BX-unPj0SVKUJI,3805
510
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py,sha256=oS0lFHje_0ncOy8fg-mg5u2whowTz8ghSrGk3FlNNIQ,18896
511
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py,sha256=vTJhL5pkUxNbCi24AcZFWBbqqKw-gCgh937woIKbSjE,13694
512
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py,sha256=GqHUSTk0N9PFT553h5Om4XuuFe0LtG5_yWDM87MbUJs,30387
513
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py,sha256=YXZqOjCnkP6sPW31FU-qNewtRekMANK0Uvlp3EIu2aQ,11334
514
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py,sha256=AZlUbo_rcsp_VdSP5JA8BfcIBtlqNRqgloZ9c3gcnp8,39422
515
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py,sha256=ndKj5JF6ch3IDDGwD-T3fipZEgJqOHKjt03s78rzMgY,83664
516
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py,sha256=ZxBm6k7D6harpWLesr-6kQZeGXKyo-9dE1QZYso1Tp0,3867
517
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py,sha256=Ng0QP_r7qn2DFmBGhSoZxqSItAkgVtyq_sZU_Vf25Hw,2190
518
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py,sha256=LFvs5dx8bnhLOZc4jM5uxXhyhOHoTprKn9B7gCmIOKU,8600
519
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py,sha256=hTIleqj6--CMndUNCT-HFPxGer8c_l2KbkUvi3U24oM,5502
520
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py,sha256=Au8iMYscDk7va-EKpwLuFJpNjfV1aChNRStkA0dzlWQ,7679
521
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py,sha256=wYVz388WuJ5e6rip4FyyN82JNbGXmGdkds50eTYQEJw,7130
522
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py,sha256=qYGsWcZozURx6U07cW5LrLL4TW39-1jaa4R3ROfkl_E,6890
523
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py,sha256=TOAI-KaUrtKjR1GNU_WwNXNpb9gGT-KX2FYe3muv_e0,4275
524
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py,sha256=-DprR09KYuwNEzEbhPvFRI3MR4_VdPMUGLPN6sL9Ym8,14625
525
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py,sha256=S68U0DxWYGDmreRbH5yLDHBNN9PsczY9H0Uik0hO-ds,13872
526
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=zv_WyHi67hvgHQ4DkZ8a4UoPcgrADKayqVtiIq-p3V4,36695
527
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py,sha256=p67DDiFS-676z0z8jPj6NwXwNjEsqTXaXCh3g2UiDno,840
528
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py,sha256=LqMHlUTy2LEzoVwjALtrAw0UYmzIuHnFjQiVmn5nv-I,605
529
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py,sha256=d4HInkL_Phk0Bgg2cWaOvhsPa6lkqDeovFW86PL0I18,6371
530
+ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py,sha256=Epk72q5iTdzRbuzOR669QqAUMgrFFngAU8Z3Qy9KLbM,11209
531
+ evalscope/models/__init__.py,sha256=RmW2S31BHBhMN49_VVF_5PJAk-TsuZQkuF2ALShbhAw,556
532
+ evalscope/models/image_edit_model.py,sha256=oVjGgebnFu3ZXBJLNn62rJ65fcJR7DlG4qEVxisPJ2Y,4104
533
+ evalscope/models/mockllm.py,sha256=t1fFAHkEb1n_atOCfnGteCX3DWp774lnWcHzi5lBjwM,2511
534
+ evalscope/models/model_apis.py,sha256=ZkZ_nfbeAFJnCndRvRIRLcbmJFTMhGRBi-WfMu0uZKE,1922
535
+ evalscope/models/modelscope.py,sha256=jSFkho_Ir2py54y_Bwj9jpCoY2mMKkZ8ORzne-ldAIE,15806
536
+ evalscope/models/openai_compatible.py,sha256=7UxS4TZBYw7jGzuu-dUs6-5g1_nydtW-0mHSxkl74w0,5438
537
+ evalscope/models/text2image_model.py,sha256=Sdiyw6vewjVTiXK8RFEh1pohOhDge80EoIWYpnLjr5Y,3929
538
+ evalscope/models/utils/openai.py,sha256=A93Wd7egqmu6DNLeibnRAPLQJXTB5ucya6aBQSHkHGk,28475
539
+ evalscope/perf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
540
+ evalscope/perf/arguments.py,sha256=JHB-JIEHq5p3zoHeKn6dkelGq0JrMVMRne-wbXK2yhg,12892
541
+ evalscope/perf/benchmark.py,sha256=Uc6BJJGYTsAnfFljPy0WJIXcapHOIwvym3o0yPRTVqU,6964
542
+ evalscope/perf/http_client.py,sha256=8xJFYja8FoQA0MDTj2NcxPkAmji4n81fsaw1gRuL1sA,5152
543
+ evalscope/perf/main.py,sha256=eEL0qUdNPMyHr3ZTixTfZxKN4IIw3gz3sw8sq3S_vs4,4015
544
+ evalscope/perf/plugin/__init__.py,sha256=Ztj4h1_JYJqbbWkeuDTj5aTRyGQf5Woc4xEIyjcokVU,94
545
+ evalscope/perf/plugin/registry.py,sha256=GhLe-h1rGzya2bgIUaV5VymQIaHqI7h5SG_i4PoGAm8,1967
546
+ evalscope/perf/plugin/api/__init__.py,sha256=7RsGdYTSfnW6iVpveEzNu8v4x8Yc8H-Kk39DqOHMrd4,152
547
+ evalscope/perf/plugin/api/base.py,sha256=LLBDKOWUXYbLLLTtO86X1Y4Erbp5egs2WCXGj4my754,2822
548
+ evalscope/perf/plugin/api/custom_api.py,sha256=HHvhNlqNQr43GhIC61yoa54QCEAy4MRMmJ0kBy-rnsQ,8305
549
+ evalscope/perf/plugin/api/dashscope_api.py,sha256=Miv2pzMa6sxZyYYJhCzcbOI_QHuZx7tazKpb6Not7ck,3627
550
+ evalscope/perf/plugin/api/default_api.py,sha256=A3_dUduoBo9-xUdYsKMI1X0WUw_wHsJFNe5hTr9LUEo,9418
551
+ evalscope/perf/plugin/api/openai_api.py,sha256=UVo9tAnqZbVNEQwAT0wOZb1Abbf-yQmr3iDKHwXDoI8,10628
552
+ evalscope/perf/plugin/datasets/__init__.py,sha256=qzeQ9BrJhiJJm1wHaFeOQkvXXdSd15Ucspbn5zjs-6Q,495
553
+ evalscope/perf/plugin/datasets/base.py,sha256=PFBMdo3H_Hx2jOXNrMb97DvJ5gJg6QajSYymCgTXKmo,3629
554
+ evalscope/perf/plugin/datasets/custom.py,sha256=kCofjHfcihPcsc1XwyLxn9QG9E88eZ5qAQW7nW6ID0c,1311
555
+ evalscope/perf/plugin/datasets/flickr8k.py,sha256=nhHiGNhXX-2c17NQ5q5Q7FgV2hB8XVeeAP8dKkboyHE,1033
556
+ evalscope/perf/plugin/datasets/kontext_bench.py,sha256=cN70hiBX1940IWvNWZG9YGE4vO1yj41Bo7bqmOWusoQ,1081
557
+ evalscope/perf/plugin/datasets/line_by_line.py,sha256=L3lj9evcr3q-Mcemyuy2WauBB5c6O-ttnIVw1t4UJUE,922
558
+ evalscope/perf/plugin/datasets/longalpaca.py,sha256=abFLvrRZFsno9IUr_bpvhMWHL9X2sahlIpGLUb-5BxA,1262
559
+ evalscope/perf/plugin/datasets/openqa.py,sha256=UlbHhzGoQTBXa4foEFhRTZX6v7So6pR-ExFhU2ws8YM,1427
560
+ evalscope/perf/plugin/datasets/random_dataset.py,sha256=GPuC5ovi3BW84RCiGSDd2cBZ3jRmFrtMRsxEocc1ud8,3347
561
+ evalscope/perf/plugin/datasets/random_vl_dataset.py,sha256=e6exWQnupWkTDNwt2MmEK-hccuxEDmWLJRMM70onKi0,3230
562
+ evalscope/perf/plugin/datasets/speed_benchmark.py,sha256=J6q7AF_Re5eHLVejXEw9c1jlk1T1PPmist0yO9UFTPE,2432
563
+ evalscope/perf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
564
+ evalscope/perf/utils/analysis_result.py,sha256=aoT7JD2zAzBeuZUfncKhJ2odX_7KnymwOmNB1Upam2c,935
565
+ evalscope/perf/utils/benchmark_util.py,sha256=Uf4vUAsfgAZs2qsyv9cRY_i87QNEHl17XMhGgXq7wFw,8048
566
+ evalscope/perf/utils/db_util.py,sha256=lr26ah_KRznBBu_ssxXki_PgtELk5bUJV2JaM4LaeNI,11534
567
+ evalscope/perf/utils/handler.py,sha256=HyKIxbzC0XCyQanlbb7UEY7yaeqjJTePNea8kMV3Sdc,1192
568
+ evalscope/perf/utils/local_server.py,sha256=dMoX8p6aCQq1JnoXxcyWknadLdBwpfQhvKwk5fn6G4Q,3727
569
+ evalscope/perf/utils/log_utils.py,sha256=YY8mnpJoHMlP6jtmEq7QujyuxhSUF1vqLk8TpBAkbY0,2162
570
+ evalscope/perf/utils/rich_display.py,sha256=AQmXv1EuA1-IGgco-Jy1NLOmTKv4eBFH2K4QS8OoGVo,8206
571
+ evalscope/report/__init__.py,sha256=rjjg_4PHuUA_15XXjOgPjO6cOm08LOi5yUGYzOK6KX8,1216
572
+ evalscope/report/combinator.py,sha256=rpZJsMiVF9Uf4niP5WmZVaLcITPEXdER9Etgqn-BsU8,6740
573
+ evalscope/report/generator.py,sha256=t2R3WGa4SowTRUPOgITtyTR4QDiJ6i3FH__byDKZU8Y,4959
574
+ evalscope/report/report.py,sha256=lEBD_E_RJiydFTaGFNLIMTFxNrqv8QcLZb_iuUg5HB0,8479
575
+ evalscope/third_party/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
576
+ evalscope/third_party/longbench_write/README.md,sha256=1yLKeSVIcihpoc4KXr8NpK86JbcLssCPx76aOKdPbYI,5431
577
+ evalscope/third_party/longbench_write/__init__.py,sha256=GNbBDc7HAh_V2Hfy5HhND_u7z6OI79czoBlP8lX4PVo,126
578
+ evalscope/third_party/longbench_write/default_task.json,sha256=d_NPShtW10Mc02U3pAuxX9hXd09tZw7QJAr1SvrECcM,694
579
+ evalscope/third_party/longbench_write/default_task.yaml,sha256=YjU8EeyH9UtM8e7_fhrwJNChQdszOAcrKmOi--Awvhk,578
580
+ evalscope/third_party/longbench_write/eval.py,sha256=39McZSDHL7bA5Dg-BSyZ4EiAF1nfTiYJAnx5FqbNYok,11265
581
+ evalscope/third_party/longbench_write/infer.py,sha256=32t90zTll6SXH7Wx8QnRFMs6ZUwvpbgYNuawCByzwR0,4971
582
+ evalscope/third_party/longbench_write/longbench_write.py,sha256=nIR1toB1hvUXR7Lrs3xcY9wqaI-bjeADg_Oscf3HdaY,3991
583
+ evalscope/third_party/longbench_write/utils.py,sha256=nd-YslsOyNGAuyBfAWb2pnTMaGLMQ58lbnJJdrCndeI,815
584
+ evalscope/third_party/longbench_write/resources/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
585
+ evalscope/third_party/longbench_write/resources/judge.txt,sha256=Go1ISY4bUBmEDXXY_DItjAmskuHSaRj5WTNMNH98FSk,1885
586
+ evalscope/third_party/longbench_write/resources/longbench_write.jsonl,sha256=H26ZSXzCTWWJTWXgFAYvOYupRuvdJUt_izOeSNOrV3k,54155
587
+ evalscope/third_party/longbench_write/resources/longbench_write_en.jsonl,sha256=h4AJJ3YfNA5IiZ5N9dR_tyEa1JNqY0INv6l5ZgQUJZ8,24235
588
+ evalscope/third_party/longbench_write/resources/longwrite_ruler.jsonl,sha256=odTr8N8PoWAFZ2kdEcmlLeMDfEo3KXDtLo9S8oieCmI,5718
589
+ evalscope/third_party/longbench_write/tools/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
590
+ evalscope/third_party/longbench_write/tools/data_etl.py,sha256=T7a-4PwZg5alZQh-oTi1zjMxjGmVVZYVwSR9-diZlF8,5971
591
+ evalscope/third_party/longbench_write/tools/openai_api.py,sha256=PiIvvDYJkn041SJkLoroXwl1B8TtwpB7licVfqNSeuQ,8168
592
+ evalscope/third_party/thinkbench/__init__.py,sha256=C0aSu71_dc1upUVkKmq2VgDd9plpRcYUdCE6BjUWJcA,110
593
+ evalscope/third_party/thinkbench/eval.py,sha256=IyfVTm6arhjBgvGMG5OZwopqQTmWVMJ8zYbbVSLtrvk,19503
594
+ evalscope/third_party/thinkbench/infer.py,sha256=2L4DAJKn3wAhNEKnKudQT60igGOJSKH80FR4nS7DHYk,3952
595
+ evalscope/third_party/thinkbench/resources/critique_template.txt,sha256=d4Egc-qH--4lG8X_EcmgymnuZgiCMbee1M5pt4HrRKA,535
596
+ evalscope/third_party/thinkbench/resources/reformat_template.txt,sha256=zTZyVAzmMBtAwI9lHly9EXsqX471OW-VTg538PDcB30,1775
597
+ evalscope/third_party/thinkbench/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
598
+ evalscope/third_party/thinkbench/tools/llm.py,sha256=HCFh58_THsVrFVzvGoThwWRu8EbPXD0DotLQEj5u4Tg,1353
599
+ evalscope/third_party/thinkbench/tools/utils.py,sha256=rDu2GVTK4ji9Yh9RLVksZqrfurQsSuN9GW3QCKJ60ng,401
600
+ evalscope/third_party/toolbench_static/README.md,sha256=Osdnt0_K-setbmYwDPCPRp2LXxamGp2mE8KsOByPPOY,3944
601
+ evalscope/third_party/toolbench_static/__init__.py,sha256=BO936RxwodHr4OEpV6W3S_keC91OfOd41_msIJ2d0fs,128
602
+ evalscope/third_party/toolbench_static/config_default.json,sha256=KrUzeHL2DNiM5FwY7cH3KZlxTwELCQZ6e39nilfUi0M,368
603
+ evalscope/third_party/toolbench_static/config_default.yaml,sha256=-6n6Zyg9eHN2eexlehSi9LI4F3EPk-3JacrAb6ZoyxI,451
604
+ evalscope/third_party/toolbench_static/eval.py,sha256=do_-lVi_vEoljeLYvt3b_AYSMqpdKzgYnTek9WLSKe8,8236
605
+ evalscope/third_party/toolbench_static/infer.py,sha256=rsADLhEd2IBcC6EI9aD7hSJmo6Oo5b22mnHWBCZLDPs,9010
606
+ evalscope/third_party/toolbench_static/requirements.txt,sha256=OW91Z8hfzh7yQUYgP1Di_E6DgNgGoGP1UcvnqrdCR68,22
607
+ evalscope/third_party/toolbench_static/toolbench_static.py,sha256=xE__eXvSwHmmSh1tXNvyBo6MCO4mDlYTbIYl9OGEfNI,2120
608
+ evalscope/third_party/toolbench_static/llm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
609
+ evalscope/third_party/toolbench_static/llm/swift_infer.py,sha256=hy0JpjSEkCJh3z5ZnY8gGfdJ2ajkS5zRl-2ZQq6Gu8A,2527
610
+ evalscope/utils/__init__.py,sha256=5OH8cOoX3YKMKUu0dMRvwzckXligIbUV-1jjJNXlpGI,2231
611
+ evalscope/utils/argument_utils.py,sha256=zYsqWLFlVeiLSQdFruBI_seTUEVTJ87r8MKwL2JNGb0,1951
612
+ evalscope/utils/chat_service.py,sha256=sSki2pKGQP3UjcIf_lbO06afI-vsaUAqglwX__wUDEw,8766
613
+ evalscope/utils/deprecation_utils.py,sha256=aDv3HFNcJFZ7rxNgALQP0-ITO8L23HC_RX-C_m2i34Y,1610
614
+ evalscope/utils/function_utils.py,sha256=Zu3njXZl6U5AhTyPUfhGXrdCRUCgY1Kvy6gtBpOrvHA,10380
615
+ evalscope/utils/import_utils.py,sha256=S0WQ3gt4zpwJHjGcyC-604pWWExg3JV7f3wzoOH-tuo,5794
616
+ evalscope/utils/io_utils.py,sha256=LSPYaIEYv8oj4ozAcbxtSCbsl4edWrr2aI5CP161DvM,14133
617
+ evalscope/utils/json_schema.py,sha256=GVP1m6g4mBrsFmOWOOVnmvl2joOz8gTlGEytLv5qy7s,8451
618
+ evalscope/utils/logger.py,sha256=su2D4d3apydmjiYrEBX0p2m8A6tPOlAupmnSfo4jttI,6807
619
+ evalscope/utils/model_utils.py,sha256=mdtYoHhUdfpxUtnS52XZjNdO3uSK4yeIBHT3aDU7s-A,2455
620
+ evalscope/utils/multi_choices.py,sha256=0UJbgr5eXNgitPC79JLcyUU-OXg9BlM-mVk-fWtUSno,9881
621
+ evalscope/utils/ner.py,sha256=gxvUURZVLJqZUrIqCy892rAAJ2ydYiGG5ZKPW_mpHsM,14148
622
+ evalscope/utils/url_utils.py,sha256=9HcFt9uZNbOJR3ADUFQ_dBFKziHV6H66Df7HYs1M4Po,1757
623
+ evalscope-1.2.0.dist-info/licenses/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
624
+ evalscope-1.2.0.dist-info/METADATA,sha256=uERC07rUVf9mGqWR3b4-t4XyJW1OUmW8waA5CHFclHo,35423
625
+ evalscope-1.2.0.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
626
+ evalscope-1.2.0.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
627
+ evalscope-1.2.0.dist-info/top_level.txt,sha256=jNR-HMn3TR8Atolq7_4rW8IWVX6GhvYV5_1Y_KbJKlY,10
628
+ evalscope-1.2.0.dist-info/RECORD,,