opencompass 0.2.4__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (414) hide show
  1. {opencompass-0.2.4 → opencompass-0.2.5}/PKG-INFO +12 -13
  2. {opencompass-0.2.4 → opencompass-0.2.5}/README.md +11 -12
  3. opencompass-0.2.5/opencompass/__init__.py +1 -0
  4. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/GaokaoBench.py +23 -6
  5. opencompass-0.2.5/opencompass/datasets/MMLUArabic.py +33 -0
  6. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/__init__.py +6 -0
  7. opencompass-0.2.5/opencompass/datasets/charm.py +55 -0
  8. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/cibench.py +178 -149
  9. opencompass-0.2.5/opencompass/datasets/drop_simple_eval.py +80 -0
  10. opencompass-0.2.5/opencompass/datasets/flames.py +57 -0
  11. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/gpqa.py +53 -1
  12. opencompass-0.2.5/opencompass/datasets/llm_compression.py +36 -0
  13. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/math.py +15 -0
  14. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/mbpp.py +12 -8
  15. opencompass-0.2.5/opencompass/datasets/mgsm.py +78 -0
  16. opencompass-0.2.5/opencompass/datasets/s3eval.py +169 -0
  17. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/subjective/__init__.py +2 -0
  18. opencompass-0.2.5/opencompass/datasets/subjective/arena_hard.py +35 -0
  19. opencompass-0.2.5/opencompass/datasets/subjective/compassbench.py +101 -0
  20. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/taco.py +4 -3
  21. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/winogrande.py +9 -0
  22. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/__init__.py +21 -13
  23. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/ai360_api.py +23 -21
  24. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/baichuan_api.py +1 -1
  25. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/baidu_api.py +26 -9
  26. opencompass-0.2.4/opencompass/models/minimax_api.py → opencompass-0.2.5/opencompass/models/deepseek_api.py +62 -66
  27. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/gemini_api.py +0 -63
  28. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/huggingface.py +9 -2
  29. opencompass-0.2.5/opencompass/models/huggingface_above_v4_33.py +440 -0
  30. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/lagent.py +4 -3
  31. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/lightllm_api.py +169 -4
  32. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/lmdeploy_pytorch.py +12 -3
  33. opencompass-0.2.5/opencompass/models/minimax_api.py +352 -0
  34. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/openai_api.py +14 -141
  35. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/qwen_api.py +1 -2
  36. opencompass-0.2.5/opencompass/models/stepfun_api.py +182 -0
  37. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/turbomind.py +29 -12
  38. opencompass-0.2.5/opencompass/models/turbomind_with_tf_above_v4_33.py +195 -0
  39. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/vllm.py +32 -20
  40. opencompass-0.2.5/opencompass/models/vllm_with_tf_above_v4_33.py +127 -0
  41. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/xunfei_api.py +149 -0
  42. opencompass-0.2.5/opencompass/models/yi_api.py +178 -0
  43. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_evaluator/__init__.py +1 -0
  44. opencompass-0.2.5/opencompass/openicl/icl_evaluator/icl_bpc_evaluator.py +32 -0
  45. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_evaluator/icl_hf_evaluator.py +26 -0
  46. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_evaluator/lm_evaluator.py +8 -5
  47. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/__init__.py +1 -0
  48. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/icl_base_inferencer.py +2 -0
  49. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/icl_ll_inferencer.py +27 -49
  50. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py +35 -67
  51. opencompass-0.2.5/opencompass/openicl/icl_inferencer/icl_sw_ce_loss_inferencer.py +352 -0
  52. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/partitioners/__init__.py +0 -1
  53. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/partitioners/num_worker.py +5 -3
  54. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/registry.py +0 -8
  55. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/runners/dlc.py +30 -22
  56. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/runners/local.py +10 -8
  57. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/__init__.py +2 -0
  58. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/default.py +11 -9
  59. opencompass-0.2.5/opencompass/summarizers/llm_compression.py +200 -0
  60. opencompass-0.2.5/opencompass/summarizers/multi_faceted.py +46 -0
  61. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/needlebench.py +1 -1
  62. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/subjective/__init__.py +4 -0
  63. opencompass-0.2.5/opencompass/summarizers/subjective/all_obj.py +123 -0
  64. opencompass-0.2.5/opencompass/summarizers/subjective/arenahard.py +309 -0
  65. opencompass-0.2.5/opencompass/summarizers/subjective/compassbench.py +241 -0
  66. opencompass-0.2.5/opencompass/summarizers/subjective/flames.py +93 -0
  67. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/tasks/__init__.py +0 -1
  68. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/tasks/openicl_eval.py +6 -2
  69. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/tasks/openicl_infer.py +4 -2
  70. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/tasks/subjective_eval.py +8 -3
  71. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/build.py +0 -1
  72. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/file.py +3 -3
  73. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/prompt.py +9 -2
  74. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/run.py +99 -89
  75. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/text_postprocessors.py +21 -15
  76. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass.egg-info/PKG-INFO +12 -13
  77. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass.egg-info/SOURCES.txt +23 -2
  78. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass.egg-info/requires.txt +2 -0
  79. opencompass-0.2.4/opencompass/__init__.py +0 -1
  80. opencompass-0.2.4/opencompass/partitioners/mm_naive.py +0 -119
  81. opencompass-0.2.4/opencompass/tasks/mm_infer.py +0 -160
  82. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/FinanceIQ.py +0 -0
  83. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/NPHardEval/__init__.py +0 -0
  84. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/NPHardEval/cmp_GCP_D.py +0 -0
  85. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/NPHardEval/cmp_KSP.py +0 -0
  86. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/NPHardEval/cmp_TSP_D.py +0 -0
  87. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/NPHardEval/hard_GCP.py +0 -0
  88. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/NPHardEval/hard_MSP.py +0 -0
  89. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/NPHardEval/hard_TSP.py +0 -0
  90. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/NPHardEval/p_BSP.py +0 -0
  91. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/NPHardEval/p_EDP.py +0 -0
  92. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/NPHardEval/p_SPP.py +0 -0
  93. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/NPHardEval/prompts.py +0 -0
  94. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/NPHardEval/utils.py +0 -0
  95. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/OpenFinData.py +0 -0
  96. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/QuALITY.py +0 -0
  97. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/TheoremQA/__init__.py +0 -0
  98. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/TheoremQA/legacy.py +0 -0
  99. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/TheoremQA/main.py +0 -0
  100. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/TheoremQA/number_utils.py +0 -0
  101. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/TheoremQA/utils.py +0 -0
  102. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/advglue.py +0 -0
  103. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/afqmcd.py +0 -0
  104. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/agieval/__init__.py +0 -0
  105. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/agieval/agieval.py +0 -0
  106. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/agieval/constructions.py +0 -0
  107. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/agieval/dataset_loader.py +0 -0
  108. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/agieval/evaluation.py +0 -0
  109. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/agieval/math_equivalence.py +0 -0
  110. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/agieval/post_process.py +0 -0
  111. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/agieval/utils.py +0 -0
  112. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/anli.py +0 -0
  113. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/anthropics_evals.py +0 -0
  114. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/apps.py +0 -0
  115. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/arc.py +0 -0
  116. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/ax.py +0 -0
  117. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/base.py +0 -0
  118. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/bbh.py +0 -0
  119. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/boolq.py +0 -0
  120. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/bustum.py +0 -0
  121. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/c3.py +0 -0
  122. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/cb.py +0 -0
  123. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/ceval.py +0 -0
  124. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/chembench.py +0 -0
  125. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/chid.py +0 -0
  126. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/circular.py +0 -0
  127. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/civilcomments.py +0 -0
  128. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/clozeTest_maxmin.py +0 -0
  129. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/cluewsc.py +0 -0
  130. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/cmb.py +1 -1
  131. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/cmmlu.py +0 -0
  132. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/cmnli.py +0 -0
  133. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/cmrc.py +0 -0
  134. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/commonsenseqa.py +0 -0
  135. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/commonsenseqa_cn.py +0 -0
  136. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/copa.py +0 -0
  137. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/crowspairs.py +0 -0
  138. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/crowspairs_cn.py +0 -0
  139. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/csl.py +0 -0
  140. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/custom.py +0 -0
  141. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/cvalues.py +0 -0
  142. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/drcd.py +0 -0
  143. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/drop.py +0 -0
  144. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/ds1000.py +0 -0
  145. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/ds1000_interpreter.py +0 -0
  146. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/eprstmt.py +0 -0
  147. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/flores.py +0 -0
  148. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/game24.py +0 -0
  149. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/govrepcrs.py +0 -0
  150. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/gsm8k.py +0 -0
  151. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/gsm_hard.py +0 -0
  152. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/hellaswag.py +0 -0
  153. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/huggingface.py +0 -0
  154. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/humaneval.py +0 -0
  155. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/humaneval_multi.py +0 -0
  156. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/humanevalx.py +0 -0
  157. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/hungarian_math.py +0 -0
  158. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/__init__.py +0 -0
  159. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/infinitebench_codedebug.py +0 -0
  160. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/infinitebench_coderun.py +0 -0
  161. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/infinitebench_endia.py +0 -0
  162. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/infinitebench_enmc.py +0 -0
  163. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/infinitebench_enqa.py +0 -0
  164. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/infinitebench_ensum.py +0 -0
  165. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/infinitebench_mathcalc.py +0 -0
  166. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/infinitebench_mathfind.py +0 -0
  167. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/infinitebench_retrievekv.py +0 -0
  168. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/infinitebench_retrievenumber.py +0 -0
  169. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/infinitebench_retrievepasskey.py +0 -0
  170. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/infinitebench_zhqa.py +0 -0
  171. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/infinitebench/utils.py +0 -0
  172. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/iwslt2017.py +0 -0
  173. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/jigsawmultilingual.py +0 -0
  174. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/jsonl.py +0 -0
  175. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/kaoshi.py +0 -0
  176. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lambada.py +0 -0
  177. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lawbench/__init__.py +0 -0
  178. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lawbench/lawbench.py +0 -0
  179. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lcsts.py +0 -0
  180. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/__init__.py +0 -0
  181. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/evaluators.py +0 -0
  182. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_coursera.py +0 -0
  183. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_financial_qa.py +0 -0
  184. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_gov_report_summ.py +0 -0
  185. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_gsm100.py +0 -0
  186. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_legal_contract_qa.py +0 -0
  187. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_meeting_summ.py +0 -0
  188. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_multidoc_qa.py +0 -0
  189. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_narrattive_qa.py +0 -0
  190. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_natural_question.py +0 -0
  191. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_news_summ.py +0 -0
  192. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_paper_assistant.py +0 -0
  193. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_patent_summ.py +0 -0
  194. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_quality.py +0 -0
  195. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_review_summ.py +0 -0
  196. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_scientific_qa.py +0 -0
  197. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_topic_retrieval.py +0 -0
  198. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_tpo.py +0 -0
  199. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/leval/leval_tvshow_summ.py +0 -0
  200. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lmeval.py +0 -0
  201. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/__init__.py +0 -0
  202. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/evaluators.py +0 -0
  203. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_2wikim_qa.py +0 -0
  204. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_dureader.py +0 -0
  205. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_gov_report.py +0 -0
  206. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_hotpot_qa.py +0 -0
  207. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_lcc.py +0 -0
  208. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_lsht.py +0 -0
  209. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_multi_news.py +0 -0
  210. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_multifieldqa_en.py +0 -0
  211. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_multifieldqa_zh.py +0 -0
  212. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_musique.py +0 -0
  213. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_narrative_qa.py +0 -0
  214. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_passage_count.py +0 -0
  215. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_passage_retrieval_en.py +0 -0
  216. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_passage_retrieval_zh.py +0 -0
  217. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_qasper.py +0 -0
  218. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_qmsum.py +0 -0
  219. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_repobench.py +0 -0
  220. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_samsum.py +0 -0
  221. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_trec.py +0 -0
  222. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_trivia_qa.py +0 -0
  223. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/longbench/longbench_vcsum.py +0 -0
  224. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/__init__.py +0 -0
  225. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/evaluators.py +0 -0
  226. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/lveval_cmrc_mixup.py +0 -0
  227. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/lveval_dureader_mixup.py +0 -0
  228. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/lveval_factrecall_en.py +0 -0
  229. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/lveval_factrecall_zh.py +0 -0
  230. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/lveval_hotpotwikiqa_mixup.py +0 -0
  231. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/lveval_lic_mixup.py +0 -0
  232. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/lveval_loogle_CR_mixup.py +0 -0
  233. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/lveval_loogle_MIR_mixup.py +0 -0
  234. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/lveval_loogle_SD_mixup.py +0 -0
  235. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/lveval_multifieldqa_en_mixup.py +0 -0
  236. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/lveval/lveval_multifieldqa_zh_mixup.py +0 -0
  237. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/mastermath2024v1.py +0 -0
  238. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/math401.py +0 -0
  239. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/math_intern.py +0 -0
  240. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/mathbench.py +0 -0
  241. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/medbench/__init__.py +0 -0
  242. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/medbench/constructions.py +0 -0
  243. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/medbench/dataset_loader.py +0 -0
  244. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/medbench/evaluation.py +0 -0
  245. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/medbench/math_equivalence.py +0 -0
  246. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/medbench/medbench.py +0 -0
  247. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/medbench/post_process.py +0 -0
  248. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/medbench/utils.py +0 -0
  249. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/mmlu.py +0 -0
  250. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/multirc.py +0 -0
  251. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/narrativeqa.py +0 -0
  252. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/natural_question.py +0 -0
  253. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/natural_question_cn.py +0 -0
  254. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/obqa.py +0 -0
  255. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/piqa.py +0 -0
  256. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/py150.py +0 -0
  257. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/qasper.py +0 -0
  258. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/qaspercut.py +0 -0
  259. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/race.py +0 -0
  260. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/realtoxicprompts.py +0 -0
  261. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/reasonbench/ReasonBenchDataset.py +0 -0
  262. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/reasonbench/__init__.py +0 -0
  263. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/record.py +0 -0
  264. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/rolebench.py +0 -0
  265. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/safety.py +0 -0
  266. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/scibench.py +0 -0
  267. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/siqa.py +0 -0
  268. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/squad20.py +0 -0
  269. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/storycloze.py +0 -0
  270. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/strategyqa.py +0 -0
  271. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/subjective/alignbench.py +0 -0
  272. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/subjective/compass_arena.py +0 -0
  273. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/subjective/corev2.py +0 -0
  274. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/subjective/creationbench.py +0 -0
  275. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/subjective/information_retrival.py +0 -0
  276. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/subjective/mtbench.py +0 -0
  277. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/subjective/multiround.py +0 -0
  278. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/subjective/subjective_cmp.py +0 -0
  279. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/summedits.py +0 -0
  280. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/summscreen.py +0 -0
  281. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/svamp.py +0 -0
  282. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/tabmwp.py +0 -0
  283. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/teval/__init__.py +0 -0
  284. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/teval/evaluators/__init__.py +0 -0
  285. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/teval/evaluators/instruct_evaluator.py +0 -0
  286. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/teval/evaluators/planning_evaluator.py +0 -0
  287. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/teval/evaluators/reason_retrieve_understand_evaluator.py +0 -0
  288. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/teval/evaluators/review_evaluator.py +0 -0
  289. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/teval/schema.py +0 -0
  290. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/teval/utils/__init__.py +0 -0
  291. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/teval/utils/convert_results.py +0 -0
  292. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/teval/utils/format_load.py +0 -0
  293. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/teval/utils/meta_template.py +0 -0
  294. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/teval/utils/template.py +0 -0
  295. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/tnews.py +0 -0
  296. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/triviaqa.py +0 -0
  297. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/triviaqarc.py +0 -0
  298. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/truthfulqa.py +0 -0
  299. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/tydiqa.py +0 -0
  300. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/wic.py +0 -0
  301. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/wikibench.py +0 -0
  302. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/winograd.py +0 -0
  303. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/wnli.py +0 -0
  304. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/wsc.py +0 -0
  305. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/xcopa.py +0 -0
  306. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/xiezhi.py +0 -0
  307. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/xlsum.py +0 -0
  308. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/datasets/xsum.py +0 -0
  309. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/metrics/__init__.py +0 -0
  310. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/metrics/dump_results.py +0 -0
  311. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/metrics/mme_score.py +0 -0
  312. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/metrics/seedbench.py +0 -0
  313. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/accessory.py +0 -0
  314. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/alaya.py +0 -0
  315. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/base.py +0 -0
  316. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/base_api.py +0 -0
  317. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/bytedance_api.py +0 -0
  318. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/claude_api/__init__.py +0 -0
  319. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/claude_api/claude_api.py +0 -0
  320. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/claude_api/postprocessors.py +0 -0
  321. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/glm.py +0 -0
  322. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/hunyuan_api.py +0 -0
  323. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/intern_model.py +0 -0
  324. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/krgpt_api.py +0 -0
  325. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/langchain.py +0 -0
  326. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/llama2.py +0 -0
  327. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/lmdeploy_tis.py +0 -0
  328. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/mistral_api.py +0 -0
  329. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/mixtral.py +0 -0
  330. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/modelscope.py +0 -0
  331. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/moonshot_api.py +0 -0
  332. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/nanbeige_api.py +0 -0
  333. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/pangu_api.py +0 -0
  334. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/sensetime_api.py +0 -0
  335. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/turbomind_api.py +0 -0
  336. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/turbomind_tis.py +0 -0
  337. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/unigpt_api.py +0 -0
  338. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/yayi_api.py +0 -0
  339. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/zhipuai_api.py +0 -0
  340. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/models/zhipuai_v2_api.py +0 -0
  341. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/__init__.py +0 -0
  342. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_dataset_reader.py +0 -0
  343. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_evaluator/icl_agent_evaluator.py +0 -0
  344. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_evaluator/icl_aucroc_evaluator.py +0 -0
  345. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_evaluator/icl_base_evaluator.py +0 -0
  346. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_evaluator/icl_circular_evaluator.py +0 -0
  347. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_evaluator/icl_em_evaluator.py +0 -0
  348. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_evaluator/icl_jieba_rouge_evaluator.py +0 -0
  349. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_evaluator/icl_misc_evaluator.py +0 -0
  350. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_evaluator/icl_plugin_evaluator.py +0 -0
  351. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_evaluator/icl_toxic_evaluator.py +0 -0
  352. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/icl_agent_inferencer.py +0 -0
  353. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/icl_attack_inferencer.py +0 -0
  354. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/icl_chat_inferencer.py +0 -0
  355. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/icl_clp_inferencer.py +0 -0
  356. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py +0 -0
  357. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/icl_mink_percent_inferencer.py +0 -0
  358. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/icl_ppl_only_inferencer.py +0 -0
  359. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/icl_sc_inferencer.py +0 -0
  360. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_inferencer/icl_tot_inferencer.py +0 -0
  361. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_prompt_template.py +0 -0
  362. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_retriever/__init__.py +0 -0
  363. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_retriever/icl_base_retriever.py +0 -0
  364. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_retriever/icl_bm25_retriever.py +0 -0
  365. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_retriever/icl_dpp_retriever.py +0 -0
  366. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_retriever/icl_fix_k_retriever.py +0 -0
  367. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_retriever/icl_mdl_retriever.py +0 -0
  368. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_retriever/icl_random_retriever.py +0 -0
  369. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_retriever/icl_topk_retriever.py +0 -0
  370. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_retriever/icl_votek_retriever.py +0 -0
  371. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/icl_retriever/icl_zero_retriever.py +0 -0
  372. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/utils/__init__.py +0 -0
  373. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/openicl/utils/logging.py +0 -0
  374. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/partitioners/base.py +0 -0
  375. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/partitioners/naive.py +0 -0
  376. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/partitioners/size.py +0 -0
  377. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/partitioners/sub_naive.py +0 -0
  378. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/partitioners/sub_size.py +0 -0
  379. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/runners/__init__.py +0 -0
  380. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/runners/base.py +0 -0
  381. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/runners/local_api.py +0 -0
  382. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/runners/slurm.py +0 -0
  383. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/runners/slurm_sequential.py +0 -0
  384. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/circular.py +0 -0
  385. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/multi_model.py +0 -0
  386. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/subjective/alignmentbench.py +0 -0
  387. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/subjective/alpacaeval.py +0 -0
  388. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/subjective/compass_arena.py +0 -0
  389. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/subjective/corev2.py +0 -0
  390. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/subjective/creationbench.py +0 -0
  391. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/subjective/information_retrival.py +0 -0
  392. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/subjective/mtbench.py +0 -0
  393. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/subjective/multiround.py +0 -0
  394. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/subjective/subjective_post_process.py +0 -0
  395. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/subjective/utils.py +0 -0
  396. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/summarizers/summarizer_pretrain.py +0 -0
  397. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/tasks/base.py +0 -0
  398. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/tasks/llm_eval.py +0 -0
  399. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/tasks/openicl_attack.py +0 -0
  400. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/__init__.py +0 -0
  401. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/abbr.py +0 -0
  402. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/auxiliary.py +0 -0
  403. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/collect_env.py +0 -0
  404. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/dependency.py +0 -0
  405. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/fileio.py +0 -0
  406. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/lark.py +0 -0
  407. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/logging.py +0 -0
  408. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/menu.py +0 -0
  409. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass/utils/types.py +0 -0
  410. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass.egg-info/dependency_links.txt +0 -0
  411. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass.egg-info/entry_points.txt +0 -0
  412. {opencompass-0.2.4 → opencompass-0.2.5}/opencompass.egg-info/top_level.txt +0 -0
  413. {opencompass-0.2.4 → opencompass-0.2.5}/setup.cfg +0 -0
  414. {opencompass-0.2.4 → opencompass-0.2.5}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: opencompass
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: A comprehensive toolkit for large model evaluation
5
5
  Home-page: https://github.com/open-compass/opencompass
6
6
  Author: OpenCompass Contributors
@@ -78,6 +78,11 @@ Description: <div align="center">
78
78
 
79
79
  ## 🚀 What's New <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
80
80
 
81
+ - **\[2024.05.08\]** We supported the evaluation of 4 MoE models: [Mixtral-8x22B-v0.1](configs/models/mixtral/hf_mixtral_8x22b_v0_1.py), [Mixtral-8x22B-Instruct-v0.1](configs/models/mixtral/hf_mixtral_8x22b_instruct_v0_1.py), [Qwen1.5-MoE-A2.7B](configs/models/qwen/hf_qwen1_5_moe_a2_7b.py), [Qwen1.5-MoE-A2.7B-Chat](configs/models/qwen/hf_qwen1_5_moe_a2_7b_chat.py). Try them out now!
82
+ - **\[2024.04.30\]** We supported evaluating a model's compression efficiency by calculating its Bits per Character (BPC) metric on an [external corpora](configs/datasets/llm_compression/README.md) ([official paper](https://github.com/hkust-nlp/llm-compression-intelligence)). Check out the [llm-compression](configs/eval_llm_compression.py) evaluation config now! 🔥🔥🔥
83
+ - **\[2024.04.29\]** We report the performance of several famous LLMs on the common benchmarks, welcome to [documentation](https://opencompass.readthedocs.io/en/latest/user_guides/corebench.html) for more information! 🔥🔥🔥.
84
+ - **\[2024.04.26\]** We deprecated the multi-madality evaluating function from OpenCompass, related implement has moved to [VLMEvalKit](https://github.com/open-compass/VLMEvalKit), welcome to use! 🔥🔥🔥.
85
+ - **\[2024.04.26\]** We supported the evaluation of [ArenaHard](configs/eval_subjective_arena_hard.py) welcome to try!🔥🔥🔥.
81
86
  - **\[2024.04.22\]** We supported the evaluation of [LLaMA3](configs/models/hf_llama/hf_llama3_8b.py) 和 [LLaMA3-Instruct](configs/models/hf_llama/hf_llama3_8b_instruct.py), welcome to try! 🔥🔥🔥
82
87
  - **\[2024.02.29\]** We supported the MT-Bench, AlpacalEval and AlignBench, more information can be found [here](https://opencompass.readthedocs.io/en/latest/advanced_guides/subjective_evaluation.html)
83
88
  - **\[2024.01.30\]** We release OpenCompass 2.0. Click [CompassKit](https://github.com/open-compass), [CompassHub](https://hub.opencompass.org.cn/home), and [CompassRank](https://rank.opencompass.org.cn/home) for more information !
@@ -130,7 +135,7 @@ Description: <div align="center">
130
135
  git clone https://github.com/open-compass/opencompass opencompass
131
136
  cd opencompass
132
137
  pip install -e .
133
- # also please install requiresments packages via `pip install -r requirements/api.txt` for API models if needed.
138
+ # also please install requirements packages via `pip install -r requirements/api.txt` for API models if needed.
134
139
  ```
135
140
 
136
141
  ### 📂 Data Preparation
@@ -165,19 +170,13 @@ Description: <div align="center">
165
170
  You can also evaluate other HuggingFace models via command line. Taking LLaMA-7b as an example:
166
171
 
167
172
  ```bash
168
- python run.py --datasets ceval_ppl mmlu_ppl \
169
- --hf-path huggyllama/llama-7b \ # HuggingFace model path
170
- --model-kwargs device_map='auto' \ # Arguments for model construction
171
- --tokenizer-kwargs padding_side='left' truncation='left' use_fast=False \ # Arguments for tokenizer construction
172
- --max-out-len 100 \ # Maximum number of tokens generated
173
- --max-seq-len 2048 \ # Maximum sequence length the model can accept
174
- --batch-size 8 \ # Batch size
175
- --no-batch-padding \ # Don't enable batch padding, infer through for loop to avoid performance loss
176
- --num-gpus 1 # Number of minimum required GPUs
173
+ python run.py --datasets ceval_ppl mmlu_ppl --hf-type base --hf-path huggyllama/llama-7b
177
174
  ```
178
175
 
179
- > **Note**<br />
180
- > To run the command above, you will need to remove the comments starting from `# ` first.
176
+ > \[!TIP\]
177
+ >
178
+ > configuration with `_ppl` is designed for base model typically.
179
+ > configuration with `_gen` can be used for both base model and chat model.
181
180
 
182
181
  Through the command line or configuration files, OpenCompass also supports evaluating APIs or custom models, as well as more diversified evaluation strategies. Please read the [Quick Start](https://opencompass.readthedocs.io/en/latest/get_started/quick_start.html) to learn how to run an evaluation task.
183
182
 
@@ -70,6 +70,11 @@ Just like a compass guides us on our journey, OpenCompass will guide you through
70
70
 
71
71
  ## 🚀 What's New <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
72
72
 
73
+ - **\[2024.05.08\]** We supported the evaluation of 4 MoE models: [Mixtral-8x22B-v0.1](configs/models/mixtral/hf_mixtral_8x22b_v0_1.py), [Mixtral-8x22B-Instruct-v0.1](configs/models/mixtral/hf_mixtral_8x22b_instruct_v0_1.py), [Qwen1.5-MoE-A2.7B](configs/models/qwen/hf_qwen1_5_moe_a2_7b.py), [Qwen1.5-MoE-A2.7B-Chat](configs/models/qwen/hf_qwen1_5_moe_a2_7b_chat.py). Try them out now!
74
+ - **\[2024.04.30\]** We supported evaluating a model's compression efficiency by calculating its Bits per Character (BPC) metric on an [external corpora](configs/datasets/llm_compression/README.md) ([official paper](https://github.com/hkust-nlp/llm-compression-intelligence)). Check out the [llm-compression](configs/eval_llm_compression.py) evaluation config now! 🔥🔥🔥
75
+ - **\[2024.04.29\]** We report the performance of several famous LLMs on the common benchmarks, welcome to [documentation](https://opencompass.readthedocs.io/en/latest/user_guides/corebench.html) for more information! 🔥🔥🔥.
76
+ - **\[2024.04.26\]** We deprecated the multi-madality evaluating function from OpenCompass, related implement has moved to [VLMEvalKit](https://github.com/open-compass/VLMEvalKit), welcome to use! 🔥🔥🔥.
77
+ - **\[2024.04.26\]** We supported the evaluation of [ArenaHard](configs/eval_subjective_arena_hard.py) welcome to try!🔥🔥🔥.
73
78
  - **\[2024.04.22\]** We supported the evaluation of [LLaMA3](configs/models/hf_llama/hf_llama3_8b.py) 和 [LLaMA3-Instruct](configs/models/hf_llama/hf_llama3_8b_instruct.py), welcome to try! 🔥🔥🔥
74
79
  - **\[2024.02.29\]** We supported the MT-Bench, AlpacalEval and AlignBench, more information can be found [here](https://opencompass.readthedocs.io/en/latest/advanced_guides/subjective_evaluation.html)
75
80
  - **\[2024.01.30\]** We release OpenCompass 2.0. Click [CompassKit](https://github.com/open-compass), [CompassHub](https://hub.opencompass.org.cn/home), and [CompassRank](https://rank.opencompass.org.cn/home) for more information !
@@ -122,7 +127,7 @@ conda activate opencompass
122
127
  git clone https://github.com/open-compass/opencompass opencompass
123
128
  cd opencompass
124
129
  pip install -e .
125
- # also please install requiresments packages via `pip install -r requirements/api.txt` for API models if needed.
130
+ # also please install requirements packages via `pip install -r requirements/api.txt` for API models if needed.
126
131
  ```
127
132
 
128
133
  ### 📂 Data Preparation
@@ -157,19 +162,13 @@ python tools/list_configs.py llama mmlu
157
162
  You can also evaluate other HuggingFace models via command line. Taking LLaMA-7b as an example:
158
163
 
159
164
  ```bash
160
- python run.py --datasets ceval_ppl mmlu_ppl \
161
- --hf-path huggyllama/llama-7b \ # HuggingFace model path
162
- --model-kwargs device_map='auto' \ # Arguments for model construction
163
- --tokenizer-kwargs padding_side='left' truncation='left' use_fast=False \ # Arguments for tokenizer construction
164
- --max-out-len 100 \ # Maximum number of tokens generated
165
- --max-seq-len 2048 \ # Maximum sequence length the model can accept
166
- --batch-size 8 \ # Batch size
167
- --no-batch-padding \ # Don't enable batch padding, infer through for loop to avoid performance loss
168
- --num-gpus 1 # Number of minimum required GPUs
165
+ python run.py --datasets ceval_ppl mmlu_ppl --hf-type base --hf-path huggyllama/llama-7b
169
166
  ```
170
167
 
171
- > **Note**<br />
172
- > To run the command above, you will need to remove the comments starting from `# ` first.
168
+ > \[!TIP\]
169
+ >
170
+ > configuration with `_ppl` is designed for base model typically.
171
+ > configuration with `_gen` can be used for both base model and chat model.
173
172
 
174
173
  Through the command line or configuration files, OpenCompass also supports evaluating APIs or custom models, as well as more diversified evaluation strategies. Please read the [Quick Start](https://opencompass.readthedocs.io/en/latest/get_started/quick_start.html) to learn how to run an evaluation task.
175
174
 
@@ -0,0 +1 @@
1
+ __version__ = '0.2.5'
@@ -91,34 +91,51 @@ class GaokaoBenchEvaluator(BaseEvaluator):
91
91
  ]:
92
92
  return {'score': 0}
93
93
  elif self.question_type == 'multi_choice':
94
+ details = {}
94
95
  correct_score, total_score = 0, 0
95
- for pred, refr in zip(predictions, references):
96
+ for index, (pred, refr) in enumerate(zip(predictions, references)):
96
97
  pred = self.do_predictions_postprocess(pred)
97
98
  pred = self.ensure_same_length(pred, refr)
99
+ is_corrects = []
98
100
  for p, r in zip(pred, refr):
99
101
  if p == r:
100
102
  correct_score += 2
103
+ is_corrects.append(True)
101
104
  else:
102
105
  for i in p:
103
106
  if i not in r:
104
107
  break
105
108
  else:
106
109
  correct_score += 1
110
+ is_corrects.append(False)
107
111
  total_score += 2
108
- return {'score': correct_score / total_score * 100}
112
+ details[str(index)] = {
113
+ 'pred': pred,
114
+ 'refr': refr,
115
+ 'is_correct': all(is_corrects),
116
+ }
117
+
109
118
  else:
119
+ details = {}
110
120
  correct_score, total_score = 0, 0
111
- for pred, refr in zip(predictions, references):
121
+ for index, (pred, refr) in enumerate(zip(predictions, references)):
112
122
  if self.question_type == 'multi_question_choice':
113
123
  pred = self.do_predictions_postprocess(pred, len(refr))
114
124
  else:
115
125
  pred = self.do_predictions_postprocess(pred)
116
126
  pred = self.ensure_same_length(pred, refr)
127
+ is_corrects = []
117
128
  for p, r in zip(pred, refr):
118
- if p == r:
119
- correct_score += 1
129
+ is_correct = p == r
130
+ correct_score += is_correct
120
131
  total_score += 1
121
- return {'score': correct_score / total_score * 100}
132
+ is_corrects.append(is_correct)
133
+ details[str(index)] = {
134
+ 'pred': pred,
135
+ 'refr': refr,
136
+ 'is_correct': all(is_corrects),
137
+ }
138
+ return {'score': correct_score / total_score * 100, 'details': details}
122
139
 
123
140
 
124
141
  for question_type in valid_gaokao_bench_question_types:
@@ -0,0 +1,33 @@
1
+ import csv
2
+ import os.path as osp
3
+
4
+ from datasets import Dataset, DatasetDict
5
+
6
+ from opencompass.registry import LOAD_DATASET
7
+
8
+ from .base import BaseDataset
9
+
10
+
11
+ @LOAD_DATASET.register_module()
12
+ class MMLUArabicDataset(BaseDataset):
13
+
14
+ @staticmethod
15
+ def load(path: str, name: str):
16
+ dataset = DatasetDict()
17
+ for split in ['dev', 'test']:
18
+ raw_data = []
19
+ filename = osp.join(path, split, f'{name}_{split}.csv')
20
+ with open(filename, encoding='utf-8') as f:
21
+ reader = csv.reader(f)
22
+ for row in reader:
23
+ assert len(row) == 6
24
+ raw_data.append({
25
+ 'input': row[0],
26
+ 'A': row[1],
27
+ 'B': row[2],
28
+ 'C': row[3],
29
+ 'D': row[4],
30
+ 'target': row[5],
31
+ })
32
+ dataset[split] = Dataset.from_list(raw_data)
33
+ return dataset
@@ -12,6 +12,7 @@ from .bustum import * # noqa: F401, F403
12
12
  from .c3 import * # noqa: F401, F403
13
13
  from .cb import * # noqa: F401, F403
14
14
  from .ceval import * # noqa: F401, F403
15
+ from .charm import * # noqa: F401, F403
15
16
  from .chembench import * # noqa: F401, F403
16
17
  from .chid import * # noqa: F401, F403
17
18
  from .cibench import * # noqa: F401, F403
@@ -33,10 +34,12 @@ from .custom import * # noqa: F401, F403
33
34
  from .cvalues import * # noqa: F401, F403
34
35
  from .drcd import * # noqa: F401, F403
35
36
  from .drop import * # noqa: F401, F403
37
+ from .drop_simple_eval import * # noqa: F401, F403
36
38
  from .ds1000 import * # noqa: F401, F403
37
39
  from .ds1000_interpreter import * # noqa: F401, F403
38
40
  from .eprstmt import * # noqa: F401, F403
39
41
  from .FinanceIQ import * # noqa: F401, F403
42
+ from .flames import * # noqa: F401, F403
40
43
  from .flores import * # noqa: F401, F403
41
44
  from .game24 import * # noqa: F401, F403
42
45
  from .GaokaoBench import * # noqa: F401, F403
@@ -59,6 +62,7 @@ from .lambada import * # noqa: F401, F403
59
62
  from .lawbench import * # noqa: F401, F403
60
63
  from .lcsts import * # noqa: F401, F403
61
64
  from .leval import * # noqa: F401, F403
65
+ from .llm_compression import LLMCompressionDataset # noqa: F401, F403
62
66
  from .longbench import * # noqa: F401, F403
63
67
  from .lveval import * # noqa: F401, F403
64
68
  from .mastermath2024v1 import * # noqa: F401, F403
@@ -68,7 +72,9 @@ from .math_intern import * # noqa: F401, F403
68
72
  from .mathbench import * # noqa: F401, F403
69
73
  from .mbpp import * # noqa: F401, F403
70
74
  from .medbench import * # noqa: F401, F403
75
+ from .mgsm import * # noqa: F401, F403
71
76
  from .mmlu import * # noqa: F401, F403
77
+ from .MMLUArabic import * # noqa: F401, F403
72
78
  from .multirc import * # noqa: F401, F403
73
79
  from .narrativeqa import * # noqa: F401, F403
74
80
  from .natural_question import * # noqa: F401, F403
@@ -0,0 +1,55 @@
1
+ import json
2
+ import os.path as osp
3
+ import re
4
+
5
+ from datasets import Dataset
6
+
7
+ from opencompass.openicl.icl_evaluator import BaseEvaluator
8
+ from opencompass.registry import (ICL_EVALUATORS, LOAD_DATASET,
9
+ TEXT_POSTPROCESSORS)
10
+
11
+ from .base import BaseDataset
12
+
13
+
14
+ @TEXT_POSTPROCESSORS.register_module('charm-reason')
15
+ def charm_reason_postprocess(text: str) -> str:
16
+ ans = text
17
+ ans_line = ans.split('answer is ')
18
+ if len(ans_line) != 1:
19
+ ans = ans_line[1].strip()
20
+ match = re.search(r'\(([A-Z])\)*', ans)
21
+ if match:
22
+ return match.group(1)
23
+ match = re.search(r'([A-Z])', ans)
24
+ if match:
25
+ return match.group(1)
26
+ return ans
27
+
28
+
29
+ @ICL_EVALUATORS.register_module()
30
+ class CharmReasonEvaluator(BaseEvaluator):
31
+
32
+ def score(self, predictions, references):
33
+ if len(predictions) != len(references):
34
+ return {'error': 'preds and refrs have different length'}
35
+ details = []
36
+ cnt = 0
37
+ for pred, ref in zip(predictions, references):
38
+ detail = {'pred': pred, 'answer': ref, 'correct': False}
39
+ if pred == ref:
40
+ cnt += 1
41
+ detail['correct'] = True
42
+ details.append(detail)
43
+ score = cnt / len(predictions) * 100
44
+ return {'score': score, 'details': details}
45
+
46
+
47
+ @LOAD_DATASET.register_module()
48
+ class CharmDataset(BaseDataset):
49
+
50
+ @staticmethod
51
+ def load(path: str, name: str):
52
+ with open(osp.join(path, f'{name}.json'), 'r') as f:
53
+ data = json.load(f)['examples']
54
+ dataset = Dataset.from_list(data)
55
+ return dataset