opencompass 0.2.3__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (394) hide show
  1. {opencompass-0.2.3 → opencompass-0.2.4}/PKG-INFO +57 -9
  2. {opencompass-0.2.3 → opencompass-0.2.4}/README.md +56 -8
  3. opencompass-0.2.4/opencompass/__init__.py +1 -0
  4. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/cmp_GCP_D.py +5 -1
  5. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/cmp_TSP_D.py +5 -1
  6. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/p_SPP.py +5 -1
  7. opencompass-0.2.4/opencompass/datasets/QuALITY.py +59 -0
  8. opencompass-0.2.4/opencompass/datasets/TheoremQA/__init__.py +4 -0
  9. opencompass-0.2.3/opencompass/datasets/TheoremQA.py → opencompass-0.2.4/opencompass/datasets/TheoremQA/legacy.py +1 -1
  10. opencompass-0.2.4/opencompass/datasets/TheoremQA/main.py +66 -0
  11. opencompass-0.2.4/opencompass/datasets/TheoremQA/number_utils.py +98 -0
  12. opencompass-0.2.4/opencompass/datasets/TheoremQA/utils.py +110 -0
  13. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/__init__.py +4 -0
  14. opencompass-0.2.4/opencompass/datasets/apps.py +877 -0
  15. opencompass-0.2.4/opencompass/datasets/chembench.py +34 -0
  16. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/custom.py +10 -0
  17. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/humanevalx.py +4 -1
  18. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/math.py +19 -6
  19. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/mathbench.py +1 -1
  20. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/mbpp.py +41 -36
  21. opencompass-0.2.4/opencompass/datasets/taco.py +823 -0
  22. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/__init__.py +6 -1
  23. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/ai360_api.py +4 -4
  24. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/baichuan_api.py +128 -4
  25. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/baidu_api.py +4 -4
  26. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/base.py +2 -2
  27. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/base_api.py +4 -4
  28. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/bytedance_api.py +4 -4
  29. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/claude_api/claude_api.py +4 -4
  30. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/gemini_api.py +6 -6
  31. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/huggingface.py +1 -1
  32. opencompass-0.2.4/opencompass/models/hunyuan_api.py +121 -0
  33. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/llama2.py +1 -1
  34. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/lmdeploy_pytorch.py +2 -2
  35. opencompass-0.2.4/opencompass/models/lmdeploy_tis.py +200 -0
  36. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/minimax_api.py +4 -4
  37. opencompass-0.2.4/opencompass/models/mistral_api.py +123 -0
  38. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/moonshot_api.py +24 -26
  39. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/nanbeige_api.py +4 -4
  40. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/openai_api.py +41 -17
  41. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/pangu_api.py +4 -4
  42. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/qwen_api.py +28 -14
  43. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/sensetime_api.py +14 -9
  44. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/turbomind.py +36 -8
  45. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/turbomind_api.py +4 -4
  46. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/turbomind_tis.py +10 -4
  47. opencompass-0.2.4/opencompass/models/unigpt_api.py +147 -0
  48. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/vllm.py +6 -0
  49. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/xunfei_api.py +4 -4
  50. opencompass-0.2.4/opencompass/models/yayi_api.py +261 -0
  51. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/zhipuai_api.py +4 -4
  52. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/zhipuai_v2_api.py +12 -6
  53. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/lm_evaluator.py +57 -23
  54. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_chat_inferencer.py +2 -14
  55. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_prompt_template.py +4 -4
  56. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/partitioners/base.py +18 -7
  57. opencompass-0.2.4/opencompass/partitioners/sub_naive.py +220 -0
  58. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/partitioners/sub_size.py +29 -6
  59. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/registry.py +15 -1
  60. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/runners/base.py +2 -1
  61. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/runners/dlc.py +37 -10
  62. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/runners/local.py +21 -7
  63. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/default.py +1 -1
  64. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/needlebench.py +234 -173
  65. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/subjective/alignmentbench.py +43 -33
  66. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/subjective/alpacaeval.py +2 -1
  67. opencompass-0.2.4/opencompass/summarizers/subjective/compass_arena.py +240 -0
  68. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/subjective/mtbench.py +55 -48
  69. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/subjective/multiround.py +2 -1
  70. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/tasks/openicl_eval.py +3 -2
  71. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/tasks/openicl_infer.py +12 -5
  72. opencompass-0.2.4/opencompass/tasks/subjective_eval.py +438 -0
  73. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/abbr.py +22 -0
  74. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/build.py +1 -0
  75. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/prompt.py +5 -5
  76. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/run.py +140 -12
  77. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/text_postprocessors.py +5 -5
  78. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass.egg-info/PKG-INFO +57 -9
  79. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass.egg-info/SOURCES.txt +15 -1
  80. opencompass-0.2.4/opencompass.egg-info/entry_points.txt +3 -0
  81. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass.egg-info/requires.txt +2 -1
  82. {opencompass-0.2.3 → opencompass-0.2.4}/setup.py +40 -33
  83. opencompass-0.2.3/opencompass/__init__.py +0 -1
  84. opencompass-0.2.3/opencompass/partitioners/sub_naive.py +0 -110
  85. opencompass-0.2.3/opencompass/summarizers/subjective/compass_arena.py +0 -204
  86. opencompass-0.2.3/opencompass/tasks/subjective_eval.py +0 -282
  87. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/FinanceIQ.py +0 -0
  88. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/GaokaoBench.py +0 -0
  89. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/__init__.py +0 -0
  90. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/cmp_KSP.py +0 -0
  91. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/hard_GCP.py +0 -0
  92. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/hard_MSP.py +0 -0
  93. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/hard_TSP.py +0 -0
  94. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/p_BSP.py +0 -0
  95. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/p_EDP.py +0 -0
  96. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/prompts.py +0 -0
  97. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/utils.py +0 -0
  98. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/OpenFinData.py +0 -0
  99. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/advglue.py +0 -0
  100. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/afqmcd.py +0 -0
  101. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/agieval/__init__.py +0 -0
  102. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/agieval/agieval.py +0 -0
  103. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/agieval/constructions.py +0 -0
  104. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/agieval/dataset_loader.py +0 -0
  105. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/agieval/evaluation.py +0 -0
  106. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/agieval/math_equivalence.py +0 -0
  107. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/agieval/post_process.py +0 -0
  108. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/agieval/utils.py +0 -0
  109. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/anli.py +0 -0
  110. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/anthropics_evals.py +0 -0
  111. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/arc.py +0 -0
  112. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/ax.py +0 -0
  113. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/base.py +0 -0
  114. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/bbh.py +0 -0
  115. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/boolq.py +0 -0
  116. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/bustum.py +0 -0
  117. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/c3.py +0 -0
  118. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/cb.py +0 -0
  119. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/ceval.py +0 -0
  120. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/chid.py +0 -0
  121. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/cibench.py +0 -0
  122. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/circular.py +0 -0
  123. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/civilcomments.py +0 -0
  124. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/clozeTest_maxmin.py +0 -0
  125. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/cluewsc.py +0 -0
  126. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/cmb.py +0 -0
  127. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/cmmlu.py +0 -0
  128. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/cmnli.py +0 -0
  129. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/cmrc.py +0 -0
  130. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/commonsenseqa.py +0 -0
  131. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/commonsenseqa_cn.py +0 -0
  132. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/copa.py +0 -0
  133. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/crowspairs.py +0 -0
  134. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/crowspairs_cn.py +0 -0
  135. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/csl.py +0 -0
  136. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/cvalues.py +0 -0
  137. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/drcd.py +0 -0
  138. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/drop.py +0 -0
  139. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/ds1000.py +0 -0
  140. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/ds1000_interpreter.py +0 -0
  141. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/eprstmt.py +0 -0
  142. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/flores.py +0 -0
  143. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/game24.py +0 -0
  144. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/govrepcrs.py +0 -0
  145. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/gpqa.py +0 -0
  146. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/gsm8k.py +0 -0
  147. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/gsm_hard.py +0 -0
  148. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/hellaswag.py +0 -0
  149. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/huggingface.py +0 -0
  150. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/humaneval.py +0 -0
  151. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/humaneval_multi.py +0 -0
  152. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/hungarian_math.py +0 -0
  153. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/__init__.py +0 -0
  154. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_codedebug.py +0 -0
  155. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_coderun.py +0 -0
  156. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_endia.py +0 -0
  157. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_enmc.py +0 -0
  158. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_enqa.py +0 -0
  159. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_ensum.py +0 -0
  160. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_mathcalc.py +0 -0
  161. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_mathfind.py +0 -0
  162. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_retrievekv.py +0 -0
  163. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_retrievenumber.py +0 -0
  164. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_retrievepasskey.py +0 -0
  165. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_zhqa.py +0 -0
  166. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/infinitebench/utils.py +0 -0
  167. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/iwslt2017.py +0 -0
  168. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/jigsawmultilingual.py +0 -0
  169. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/jsonl.py +0 -0
  170. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/kaoshi.py +0 -0
  171. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lambada.py +0 -0
  172. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lawbench/__init__.py +0 -0
  173. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lawbench/lawbench.py +0 -0
  174. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lcsts.py +0 -0
  175. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/__init__.py +0 -0
  176. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/evaluators.py +0 -0
  177. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_coursera.py +0 -0
  178. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_financial_qa.py +0 -0
  179. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_gov_report_summ.py +0 -0
  180. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_gsm100.py +0 -0
  181. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_legal_contract_qa.py +0 -0
  182. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_meeting_summ.py +0 -0
  183. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_multidoc_qa.py +0 -0
  184. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_narrattive_qa.py +0 -0
  185. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_natural_question.py +0 -0
  186. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_news_summ.py +0 -0
  187. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_paper_assistant.py +0 -0
  188. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_patent_summ.py +0 -0
  189. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_quality.py +0 -0
  190. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_review_summ.py +0 -0
  191. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_scientific_qa.py +0 -0
  192. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_topic_retrieval.py +0 -0
  193. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_tpo.py +0 -0
  194. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/leval/leval_tvshow_summ.py +0 -0
  195. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lmeval.py +0 -0
  196. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/__init__.py +0 -0
  197. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/evaluators.py +0 -0
  198. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_2wikim_qa.py +0 -0
  199. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_dureader.py +0 -0
  200. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_gov_report.py +0 -0
  201. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_hotpot_qa.py +0 -0
  202. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_lcc.py +0 -0
  203. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_lsht.py +0 -0
  204. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_multi_news.py +0 -0
  205. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_multifieldqa_en.py +0 -0
  206. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_multifieldqa_zh.py +0 -0
  207. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_musique.py +0 -0
  208. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_narrative_qa.py +0 -0
  209. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_passage_count.py +0 -0
  210. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_passage_retrieval_en.py +0 -0
  211. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_passage_retrieval_zh.py +0 -0
  212. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_qasper.py +0 -0
  213. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_qmsum.py +0 -0
  214. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_repobench.py +0 -0
  215. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_samsum.py +0 -0
  216. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_trec.py +0 -0
  217. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_trivia_qa.py +0 -0
  218. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_vcsum.py +0 -0
  219. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/__init__.py +0 -0
  220. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/evaluators.py +0 -0
  221. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/lveval_cmrc_mixup.py +0 -0
  222. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/lveval_dureader_mixup.py +0 -0
  223. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/lveval_factrecall_en.py +0 -0
  224. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/lveval_factrecall_zh.py +0 -0
  225. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/lveval_hotpotwikiqa_mixup.py +0 -0
  226. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/lveval_lic_mixup.py +0 -0
  227. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/lveval_loogle_CR_mixup.py +0 -0
  228. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/lveval_loogle_MIR_mixup.py +0 -0
  229. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/lveval_loogle_SD_mixup.py +0 -0
  230. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/lveval_multifieldqa_en_mixup.py +0 -0
  231. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/lveval/lveval_multifieldqa_zh_mixup.py +0 -0
  232. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/mastermath2024v1.py +0 -0
  233. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/math401.py +0 -0
  234. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/math_intern.py +0 -0
  235. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/medbench/__init__.py +0 -0
  236. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/medbench/constructions.py +0 -0
  237. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/medbench/dataset_loader.py +0 -0
  238. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/medbench/evaluation.py +0 -0
  239. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/medbench/math_equivalence.py +0 -0
  240. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/medbench/medbench.py +0 -0
  241. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/medbench/post_process.py +0 -0
  242. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/medbench/utils.py +0 -0
  243. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/mmlu.py +0 -0
  244. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/multirc.py +0 -0
  245. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/narrativeqa.py +0 -0
  246. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/natural_question.py +0 -0
  247. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/natural_question_cn.py +0 -0
  248. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/obqa.py +0 -0
  249. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/piqa.py +0 -0
  250. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/py150.py +0 -0
  251. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/qasper.py +0 -0
  252. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/qaspercut.py +0 -0
  253. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/race.py +0 -0
  254. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/realtoxicprompts.py +0 -0
  255. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/reasonbench/ReasonBenchDataset.py +0 -0
  256. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/reasonbench/__init__.py +0 -0
  257. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/record.py +0 -0
  258. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/rolebench.py +0 -0
  259. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/safety.py +0 -0
  260. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/scibench.py +0 -0
  261. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/siqa.py +0 -0
  262. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/squad20.py +0 -0
  263. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/storycloze.py +0 -0
  264. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/strategyqa.py +0 -0
  265. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/subjective/__init__.py +0 -0
  266. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/subjective/alignbench.py +0 -0
  267. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/subjective/compass_arena.py +0 -0
  268. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/subjective/corev2.py +0 -0
  269. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/subjective/creationbench.py +0 -0
  270. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/subjective/information_retrival.py +0 -0
  271. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/subjective/mtbench.py +0 -0
  272. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/subjective/multiround.py +0 -0
  273. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/subjective/subjective_cmp.py +0 -0
  274. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/summedits.py +0 -0
  275. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/summscreen.py +0 -0
  276. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/svamp.py +0 -0
  277. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/tabmwp.py +0 -0
  278. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/teval/__init__.py +0 -0
  279. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/teval/evaluators/__init__.py +0 -0
  280. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/teval/evaluators/instruct_evaluator.py +0 -0
  281. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/teval/evaluators/planning_evaluator.py +0 -0
  282. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/teval/evaluators/reason_retrieve_understand_evaluator.py +0 -0
  283. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/teval/evaluators/review_evaluator.py +0 -0
  284. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/teval/schema.py +0 -0
  285. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/teval/utils/__init__.py +0 -0
  286. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/teval/utils/convert_results.py +0 -0
  287. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/teval/utils/format_load.py +0 -0
  288. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/teval/utils/meta_template.py +0 -0
  289. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/teval/utils/template.py +0 -0
  290. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/tnews.py +0 -0
  291. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/triviaqa.py +0 -0
  292. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/triviaqarc.py +0 -0
  293. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/truthfulqa.py +0 -0
  294. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/tydiqa.py +0 -0
  295. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/wic.py +0 -0
  296. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/wikibench.py +0 -0
  297. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/winograd.py +0 -0
  298. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/winogrande.py +0 -0
  299. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/wnli.py +0 -0
  300. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/wsc.py +0 -0
  301. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/xcopa.py +0 -0
  302. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/xiezhi.py +0 -0
  303. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/xlsum.py +0 -0
  304. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/datasets/xsum.py +0 -0
  305. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/metrics/__init__.py +0 -0
  306. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/metrics/dump_results.py +0 -0
  307. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/metrics/mme_score.py +0 -0
  308. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/metrics/seedbench.py +0 -0
  309. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/accessory.py +0 -0
  310. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/alaya.py +0 -0
  311. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/claude_api/__init__.py +0 -0
  312. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/claude_api/postprocessors.py +0 -0
  313. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/glm.py +0 -0
  314. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/intern_model.py +0 -0
  315. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/krgpt_api.py +0 -0
  316. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/lagent.py +0 -0
  317. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/langchain.py +0 -0
  318. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/lightllm_api.py +0 -0
  319. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/mixtral.py +0 -0
  320. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/models/modelscope.py +0 -0
  321. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/__init__.py +0 -0
  322. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_dataset_reader.py +0 -0
  323. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/__init__.py +0 -0
  324. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_agent_evaluator.py +0 -0
  325. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_aucroc_evaluator.py +0 -0
  326. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_base_evaluator.py +0 -0
  327. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_circular_evaluator.py +0 -0
  328. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_em_evaluator.py +0 -0
  329. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_hf_evaluator.py +0 -0
  330. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_jieba_rouge_evaluator.py +0 -0
  331. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_misc_evaluator.py +0 -0
  332. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_plugin_evaluator.py +0 -0
  333. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_toxic_evaluator.py +0 -0
  334. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/__init__.py +0 -0
  335. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_agent_inferencer.py +0 -0
  336. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_attack_inferencer.py +0 -0
  337. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_base_inferencer.py +0 -0
  338. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_clp_inferencer.py +0 -0
  339. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py +0 -0
  340. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_ll_inferencer.py +0 -0
  341. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_mink_percent_inferencer.py +0 -0
  342. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py +0 -0
  343. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_ppl_only_inferencer.py +0 -0
  344. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_sc_inferencer.py +0 -0
  345. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_tot_inferencer.py +0 -0
  346. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/__init__.py +0 -0
  347. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_base_retriever.py +0 -0
  348. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_bm25_retriever.py +0 -0
  349. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_dpp_retriever.py +0 -0
  350. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_fix_k_retriever.py +0 -0
  351. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_mdl_retriever.py +0 -0
  352. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_random_retriever.py +0 -0
  353. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_topk_retriever.py +0 -0
  354. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_votek_retriever.py +0 -0
  355. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_zero_retriever.py +0 -0
  356. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/utils/__init__.py +0 -0
  357. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/openicl/utils/logging.py +0 -0
  358. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/partitioners/__init__.py +0 -0
  359. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/partitioners/mm_naive.py +0 -0
  360. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/partitioners/naive.py +0 -0
  361. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/partitioners/num_worker.py +0 -0
  362. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/partitioners/size.py +0 -0
  363. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/runners/__init__.py +0 -0
  364. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/runners/local_api.py +0 -0
  365. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/runners/slurm.py +0 -0
  366. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/runners/slurm_sequential.py +0 -0
  367. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/__init__.py +0 -0
  368. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/circular.py +0 -0
  369. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/multi_model.py +0 -0
  370. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/subjective/__init__.py +0 -0
  371. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/subjective/corev2.py +0 -0
  372. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/subjective/creationbench.py +0 -0
  373. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/subjective/information_retrival.py +0 -0
  374. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/subjective/subjective_post_process.py +0 -0
  375. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/subjective/utils.py +0 -0
  376. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/summarizers/summarizer_pretrain.py +0 -0
  377. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/tasks/__init__.py +0 -0
  378. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/tasks/base.py +0 -0
  379. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/tasks/llm_eval.py +0 -0
  380. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/tasks/mm_infer.py +0 -0
  381. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/tasks/openicl_attack.py +0 -0
  382. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/__init__.py +0 -0
  383. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/auxiliary.py +0 -0
  384. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/collect_env.py +0 -0
  385. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/dependency.py +0 -0
  386. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/file.py +0 -0
  387. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/fileio.py +0 -0
  388. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/lark.py +0 -0
  389. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/logging.py +0 -0
  390. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/menu.py +0 -0
  391. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass/utils/types.py +0 -0
  392. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass.egg-info/dependency_links.txt +0 -0
  393. {opencompass-0.2.3 → opencompass-0.2.4}/opencompass.egg-info/top_level.txt +0 -0
  394. {opencompass-0.2.3 → opencompass-0.2.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: opencompass
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: A comprehensive toolkit for large model evaluation
5
5
  Home-page: https://github.com/open-compass/opencompass
6
6
  Author: OpenCompass Contributors
@@ -11,8 +11,13 @@ Description: <div align="center">
11
11
  <br />
12
12
  <br />
13
13
 
14
- [![docs](https://readthedocs.org/projects/opencompass/badge)](https://opencompass.readthedocs.io/en)
15
- [![license](https://img.shields.io/github/license/InternLM/opencompass.svg)](https://github.com/open-compass/opencompass/blob/main/LICENSE)
14
+ [![][github-release-shield]][github-release-link]
15
+ [![][github-releasedate-shield]][github-releasedate-link]
16
+ [![][github-contributors-shield]][github-contributors-link]<br>
17
+ [![][github-forks-shield]][github-forks-link]
18
+ [![][github-stars-shield]][github-stars-link]
19
+ [![][github-issues-shield]][github-issues-link]
20
+ [![][github-license-shield]][github-license-link]
16
21
 
17
22
  <!-- [![PyPI](https://badge.fury.io/py/opencompass.svg)](https://pypi.org/project/opencompass/) -->
18
23
 
@@ -25,12 +30,18 @@ Description: <div align="center">
25
30
 
26
31
  English | [简体中文](README_zh-CN.md)
27
32
 
33
+ [![][github-trending-shield]][github-trending-url]
34
+
28
35
  </div>
29
36
 
30
37
  <p align="center">
31
38
  👋 join us on <a href="https://discord.gg/KKwfEbFj7U" target="_blank">Discord</a> and <a href="https://r.vansin.top/?r=opencompass" target="_blank">WeChat</a>
32
39
  </p>
33
40
 
41
+ > \[!IMPORTANT\]
42
+ >
43
+ > **Star Us**, You will receive all release notifications from GitHub without any delay ~ ⭐️
44
+
34
45
  ## 📣 OpenCompass 2.0
35
46
 
36
47
  We are thrilled to introduce OpenCompass 2.0, an advanced suite featuring three key components: [CompassKit](https://github.com/open-compass), [CompassHub](https://hub.opencompass.org.cn/home), and [CompassRank](https://rank.opencompass.org.cn/home).
@@ -42,6 +53,14 @@ Description: <div align="center">
42
53
 
43
54
  **CompassKit** is a powerful collection of evaluation toolkits specifically tailored for Large Language Models and Large Vision-language Models. It provides an extensive set of tools to assess and measure the performance of these complex models effectively. Welcome to try our toolkits for in your research and products.
44
55
 
56
+ <details>
57
+ <summary><kbd>Star History</kbd></summary>
58
+ <picture>
59
+ <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=open-compass%2Fopencompass&theme=dark&type=Date">
60
+ <img width="100%" src="https://api.star-history.com/svg?repos=open-compass%2Fopencompass&type=Date">
61
+ </picture>
62
+ </details>
63
+
45
64
  ## 🧭 Welcome
46
65
 
47
66
  to **OpenCompass**!
@@ -59,12 +78,9 @@ Description: <div align="center">
59
78
 
60
79
  ## 🚀 What's New <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
61
80
 
62
- - **\[2024.02.29\]** We supported the MT-Bench, AlpacalEval and AlignBench, more information can be found [here](https://opencompass.readthedocs.io/en/latest/advanced_guides/subjective_evaluation.html) 🔥🔥🔥.
63
- - **\[2024.01.30\]** We release OpenCompass 2.0. Click [CompassKit](https://github.com/open-compass), [CompassHub](https://hub.opencompass.org.cn/home), and [CompassRank](https://rank.opencompass.org.cn/home) for more information ! 🔥🔥🔥.
64
- - **\[2024.01.17\]** We supported the evaluation of [InternLM2](https://github.com/open-compass/opencompass/blob/main/configs/eval_internlm2_keyset.py) and [InternLM2-Chat](https://github.com/open-compass/opencompass/blob/main/configs/eval_internlm2_chat_keyset.py), InternLM2 showed extremely strong performance in these tests, welcome to try! 🔥🔥🔥.
65
- - **\[2024.01.17\]** We supported the needle in a haystack test with multiple needles, more information can be found [here](https://opencompass.readthedocs.io/en/latest/advanced_guides/needleinahaystack_eval.html#id8) 🔥🔥🔥.
66
- - **\[2023.12.28\]** We have enabled seamless evaluation of all models developed using [LLaMA2-Accessory](https://github.com/Alpha-VLLM/LLaMA2-Accessory), a powerful toolkit for comprehensive LLM development.
67
- - **\[2023.12.22\]** We have released [T-Eval](https://github.com/open-compass/T-Eval), a step-by-step evaluation benchmark to gauge your LLMs on tool utilization. Welcome to our [Leaderboard](https://open-compass.github.io/T-Eval/leaderboard.html) for more details!
81
+ - **\[2024.04.22\]** We supported the evaluation of [LLaMA3](configs/models/hf_llama/hf_llama3_8b.py) [LLaMA3-Instruct](configs/models/hf_llama/hf_llama3_8b_instruct.py), welcome to try! 🔥🔥🔥
82
+ - **\[2024.02.29\]** We supported the MT-Bench, AlpacalEval and AlignBench, more information can be found [here](https://opencompass.readthedocs.io/en/latest/advanced_guides/subjective_evaluation.html)
83
+ - **\[2024.01.30\]** We release OpenCompass 2.0. Click [CompassKit](https://github.com/open-compass), [CompassHub](https://hub.opencompass.org.cn/home), and [CompassRank](https://rank.opencompass.org.cn/home) for more information !
68
84
 
69
85
  > [More](docs/en/notes/news.md)
70
86
 
@@ -447,6 +463,7 @@ Description: <div align="center">
447
463
 
448
464
  - [InternLM](https://github.com/InternLM/InternLM)
449
465
  - [LLaMA](https://github.com/facebookresearch/llama)
466
+ - [LLaMA3](https://github.com/meta-llama/llama3)
450
467
  - [Vicuna](https://github.com/lm-sys/FastChat)
451
468
  - [Alpaca](https://github.com/tatsu-lab/stanford_alpaca)
452
469
  - [Baichuan](https://github.com/baichuan-inc)
@@ -505,6 +522,20 @@ Description: <div align="center">
505
522
 
506
523
  We appreciate all contributions to improving OpenCompass. Please refer to the [contributing guideline](https://opencompass.readthedocs.io/en/latest/notes/contribution_guide.html) for the best practice.
507
524
 
525
+ <!-- Copy-paste in your Readme.md file -->
526
+
527
+ <!-- Made with [OSS Insight](https://ossinsight.io/) -->
528
+
529
+ <a href="https://github.com/open-compass/opencompass/graphs/contributors" target="_blank">
530
+ <table>
531
+ <tr>
532
+ <th colspan="2">
533
+ <br><img src="https://contrib.rocks/image?repo=open-compass/opencompass"><br><br>
534
+ </th>
535
+ </tr>
536
+ </table>
537
+ </a>
538
+
508
539
  ## 🤝 Acknowledgements
509
540
 
510
541
  Some code in this project is cited and modified from [OpenICL](https://github.com/Shark-NLP/OpenICL).
@@ -524,6 +555,23 @@ Description: <div align="center">
524
555
 
525
556
  <p align="right"><a href="#top">🔝Back to top</a></p>
526
557
 
558
+ [github-contributors-link]: https://github.com/open-compass/opencompass/graphs/contributors
559
+ [github-contributors-shield]: https://img.shields.io/github/contributors/open-compass/opencompass?color=c4f042&labelColor=black&style=flat-square
560
+ [github-forks-link]: https://github.com/open-compass/opencompass/network/members
561
+ [github-forks-shield]: https://img.shields.io/github/forks/open-compass/opencompass?color=8ae8ff&labelColor=black&style=flat-square
562
+ [github-issues-link]: https://github.com/open-compass/opencompass/issues
563
+ [github-issues-shield]: https://img.shields.io/github/issues/open-compass/opencompass?color=ff80eb&labelColor=black&style=flat-square
564
+ [github-license-link]: https://github.com/open-compass/opencompass/blob/main/LICENSE
565
+ [github-license-shield]: https://img.shields.io/github/license/open-compass/opencompass?color=white&labelColor=black&style=flat-square
566
+ [github-release-link]: https://github.com/open-compass/opencompass/releases
567
+ [github-release-shield]: https://img.shields.io/github/v/release/open-compass/opencompass?color=369eff&labelColor=black&logo=github&style=flat-square
568
+ [github-releasedate-link]: https://github.com/open-compass/opencompass/releases
569
+ [github-releasedate-shield]: https://img.shields.io/github/release-date/open-compass/opencompass?labelColor=black&style=flat-square
570
+ [github-stars-link]: https://github.com/open-compass/opencompass/stargazers
571
+ [github-stars-shield]: https://img.shields.io/github/stars/open-compass/opencompass?color=ffcb47&labelColor=black&style=flat-square
572
+ [github-trending-shield]: https://trendshift.io/api/badge/repositories/6630
573
+ [github-trending-url]: https://trendshift.io/repositories/6630
574
+
527
575
  Keywords: AI,NLP,in-context learning,large language model,evaluation,benchmark,llm
528
576
  Platform: UNKNOWN
529
577
  Classifier: Programming Language :: Python :: 3.8
@@ -3,8 +3,13 @@
3
3
  <br />
4
4
  <br />
5
5
 
6
- [![docs](https://readthedocs.org/projects/opencompass/badge)](https://opencompass.readthedocs.io/en)
7
- [![license](https://img.shields.io/github/license/InternLM/opencompass.svg)](https://github.com/open-compass/opencompass/blob/main/LICENSE)
6
+ [![][github-release-shield]][github-release-link]
7
+ [![][github-releasedate-shield]][github-releasedate-link]
8
+ [![][github-contributors-shield]][github-contributors-link]<br>
9
+ [![][github-forks-shield]][github-forks-link]
10
+ [![][github-stars-shield]][github-stars-link]
11
+ [![][github-issues-shield]][github-issues-link]
12
+ [![][github-license-shield]][github-license-link]
8
13
 
9
14
  <!-- [![PyPI](https://badge.fury.io/py/opencompass.svg)](https://pypi.org/project/opencompass/) -->
10
15
 
@@ -17,12 +22,18 @@
17
22
 
18
23
  English | [简体中文](README_zh-CN.md)
19
24
 
25
+ [![][github-trending-shield]][github-trending-url]
26
+
20
27
  </div>
21
28
 
22
29
  <p align="center">
23
30
  👋 join us on <a href="https://discord.gg/KKwfEbFj7U" target="_blank">Discord</a> and <a href="https://r.vansin.top/?r=opencompass" target="_blank">WeChat</a>
24
31
  </p>
25
32
 
33
+ > \[!IMPORTANT\]
34
+ >
35
+ > **Star Us**, You will receive all release notifications from GitHub without any delay ~ ⭐️
36
+
26
37
  ## 📣 OpenCompass 2.0
27
38
 
28
39
  We are thrilled to introduce OpenCompass 2.0, an advanced suite featuring three key components: [CompassKit](https://github.com/open-compass), [CompassHub](https://hub.opencompass.org.cn/home), and [CompassRank](https://rank.opencompass.org.cn/home).
@@ -34,6 +45,14 @@ We are thrilled to introduce OpenCompass 2.0, an advanced suite featuring three
34
45
 
35
46
  **CompassKit** is a powerful collection of evaluation toolkits specifically tailored for Large Language Models and Large Vision-language Models. It provides an extensive set of tools to assess and measure the performance of these complex models effectively. Welcome to try our toolkits for in your research and products.
36
47
 
48
+ <details>
49
+ <summary><kbd>Star History</kbd></summary>
50
+ <picture>
51
+ <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=open-compass%2Fopencompass&theme=dark&type=Date">
52
+ <img width="100%" src="https://api.star-history.com/svg?repos=open-compass%2Fopencompass&type=Date">
53
+ </picture>
54
+ </details>
55
+
37
56
  ## 🧭 Welcome
38
57
 
39
58
  to **OpenCompass**!
@@ -51,12 +70,9 @@ Just like a compass guides us on our journey, OpenCompass will guide you through
51
70
 
52
71
  ## 🚀 What's New <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
53
72
 
54
- - **\[2024.02.29\]** We supported the MT-Bench, AlpacalEval and AlignBench, more information can be found [here](https://opencompass.readthedocs.io/en/latest/advanced_guides/subjective_evaluation.html) 🔥🔥🔥.
55
- - **\[2024.01.30\]** We release OpenCompass 2.0. Click [CompassKit](https://github.com/open-compass), [CompassHub](https://hub.opencompass.org.cn/home), and [CompassRank](https://rank.opencompass.org.cn/home) for more information ! 🔥🔥🔥.
56
- - **\[2024.01.17\]** We supported the evaluation of [InternLM2](https://github.com/open-compass/opencompass/blob/main/configs/eval_internlm2_keyset.py) and [InternLM2-Chat](https://github.com/open-compass/opencompass/blob/main/configs/eval_internlm2_chat_keyset.py), InternLM2 showed extremely strong performance in these tests, welcome to try! 🔥🔥🔥.
57
- - **\[2024.01.17\]** We supported the needle in a haystack test with multiple needles, more information can be found [here](https://opencompass.readthedocs.io/en/latest/advanced_guides/needleinahaystack_eval.html#id8) 🔥🔥🔥.
58
- - **\[2023.12.28\]** We have enabled seamless evaluation of all models developed using [LLaMA2-Accessory](https://github.com/Alpha-VLLM/LLaMA2-Accessory), a powerful toolkit for comprehensive LLM development.
59
- - **\[2023.12.22\]** We have released [T-Eval](https://github.com/open-compass/T-Eval), a step-by-step evaluation benchmark to gauge your LLMs on tool utilization. Welcome to our [Leaderboard](https://open-compass.github.io/T-Eval/leaderboard.html) for more details!
73
+ - **\[2024.04.22\]** We supported the evaluation of [LLaMA3](configs/models/hf_llama/hf_llama3_8b.py) [LLaMA3-Instruct](configs/models/hf_llama/hf_llama3_8b_instruct.py), welcome to try! 🔥🔥🔥
74
+ - **\[2024.02.29\]** We supported the MT-Bench, AlpacalEval and AlignBench, more information can be found [here](https://opencompass.readthedocs.io/en/latest/advanced_guides/subjective_evaluation.html)
75
+ - **\[2024.01.30\]** We release OpenCompass 2.0. Click [CompassKit](https://github.com/open-compass), [CompassHub](https://hub.opencompass.org.cn/home), and [CompassRank](https://rank.opencompass.org.cn/home) for more information !
60
76
 
61
77
  > [More](docs/en/notes/news.md)
62
78
 
@@ -439,6 +455,7 @@ Through the command line or configuration files, OpenCompass also supports evalu
439
455
 
440
456
  - [InternLM](https://github.com/InternLM/InternLM)
441
457
  - [LLaMA](https://github.com/facebookresearch/llama)
458
+ - [LLaMA3](https://github.com/meta-llama/llama3)
442
459
  - [Vicuna](https://github.com/lm-sys/FastChat)
443
460
  - [Alpaca](https://github.com/tatsu-lab/stanford_alpaca)
444
461
  - [Baichuan](https://github.com/baichuan-inc)
@@ -497,6 +514,20 @@ Through the command line or configuration files, OpenCompass also supports evalu
497
514
 
498
515
  We appreciate all contributions to improving OpenCompass. Please refer to the [contributing guideline](https://opencompass.readthedocs.io/en/latest/notes/contribution_guide.html) for the best practice.
499
516
 
517
+ <!-- Copy-paste in your Readme.md file -->
518
+
519
+ <!-- Made with [OSS Insight](https://ossinsight.io/) -->
520
+
521
+ <a href="https://github.com/open-compass/opencompass/graphs/contributors" target="_blank">
522
+ <table>
523
+ <tr>
524
+ <th colspan="2">
525
+ <br><img src="https://contrib.rocks/image?repo=open-compass/opencompass"><br><br>
526
+ </th>
527
+ </tr>
528
+ </table>
529
+ </a>
530
+
500
531
  ## 🤝 Acknowledgements
501
532
 
502
533
  Some code in this project is cited and modified from [OpenICL](https://github.com/Shark-NLP/OpenICL).
@@ -515,3 +546,20 @@ Some datasets and prompt implementations are modified from [chain-of-thought-hub
515
546
  ```
516
547
 
517
548
  <p align="right"><a href="#top">🔝Back to top</a></p>
549
+
550
+ [github-contributors-link]: https://github.com/open-compass/opencompass/graphs/contributors
551
+ [github-contributors-shield]: https://img.shields.io/github/contributors/open-compass/opencompass?color=c4f042&labelColor=black&style=flat-square
552
+ [github-forks-link]: https://github.com/open-compass/opencompass/network/members
553
+ [github-forks-shield]: https://img.shields.io/github/forks/open-compass/opencompass?color=8ae8ff&labelColor=black&style=flat-square
554
+ [github-issues-link]: https://github.com/open-compass/opencompass/issues
555
+ [github-issues-shield]: https://img.shields.io/github/issues/open-compass/opencompass?color=ff80eb&labelColor=black&style=flat-square
556
+ [github-license-link]: https://github.com/open-compass/opencompass/blob/main/LICENSE
557
+ [github-license-shield]: https://img.shields.io/github/license/open-compass/opencompass?color=white&labelColor=black&style=flat-square
558
+ [github-release-link]: https://github.com/open-compass/opencompass/releases
559
+ [github-release-shield]: https://img.shields.io/github/v/release/open-compass/opencompass?color=369eff&labelColor=black&logo=github&style=flat-square
560
+ [github-releasedate-link]: https://github.com/open-compass/opencompass/releases
561
+ [github-releasedate-shield]: https://img.shields.io/github/release-date/open-compass/opencompass?labelColor=black&style=flat-square
562
+ [github-stars-link]: https://github.com/open-compass/opencompass/stargazers
563
+ [github-stars-shield]: https://img.shields.io/github/stars/open-compass/opencompass?color=ffcb47&labelColor=black&style=flat-square
564
+ [github-trending-shield]: https://trendshift.io/api/badge/repositories/6630
565
+ [github-trending-url]: https://trendshift.io/repositories/6630
@@ -0,0 +1 @@
1
+ __version__ = '0.2.4'
@@ -1,6 +1,10 @@
1
1
  import ast
2
2
 
3
- import networkx as nx
3
+ try:
4
+ import networkx as nx
5
+ except ImportError:
6
+ nx = None
7
+
4
8
  from datasets import Dataset
5
9
 
6
10
  from opencompass.openicl.icl_evaluator import BaseEvaluator
@@ -1,7 +1,11 @@
1
1
  import ast
2
2
  import json
3
3
 
4
- import networkx as nx
4
+ try:
5
+ import networkx as nx
6
+ except ImportError:
7
+ nx = None
8
+
5
9
  import pandas as pd
6
10
  from datasets import Dataset
7
11
 
@@ -1,7 +1,11 @@
1
1
  import ast
2
2
  import json
3
3
 
4
- import networkx as nx
4
+ try:
5
+ import networkx as nx
6
+ except ImportError:
7
+ nx = None
8
+
5
9
  from datasets import Dataset
6
10
 
7
11
  from opencompass.openicl.icl_evaluator import BaseEvaluator
@@ -0,0 +1,59 @@
1
+ import json
2
+
3
+ from datasets import Dataset
4
+
5
+ from opencompass.openicl.icl_evaluator import BaseEvaluator
6
+ from opencompass.registry import LOAD_DATASET
7
+
8
+ from .base import BaseDataset
9
+
10
+
11
+ @LOAD_DATASET.register_module()
12
+ class QuALITYDataset(BaseDataset):
13
+
14
+ @staticmethod
15
+ def load(path: str):
16
+ dataset_list = []
17
+ with open(path, 'r', encoding='utf-8') as f:
18
+ for line in f:
19
+ line = json.loads(line)
20
+ for question in line['questions']:
21
+ dataset_list.append({
22
+ 'article':
23
+ line['article'],
24
+ 'question':
25
+ question['question'],
26
+ 'A':
27
+ question['options'][0],
28
+ 'B':
29
+ question['options'][1],
30
+ 'C':
31
+ question['options'][2],
32
+ 'D':
33
+ question['options'][3],
34
+ 'gold_label':
35
+ 'ABCD'[question['gold_label'] - 1],
36
+ 'difficult':
37
+ question['difficult']
38
+ })
39
+ return Dataset.from_list(dataset_list)
40
+
41
+
42
+ class QuALITYEvaluator(BaseEvaluator):
43
+
44
+ def score(self, predictions, references, test_set):
45
+ assert len(predictions) == len(references)
46
+ easy, hard, all = [], [], []
47
+ for pred, refer, test in zip(predictions, references, test_set):
48
+ if pred == refer:
49
+ answer = True
50
+ else:
51
+ answer = False
52
+ all.append(answer)
53
+ if test['difficult'] == 0:
54
+ easy.append(answer)
55
+ else:
56
+ hard.append(answer)
57
+ return dict(easy_acc=sum(easy) / len(easy) * 100,
58
+ hard_acc=sum(hard) / len(easy) * 100,
59
+ all_acc=sum(all) / len(all) * 100)
@@ -0,0 +1,4 @@
1
+ from .legacy import (TheoremQA_postprocess, TheoremQA_postprocess_v2,
2
+ TheoremQADataset)
3
+ from .main import (TheoremQA_postprocess_v3, TheoremQADatasetV3,
4
+ TheoremQAEvaluatorV3)
@@ -4,7 +4,7 @@ from datasets import load_dataset
4
4
 
5
5
  from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
6
6
 
7
- from .base import BaseDataset
7
+ from ..base import BaseDataset
8
8
 
9
9
 
10
10
  @LOAD_DATASET.register_module()
@@ -0,0 +1,66 @@
1
+ import re
2
+ import json
3
+
4
+ from datasets import Dataset, DatasetDict
5
+
6
+ from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS, ICL_EVALUATORS
7
+
8
+ from opencompass.openicl.icl_evaluator import BaseEvaluator
9
+ from ..base import BaseDataset
10
+ from . import utils
11
+ from tqdm import tqdm
12
+
13
+
14
+ @LOAD_DATASET.register_module()
15
+ class TheoremQADatasetV3(BaseDataset):
16
+
17
+ @staticmethod
18
+ def load(path: str):
19
+ with open(path, 'r') as f:
20
+ data = json.load(f)
21
+ for item in data:
22
+ item['Answer'] = str(item['Answer'])
23
+ dataset = Dataset.from_list(data)
24
+ return dataset
25
+
26
+
27
+ def TheoremQA_postprocess_v3(text: str) -> str:
28
+ answer = utils.answer_clean(["The answer is:", "The answer is", "the answer is"], text)
29
+ return answer
30
+
31
+
32
+ @ICL_EVALUATORS.register_module()
33
+ class TheoremQAEvaluatorV3(BaseEvaluator):
34
+ def score(self, predictions, references, test_set):
35
+ if len(predictions) != len(references):
36
+ return {"error": "preds and refrs have different length"}
37
+
38
+ details = []
39
+ correct, wrong = 0, 0
40
+ for index in tqdm(range(len(predictions))):
41
+ answer = predictions[index]
42
+ groundtruth = references[index]
43
+ answer_type = test_set[index]['Answer_type']
44
+ if answer_type in ['float', 'integer', 'bool']:
45
+ groundtruth = [groundtruth, eval(groundtruth)]
46
+ else:
47
+ groundtruth = [groundtruth, None]
48
+ if utils.compare_answer_with_groundtruth(answer, *groundtruth):
49
+ correct += 1
50
+ is_correct = True
51
+ else:
52
+ wrong += 1
53
+ is_correct = False
54
+
55
+ details.append(
56
+ {
57
+ # "question": question,
58
+ # "solution": output,
59
+ "correct": groundtruth,
60
+ "pred": answer,
61
+ "is_correct": is_correct,
62
+ }
63
+ )
64
+
65
+ score = correct / (correct + wrong) * 100
66
+ return {'score': score, 'details': details}
@@ -0,0 +1,98 @@
1
+ import re
2
+ import math
3
+ from math import sqrt, sin, cos, log, pi, factorial, exp, e
4
+ E = 2.718
5
+
6
+
7
+ def floatify(num: str):
8
+ try:
9
+ num = float(num)
10
+ if num.is_integer():
11
+ return round(num)
12
+ else:
13
+ return num
14
+ except Exception:
15
+ return None
16
+
17
+
18
+ def within_eps(pred: float, gt: float):
19
+ eps = abs(gt) * 0.04
20
+ if pred >= gt - eps and pred <= gt + eps:
21
+ return True
22
+ else:
23
+ return False
24
+
25
+
26
+ def clean_units(pred_str: str):
27
+ """Clean the units in the number."""
28
+ def convert_pi_to_number(code_string):
29
+ code_string = code_string.replace('\\pi', 'π')
30
+ # Replace \pi or π not preceded by a digit or } with 3.14
31
+ code_string = re.sub(r'(?<![\d}])\\?π', '3.14', code_string)
32
+ # Replace instances where π is preceded by a digit but without a multiplication symbol, e.g., "3π" -> "3*3.14"
33
+ code_string = re.sub(r'(\d)(\\?π)', r'\1*3.14', code_string)
34
+ # Handle cases where π is within braces or followed by a multiplication symbol
35
+ # This replaces "{π}" with "3.14" directly and "3*π" with "3*3.14"
36
+ code_string = re.sub(r'\{(\\?π)\}', '3.14', code_string)
37
+ code_string = re.sub(r'\*(\\?π)', '*3.14', code_string)
38
+ return code_string
39
+
40
+ pred_str = convert_pi_to_number(pred_str)
41
+ pred_str = pred_str.replace('%', '/100')
42
+ pred_str = pred_str.replace('$', '')
43
+ pred_str = pred_str.replace('¥', '')
44
+ pred_str = pred_str.replace('°C', '')
45
+ pred_str = pred_str.replace(' C', '')
46
+ pred_str = pred_str.replace('°', '')
47
+ return pred_str
48
+
49
+
50
+ def number_it(num):
51
+ from latex2sympy2 import latex2sympy
52
+ if isinstance(num, (int, float)):
53
+ return num
54
+
55
+ num = clean_units(num)
56
+ try:
57
+ num = str(latex2sympy(num))
58
+ except Exception:
59
+ pass
60
+
61
+ if floatify(num) is not None:
62
+ return floatify(num)
63
+ else:
64
+ try:
65
+ num = eval(num)
66
+ if isinstance(num, list) or isinstance(num, tuple):
67
+ num = num[0]
68
+ if floatify(num) is not None:
69
+ return floatify(num)
70
+ else:
71
+ return None
72
+ except Exception:
73
+ return None
74
+
75
+
76
+ def compare_two_numbers(p, gt):
77
+ try:
78
+ if math.isnan(p):
79
+ return False
80
+ if isinstance(gt, int):
81
+ return round(p) == gt
82
+ else:
83
+ return within_eps(pred=p, gt=gt)
84
+ except Exception:
85
+ return False
86
+
87
+
88
+ def compare_two_list(pred, gt):
89
+ if not isinstance(pred, list):
90
+ return False
91
+ elif len(pred) != len(gt):
92
+ return False
93
+ elif any([not isinstance(x, (int, float)) for x in pred]):
94
+ return False
95
+ else:
96
+ pred = sorted(pred)
97
+ gt = sorted(gt)
98
+ return all([compare_two_numbers(p, g) for p, g in zip(pred, gt)])
@@ -0,0 +1,110 @@
1
+ import re
2
+ from .number_utils import clean_units, compare_two_numbers, compare_two_list, number_it
3
+ import contextlib
4
+ import signal
5
+
6
+ @contextlib.contextmanager
7
+ def time_limit(seconds: float):
8
+ def signal_handler(signum, frame):
9
+ raise ValueError
10
+
11
+ signal.setitimer(signal.ITIMER_REAL, seconds)
12
+ signal.signal(signal.SIGALRM, signal_handler)
13
+ try:
14
+ yield
15
+ finally:
16
+ signal.setitimer(signal.ITIMER_REAL, 0)
17
+
18
+
19
+ def extract_theoremqa_answer(pred: str, answer_flag: bool = True):
20
+ from latex2sympy2 import latex2sympy
21
+
22
+ if any([option in pred.lower() for option in ['yes', 'true']]):
23
+ pred = 'True'
24
+ elif any([option in pred.lower() for option in ['no', 'false']]):
25
+ pred = 'False'
26
+ elif any([option in pred.lower() for option in ['(a)', '(b)', '(c)', '(d)', '(e)', '(f)']]):
27
+ pass
28
+ else:
29
+ if answer_flag:
30
+ # Extract the numbers out of the string
31
+ pred = pred.split('=')[-1].strip()
32
+ pred = clean_units(pred)
33
+ try:
34
+ with time_limit(1):
35
+ tmp = str(latex2sympy(pred))
36
+ pred = str(eval(tmp))
37
+ except Exception:
38
+ if re.match(r'-?[\d\.]+\s\D+$', pred):
39
+ pred = pred.split(' ')[0]
40
+ elif re.match(r'-?[\d\.]+\s[^\s]+$', pred):
41
+ pred = pred.split(' ')[0]
42
+ else:
43
+ # desparate search over the last number
44
+ preds = re.findall(r'-?\d*\.?\d+', pred)
45
+ if(len(preds) >= 1):
46
+ pred = preds[-1]
47
+ else:
48
+ pred = ''
49
+ return pred
50
+
51
+ def answer_clean(direct_answer_trigger_for_fewshot: tuple, pred: str):
52
+ pred = pred.strip('\n')
53
+
54
+ # Determine if this is ICL, if so, use \n\n to split the first chunk.
55
+ ICL = False
56
+ for trigger in direct_answer_trigger_for_fewshot:
57
+ if pred.count(trigger) > 1:
58
+ ICL = True
59
+ if ICL:
60
+ pred = pred.split('\n\n')[0]
61
+
62
+ # Split the trigger to find the answer.
63
+ preds = re.split('|'.join(direct_answer_trigger_for_fewshot), pred)
64
+ if len(preds) > 1:
65
+ answer_flag = True
66
+ pred = preds[-1]
67
+ else:
68
+ answer_flag = False
69
+
70
+ pred = pred.strip('\n').rstrip('.').rstrip('/').strip(' ')
71
+
72
+ pred = [extract_theoremqa_answer(pred, answer_flag)]
73
+
74
+ # If there is no candidate in list, null is set.
75
+ if len(pred) == 0:
76
+ pred = ""
77
+ else:
78
+ if answer_flag:
79
+ # choose the first element in list ...
80
+ pred = pred[0]
81
+ else:
82
+ # choose the last e
83
+ pred = pred[-1]
84
+
85
+ # Remove the period at the end, again!
86
+ pred = pred.rstrip('.').rstrip('/')
87
+ return pred
88
+
89
+
90
+
91
+ def compare_answer_with_groundtruth(answer: str, groundtruth_str: str, groundtruth_num = None):
92
+ if groundtruth_str.lower() in ['(a)', '(b)', '(c)', '(d)', '(e)', '(f)']:
93
+ return groundtruth_str.lower() in answer.lower()
94
+ elif answer.lower() == groundtruth_str.lower():
95
+ return True
96
+ elif groundtruth_num is not None:
97
+ if isinstance(groundtruth_num, (int, float)):
98
+ return compare_two_numbers(number_it(answer), groundtruth_num)
99
+ else:
100
+ if answer.startswith('(') and answer.endswith(')'):
101
+ try:
102
+ answer = list(eval(answer))
103
+ answer = [number_it(a) for a in answer]
104
+ except Exception as e:
105
+ return False
106
+ return compare_two_list(answer, groundtruth_num)
107
+ else:
108
+ return False
109
+ else:
110
+ return False