opencompass 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {opencompass-0.2.2 → opencompass-0.2.4}/PKG-INFO +74 -29
- {opencompass-0.2.2 → opencompass-0.2.4}/README.md +73 -28
- opencompass-0.2.4/opencompass/__init__.py +1 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/cmp_GCP_D.py +5 -1
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/cmp_TSP_D.py +5 -1
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/p_SPP.py +5 -1
- opencompass-0.2.4/opencompass/datasets/OpenFinData.py +47 -0
- opencompass-0.2.4/opencompass/datasets/QuALITY.py +59 -0
- opencompass-0.2.4/opencompass/datasets/TheoremQA/__init__.py +4 -0
- opencompass-0.2.2/opencompass/datasets/TheoremQA.py → opencompass-0.2.4/opencompass/datasets/TheoremQA/legacy.py +13 -1
- opencompass-0.2.4/opencompass/datasets/TheoremQA/main.py +66 -0
- opencompass-0.2.4/opencompass/datasets/TheoremQA/number_utils.py +98 -0
- opencompass-0.2.4/opencompass/datasets/TheoremQA/utils.py +110 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/__init__.py +7 -1
- opencompass-0.2.4/opencompass/datasets/apps.py +877 -0
- opencompass-0.2.4/opencompass/datasets/chembench.py +34 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/custom.py +10 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/gpqa.py +10 -32
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/hellaswag.py +27 -1
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/humaneval.py +5 -2
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/humanevalx.py +4 -1
- opencompass-0.2.4/opencompass/datasets/lveval/__init__.py +14 -0
- opencompass-0.2.4/opencompass/datasets/lveval/evaluators.py +409 -0
- opencompass-0.2.4/opencompass/datasets/lveval/lveval_cmrc_mixup.py +28 -0
- opencompass-0.2.4/opencompass/datasets/lveval/lveval_dureader_mixup.py +26 -0
- opencompass-0.2.4/opencompass/datasets/lveval/lveval_factrecall_en.py +28 -0
- opencompass-0.2.4/opencompass/datasets/lveval/lveval_factrecall_zh.py +28 -0
- opencompass-0.2.4/opencompass/datasets/lveval/lveval_hotpotwikiqa_mixup.py +31 -0
- opencompass-0.2.4/opencompass/datasets/lveval/lveval_lic_mixup.py +31 -0
- opencompass-0.2.4/opencompass/datasets/lveval/lveval_loogle_CR_mixup.py +29 -0
- opencompass-0.2.4/opencompass/datasets/lveval/lveval_loogle_MIR_mixup.py +29 -0
- opencompass-0.2.4/opencompass/datasets/lveval/lveval_loogle_SD_mixup.py +29 -0
- opencompass-0.2.4/opencompass/datasets/lveval/lveval_multifieldqa_en_mixup.py +31 -0
- opencompass-0.2.4/opencompass/datasets/lveval/lveval_multifieldqa_zh_mixup.py +31 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/math.py +19 -6
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/mathbench.py +1 -1
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/mbpp.py +45 -40
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/natural_question.py +4 -3
- opencompass-0.2.4/opencompass/datasets/taco.py +823 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/winogrande.py +33 -3
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/__init__.py +9 -1
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/ai360_api.py +4 -4
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/baichuan_api.py +128 -4
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/baidu_api.py +4 -4
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/base.py +2 -2
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/base_api.py +4 -4
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/bytedance_api.py +4 -4
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/claude_api/claude_api.py +4 -4
- opencompass-0.2.4/opencompass/models/gemini_api.py +251 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/huggingface.py +1 -1
- opencompass-0.2.4/opencompass/models/hunyuan_api.py +121 -0
- opencompass-0.2.4/opencompass/models/krgpt_api.py +134 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/lightllm_api.py +38 -5
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/llama2.py +1 -1
- opencompass-0.2.2/opencompass/models/turbomind.py → opencompass-0.2.4/opencompass/models/lmdeploy_pytorch.py +27 -29
- opencompass-0.2.4/opencompass/models/lmdeploy_tis.py +200 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/minimax_api.py +4 -4
- opencompass-0.2.4/opencompass/models/mistral_api.py +123 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/moonshot_api.py +24 -26
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/nanbeige_api.py +4 -4
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/openai_api.py +49 -17
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/pangu_api.py +4 -4
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/qwen_api.py +28 -14
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/sensetime_api.py +14 -9
- opencompass-0.2.4/opencompass/models/turbomind.py +219 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/turbomind_api.py +27 -19
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/turbomind_tis.py +10 -4
- opencompass-0.2.4/opencompass/models/unigpt_api.py +147 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/vllm.py +6 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/xunfei_api.py +4 -4
- opencompass-0.2.4/opencompass/models/yayi_api.py +261 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/zhipuai_api.py +4 -4
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/zhipuai_v2_api.py +12 -6
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/__init__.py +1 -0
- opencompass-0.2.4/opencompass/openicl/icl_evaluator/icl_plugin_evaluator.py +97 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/lm_evaluator.py +57 -23
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_chat_inferencer.py +8 -6
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py +2 -2
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_prompt_template.py +4 -4
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/partitioners/base.py +18 -7
- opencompass-0.2.4/opencompass/partitioners/sub_naive.py +220 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/partitioners/sub_size.py +29 -6
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/registry.py +15 -1
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/runners/base.py +2 -1
- opencompass-0.2.4/opencompass/runners/dlc.py +289 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/runners/local.py +23 -9
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/runners/local_api.py +1 -1
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/runners/slurm.py +1 -1
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/runners/slurm_sequential.py +1 -1
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/default.py +1 -1
- opencompass-0.2.4/opencompass/summarizers/needlebench.py +737 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/subjective/alignmentbench.py +43 -33
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/subjective/alpacaeval.py +2 -1
- opencompass-0.2.4/opencompass/summarizers/subjective/compass_arena.py +240 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/subjective/mtbench.py +55 -44
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/subjective/multiround.py +2 -1
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/tasks/openicl_eval.py +3 -2
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/tasks/openicl_infer.py +12 -5
- opencompass-0.2.4/opencompass/tasks/subjective_eval.py +438 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/abbr.py +22 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/build.py +1 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/prompt.py +5 -5
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/run.py +140 -12
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/text_postprocessors.py +5 -5
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass.egg-info/PKG-INFO +74 -29
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass.egg-info/SOURCES.txt +34 -1
- opencompass-0.2.4/opencompass.egg-info/entry_points.txt +3 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass.egg-info/requires.txt +4 -1
- {opencompass-0.2.2 → opencompass-0.2.4}/setup.py +40 -33
- opencompass-0.2.2/opencompass/__init__.py +0 -1
- opencompass-0.2.2/opencompass/partitioners/sub_naive.py +0 -110
- opencompass-0.2.2/opencompass/runners/dlc.py +0 -229
- opencompass-0.2.2/opencompass/summarizers/subjective/compass_arena.py +0 -204
- opencompass-0.2.2/opencompass/tasks/subjective_eval.py +0 -282
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/FinanceIQ.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/GaokaoBench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/cmp_KSP.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/hard_GCP.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/hard_MSP.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/hard_TSP.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/p_BSP.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/p_EDP.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/prompts.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/NPHardEval/utils.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/advglue.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/afqmcd.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/agieval/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/agieval/agieval.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/agieval/constructions.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/agieval/dataset_loader.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/agieval/evaluation.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/agieval/math_equivalence.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/agieval/post_process.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/agieval/utils.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/anli.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/anthropics_evals.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/arc.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/ax.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/base.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/bbh.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/boolq.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/bustum.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/c3.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/cb.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/ceval.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/chid.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/cibench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/circular.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/civilcomments.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/clozeTest_maxmin.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/cluewsc.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/cmb.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/cmmlu.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/cmnli.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/cmrc.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/commonsenseqa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/commonsenseqa_cn.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/copa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/crowspairs.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/crowspairs_cn.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/csl.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/cvalues.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/drcd.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/drop.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/ds1000.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/ds1000_interpreter.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/eprstmt.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/flores.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/game24.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/govrepcrs.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/gsm8k.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/gsm_hard.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/huggingface.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/humaneval_multi.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/hungarian_math.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_codedebug.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_coderun.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_endia.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_enmc.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_enqa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_ensum.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_mathcalc.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_mathfind.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_retrievekv.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_retrievenumber.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_retrievepasskey.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/infinitebench_zhqa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/infinitebench/utils.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/iwslt2017.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/jigsawmultilingual.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/jsonl.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/kaoshi.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/lambada.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/lawbench/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/lawbench/lawbench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/lcsts.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/evaluators.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_coursera.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_financial_qa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_gov_report_summ.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_gsm100.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_legal_contract_qa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_meeting_summ.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_multidoc_qa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_narrattive_qa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_natural_question.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_news_summ.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_paper_assistant.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_patent_summ.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_quality.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_review_summ.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_scientific_qa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_topic_retrieval.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_tpo.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/leval/leval_tvshow_summ.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/lmeval.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/evaluators.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_2wikim_qa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_dureader.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_gov_report.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_hotpot_qa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_lcc.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_lsht.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_multi_news.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_multifieldqa_en.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_multifieldqa_zh.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_musique.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_narrative_qa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_passage_count.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_passage_retrieval_en.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_passage_retrieval_zh.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_qasper.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_qmsum.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_repobench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_samsum.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_trec.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_trivia_qa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/longbench/longbench_vcsum.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/mastermath2024v1.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/math401.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/math_intern.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/medbench/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/medbench/constructions.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/medbench/dataset_loader.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/medbench/evaluation.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/medbench/math_equivalence.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/medbench/medbench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/medbench/post_process.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/medbench/utils.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/mmlu.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/multirc.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/narrativeqa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/natural_question_cn.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/obqa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/piqa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/py150.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/qasper.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/qaspercut.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/race.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/realtoxicprompts.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/reasonbench/ReasonBenchDataset.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/reasonbench/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/record.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/rolebench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/safety.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/scibench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/siqa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/squad20.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/storycloze.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/strategyqa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/subjective/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/subjective/alignbench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/subjective/compass_arena.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/subjective/corev2.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/subjective/creationbench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/subjective/information_retrival.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/subjective/mtbench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/subjective/multiround.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/subjective/subjective_cmp.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/summedits.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/summscreen.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/svamp.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/tabmwp.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/teval/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/teval/evaluators/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/teval/evaluators/instruct_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/teval/evaluators/planning_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/teval/evaluators/reason_retrieve_understand_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/teval/evaluators/review_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/teval/schema.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/teval/utils/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/teval/utils/convert_results.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/teval/utils/format_load.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/teval/utils/meta_template.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/teval/utils/template.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/tnews.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/triviaqa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/triviaqarc.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/truthfulqa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/tydiqa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/wic.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/wikibench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/winograd.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/wnli.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/wsc.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/xcopa.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/xiezhi.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/xlsum.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/datasets/xsum.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/metrics/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/metrics/dump_results.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/metrics/mme_score.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/metrics/seedbench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/accessory.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/alaya.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/claude_api/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/claude_api/postprocessors.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/glm.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/intern_model.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/lagent.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/langchain.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/mixtral.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/models/modelscope.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_dataset_reader.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_agent_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_aucroc_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_base_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_circular_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_em_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_hf_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_jieba_rouge_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_misc_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_evaluator/icl_toxic_evaluator.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_agent_inferencer.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_attack_inferencer.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_base_inferencer.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_clp_inferencer.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_ll_inferencer.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_mink_percent_inferencer.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_ppl_only_inferencer.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_sc_inferencer.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_inferencer/icl_tot_inferencer.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_base_retriever.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_bm25_retriever.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_dpp_retriever.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_fix_k_retriever.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_mdl_retriever.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_random_retriever.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_topk_retriever.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_votek_retriever.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/icl_retriever/icl_zero_retriever.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/utils/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/openicl/utils/logging.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/partitioners/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/partitioners/mm_naive.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/partitioners/naive.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/partitioners/num_worker.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/partitioners/size.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/runners/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/circular.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/multi_model.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/subjective/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/subjective/corev2.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/subjective/creationbench.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/subjective/information_retrival.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/subjective/subjective_post_process.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/subjective/utils.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/summarizers/summarizer_pretrain.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/tasks/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/tasks/base.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/tasks/llm_eval.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/tasks/mm_infer.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/tasks/openicl_attack.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/__init__.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/auxiliary.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/collect_env.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/dependency.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/file.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/fileio.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/lark.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/logging.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/menu.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass/utils/types.py +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass.egg-info/dependency_links.txt +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/opencompass.egg-info/top_level.txt +0 -0
- {opencompass-0.2.2 → opencompass-0.2.4}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: opencompass
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: A comprehensive toolkit for large model evaluation
|
|
5
5
|
Home-page: https://github.com/open-compass/opencompass
|
|
6
6
|
Author: OpenCompass Contributors
|
|
@@ -11,37 +11,55 @@ Description: <div align="center">
|
|
|
11
11
|
<br />
|
|
12
12
|
<br />
|
|
13
13
|
|
|
14
|
-
[![
|
|
15
|
-
[![
|
|
14
|
+
[![][github-release-shield]][github-release-link]
|
|
15
|
+
[![][github-releasedate-shield]][github-releasedate-link]
|
|
16
|
+
[![][github-contributors-shield]][github-contributors-link]<br>
|
|
17
|
+
[![][github-forks-shield]][github-forks-link]
|
|
18
|
+
[![][github-stars-shield]][github-stars-link]
|
|
19
|
+
[![][github-issues-shield]][github-issues-link]
|
|
20
|
+
[![][github-license-shield]][github-license-link]
|
|
16
21
|
|
|
17
22
|
<!-- [](https://pypi.org/project/opencompass/) -->
|
|
18
23
|
|
|
19
24
|
[🌐Website](https://opencompass.org.cn/) |
|
|
25
|
+
[📖CompassHub](https://hub.opencompass.org.cn/home) |
|
|
26
|
+
[📊CompassRank](https://rank.opencompass.org.cn/home) |
|
|
20
27
|
[📘Documentation](https://opencompass.readthedocs.io/en/latest/) |
|
|
21
28
|
[🛠️Installation](https://opencompass.readthedocs.io/en/latest/get_started/installation.html) |
|
|
22
29
|
[🤔Reporting Issues](https://github.com/open-compass/opencompass/issues/new/choose)
|
|
23
30
|
|
|
24
31
|
English | [简体中文](README_zh-CN.md)
|
|
25
32
|
|
|
33
|
+
[![][github-trending-shield]][github-trending-url]
|
|
34
|
+
|
|
26
35
|
</div>
|
|
27
36
|
|
|
28
37
|
<p align="center">
|
|
29
38
|
👋 join us on <a href="https://discord.gg/KKwfEbFj7U" target="_blank">Discord</a> and <a href="https://r.vansin.top/?r=opencompass" target="_blank">WeChat</a>
|
|
30
39
|
</p>
|
|
31
40
|
|
|
32
|
-
|
|
41
|
+
> \[!IMPORTANT\]
|
|
42
|
+
>
|
|
43
|
+
> **Star Us**, You will receive all release notifications from GitHub without any delay ~ ⭐️
|
|
33
44
|
|
|
34
|
-
|
|
45
|
+
## 📣 OpenCompass 2.0
|
|
35
46
|
|
|
36
|
-
We
|
|
47
|
+
We are thrilled to introduce OpenCompass 2.0, an advanced suite featuring three key components: [CompassKit](https://github.com/open-compass), [CompassHub](https://hub.opencompass.org.cn/home), and [CompassRank](https://rank.opencompass.org.cn/home).
|
|
48
|
+

|
|
37
49
|
|
|
38
|
-
|
|
50
|
+
**CompassRank** has been significantly enhanced into the leaderboards that now incorporates both open-source benchmarks and proprietary benchmarks. This upgrade allows for a more comprehensive evaluation of models across the industry.
|
|
39
51
|
|
|
40
|
-
|
|
52
|
+
**CompassHub** presents a pioneering benchmark browser interface, designed to simplify and expedite the exploration and utilization of an extensive array of benchmarks for researchers and practitioners alike. To enhance the visibility of your own benchmark within the community, we warmly invite you to contribute it to CompassHub. You may initiate the submission process by clicking [here](https://hub.opencompass.org.cn/dataset-submit).
|
|
41
53
|
|
|
42
|
-
|
|
54
|
+
**CompassKit** is a powerful collection of evaluation toolkits specifically tailored for Large Language Models and Large Vision-language Models. It provides an extensive set of tools to assess and measure the performance of these complex models effectively. Welcome to try our toolkits for in your research and products.
|
|
43
55
|
|
|
44
|
-
|
|
56
|
+
<details>
|
|
57
|
+
<summary><kbd>Star History</kbd></summary>
|
|
58
|
+
<picture>
|
|
59
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=open-compass%2Fopencompass&theme=dark&type=Date">
|
|
60
|
+
<img width="100%" src="https://api.star-history.com/svg?repos=open-compass%2Fopencompass&type=Date">
|
|
61
|
+
</picture>
|
|
62
|
+
</details>
|
|
45
63
|
|
|
46
64
|
## 🧭 Welcome
|
|
47
65
|
|
|
@@ -60,12 +78,9 @@ Description: <div align="center">
|
|
|
60
78
|
|
|
61
79
|
## 🚀 What's New <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
|
|
62
80
|
|
|
63
|
-
- **\[2024.
|
|
64
|
-
- **\[2024.
|
|
65
|
-
- **\[
|
|
66
|
-
- **\[2023.12.22\]** We have released [T-Eval](https://github.com/open-compass/T-Eval), a step-by-step evaluation benchmark to gauge your LLMs on tool utilization. Welcome to our [Leaderboard](https://open-compass.github.io/T-Eval/leaderboard.html) for more details! 🔥🔥🔥.
|
|
67
|
-
- **\[2023.12.10\]** We have released [VLMEvalKit](https://github.com/open-compass/VLMEvalKit), a toolkit for evaluating vision-language models (VLMs), currently support 20+ VLMs and 7 multi-modal benchmarks (including MMBench series).
|
|
68
|
-
- **\[2023.12.10\]** We have supported Mistral AI's MoE LLM: **Mixtral-8x7B-32K**. Welcome to [MixtralKit](https://github.com/open-compass/MixtralKit) for more details about inference and evaluation.
|
|
81
|
+
- **\[2024.04.22\]** We supported the evaluation of [LLaMA3](configs/models/hf_llama/hf_llama3_8b.py) 和 [LLaMA3-Instruct](configs/models/hf_llama/hf_llama3_8b_instruct.py), welcome to try! 🔥🔥🔥
|
|
82
|
+
- **\[2024.02.29\]** We supported the MT-Bench, AlpacalEval and AlignBench, more information can be found [here](https://opencompass.readthedocs.io/en/latest/advanced_guides/subjective_evaluation.html)
|
|
83
|
+
- **\[2024.01.30\]** We release OpenCompass 2.0. Click [CompassKit](https://github.com/open-compass), [CompassHub](https://hub.opencompass.org.cn/home), and [CompassRank](https://rank.opencompass.org.cn/home) for more information !
|
|
69
84
|
|
|
70
85
|
> [More](docs/en/notes/news.md)
|
|
71
86
|
|
|
@@ -87,7 +102,7 @@ Description: <div align="center">
|
|
|
87
102
|
|
|
88
103
|
## 📊 Leaderboard
|
|
89
104
|
|
|
90
|
-
We provide [OpenCompass Leaderboard](https://opencompass.org.cn/
|
|
105
|
+
We provide [OpenCompass Leaderboard](https://rank.opencompass.org.cn/home) for the community to rank all public models and API models. If you would like to join the evaluation, please provide the model repository URL or a standard API interface to the email address `opencompass@pjlab.org.cn`.
|
|
91
106
|
|
|
92
107
|
<p align="right"><a href="#top">🔝Back to top</a></p>
|
|
93
108
|
|
|
@@ -122,8 +137,8 @@ Description: <div align="center">
|
|
|
122
137
|
|
|
123
138
|
```bash
|
|
124
139
|
# Download dataset to data/ folder
|
|
125
|
-
wget https://github.com/open-compass/opencompass/releases/download/0.
|
|
126
|
-
unzip OpenCompassData-core-
|
|
140
|
+
wget https://github.com/open-compass/opencompass/releases/download/0.2.2.rc1/OpenCompassData-core-20240207.zip
|
|
141
|
+
unzip OpenCompassData-core-20240207.zip
|
|
127
142
|
```
|
|
128
143
|
|
|
129
144
|
Some third-party features, like Humaneval and Llama, may require additional steps to work properly, for detailed steps please refer to the [Installation Guide](https://opencompass.readthedocs.io/en/latest/get_started/installation.html).
|
|
@@ -428,10 +443,6 @@ Description: <div align="center">
|
|
|
428
443
|
</tbody>
|
|
429
444
|
</table>
|
|
430
445
|
|
|
431
|
-
## OpenCompass Ecosystem
|
|
432
|
-
|
|
433
|
-
<p align="right"><a href="#top">🔝Back to top</a></p>
|
|
434
|
-
|
|
435
446
|
## 📖 Model Support
|
|
436
447
|
|
|
437
448
|
<table align="center">
|
|
@@ -452,6 +463,7 @@ Description: <div align="center">
|
|
|
452
463
|
|
|
453
464
|
- [InternLM](https://github.com/InternLM/InternLM)
|
|
454
465
|
- [LLaMA](https://github.com/facebookresearch/llama)
|
|
466
|
+
- [LLaMA3](https://github.com/meta-llama/llama3)
|
|
455
467
|
- [Vicuna](https://github.com/lm-sys/FastChat)
|
|
456
468
|
- [Alpaca](https://github.com/tatsu-lab/stanford_alpaca)
|
|
457
469
|
- [Baichuan](https://github.com/baichuan-inc)
|
|
@@ -461,12 +473,14 @@ Description: <div align="center">
|
|
|
461
473
|
- [TigerBot](https://github.com/TigerResearch/TigerBot)
|
|
462
474
|
- [Qwen](https://github.com/QwenLM/Qwen)
|
|
463
475
|
- [BlueLM](https://github.com/vivo-ai-lab/BlueLM)
|
|
476
|
+
- [Gemma](https://huggingface.co/google/gemma-7b)
|
|
464
477
|
- ...
|
|
465
478
|
|
|
466
479
|
</td>
|
|
467
480
|
<td>
|
|
468
481
|
|
|
469
482
|
- OpenAI
|
|
483
|
+
- Gemini
|
|
470
484
|
- Claude
|
|
471
485
|
- ZhipuAI(ChatGLM)
|
|
472
486
|
- Baichuan
|
|
@@ -489,18 +503,18 @@ Description: <div align="center">
|
|
|
489
503
|
|
|
490
504
|
## 🔜 Roadmap
|
|
491
505
|
|
|
492
|
-
- [
|
|
506
|
+
- [x] Subjective Evaluation
|
|
493
507
|
- [ ] Release CompassAreana
|
|
494
|
-
- [
|
|
508
|
+
- [x] Subjective evaluation.
|
|
495
509
|
- [x] Long-context
|
|
496
|
-
- [
|
|
510
|
+
- [x] Long-context evaluation with extensive datasets.
|
|
497
511
|
- [ ] Long-context leaderboard.
|
|
498
|
-
- [
|
|
512
|
+
- [x] Coding
|
|
499
513
|
- [ ] Coding evaluation leaderboard.
|
|
500
514
|
- [x] Non-python language evaluation service.
|
|
501
|
-
- [
|
|
515
|
+
- [x] Agent
|
|
502
516
|
- [ ] Support various agenet framework.
|
|
503
|
-
- [
|
|
517
|
+
- [x] Evaluation of tool use of the LLMs.
|
|
504
518
|
- [x] Robustness
|
|
505
519
|
- [x] Support various attack method
|
|
506
520
|
|
|
@@ -508,6 +522,20 @@ Description: <div align="center">
|
|
|
508
522
|
|
|
509
523
|
We appreciate all contributions to improving OpenCompass. Please refer to the [contributing guideline](https://opencompass.readthedocs.io/en/latest/notes/contribution_guide.html) for the best practice.
|
|
510
524
|
|
|
525
|
+
<!-- Copy-paste in your Readme.md file -->
|
|
526
|
+
|
|
527
|
+
<!-- Made with [OSS Insight](https://ossinsight.io/) -->
|
|
528
|
+
|
|
529
|
+
<a href="https://github.com/open-compass/opencompass/graphs/contributors" target="_blank">
|
|
530
|
+
<table>
|
|
531
|
+
<tr>
|
|
532
|
+
<th colspan="2">
|
|
533
|
+
<br><img src="https://contrib.rocks/image?repo=open-compass/opencompass"><br><br>
|
|
534
|
+
</th>
|
|
535
|
+
</tr>
|
|
536
|
+
</table>
|
|
537
|
+
</a>
|
|
538
|
+
|
|
511
539
|
## 🤝 Acknowledgements
|
|
512
540
|
|
|
513
541
|
Some code in this project is cited and modified from [OpenICL](https://github.com/Shark-NLP/OpenICL).
|
|
@@ -527,6 +555,23 @@ Description: <div align="center">
|
|
|
527
555
|
|
|
528
556
|
<p align="right"><a href="#top">🔝Back to top</a></p>
|
|
529
557
|
|
|
558
|
+
[github-contributors-link]: https://github.com/open-compass/opencompass/graphs/contributors
|
|
559
|
+
[github-contributors-shield]: https://img.shields.io/github/contributors/open-compass/opencompass?color=c4f042&labelColor=black&style=flat-square
|
|
560
|
+
[github-forks-link]: https://github.com/open-compass/opencompass/network/members
|
|
561
|
+
[github-forks-shield]: https://img.shields.io/github/forks/open-compass/opencompass?color=8ae8ff&labelColor=black&style=flat-square
|
|
562
|
+
[github-issues-link]: https://github.com/open-compass/opencompass/issues
|
|
563
|
+
[github-issues-shield]: https://img.shields.io/github/issues/open-compass/opencompass?color=ff80eb&labelColor=black&style=flat-square
|
|
564
|
+
[github-license-link]: https://github.com/open-compass/opencompass/blob/main/LICENSE
|
|
565
|
+
[github-license-shield]: https://img.shields.io/github/license/open-compass/opencompass?color=white&labelColor=black&style=flat-square
|
|
566
|
+
[github-release-link]: https://github.com/open-compass/opencompass/releases
|
|
567
|
+
[github-release-shield]: https://img.shields.io/github/v/release/open-compass/opencompass?color=369eff&labelColor=black&logo=github&style=flat-square
|
|
568
|
+
[github-releasedate-link]: https://github.com/open-compass/opencompass/releases
|
|
569
|
+
[github-releasedate-shield]: https://img.shields.io/github/release-date/open-compass/opencompass?labelColor=black&style=flat-square
|
|
570
|
+
[github-stars-link]: https://github.com/open-compass/opencompass/stargazers
|
|
571
|
+
[github-stars-shield]: https://img.shields.io/github/stars/open-compass/opencompass?color=ffcb47&labelColor=black&style=flat-square
|
|
572
|
+
[github-trending-shield]: https://trendshift.io/api/badge/repositories/6630
|
|
573
|
+
[github-trending-url]: https://trendshift.io/repositories/6630
|
|
574
|
+
|
|
530
575
|
Keywords: AI,NLP,in-context learning,large language model,evaluation,benchmark,llm
|
|
531
576
|
Platform: UNKNOWN
|
|
532
577
|
Classifier: Programming Language :: Python :: 3.8
|
|
@@ -3,37 +3,55 @@
|
|
|
3
3
|
<br />
|
|
4
4
|
<br />
|
|
5
5
|
|
|
6
|
-
[![
|
|
7
|
-
[![
|
|
6
|
+
[![][github-release-shield]][github-release-link]
|
|
7
|
+
[![][github-releasedate-shield]][github-releasedate-link]
|
|
8
|
+
[![][github-contributors-shield]][github-contributors-link]<br>
|
|
9
|
+
[![][github-forks-shield]][github-forks-link]
|
|
10
|
+
[![][github-stars-shield]][github-stars-link]
|
|
11
|
+
[![][github-issues-shield]][github-issues-link]
|
|
12
|
+
[![][github-license-shield]][github-license-link]
|
|
8
13
|
|
|
9
14
|
<!-- [](https://pypi.org/project/opencompass/) -->
|
|
10
15
|
|
|
11
16
|
[🌐Website](https://opencompass.org.cn/) |
|
|
17
|
+
[📖CompassHub](https://hub.opencompass.org.cn/home) |
|
|
18
|
+
[📊CompassRank](https://rank.opencompass.org.cn/home) |
|
|
12
19
|
[📘Documentation](https://opencompass.readthedocs.io/en/latest/) |
|
|
13
20
|
[🛠️Installation](https://opencompass.readthedocs.io/en/latest/get_started/installation.html) |
|
|
14
21
|
[🤔Reporting Issues](https://github.com/open-compass/opencompass/issues/new/choose)
|
|
15
22
|
|
|
16
23
|
English | [简体中文](README_zh-CN.md)
|
|
17
24
|
|
|
25
|
+
[![][github-trending-shield]][github-trending-url]
|
|
26
|
+
|
|
18
27
|
</div>
|
|
19
28
|
|
|
20
29
|
<p align="center">
|
|
21
30
|
👋 join us on <a href="https://discord.gg/KKwfEbFj7U" target="_blank">Discord</a> and <a href="https://r.vansin.top/?r=opencompass" target="_blank">WeChat</a>
|
|
22
31
|
</p>
|
|
23
32
|
|
|
24
|
-
|
|
33
|
+
> \[!IMPORTANT\]
|
|
34
|
+
>
|
|
35
|
+
> **Star Us**, You will receive all release notifications from GitHub without any delay ~ ⭐️
|
|
25
36
|
|
|
26
|
-
|
|
37
|
+
## 📣 OpenCompass 2.0
|
|
27
38
|
|
|
28
|
-
We
|
|
39
|
+
We are thrilled to introduce OpenCompass 2.0, an advanced suite featuring three key components: [CompassKit](https://github.com/open-compass), [CompassHub](https://hub.opencompass.org.cn/home), and [CompassRank](https://rank.opencompass.org.cn/home).
|
|
40
|
+

|
|
29
41
|
|
|
30
|
-
|
|
42
|
+
**CompassRank** has been significantly enhanced into the leaderboards that now incorporates both open-source benchmarks and proprietary benchmarks. This upgrade allows for a more comprehensive evaluation of models across the industry.
|
|
31
43
|
|
|
32
|
-
|
|
44
|
+
**CompassHub** presents a pioneering benchmark browser interface, designed to simplify and expedite the exploration and utilization of an extensive array of benchmarks for researchers and practitioners alike. To enhance the visibility of your own benchmark within the community, we warmly invite you to contribute it to CompassHub. You may initiate the submission process by clicking [here](https://hub.opencompass.org.cn/dataset-submit).
|
|
33
45
|
|
|
34
|
-
|
|
46
|
+
**CompassKit** is a powerful collection of evaluation toolkits specifically tailored for Large Language Models and Large Vision-language Models. It provides an extensive set of tools to assess and measure the performance of these complex models effectively. Welcome to try our toolkits for in your research and products.
|
|
35
47
|
|
|
36
|
-
|
|
48
|
+
<details>
|
|
49
|
+
<summary><kbd>Star History</kbd></summary>
|
|
50
|
+
<picture>
|
|
51
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=open-compass%2Fopencompass&theme=dark&type=Date">
|
|
52
|
+
<img width="100%" src="https://api.star-history.com/svg?repos=open-compass%2Fopencompass&type=Date">
|
|
53
|
+
</picture>
|
|
54
|
+
</details>
|
|
37
55
|
|
|
38
56
|
## 🧭 Welcome
|
|
39
57
|
|
|
@@ -52,12 +70,9 @@ Just like a compass guides us on our journey, OpenCompass will guide you through
|
|
|
52
70
|
|
|
53
71
|
## 🚀 What's New <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
|
|
54
72
|
|
|
55
|
-
- **\[2024.
|
|
56
|
-
- **\[2024.
|
|
57
|
-
- **\[
|
|
58
|
-
- **\[2023.12.22\]** We have released [T-Eval](https://github.com/open-compass/T-Eval), a step-by-step evaluation benchmark to gauge your LLMs on tool utilization. Welcome to our [Leaderboard](https://open-compass.github.io/T-Eval/leaderboard.html) for more details! 🔥🔥🔥.
|
|
59
|
-
- **\[2023.12.10\]** We have released [VLMEvalKit](https://github.com/open-compass/VLMEvalKit), a toolkit for evaluating vision-language models (VLMs), currently support 20+ VLMs and 7 multi-modal benchmarks (including MMBench series).
|
|
60
|
-
- **\[2023.12.10\]** We have supported Mistral AI's MoE LLM: **Mixtral-8x7B-32K**. Welcome to [MixtralKit](https://github.com/open-compass/MixtralKit) for more details about inference and evaluation.
|
|
73
|
+
- **\[2024.04.22\]** We supported the evaluation of [LLaMA3](configs/models/hf_llama/hf_llama3_8b.py) 和 [LLaMA3-Instruct](configs/models/hf_llama/hf_llama3_8b_instruct.py), welcome to try! 🔥🔥🔥
|
|
74
|
+
- **\[2024.02.29\]** We supported the MT-Bench, AlpacalEval and AlignBench, more information can be found [here](https://opencompass.readthedocs.io/en/latest/advanced_guides/subjective_evaluation.html)
|
|
75
|
+
- **\[2024.01.30\]** We release OpenCompass 2.0. Click [CompassKit](https://github.com/open-compass), [CompassHub](https://hub.opencompass.org.cn/home), and [CompassRank](https://rank.opencompass.org.cn/home) for more information !
|
|
61
76
|
|
|
62
77
|
> [More](docs/en/notes/news.md)
|
|
63
78
|
|
|
@@ -79,7 +94,7 @@ OpenCompass is a one-stop platform for large model evaluation, aiming to provide
|
|
|
79
94
|
|
|
80
95
|
## 📊 Leaderboard
|
|
81
96
|
|
|
82
|
-
We provide [OpenCompass Leaderboard](https://opencompass.org.cn/
|
|
97
|
+
We provide [OpenCompass Leaderboard](https://rank.opencompass.org.cn/home) for the community to rank all public models and API models. If you would like to join the evaluation, please provide the model repository URL or a standard API interface to the email address `opencompass@pjlab.org.cn`.
|
|
83
98
|
|
|
84
99
|
<p align="right"><a href="#top">🔝Back to top</a></p>
|
|
85
100
|
|
|
@@ -114,8 +129,8 @@ pip install -e .
|
|
|
114
129
|
|
|
115
130
|
```bash
|
|
116
131
|
# Download dataset to data/ folder
|
|
117
|
-
wget https://github.com/open-compass/opencompass/releases/download/0.
|
|
118
|
-
unzip OpenCompassData-core-
|
|
132
|
+
wget https://github.com/open-compass/opencompass/releases/download/0.2.2.rc1/OpenCompassData-core-20240207.zip
|
|
133
|
+
unzip OpenCompassData-core-20240207.zip
|
|
119
134
|
```
|
|
120
135
|
|
|
121
136
|
Some third-party features, like Humaneval and Llama, may require additional steps to work properly, for detailed steps please refer to the [Installation Guide](https://opencompass.readthedocs.io/en/latest/get_started/installation.html).
|
|
@@ -420,10 +435,6 @@ Through the command line or configuration files, OpenCompass also supports evalu
|
|
|
420
435
|
</tbody>
|
|
421
436
|
</table>
|
|
422
437
|
|
|
423
|
-
## OpenCompass Ecosystem
|
|
424
|
-
|
|
425
|
-
<p align="right"><a href="#top">🔝Back to top</a></p>
|
|
426
|
-
|
|
427
438
|
## 📖 Model Support
|
|
428
439
|
|
|
429
440
|
<table align="center">
|
|
@@ -444,6 +455,7 @@ Through the command line or configuration files, OpenCompass also supports evalu
|
|
|
444
455
|
|
|
445
456
|
- [InternLM](https://github.com/InternLM/InternLM)
|
|
446
457
|
- [LLaMA](https://github.com/facebookresearch/llama)
|
|
458
|
+
- [LLaMA3](https://github.com/meta-llama/llama3)
|
|
447
459
|
- [Vicuna](https://github.com/lm-sys/FastChat)
|
|
448
460
|
- [Alpaca](https://github.com/tatsu-lab/stanford_alpaca)
|
|
449
461
|
- [Baichuan](https://github.com/baichuan-inc)
|
|
@@ -453,12 +465,14 @@ Through the command line or configuration files, OpenCompass also supports evalu
|
|
|
453
465
|
- [TigerBot](https://github.com/TigerResearch/TigerBot)
|
|
454
466
|
- [Qwen](https://github.com/QwenLM/Qwen)
|
|
455
467
|
- [BlueLM](https://github.com/vivo-ai-lab/BlueLM)
|
|
468
|
+
- [Gemma](https://huggingface.co/google/gemma-7b)
|
|
456
469
|
- ...
|
|
457
470
|
|
|
458
471
|
</td>
|
|
459
472
|
<td>
|
|
460
473
|
|
|
461
474
|
- OpenAI
|
|
475
|
+
- Gemini
|
|
462
476
|
- Claude
|
|
463
477
|
- ZhipuAI(ChatGLM)
|
|
464
478
|
- Baichuan
|
|
@@ -481,18 +495,18 @@ Through the command line or configuration files, OpenCompass also supports evalu
|
|
|
481
495
|
|
|
482
496
|
## 🔜 Roadmap
|
|
483
497
|
|
|
484
|
-
- [
|
|
498
|
+
- [x] Subjective Evaluation
|
|
485
499
|
- [ ] Release CompassAreana
|
|
486
|
-
- [
|
|
500
|
+
- [x] Subjective evaluation.
|
|
487
501
|
- [x] Long-context
|
|
488
|
-
- [
|
|
502
|
+
- [x] Long-context evaluation with extensive datasets.
|
|
489
503
|
- [ ] Long-context leaderboard.
|
|
490
|
-
- [
|
|
504
|
+
- [x] Coding
|
|
491
505
|
- [ ] Coding evaluation leaderboard.
|
|
492
506
|
- [x] Non-python language evaluation service.
|
|
493
|
-
- [
|
|
507
|
+
- [x] Agent
|
|
494
508
|
- [ ] Support various agenet framework.
|
|
495
|
-
- [
|
|
509
|
+
- [x] Evaluation of tool use of the LLMs.
|
|
496
510
|
- [x] Robustness
|
|
497
511
|
- [x] Support various attack method
|
|
498
512
|
|
|
@@ -500,6 +514,20 @@ Through the command line or configuration files, OpenCompass also supports evalu
|
|
|
500
514
|
|
|
501
515
|
We appreciate all contributions to improving OpenCompass. Please refer to the [contributing guideline](https://opencompass.readthedocs.io/en/latest/notes/contribution_guide.html) for the best practice.
|
|
502
516
|
|
|
517
|
+
<!-- Copy-paste in your Readme.md file -->
|
|
518
|
+
|
|
519
|
+
<!-- Made with [OSS Insight](https://ossinsight.io/) -->
|
|
520
|
+
|
|
521
|
+
<a href="https://github.com/open-compass/opencompass/graphs/contributors" target="_blank">
|
|
522
|
+
<table>
|
|
523
|
+
<tr>
|
|
524
|
+
<th colspan="2">
|
|
525
|
+
<br><img src="https://contrib.rocks/image?repo=open-compass/opencompass"><br><br>
|
|
526
|
+
</th>
|
|
527
|
+
</tr>
|
|
528
|
+
</table>
|
|
529
|
+
</a>
|
|
530
|
+
|
|
503
531
|
## 🤝 Acknowledgements
|
|
504
532
|
|
|
505
533
|
Some code in this project is cited and modified from [OpenICL](https://github.com/Shark-NLP/OpenICL).
|
|
@@ -518,3 +546,20 @@ Some datasets and prompt implementations are modified from [chain-of-thought-hub
|
|
|
518
546
|
```
|
|
519
547
|
|
|
520
548
|
<p align="right"><a href="#top">🔝Back to top</a></p>
|
|
549
|
+
|
|
550
|
+
[github-contributors-link]: https://github.com/open-compass/opencompass/graphs/contributors
|
|
551
|
+
[github-contributors-shield]: https://img.shields.io/github/contributors/open-compass/opencompass?color=c4f042&labelColor=black&style=flat-square
|
|
552
|
+
[github-forks-link]: https://github.com/open-compass/opencompass/network/members
|
|
553
|
+
[github-forks-shield]: https://img.shields.io/github/forks/open-compass/opencompass?color=8ae8ff&labelColor=black&style=flat-square
|
|
554
|
+
[github-issues-link]: https://github.com/open-compass/opencompass/issues
|
|
555
|
+
[github-issues-shield]: https://img.shields.io/github/issues/open-compass/opencompass?color=ff80eb&labelColor=black&style=flat-square
|
|
556
|
+
[github-license-link]: https://github.com/open-compass/opencompass/blob/main/LICENSE
|
|
557
|
+
[github-license-shield]: https://img.shields.io/github/license/open-compass/opencompass?color=white&labelColor=black&style=flat-square
|
|
558
|
+
[github-release-link]: https://github.com/open-compass/opencompass/releases
|
|
559
|
+
[github-release-shield]: https://img.shields.io/github/v/release/open-compass/opencompass?color=369eff&labelColor=black&logo=github&style=flat-square
|
|
560
|
+
[github-releasedate-link]: https://github.com/open-compass/opencompass/releases
|
|
561
|
+
[github-releasedate-shield]: https://img.shields.io/github/release-date/open-compass/opencompass?labelColor=black&style=flat-square
|
|
562
|
+
[github-stars-link]: https://github.com/open-compass/opencompass/stargazers
|
|
563
|
+
[github-stars-shield]: https://img.shields.io/github/stars/open-compass/opencompass?color=ffcb47&labelColor=black&style=flat-square
|
|
564
|
+
[github-trending-shield]: https://trendshift.io/api/badge/repositories/6630
|
|
565
|
+
[github-trending-url]: https://trendshift.io/repositories/6630
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '0.2.4'
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os.path as osp
|
|
3
|
+
|
|
4
|
+
from datasets import Dataset
|
|
5
|
+
|
|
6
|
+
from opencompass.openicl.icl_evaluator import BaseEvaluator
|
|
7
|
+
from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET
|
|
8
|
+
|
|
9
|
+
from .base import BaseDataset
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@LOAD_DATASET.register_module()
|
|
13
|
+
class OpenFinDataDataset(BaseDataset):
|
|
14
|
+
|
|
15
|
+
@staticmethod
|
|
16
|
+
def load(path: str, name: str):
|
|
17
|
+
with open(osp.join(path, f'{name}.json'), 'r') as f:
|
|
18
|
+
data = json.load(f)
|
|
19
|
+
return Dataset.from_list(data)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@ICL_EVALUATORS.register_module()
|
|
23
|
+
class OpenFinDataKWEvaluator(BaseEvaluator):
|
|
24
|
+
|
|
25
|
+
def __init__(self, ):
|
|
26
|
+
super().__init__()
|
|
27
|
+
|
|
28
|
+
def score(self, predictions, references):
|
|
29
|
+
assert len(predictions) == len(references)
|
|
30
|
+
|
|
31
|
+
scores = []
|
|
32
|
+
results = dict()
|
|
33
|
+
|
|
34
|
+
for i in range(len(references)):
|
|
35
|
+
all_hit = True
|
|
36
|
+
judgement = references[i].split('、')
|
|
37
|
+
for item in judgement:
|
|
38
|
+
if item not in predictions[i]:
|
|
39
|
+
all_hit = False
|
|
40
|
+
break
|
|
41
|
+
if all_hit:
|
|
42
|
+
scores.append(True)
|
|
43
|
+
else:
|
|
44
|
+
scores.append(False)
|
|
45
|
+
|
|
46
|
+
results['accuracy'] = round(sum(scores) / len(scores), 4) * 100
|
|
47
|
+
return results
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from datasets import Dataset
|
|
4
|
+
|
|
5
|
+
from opencompass.openicl.icl_evaluator import BaseEvaluator
|
|
6
|
+
from opencompass.registry import LOAD_DATASET
|
|
7
|
+
|
|
8
|
+
from .base import BaseDataset
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@LOAD_DATASET.register_module()
|
|
12
|
+
class QuALITYDataset(BaseDataset):
|
|
13
|
+
|
|
14
|
+
@staticmethod
|
|
15
|
+
def load(path: str):
|
|
16
|
+
dataset_list = []
|
|
17
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
18
|
+
for line in f:
|
|
19
|
+
line = json.loads(line)
|
|
20
|
+
for question in line['questions']:
|
|
21
|
+
dataset_list.append({
|
|
22
|
+
'article':
|
|
23
|
+
line['article'],
|
|
24
|
+
'question':
|
|
25
|
+
question['question'],
|
|
26
|
+
'A':
|
|
27
|
+
question['options'][0],
|
|
28
|
+
'B':
|
|
29
|
+
question['options'][1],
|
|
30
|
+
'C':
|
|
31
|
+
question['options'][2],
|
|
32
|
+
'D':
|
|
33
|
+
question['options'][3],
|
|
34
|
+
'gold_label':
|
|
35
|
+
'ABCD'[question['gold_label'] - 1],
|
|
36
|
+
'difficult':
|
|
37
|
+
question['difficult']
|
|
38
|
+
})
|
|
39
|
+
return Dataset.from_list(dataset_list)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class QuALITYEvaluator(BaseEvaluator):
|
|
43
|
+
|
|
44
|
+
def score(self, predictions, references, test_set):
|
|
45
|
+
assert len(predictions) == len(references)
|
|
46
|
+
easy, hard, all = [], [], []
|
|
47
|
+
for pred, refer, test in zip(predictions, references, test_set):
|
|
48
|
+
if pred == refer:
|
|
49
|
+
answer = True
|
|
50
|
+
else:
|
|
51
|
+
answer = False
|
|
52
|
+
all.append(answer)
|
|
53
|
+
if test['difficult'] == 0:
|
|
54
|
+
easy.append(answer)
|
|
55
|
+
else:
|
|
56
|
+
hard.append(answer)
|
|
57
|
+
return dict(easy_acc=sum(easy) / len(easy) * 100,
|
|
58
|
+
hard_acc=sum(hard) / len(easy) * 100,
|
|
59
|
+
all_acc=sum(all) / len(all) * 100)
|
|
@@ -4,7 +4,7 @@ from datasets import load_dataset
|
|
|
4
4
|
|
|
5
5
|
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from ..base import BaseDataset
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@LOAD_DATASET.register_module()
|
|
@@ -24,3 +24,15 @@ def TheoremQA_postprocess(text: str) -> str:
|
|
|
24
24
|
else:
|
|
25
25
|
text = matches[0].strip().strip('.,?!\"\';:')
|
|
26
26
|
return text
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def TheoremQA_postprocess_v2(text: str) -> str:
|
|
30
|
+
prediction = text.strip().strip('\n').split('\n')[-1]
|
|
31
|
+
tmp = ''
|
|
32
|
+
for entry in prediction.split(' ')[::-1]:
|
|
33
|
+
if entry == 'is' or entry == 'be' or entry == 'are' or entry.endswith(
|
|
34
|
+
':'):
|
|
35
|
+
break
|
|
36
|
+
tmp = entry + ' ' + tmp
|
|
37
|
+
prediction = tmp.strip().strip('.')
|
|
38
|
+
return prediction
|