ScandEval 16.12.0__tar.gz → 16.13.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scandeval-16.12.0 → scandeval-16.13.0}/.github/ISSUE_TEMPLATE/benchmark_dataset_request.yaml +1 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/.github/ISSUE_TEMPLATE/model_evaluation_request.yaml +1 -1
- scandeval-16.13.0/.github/auto_assign.yaml +9 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/.gitignore +3 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/.pre-commit-config.yaml +2 -2
- {scandeval-16.12.0 → scandeval-16.13.0}/CHANGELOG.md +36 -0
- scandeval-16.13.0/PKG-INFO +334 -0
- scandeval-16.13.0/README.md +254 -0
- scandeval-16.13.0/docs/datasets/belarusian.md +385 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/danish.md +2 -2
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/python-package.md +61 -9
- {scandeval-16.12.0 → scandeval-16.13.0}/pyproject.toml +1 -1
- scandeval-16.13.0/src/scandeval/async_utils.py +46 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_config_factory.py +26 -2
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_modules/fresh.py +2 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_modules/hf.py +50 -12
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_modules/litellm.py +25 -15
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_modules/vllm.py +3 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmarker.py +15 -33
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/cli.py +2 -4
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/constants.py +5 -0
- scandeval-16.13.0/src/scandeval/custom_dataset_configs.py +152 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/data_loading.py +87 -31
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/data_models.py +396 -225
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/__init__.py +51 -25
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/albanian.py +1 -1
- scandeval-16.13.0/src/scandeval/dataset_configs/belarusian.py +47 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/bulgarian.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/catalan.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/croatian.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/danish.py +3 -2
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/dutch.py +7 -6
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/english.py +4 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/estonian.py +8 -7
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/faroese.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/finnish.py +5 -4
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/french.py +6 -5
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/german.py +4 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/greek.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/hungarian.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/icelandic.py +4 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/italian.py +4 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/latvian.py +2 -2
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/lithuanian.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/norwegian.py +6 -5
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/polish.py +4 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/portuguese.py +5 -4
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/romanian.py +2 -2
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/serbian.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/slovene.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/spanish.py +4 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/swedish.py +4 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/ukrainian.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/generation_utils.py +6 -6
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/llm_as_a_judge.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/pipeline.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/model_cache.py +34 -4
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/linguistic_acceptability.py +9 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/multiple_choice.py +9 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/named_entity_recognition.py +21 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/reading_comprehension.py +10 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/sentiment_classification.py +11 -0
- scandeval-16.13.0/src/scandeval/string_utils.py +157 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/task_group_utils/sequence_classification.py +2 -5
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/task_group_utils/token_classification.py +2 -4
- scandeval-16.13.0/src/scandeval/utils.py +231 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/constants.py +1 -0
- scandeval-16.13.0/src/scripts/create_be_wsc.py +463 -0
- scandeval-16.13.0/src/scripts/create_besls.py +146 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_duidelijke_taal.py +2 -2
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_exam_et.py +4 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_goldenswag.py +4 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_hellaswag.py +5 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_hellaswag_cs.py +4 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_hellaswag_fi.py +6 -4
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_multi_wiki_qa.py +1 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_wikiann.py +1 -1
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/load_ud_pos.py +18 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/conftest.py +12 -3
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_benchmark_config_factory.py +15 -2
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_benchmarker.py +17 -43
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_data_loading.py +16 -7
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_data_models.py +1 -27
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_dataset_configs.py +8 -22
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_model_loading.py +23 -10
- scandeval-16.13.0/tests/test_string_utils.py +55 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_utils.py +1 -52
- {scandeval-16.12.0 → scandeval-16.13.0}/uv.lock +10 -10
- scandeval-16.12.0/.github/auto_assign.yaml +0 -29
- scandeval-16.12.0/PKG-INFO +0 -667
- scandeval-16.12.0/README.md +0 -587
- scandeval-16.12.0/src/scandeval/utils.py +0 -548
- {scandeval-16.12.0 → scandeval-16.13.0}/.github/ISSUE_TEMPLATE/bug.yaml +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/.github/ISSUE_TEMPLATE/language_request.yaml +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/.github/workflows/auto_assign_reviewers.yaml +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/.github/workflows/ci.yaml +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/.markdownlint.jsonc +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/CITATION.cff +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/CODE_OF_CONDUCT.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/CONTRIBUTING.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/Dockerfile.cuda +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/LICENSE +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/NEW_DATASET_GUIDE.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/CNAME +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/README.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/README.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/albanian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/bosnian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/bulgarian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/catalan.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/croatian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/czech.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/dutch.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/english.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/estonian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/faroese.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/finnish.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/french.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/german.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/greek.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/hungarian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/icelandic.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/italian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/latvian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/lithuanian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/norwegian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/polish.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/portuguese.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/romanian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/serbian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/slovak.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/slovene.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/spanish.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/swedish.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/ukrainian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/extras/radial_plotter.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/faq.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/gfx/favicon.png +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/albanian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/bosnian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/bulgarian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/catalan.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/croatian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/czech.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/danish.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/dutch.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/english.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/estonian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/faroese.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/finnish.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/french.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/german.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/greek.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/hungarian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/icelandic.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/italian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/latvian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/lithuanian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/norwegian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/polish.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/portuguese.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/romanian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/serbian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/slovak.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/slovene.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/spanish.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/swedish.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/ukrainian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/baltic.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/european.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/finnic.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/germanic.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/mainland-scandinavian.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/romance.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/slavic.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/README.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/methodology.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/README.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/bias-detection.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/common-sense-reasoning.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/european-values.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/knowledge.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/linguistic-acceptability.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/named-entity-recognition.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/reading-comprehension.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/sentiment-classification.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/simplification.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/speed.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/summarization.md +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/gfx/euroeval.png +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/gfx/euroeval.xcf +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/gfx/scandeval.png +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/makefile +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/mkdocs.yaml +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/__init__.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_modules/__init__.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_modules/base.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/caching_utils.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/callbacks.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/bosnian.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/czech.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/slovak.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/enums.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/exceptions.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/finetuning.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/generation.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/languages.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/logging_utils.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/__init__.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/base.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/bias.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/huggingface.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/speed.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/model_config.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/model_loading.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/__init__.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/classification.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/simplification.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/summarization.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/token_classification.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/scores.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/speed_benchmark.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/task_group_utils/__init__.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/task_group_utils/multiple_choice_classification.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/task_group_utils/question_answering.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/task_group_utils/text_to_text.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/tasks.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/tokenisation_utils.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/types.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/__init__.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_allocine.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_angry_tweets.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_arc.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_arc_is.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_atsiliepimai.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_belebele.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_bg_ner_bsnlp.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_boolq_pt.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_cinexio.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_cnn_dailymail.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_conll_en.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_conll_es.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_conll_nl.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_copa_lv.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_copa_nl.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_cross_domain_uk_reviews.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_cs_gec.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_csfd_sentiment.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_csfd_sentiment_sk.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_czech_news.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_dacsa.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_dane.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_danish_citizen_tests.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_dansk.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_danske_talemaader.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_danske_talemaader_old.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_dbrd.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_dutch_cola.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_elner.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_eltec.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_err_news.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_estner.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_estonian_valence.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_european_values.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_exams_bg.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_fone.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_foqa.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_fosent.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_fquad.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_fullstack_ner.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_germanquad.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_germeval.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_global_mmlu.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_grammar_et.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_greek_sa.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_greek_wikipedia.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_guia_cat.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_harem.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_hotter_and_colder_sentiment.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_hun_sum.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_husst.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_ice_linguistic.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_icelandic_error_corpus.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_icelandic_knowledge.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_icelandic_qa.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_icesum.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_idioms_no.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_ilpost_sum.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_jentoft.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_kpwr_ner.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_latvian_lsm_summary.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_latvian_twitter_sentiment.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_life_in_the_uk.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_lithuanian_lrytas_summarization.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_llmzszl.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_lr_sum.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_lt_emotions.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_lt_history.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mbbq_nl.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mim_gold_ner.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mlqa_es.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mlsum_de.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mlsum_es.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mmlu.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mmlu_et.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mmlu_hr.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mmlu_lv.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mms.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_multinerd-it.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_ner_uk.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_no_cola.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_no_sammendrag.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_nor_common_sense_qa.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_nordjylland_news.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_norec.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_norglm_multiqa.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_norglm_multisum.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_norne.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_norquad.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_nqii.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_nrk_quiz_qa.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_orange_sum.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_personal_sum.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_polemo2.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_poner.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_poquad.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_psc.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_publico.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_ronec.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_rosent.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_rrn.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sb10k.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_scala.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_scandiqa.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_scandisent_fi.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_schibsted.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sentiment_headlines_es.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sentinews.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sentipolc16.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_skolprov.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sqad.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_squad.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_squad_it.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_squad_nl.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_squad_nl_old.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_ssj500k_ner.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sst2_pt.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sst5.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_suc3.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sumo_ro.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_swedish_facts.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_swedn.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_swerec.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_szeged_ner.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_trivia_et.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_turku_ner_fi.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_tydiqa_fi.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_umimeto_qa.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_uner_sk.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_uner_sr.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_wiki_lingua_nl.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_wikineural-it.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_winogrande.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_winogrande_et.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_winogrande_is.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_xlsum_fi.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_xquad.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/fix_dot_env_file.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/versioning.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/__init__.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_benchmark_modules/__init__.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_benchmark_modules/test_hf.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_bias_metrics.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_callbacks.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_cli.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_constants.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_enums.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_exceptions.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_finetuning.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_languages.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_model_config.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scores.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/__init__.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/__init__.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_create_scala.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_data/de_gsd-ud-train.conllu.adp_det +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_data/empty.file +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_data/en_gum-ud-train.conllu.case +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_01 +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_02 +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_03 +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_speed_benchmark.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_tokenisation_utils.py +0 -0
- {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_types.py +0 -0
{scandeval-16.12.0 → scandeval-16.13.0}/.github/ISSUE_TEMPLATE/model_evaluation_request.yaml
RENAMED
|
@@ -22,7 +22,7 @@ body:
|
|
|
22
22
|
- label: Finnic languages (Estonian, Finnish)
|
|
23
23
|
- label: Romance languages (Catalan, French, Italian, Portuguese, Romanian, Spanish)
|
|
24
24
|
- label: Scandinavian languages (Danish, Faroese, Icelandic, Norwegian, Swedish)
|
|
25
|
-
- label: Slavic languages (Bulgarian, Bosnian, Croatian, Czech, Polish, Serbian, Slovak, Slovenian, Ukrainian)
|
|
25
|
+
- label: Slavic languages (Belarusian, Bulgarian, Bosnian, Croatian, Czech, Polish, Serbian, Slovak, Slovenian, Ukrainian)
|
|
26
26
|
- label: West Germanic languages (Dutch, English, German)
|
|
27
27
|
- label: Albanian
|
|
28
28
|
- label: Greek
|
|
@@ -10,7 +10,7 @@ repos:
|
|
|
10
10
|
- id: trailing-whitespace
|
|
11
11
|
- id: debug-statements
|
|
12
12
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
13
|
-
rev: v0.
|
|
13
|
+
rev: v0.15.0
|
|
14
14
|
hooks:
|
|
15
15
|
- id: ruff
|
|
16
16
|
args:
|
|
@@ -34,7 +34,7 @@ repos:
|
|
|
34
34
|
hooks:
|
|
35
35
|
- id: nbstripout
|
|
36
36
|
- repo: https://github.com/facebook/pyrefly-pre-commit
|
|
37
|
-
rev: 0.
|
|
37
|
+
rev: 0.51.1
|
|
38
38
|
hooks:
|
|
39
39
|
- id: pyrefly-check
|
|
40
40
|
name: Pyrefly (type checking)
|
|
@@ -7,6 +7,42 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [v16.13.0] - 2026-02-06
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Added support for Belarusian 🇧🇾! This includes the sentiment classification dataset,
|
|
15
|
+
the linguistic acceptability dataset ScaLA-be, the named entity recognition dataset
|
|
16
|
+
WikiANN-be, the reading comprehension dataset MultiWikiQA-be, and the common-sense
|
|
17
|
+
reasoning dataset BE-WSC. This was added by @oliverkinch ✨
|
|
18
|
+
- Added support for evaluating Hugging Face dataset repos directly, if they have a
|
|
19
|
+
`euroeval_config.py` file. We plan to allow support for a JSON/YAML config file in the
|
|
20
|
+
future, for simpler datasets.
|
|
21
|
+
|
|
22
|
+
### Changed
|
|
23
|
+
|
|
24
|
+
- Replaced all `DatasetConfig` arguments starting with underscores with their
|
|
25
|
+
non-underscored version (e.g., `_labels` -> `labels`), as this caused some confusion
|
|
26
|
+
when defining custom datasets. We still maintain the underscored versions for
|
|
27
|
+
backwards compatibility, but raise a warning when using them.
|
|
28
|
+
- Now logs when the model inference service is temporarily unavailable, even when the
|
|
29
|
+
verbose flag is not set.
|
|
30
|
+
- When evaluating local models, we now automatically add the "/v1" suffix to the API
|
|
31
|
+
base URL if not present and required by the API.
|
|
32
|
+
|
|
33
|
+
### Fixed
|
|
34
|
+
|
|
35
|
+
- Now allows all attention backends compatible with vLLM to be used, through the
|
|
36
|
+
`--attention-backend` CLI option. This was already possible through the Python API,
|
|
37
|
+
but was artificially restricted in the CLI.
|
|
38
|
+
- When intialising a custom `Task` object, we now default the `default_labels` argument
|
|
39
|
+
to an empty list.
|
|
40
|
+
|
|
41
|
+
### Deprecated
|
|
42
|
+
|
|
43
|
+
- All underscored versions of `DatasetConfig` arguments are deprecated. Please use their
|
|
44
|
+
non-underscored version instead.
|
|
45
|
+
|
|
10
46
|
## [v16.12.0] - 2026-02-02
|
|
11
47
|
|
|
12
48
|
### Added
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ScandEval
|
|
3
|
+
Version: 16.13.0
|
|
4
|
+
Summary: The robust European language model benchmark.
|
|
5
|
+
Project-URL: Repository, https://github.com/EuroEval/EuroEval
|
|
6
|
+
Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
|
|
7
|
+
Author-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
|
|
8
|
+
Maintainer-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2022-2026 Dan Saattrup Smart
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Python: <4.0,>=3.12
|
|
32
|
+
Requires-Dist: accelerate>=1.9.0
|
|
33
|
+
Requires-Dist: bert-score>=0.3.13
|
|
34
|
+
Requires-Dist: click>=8.1.3
|
|
35
|
+
Requires-Dist: cloudpickle>=3.1.1
|
|
36
|
+
Requires-Dist: datasets>=3.5.0
|
|
37
|
+
Requires-Dist: demjson3>=3.0.6
|
|
38
|
+
Requires-Dist: evaluate>=0.4.1
|
|
39
|
+
Requires-Dist: huggingface-hub>=0.30.1
|
|
40
|
+
Requires-Dist: levenshtein>=0.24.0
|
|
41
|
+
Requires-Dist: litellm>=1.75.6
|
|
42
|
+
Requires-Dist: mistral-common[soundfile]
|
|
43
|
+
Requires-Dist: more-itertools>=10.5.0
|
|
44
|
+
Requires-Dist: numpy>=2.0.0
|
|
45
|
+
Requires-Dist: ollama>=0.5.1
|
|
46
|
+
Requires-Dist: pandas>=2.2.0
|
|
47
|
+
Requires-Dist: peft>=0.15.0
|
|
48
|
+
Requires-Dist: protobuf>=2.0.0
|
|
49
|
+
Requires-Dist: pydantic>=2.6.0
|
|
50
|
+
Requires-Dist: pyinfer>=0.0.3
|
|
51
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
52
|
+
Requires-Dist: rouge-score>=0.1.2
|
|
53
|
+
Requires-Dist: sacrebleu>=2.5.1
|
|
54
|
+
Requires-Dist: sacremoses>=0.1.1
|
|
55
|
+
Requires-Dist: scikit-learn==1.6.1
|
|
56
|
+
Requires-Dist: sentencepiece>=0.1.96
|
|
57
|
+
Requires-Dist: seqeval>=1.2.2
|
|
58
|
+
Requires-Dist: setuptools>=75.8.2
|
|
59
|
+
Requires-Dist: tenacity>=9.0.0
|
|
60
|
+
Requires-Dist: termcolor>=2.0.0
|
|
61
|
+
Requires-Dist: torch>=2.6.0
|
|
62
|
+
Requires-Dist: transformers[mistral-common]<5.0.0,>=4.56.0
|
|
63
|
+
Provides-Extra: all
|
|
64
|
+
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
|
|
65
|
+
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
|
|
66
|
+
Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'all'
|
|
67
|
+
Requires-Dist: timm>=1.0.19; extra == 'all'
|
|
68
|
+
Requires-Dist: vllm-metal>=0.1.0; (platform_system == 'Darwin') and extra == 'all'
|
|
69
|
+
Requires-Dist: vllm==0.11.0; (platform_system == 'Darwin') and extra == 'all'
|
|
70
|
+
Requires-Dist: vllm[flashinfer]>=0.14.1; (platform_system == 'Linux') and extra == 'all'
|
|
71
|
+
Provides-Extra: generative
|
|
72
|
+
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
|
|
73
|
+
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
|
|
74
|
+
Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'generative'
|
|
75
|
+
Requires-Dist: timm>=1.0.19; extra == 'generative'
|
|
76
|
+
Requires-Dist: vllm-metal>=0.1.0; (platform_system == 'Darwin') and extra == 'generative'
|
|
77
|
+
Requires-Dist: vllm==0.11.0; (platform_system == 'Darwin') and extra == 'generative'
|
|
78
|
+
Requires-Dist: vllm[flashinfer]>=0.14.1; (platform_system == 'Linux') and extra == 'generative'
|
|
79
|
+
Description-Content-Type: text/markdown
|
|
80
|
+
|
|
81
|
+
<!-- This disables the requirement that the first line is a top-level heading -->
|
|
82
|
+
<!-- markdownlint-configure-file { "MD041": false } -->
|
|
83
|
+
|
|
84
|
+
<div align='center'>
|
|
85
|
+
<img
|
|
86
|
+
src="https://raw.githubusercontent.com/EuroEval/EuroEval/main/gfx/euroeval.png"
|
|
87
|
+
height="500"
|
|
88
|
+
width="372"
|
|
89
|
+
>
|
|
90
|
+
</div>
|
|
91
|
+
|
|
92
|
+
### The robust European language model benchmark
|
|
93
|
+
|
|
94
|
+
(formerly known as ScandEval)
|
|
95
|
+
|
|
96
|
+
______________________________________________________________________
|
|
97
|
+
[](https://euroeval.com)
|
|
98
|
+
[](https://pypi.org/project/euroeval/)
|
|
99
|
+
[](https://arxiv.org/abs/2304.00906)
|
|
100
|
+
[](https://arxiv.org/abs/2406.13469)
|
|
101
|
+
[](https://github.com/EuroEval/EuroEval/blob/main/LICENSE)
|
|
102
|
+
[](https://github.com/EuroEval/EuroEval/commits/main)
|
|
103
|
+
[](https://github.com/EuroEval/EuroEval/tree/main/tests)
|
|
104
|
+
[](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
|
|
105
|
+
|
|
106
|
+
## Maintainer
|
|
107
|
+
|
|
108
|
+
- Dan Saattrup Smart ([@saattrupdan](https://github.com/saattrupdan), <dan.smart@alexandra.dk>)
|
|
109
|
+
|
|
110
|
+
## Installation and usage
|
|
111
|
+
|
|
112
|
+
See the [documentation](https://euroeval.com/python-package/) for more information.
|
|
113
|
+
|
|
114
|
+
## Reproducing the evaluation datasets
|
|
115
|
+
|
|
116
|
+
All datasets used in this project are generated using the scripts located in the
|
|
117
|
+
[src/scripts](src/scripts) folder. To reproduce a dataset, run the corresponding script
|
|
118
|
+
with the following command
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
uv run src/scripts/<name-of-script>.py
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Replace <name-of-script> with the specific script you wish to execute, e.g.,
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
uv run src/scripts/create_allocine.py
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Contributors :pray:
|
|
131
|
+
|
|
132
|
+
A huge thank you to all the contributors who have helped make this project a success!
|
|
133
|
+
|
|
134
|
+
<a href="https://github.com/peter-sk">
|
|
135
|
+
<img
|
|
136
|
+
src="https://avatars.githubusercontent.com/u/6168908"
|
|
137
|
+
width=50
|
|
138
|
+
alt="Contributor avatar for peter-sk"
|
|
139
|
+
/>
|
|
140
|
+
</a>
|
|
141
|
+
<a href="https://github.com/AJDERS">
|
|
142
|
+
<img
|
|
143
|
+
src="https://avatars.githubusercontent.com/u/38854604"
|
|
144
|
+
width=50
|
|
145
|
+
alt="Contributor avatar for AJDERS"
|
|
146
|
+
/>
|
|
147
|
+
</a>
|
|
148
|
+
<a href="https://github.com/oliverkinch">
|
|
149
|
+
<img
|
|
150
|
+
src="https://avatars.githubusercontent.com/u/71556498"
|
|
151
|
+
width=50
|
|
152
|
+
alt="Contributor avatar for oliverkinch"
|
|
153
|
+
/>
|
|
154
|
+
</a>
|
|
155
|
+
<a href="https://github.com/versae">
|
|
156
|
+
<img
|
|
157
|
+
src="https://avatars.githubusercontent.com/u/173537"
|
|
158
|
+
width=50
|
|
159
|
+
alt="Contributor avatar for versae"
|
|
160
|
+
/>
|
|
161
|
+
</a>
|
|
162
|
+
<a href="https://github.com/KennethEnevoldsen">
|
|
163
|
+
<img
|
|
164
|
+
src="https://avatars.githubusercontent.com/u/23721977"
|
|
165
|
+
width=50
|
|
166
|
+
alt="Contributor avatar for KennethEnevoldsen"
|
|
167
|
+
/>
|
|
168
|
+
</a>
|
|
169
|
+
<a href="https://github.com/viggo-gascou">
|
|
170
|
+
<img
|
|
171
|
+
src="https://avatars.githubusercontent.com/u/94069687"
|
|
172
|
+
width=50
|
|
173
|
+
alt="Contributor avatar for viggo-gascou"
|
|
174
|
+
/>
|
|
175
|
+
</a>
|
|
176
|
+
<a href="https://github.com/mathiasesn">
|
|
177
|
+
<img
|
|
178
|
+
src="https://avatars.githubusercontent.com/u/27091759"
|
|
179
|
+
width=50
|
|
180
|
+
alt="Contributor avatar for mathiasesn"
|
|
181
|
+
/>
|
|
182
|
+
</a>
|
|
183
|
+
<a href="https://github.com/Alkarex">
|
|
184
|
+
<img
|
|
185
|
+
src="https://avatars.githubusercontent.com/u/1008324"
|
|
186
|
+
width=50
|
|
187
|
+
alt="Contributor avatar for Alkarex"
|
|
188
|
+
/>
|
|
189
|
+
</a>
|
|
190
|
+
<a href="https://github.com/marksverdhei">
|
|
191
|
+
<img
|
|
192
|
+
src="https://avatars.githubusercontent.com/u/46672778"
|
|
193
|
+
width=50
|
|
194
|
+
alt="Contributor avatar for marksverdhei"
|
|
195
|
+
/>
|
|
196
|
+
</a>
|
|
197
|
+
<a href="https://github.com/Mikeriess">
|
|
198
|
+
<img
|
|
199
|
+
src="https://avatars.githubusercontent.com/u/19728563"
|
|
200
|
+
width=50
|
|
201
|
+
alt="Contributor avatar for Mikeriess"
|
|
202
|
+
/>
|
|
203
|
+
</a>
|
|
204
|
+
<a href="https://github.com/ThomasKluiters">
|
|
205
|
+
<img
|
|
206
|
+
src="https://avatars.githubusercontent.com/u/8137941"
|
|
207
|
+
width=50
|
|
208
|
+
alt="Contributor avatar for ThomasKluiters"
|
|
209
|
+
/>
|
|
210
|
+
</a>
|
|
211
|
+
<a href="https://github.com/BramVanroy">
|
|
212
|
+
<img
|
|
213
|
+
src="https://avatars.githubusercontent.com/u/2779410"
|
|
214
|
+
width=50
|
|
215
|
+
alt="Contributor avatar for BramVanroy"
|
|
216
|
+
/>
|
|
217
|
+
</a>
|
|
218
|
+
<a href="https://github.com/peregilk">
|
|
219
|
+
<img
|
|
220
|
+
src="https://avatars.githubusercontent.com/u/9079808"
|
|
221
|
+
width=50
|
|
222
|
+
alt="Contributor avatar for peregilk"
|
|
223
|
+
/>
|
|
224
|
+
</a>
|
|
225
|
+
<a href="https://github.com/Rijgersberg">
|
|
226
|
+
<img
|
|
227
|
+
src="https://avatars.githubusercontent.com/u/8604946"
|
|
228
|
+
width=50
|
|
229
|
+
alt="Contributor avatar for Rijgersberg"
|
|
230
|
+
/>
|
|
231
|
+
</a>
|
|
232
|
+
<a href="https://github.com/duarteocarmo">
|
|
233
|
+
<img
|
|
234
|
+
src="https://avatars.githubusercontent.com/u/26342344"
|
|
235
|
+
width=50
|
|
236
|
+
alt="Contributor avatar for duarteocarmo"
|
|
237
|
+
/>
|
|
238
|
+
</a>
|
|
239
|
+
<a href="https://github.com/slowwavesleep">
|
|
240
|
+
<img
|
|
241
|
+
src="https://avatars.githubusercontent.com/u/44175589"
|
|
242
|
+
width=50
|
|
243
|
+
alt="Contributor avatar for slowwavesleep"
|
|
244
|
+
/>
|
|
245
|
+
</a>
|
|
246
|
+
<a href="https://github.com/mrkowalski">
|
|
247
|
+
<img
|
|
248
|
+
src="https://avatars.githubusercontent.com/u/6357044"
|
|
249
|
+
width=50
|
|
250
|
+
alt="Contributor avatar for mrkowalski"
|
|
251
|
+
/>
|
|
252
|
+
</a>
|
|
253
|
+
<a href="https://github.com/simonevanbruggen">
|
|
254
|
+
<img
|
|
255
|
+
src="https://avatars.githubusercontent.com/u/24842609"
|
|
256
|
+
width=50
|
|
257
|
+
alt="Contributor avatar for simonevanbruggen"
|
|
258
|
+
/>
|
|
259
|
+
</a>
|
|
260
|
+
<a href="https://github.com/tvosch">
|
|
261
|
+
<img
|
|
262
|
+
src="https://avatars.githubusercontent.com/u/110661769"
|
|
263
|
+
width=50
|
|
264
|
+
alt="Contributor avatar for tvosch"
|
|
265
|
+
/>
|
|
266
|
+
</a>
|
|
267
|
+
<a href="https://github.com/Touzen">
|
|
268
|
+
<img
|
|
269
|
+
src="https://avatars.githubusercontent.com/u/1416265"
|
|
270
|
+
width=50
|
|
271
|
+
alt="Contributor avatar for Touzen"
|
|
272
|
+
/>
|
|
273
|
+
</a>
|
|
274
|
+
<a href="https://github.com/caldaibis">
|
|
275
|
+
<img
|
|
276
|
+
src="https://avatars.githubusercontent.com/u/16032437"
|
|
277
|
+
width=50
|
|
278
|
+
alt="Contributor avatar for caldaibis"
|
|
279
|
+
/>
|
|
280
|
+
</a>
|
|
281
|
+
<a href="https://github.com/SwekeR-463">
|
|
282
|
+
<img
|
|
283
|
+
src="https://avatars.githubusercontent.com/u/114919896?v=4"
|
|
284
|
+
width=50
|
|
285
|
+
alt="Contributor avatar for SwekeR-463"
|
|
286
|
+
/>
|
|
287
|
+
</a>
|
|
288
|
+
|
|
289
|
+
### Contribute to EuroEval
|
|
290
|
+
|
|
291
|
+
We welcome contributions to EuroEval! Whether you're fixing bugs, adding features, or
|
|
292
|
+
contributing new datasets, your help makes this project better for everyone.
|
|
293
|
+
|
|
294
|
+
- **General contributions**: Check out our [contribution guidelines](CONTRIBUTING.md)
|
|
295
|
+
for information on how to get started.
|
|
296
|
+
- **Adding datasets**: If you're interested in adding a new dataset to EuroEval, we have
|
|
297
|
+
a [dedicated guide](NEW_DATASET_GUIDE.md) with step-by-step instructions.
|
|
298
|
+
|
|
299
|
+
### Special thanks
|
|
300
|
+
|
|
301
|
+
- Thanks to [Google](https://google.com/) for sponsoring Gemini credits as part of their
|
|
302
|
+
[Google Cloud for Researchers Program](https://cloud.google.com/edu/researchers).
|
|
303
|
+
- Thanks [@Mikeriess](https://github.com/Mikeriess) for evaluating many of the larger
|
|
304
|
+
models on the leaderboards.
|
|
305
|
+
- Thanks to [OpenAI](https://openai.com/) for sponsoring OpenAI credits as part of their
|
|
306
|
+
[Researcher Access Program](https://openai.com/form/researcher-access-program/).
|
|
307
|
+
- Thanks to [UWV](https://www.uwv.nl/) and [KU
|
|
308
|
+
Leuven](https://www.arts.kuleuven.be/ling/ccl) for sponsoring the Azure OpenAI
|
|
309
|
+
credits used to evaluate GPT-4-turbo in Dutch.
|
|
310
|
+
- Thanks to [Miðeind](https://mideind.is/en) for sponsoring the OpenAI
|
|
311
|
+
credits used to evaluate GPT-4-turbo in Icelandic and Faroese.
|
|
312
|
+
- Thanks to [CHC](https://chc.au.dk/) for sponsoring the OpenAI credits used to
|
|
313
|
+
evaluate GPT-4-turbo in German.
|
|
314
|
+
|
|
315
|
+
## Citing EuroEval
|
|
316
|
+
|
|
317
|
+
If you want to cite the framework then feel free to use this:
|
|
318
|
+
|
|
319
|
+
```bibtex
|
|
320
|
+
@article{smart2024encoder,
|
|
321
|
+
title={Encoder vs Decoder: Comparative Analysis of Encoder and Decoder Language Models on Multilingual NLU Tasks},
|
|
322
|
+
author={Smart, Dan Saattrup and Enevoldsen, Kenneth and Schneider-Kamp, Peter},
|
|
323
|
+
journal={arXiv preprint arXiv:2406.13469},
|
|
324
|
+
year={2024}
|
|
325
|
+
}
|
|
326
|
+
@inproceedings{smart2023scandeval,
|
|
327
|
+
author = {Smart, Dan Saattrup},
|
|
328
|
+
booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)},
|
|
329
|
+
month = may,
|
|
330
|
+
pages = {185--201},
|
|
331
|
+
title = {{ScandEval: A Benchmark for Scandinavian Natural Language Processing}},
|
|
332
|
+
year = {2023}
|
|
333
|
+
}
|
|
334
|
+
```
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
<!-- This disables the requirement that the first line is a top-level heading -->
|
|
2
|
+
<!-- markdownlint-configure-file { "MD041": false } -->
|
|
3
|
+
|
|
4
|
+
<div align='center'>
|
|
5
|
+
<img
|
|
6
|
+
src="https://raw.githubusercontent.com/EuroEval/EuroEval/main/gfx/euroeval.png"
|
|
7
|
+
height="500"
|
|
8
|
+
width="372"
|
|
9
|
+
>
|
|
10
|
+
</div>
|
|
11
|
+
|
|
12
|
+
### The robust European language model benchmark
|
|
13
|
+
|
|
14
|
+
(formerly known as ScandEval)
|
|
15
|
+
|
|
16
|
+
______________________________________________________________________
|
|
17
|
+
[](https://euroeval.com)
|
|
18
|
+
[](https://pypi.org/project/euroeval/)
|
|
19
|
+
[](https://arxiv.org/abs/2304.00906)
|
|
20
|
+
[](https://arxiv.org/abs/2406.13469)
|
|
21
|
+
[](https://github.com/EuroEval/EuroEval/blob/main/LICENSE)
|
|
22
|
+
[](https://github.com/EuroEval/EuroEval/commits/main)
|
|
23
|
+
[](https://github.com/EuroEval/EuroEval/tree/main/tests)
|
|
24
|
+
[](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
|
|
25
|
+
|
|
26
|
+
## Maintainer
|
|
27
|
+
|
|
28
|
+
- Dan Saattrup Smart ([@saattrupdan](https://github.com/saattrupdan), <dan.smart@alexandra.dk>)
|
|
29
|
+
|
|
30
|
+
## Installation and usage
|
|
31
|
+
|
|
32
|
+
See the [documentation](https://euroeval.com/python-package/) for more information.
|
|
33
|
+
|
|
34
|
+
## Reproducing the evaluation datasets
|
|
35
|
+
|
|
36
|
+
All datasets used in this project are generated using the scripts located in the
|
|
37
|
+
[src/scripts](src/scripts) folder. To reproduce a dataset, run the corresponding script
|
|
38
|
+
with the following command
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
uv run src/scripts/<name-of-script>.py
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Replace <name-of-script> with the specific script you wish to execute, e.g.,
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
uv run src/scripts/create_allocine.py
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Contributors :pray:
|
|
51
|
+
|
|
52
|
+
A huge thank you to all the contributors who have helped make this project a success!
|
|
53
|
+
|
|
54
|
+
<a href="https://github.com/peter-sk">
|
|
55
|
+
<img
|
|
56
|
+
src="https://avatars.githubusercontent.com/u/6168908"
|
|
57
|
+
width=50
|
|
58
|
+
alt="Contributor avatar for peter-sk"
|
|
59
|
+
/>
|
|
60
|
+
</a>
|
|
61
|
+
<a href="https://github.com/AJDERS">
|
|
62
|
+
<img
|
|
63
|
+
src="https://avatars.githubusercontent.com/u/38854604"
|
|
64
|
+
width=50
|
|
65
|
+
alt="Contributor avatar for AJDERS"
|
|
66
|
+
/>
|
|
67
|
+
</a>
|
|
68
|
+
<a href="https://github.com/oliverkinch">
|
|
69
|
+
<img
|
|
70
|
+
src="https://avatars.githubusercontent.com/u/71556498"
|
|
71
|
+
width=50
|
|
72
|
+
alt="Contributor avatar for oliverkinch"
|
|
73
|
+
/>
|
|
74
|
+
</a>
|
|
75
|
+
<a href="https://github.com/versae">
|
|
76
|
+
<img
|
|
77
|
+
src="https://avatars.githubusercontent.com/u/173537"
|
|
78
|
+
width=50
|
|
79
|
+
alt="Contributor avatar for versae"
|
|
80
|
+
/>
|
|
81
|
+
</a>
|
|
82
|
+
<a href="https://github.com/KennethEnevoldsen">
|
|
83
|
+
<img
|
|
84
|
+
src="https://avatars.githubusercontent.com/u/23721977"
|
|
85
|
+
width=50
|
|
86
|
+
alt="Contributor avatar for KennethEnevoldsen"
|
|
87
|
+
/>
|
|
88
|
+
</a>
|
|
89
|
+
<a href="https://github.com/viggo-gascou">
|
|
90
|
+
<img
|
|
91
|
+
src="https://avatars.githubusercontent.com/u/94069687"
|
|
92
|
+
width=50
|
|
93
|
+
alt="Contributor avatar for viggo-gascou"
|
|
94
|
+
/>
|
|
95
|
+
</a>
|
|
96
|
+
<a href="https://github.com/mathiasesn">
|
|
97
|
+
<img
|
|
98
|
+
src="https://avatars.githubusercontent.com/u/27091759"
|
|
99
|
+
width=50
|
|
100
|
+
alt="Contributor avatar for mathiasesn"
|
|
101
|
+
/>
|
|
102
|
+
</a>
|
|
103
|
+
<a href="https://github.com/Alkarex">
|
|
104
|
+
<img
|
|
105
|
+
src="https://avatars.githubusercontent.com/u/1008324"
|
|
106
|
+
width=50
|
|
107
|
+
alt="Contributor avatar for Alkarex"
|
|
108
|
+
/>
|
|
109
|
+
</a>
|
|
110
|
+
<a href="https://github.com/marksverdhei">
|
|
111
|
+
<img
|
|
112
|
+
src="https://avatars.githubusercontent.com/u/46672778"
|
|
113
|
+
width=50
|
|
114
|
+
alt="Contributor avatar for marksverdhei"
|
|
115
|
+
/>
|
|
116
|
+
</a>
|
|
117
|
+
<a href="https://github.com/Mikeriess">
|
|
118
|
+
<img
|
|
119
|
+
src="https://avatars.githubusercontent.com/u/19728563"
|
|
120
|
+
width=50
|
|
121
|
+
alt="Contributor avatar for Mikeriess"
|
|
122
|
+
/>
|
|
123
|
+
</a>
|
|
124
|
+
<a href="https://github.com/ThomasKluiters">
|
|
125
|
+
<img
|
|
126
|
+
src="https://avatars.githubusercontent.com/u/8137941"
|
|
127
|
+
width=50
|
|
128
|
+
alt="Contributor avatar for ThomasKluiters"
|
|
129
|
+
/>
|
|
130
|
+
</a>
|
|
131
|
+
<a href="https://github.com/BramVanroy">
|
|
132
|
+
<img
|
|
133
|
+
src="https://avatars.githubusercontent.com/u/2779410"
|
|
134
|
+
width=50
|
|
135
|
+
alt="Contributor avatar for BramVanroy"
|
|
136
|
+
/>
|
|
137
|
+
</a>
|
|
138
|
+
<a href="https://github.com/peregilk">
|
|
139
|
+
<img
|
|
140
|
+
src="https://avatars.githubusercontent.com/u/9079808"
|
|
141
|
+
width=50
|
|
142
|
+
alt="Contributor avatar for peregilk"
|
|
143
|
+
/>
|
|
144
|
+
</a>
|
|
145
|
+
<a href="https://github.com/Rijgersberg">
|
|
146
|
+
<img
|
|
147
|
+
src="https://avatars.githubusercontent.com/u/8604946"
|
|
148
|
+
width=50
|
|
149
|
+
alt="Contributor avatar for Rijgersberg"
|
|
150
|
+
/>
|
|
151
|
+
</a>
|
|
152
|
+
<a href="https://github.com/duarteocarmo">
|
|
153
|
+
<img
|
|
154
|
+
src="https://avatars.githubusercontent.com/u/26342344"
|
|
155
|
+
width=50
|
|
156
|
+
alt="Contributor avatar for duarteocarmo"
|
|
157
|
+
/>
|
|
158
|
+
</a>
|
|
159
|
+
<a href="https://github.com/slowwavesleep">
|
|
160
|
+
<img
|
|
161
|
+
src="https://avatars.githubusercontent.com/u/44175589"
|
|
162
|
+
width=50
|
|
163
|
+
alt="Contributor avatar for slowwavesleep"
|
|
164
|
+
/>
|
|
165
|
+
</a>
|
|
166
|
+
<a href="https://github.com/mrkowalski">
|
|
167
|
+
<img
|
|
168
|
+
src="https://avatars.githubusercontent.com/u/6357044"
|
|
169
|
+
width=50
|
|
170
|
+
alt="Contributor avatar for mrkowalski"
|
|
171
|
+
/>
|
|
172
|
+
</a>
|
|
173
|
+
<a href="https://github.com/simonevanbruggen">
|
|
174
|
+
<img
|
|
175
|
+
src="https://avatars.githubusercontent.com/u/24842609"
|
|
176
|
+
width=50
|
|
177
|
+
alt="Contributor avatar for simonevanbruggen"
|
|
178
|
+
/>
|
|
179
|
+
</a>
|
|
180
|
+
<a href="https://github.com/tvosch">
|
|
181
|
+
<img
|
|
182
|
+
src="https://avatars.githubusercontent.com/u/110661769"
|
|
183
|
+
width=50
|
|
184
|
+
alt="Contributor avatar for tvosch"
|
|
185
|
+
/>
|
|
186
|
+
</a>
|
|
187
|
+
<a href="https://github.com/Touzen">
|
|
188
|
+
<img
|
|
189
|
+
src="https://avatars.githubusercontent.com/u/1416265"
|
|
190
|
+
width=50
|
|
191
|
+
alt="Contributor avatar for Touzen"
|
|
192
|
+
/>
|
|
193
|
+
</a>
|
|
194
|
+
<a href="https://github.com/caldaibis">
|
|
195
|
+
<img
|
|
196
|
+
src="https://avatars.githubusercontent.com/u/16032437"
|
|
197
|
+
width=50
|
|
198
|
+
alt="Contributor avatar for caldaibis"
|
|
199
|
+
/>
|
|
200
|
+
</a>
|
|
201
|
+
<a href="https://github.com/SwekeR-463">
|
|
202
|
+
<img
|
|
203
|
+
src="https://avatars.githubusercontent.com/u/114919896?v=4"
|
|
204
|
+
width=50
|
|
205
|
+
alt="Contributor avatar for SwekeR-463"
|
|
206
|
+
/>
|
|
207
|
+
</a>
|
|
208
|
+
|
|
209
|
+
### Contribute to EuroEval
|
|
210
|
+
|
|
211
|
+
We welcome contributions to EuroEval! Whether you're fixing bugs, adding features, or
|
|
212
|
+
contributing new datasets, your help makes this project better for everyone.
|
|
213
|
+
|
|
214
|
+
- **General contributions**: Check out our [contribution guidelines](CONTRIBUTING.md)
|
|
215
|
+
for information on how to get started.
|
|
216
|
+
- **Adding datasets**: If you're interested in adding a new dataset to EuroEval, we have
|
|
217
|
+
a [dedicated guide](NEW_DATASET_GUIDE.md) with step-by-step instructions.
|
|
218
|
+
|
|
219
|
+
### Special thanks
|
|
220
|
+
|
|
221
|
+
- Thanks to [Google](https://google.com/) for sponsoring Gemini credits as part of their
|
|
222
|
+
[Google Cloud for Researchers Program](https://cloud.google.com/edu/researchers).
|
|
223
|
+
- Thanks [@Mikeriess](https://github.com/Mikeriess) for evaluating many of the larger
|
|
224
|
+
models on the leaderboards.
|
|
225
|
+
- Thanks to [OpenAI](https://openai.com/) for sponsoring OpenAI credits as part of their
|
|
226
|
+
[Researcher Access Program](https://openai.com/form/researcher-access-program/).
|
|
227
|
+
- Thanks to [UWV](https://www.uwv.nl/) and [KU
|
|
228
|
+
Leuven](https://www.arts.kuleuven.be/ling/ccl) for sponsoring the Azure OpenAI
|
|
229
|
+
credits used to evaluate GPT-4-turbo in Dutch.
|
|
230
|
+
- Thanks to [Miðeind](https://mideind.is/en) for sponsoring the OpenAI
|
|
231
|
+
credits used to evaluate GPT-4-turbo in Icelandic and Faroese.
|
|
232
|
+
- Thanks to [CHC](https://chc.au.dk/) for sponsoring the OpenAI credits used to
|
|
233
|
+
evaluate GPT-4-turbo in German.
|
|
234
|
+
|
|
235
|
+
## Citing EuroEval
|
|
236
|
+
|
|
237
|
+
If you want to cite the framework then feel free to use this:
|
|
238
|
+
|
|
239
|
+
```bibtex
|
|
240
|
+
@article{smart2024encoder,
|
|
241
|
+
title={Encoder vs Decoder: Comparative Analysis of Encoder and Decoder Language Models on Multilingual NLU Tasks},
|
|
242
|
+
author={Smart, Dan Saattrup and Enevoldsen, Kenneth and Schneider-Kamp, Peter},
|
|
243
|
+
journal={arXiv preprint arXiv:2406.13469},
|
|
244
|
+
year={2024}
|
|
245
|
+
}
|
|
246
|
+
@inproceedings{smart2023scandeval,
|
|
247
|
+
author = {Smart, Dan Saattrup},
|
|
248
|
+
booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)},
|
|
249
|
+
month = may,
|
|
250
|
+
pages = {185--201},
|
|
251
|
+
title = {{ScandEval: A Benchmark for Scandinavian Natural Language Processing}},
|
|
252
|
+
year = {2023}
|
|
253
|
+
}
|
|
254
|
+
```
|