EuroEval 15.6.0__tar.gz → 15.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- {euroeval-15.6.0 → euroeval-15.6.1}/CHANGELOG.md +12 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/PKG-INFO +2 -1
- {euroeval-15.6.0 → euroeval-15.6.1}/README.md +1 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/dutch.md +8 -6
- {euroeval-15.6.0 → euroeval-15.6.1}/makefile +2 -15
- {euroeval-15.6.0 → euroeval-15.6.1}/pyproject.toml +1 -1
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/data_models.py +8 -4
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/faroese.py +1 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/norwegian.py +1 -1
- {euroeval-15.6.0 → euroeval-15.6.1}/uv.lock +1 -1
- {euroeval-15.6.0 → euroeval-15.6.1}/.github/ISSUE_TEMPLATE/benchmark_dataset_request.yaml +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/.github/ISSUE_TEMPLATE/bug.yaml +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/.github/ISSUE_TEMPLATE/model_evaluation_request.yaml +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/.github/workflows/ci.yaml +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/.gitignore +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/.pre-commit-config.yaml +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/CITATION.cff +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/CODE_OF_CONDUCT.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/CONTRIBUTING.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/Dockerfile.cuda +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/LICENSE +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/CNAME +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/README.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/README.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/danish.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/english.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/faroese.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/french.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/german.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/icelandic.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/italian.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/norwegian.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/spanish.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/swedish.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/extras/radial_plotter.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/faq.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/gfx/favicon.png +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/danish.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/dutch.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/english.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/faroese.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/french.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/german.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/icelandic.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/italian.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/norwegian.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/swedish.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Multilingual/european.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Multilingual/germanic.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Multilingual/mainland-scandinavian.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Multilingual/romance.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/README.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/methodology.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/python-package.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/README.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/common-sense-reasoning.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/knowledge.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/linguistic-acceptability.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/named-entity-recognition.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/reading-comprehension.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/sentiment-classification.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/speed.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/summarization.md +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/gfx/euroeval.png +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/gfx/euroeval.xcf +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/gfx/scandeval.png +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/mkdocs.yaml +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/__init__.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_config_factory.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_modules/__init__.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_modules/base.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_modules/fresh.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_modules/hf.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_modules/litellm.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_modules/vllm.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmarker.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/callbacks.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/cli.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/constants.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/data_loading.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/__init__.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/danish.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/dutch.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/english.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/french.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/german.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/icelandic.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/italian.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/spanish.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/swedish.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/enums.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/exceptions.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/finetuning.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/generation.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/human_evaluation.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/languages.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/model_cache.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/model_config.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/model_loading.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/__init__.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/linguistic_acceptability.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/multiple_choice.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/named_entity_recognition.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/reading_comprehension.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/sentiment_classification.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/summarization.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/scores.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/speed_benchmark.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/__init__.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/multiple_choice_classification.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/question_answering.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/sequence_classification.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/text_to_text.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/token_classification.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/tasks.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/tokenization_utils.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/types.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/utils.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/constants.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_allocine.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_angry_tweets.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_arc.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_arc_is.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_belebele.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_cnn_dailymail.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_conll_en.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_conll_es.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_conll_nl.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_dane.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_danish_citizen_tests.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_dansk.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_danske_talemaader.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_danske_talemaader_old.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_dbrd.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_dutch_cola.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_dutch_social.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_eltec.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_fone.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_foqa.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_fosent.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_fquad.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_germanquad.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_germeval.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_hellaswag.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_hotter_and_colder_sentiment.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_ice_linguistic.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_icelandic_error_corpus.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_icelandic_knowledge.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_icelandic_qa.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_icesum.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_ilpost_sum.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_jentoft.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_mim_gold_ner.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_mlqa_es.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_mlsum_de.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_mlsum_es.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_mmlu.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_multinerd-it.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_no_cola.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_no_sammendrag.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_nor_common_sense_qa.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_nordjylland_news.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_norec.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_norglm_multiqa.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_norglm_multisum.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_norne.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_norquad.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_nqii.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_nrk_quiz_qa.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_orange_sum.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_personal_sum.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_rrn.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_sb10k.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_scala.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_scandiqa.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_schibsted.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_sentiment_headlines_es.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_sentipolc16.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_squad.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_squad_it.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_squad_nl.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_squad_nl_old.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_sst5.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_suc3.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_swedn.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_swerec.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_wiki_lingua_nl.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_wikiann_fo.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_wikineural-it.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_winogrande_is.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_xquad_es.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/fix_dot_env_file.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/load_ud_pos.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/versioning.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/__init__.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/conftest.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_config_factory.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_modules/__init__.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_modules/test_base.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_modules/test_fresh.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_modules/test_hf.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_modules/test_litellm.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_modules/test_vllm.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmarker.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_callbacks.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_cli.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_constants.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_data_loading.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_data_models.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_dataset_configs.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_enums.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_exceptions.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_finetuning.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_generation.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_human_evaluation.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_languages.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_model_cache.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_model_config.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_model_loading.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_scores.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_speed_benchmark.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_task_utils/__init__.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_task_utils/test_question_answering.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_task_utils/test_sequence_classification.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_task_utils/test_text_to_text.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_task_utils/test_token_classification.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_tasks.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_tokenization_utils.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_types.py +0 -0
- {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_utils.py +0 -0
|
@@ -10,6 +10,18 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
## [v15.6.1] - 2025-04-14
|
|
14
|
+
### Changed
|
|
15
|
+
- Added more info about SQuAD-nl in the documentation. This was contributed by
|
|
16
|
+
[@Rijgersberg](https://github.com/Rijgersberg) ✨
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
- The "E" option for the Norwegian NorCommonSenseQA dataset was not included in the
|
|
20
|
+
refactor in v15.6.0, leading to evaluation errors. This has been fixed now.
|
|
21
|
+
- The number of few-shot examples for FoSent was not reduced to 5 again during the
|
|
22
|
+
refactor in v15.6.0, leading to evaluation errors. This has been fixed now.
|
|
23
|
+
|
|
24
|
+
|
|
13
25
|
## [v15.6.0] - 2025-04-13
|
|
14
26
|
### Added
|
|
15
27
|
- We now support specifying custom inference providers when benchmarking via the Hugging
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: EuroEval
|
|
3
|
-
Version: 15.6.
|
|
3
|
+
Version: 15.6.1
|
|
4
4
|
Summary: The robust European language model benchmark.
|
|
5
5
|
Project-URL: Repository, https://github.com/EuroEval/EuroEval
|
|
6
6
|
Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
|
|
@@ -237,6 +237,7 @@ A huge thank you to all the contributors who have helped make this project a suc
|
|
|
237
237
|
<a href="https://github.com/ThomasKluiters"><img src="https://avatars.githubusercontent.com/u/8137941" width=50 alt="Contributor avatar for ThomasKluiters"/></a>
|
|
238
238
|
<a href="https://github.com/BramVanroy"><img src="https://avatars.githubusercontent.com/u/2779410" width=50 alt="Contributor avatar for BramVanroy"/></a>
|
|
239
239
|
<a href="https://github.com/peregilk"><img src="https://avatars.githubusercontent.com/u/9079808" width=50 alt="Contributor avatar for peregilk"/></a>
|
|
240
|
+
<a href="https://github.com/Rijgersberg"><img src="https://avatars.githubusercontent.com/u/8604946" width=50 alt="Contributor avatar for Rijgersberg"/></a>
|
|
240
241
|
|
|
241
242
|
### Special Thanks
|
|
242
243
|
- Thanks to [Google](https://google.com/) for sponsoring Gemini credits as part of their
|
|
@@ -161,6 +161,7 @@ A huge thank you to all the contributors who have helped make this project a suc
|
|
|
161
161
|
<a href="https://github.com/ThomasKluiters"><img src="https://avatars.githubusercontent.com/u/8137941" width=50 alt="Contributor avatar for ThomasKluiters"/></a>
|
|
162
162
|
<a href="https://github.com/BramVanroy"><img src="https://avatars.githubusercontent.com/u/2779410" width=50 alt="Contributor avatar for BramVanroy"/></a>
|
|
163
163
|
<a href="https://github.com/peregilk"><img src="https://avatars.githubusercontent.com/u/9079808" width=50 alt="Contributor avatar for peregilk"/></a>
|
|
164
|
+
<a href="https://github.com/Rijgersberg"><img src="https://avatars.githubusercontent.com/u/8604946" width=50 alt="Contributor avatar for Rijgersberg"/></a>
|
|
164
165
|
|
|
165
166
|
### Special Thanks
|
|
166
167
|
- Thanks to [Google](https://google.com/) for sponsoring Gemini credits as part of their
|
|
@@ -310,12 +310,14 @@ Here are a few examples from the training split:
|
|
|
310
310
|
This dataset is published
|
|
311
311
|
[here](https://huggingface.co/datasets/GroNLP/squad-nl-v2.0) and is a machine translated
|
|
312
312
|
dataset of the English [SQuAD](https://aclanthology.org/D16-1264/) and
|
|
313
|
-
[XQuAD](https://aclanthology.org/2020.acl-main.421/) datasets
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
313
|
+
[XQuAD](https://aclanthology.org/2020.acl-main.421/) datasets, created for the
|
|
314
|
+
Dutch-language [DUMB](https://dumbench.nl/) benchmark. Google Translate was used to
|
|
315
|
+
translate the original datasets to Dutch. The test data
|
|
316
|
+
[was manually corrected](https://aclanthology.org/2023.emnlp-main.447/) by eight BSc
|
|
317
|
+
students as part of their thesis work.
|
|
318
|
+
|
|
319
|
+
The original SQuAD and XQuAD datasets are based on English Wikipedia articles and the
|
|
320
|
+
questions and answers are written by crowdworkers.
|
|
319
321
|
|
|
320
322
|
Here are a few examples from the training split:
|
|
321
323
|
|
|
@@ -81,21 +81,8 @@ test: ## Run tests
|
|
|
81
81
|
tree: ## Print directory tree
|
|
82
82
|
@tree -a --gitignore -I .git .
|
|
83
83
|
|
|
84
|
-
|
|
85
|
-
uv run
|
|
86
|
-
|
|
87
|
-
format: ## Format the project
|
|
88
|
-
uv run ruff format .
|
|
89
|
-
|
|
90
|
-
type-check: ## Type-check the project
|
|
91
|
-
@uv run mypy . \
|
|
92
|
-
--install-types \
|
|
93
|
-
--non-interactive \
|
|
94
|
-
--ignore-missing-imports \
|
|
95
|
-
--show-error-codes \
|
|
96
|
-
--check-untyped-defs
|
|
97
|
-
|
|
98
|
-
check: lint format type-check ## Lint, format, and type-check the code
|
|
84
|
+
check: ## Lint, format, and type-check the code
|
|
85
|
+
@uv run pre-commit run --all-files
|
|
99
86
|
|
|
100
87
|
bump-major:
|
|
101
88
|
@uv run python -m src.scripts.versioning --major
|
|
@@ -388,8 +388,10 @@ class DatasetConfig:
|
|
|
388
388
|
language.
|
|
389
389
|
_prompt_label_mapping (optional):
|
|
390
390
|
A mapping from the labels to another phrase which is used as a substitute
|
|
391
|
-
for the label in few-shot evaluation.
|
|
392
|
-
and
|
|
391
|
+
for the label in few-shot evaluation. If "auto" then the mapping will be set
|
|
392
|
+
to a 1:1 mapping between the labels and themselves. If None then the mapping
|
|
393
|
+
will be set to the default mapping for the task and language. Defaults to
|
|
394
|
+
None.
|
|
393
395
|
unofficial (optional):
|
|
394
396
|
Whether the dataset is unofficial. Defaults to False.
|
|
395
397
|
"""
|
|
@@ -405,7 +407,7 @@ class DatasetConfig:
|
|
|
405
407
|
_num_few_shot_examples: int | None = None
|
|
406
408
|
_max_generated_tokens: int | None = None
|
|
407
409
|
_labels: list[str] | None = None
|
|
408
|
-
_prompt_label_mapping: dict[str, str] | None = None
|
|
410
|
+
_prompt_label_mapping: dict[str, str] | t.Literal["auto"] | None = None
|
|
409
411
|
unofficial: bool = False
|
|
410
412
|
|
|
411
413
|
@property
|
|
@@ -475,7 +477,9 @@ class DatasetConfig:
|
|
|
475
477
|
@property
|
|
476
478
|
def prompt_label_mapping(self) -> dict[str, str]:
|
|
477
479
|
"""Mapping from English labels to localised labels."""
|
|
478
|
-
if self._prompt_label_mapping
|
|
480
|
+
if self._prompt_label_mapping == "auto":
|
|
481
|
+
return {label: label for label in self.labels}
|
|
482
|
+
elif self._prompt_label_mapping is not None:
|
|
479
483
|
return self._prompt_label_mapping
|
|
480
484
|
|
|
481
485
|
main_language = self.languages[0]
|
|
@@ -83,6 +83,7 @@ NOR_COMMON_SENSE_QA_CONFIG = DatasetConfig(
|
|
|
83
83
|
huggingface_id="EuroEval/nor-common-sense-qa",
|
|
84
84
|
task=COMMON_SENSE,
|
|
85
85
|
languages=[NB, NN, NO],
|
|
86
|
+
_labels=["a", "b", "c", "d", "e"],
|
|
86
87
|
)
|
|
87
88
|
|
|
88
89
|
|
|
@@ -105,7 +106,6 @@ NORGLM_MULTI_QA = DatasetConfig(
|
|
|
105
106
|
huggingface_id="EuroEval/norglm-multi-qa",
|
|
106
107
|
task=RC,
|
|
107
108
|
languages=[NB, NN, NO],
|
|
108
|
-
_num_few_shot_examples=2,
|
|
109
109
|
unofficial=True,
|
|
110
110
|
)
|
|
111
111
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/linguistic_acceptability.py
RENAMED
|
File without changes
|
|
File without changes
|
{euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/named_entity_recognition.py
RENAMED
|
File without changes
|
|
File without changes
|
{euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/sentiment_classification.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/multiple_choice_classification.py
RENAMED
|
File without changes
|
|
File without changes
|
{euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/sequence_classification.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|