EuroEval 15.8.1__tar.gz → 15.8.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- {euroeval-15.8.1 → euroeval-15.8.2}/.github/workflows/ci.yaml +4 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/.pre-commit-config.yaml +1 -1
- {euroeval-15.8.1 → euroeval-15.8.2}/CHANGELOG.md +7 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/PKG-INFO +3 -3
- {euroeval-15.8.1 → euroeval-15.8.2}/pyproject.toml +3 -3
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/benchmark_modules/litellm.py +7 -2
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/model_cache.py +9 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/uv.lock +3 -3
- {euroeval-15.8.1 → euroeval-15.8.2}/.github/ISSUE_TEMPLATE/benchmark_dataset_request.yaml +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/.github/ISSUE_TEMPLATE/bug.yaml +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/.github/ISSUE_TEMPLATE/model_evaluation_request.yaml +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/.gitignore +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/CITATION.cff +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/CODE_OF_CONDUCT.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/CONTRIBUTING.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/Dockerfile.cuda +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/LICENSE +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/NEW_DATASET_GUIDE.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/README.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/CNAME +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/README.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/README.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/danish.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/dutch.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/english.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/faroese.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/finnish.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/french.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/german.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/icelandic.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/italian.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/norwegian.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/spanish.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/datasets/swedish.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/extras/radial_plotter.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/faq.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/gfx/favicon.png +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Monolingual/danish.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Monolingual/dutch.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Monolingual/english.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Monolingual/faroese.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Monolingual/french.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Monolingual/german.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Monolingual/icelandic.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Monolingual/italian.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Monolingual/norwegian.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Monolingual/spanish.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Monolingual/swedish.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Multilingual/european.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Multilingual/germanic.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Multilingual/mainland-scandinavian.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/Multilingual/romance.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/leaderboards/README.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/methodology.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/python-package.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/tasks/README.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/tasks/common-sense-reasoning.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/tasks/knowledge.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/tasks/linguistic-acceptability.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/tasks/named-entity-recognition.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/tasks/reading-comprehension.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/tasks/sentiment-classification.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/tasks/speed.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/docs/tasks/summarization.md +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/gfx/euroeval.png +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/gfx/euroeval.xcf +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/gfx/scandeval.png +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/makefile +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/mkdocs.yaml +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/__init__.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/benchmark_config_factory.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/benchmark_modules/__init__.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/benchmark_modules/base.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/benchmark_modules/fresh.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/benchmark_modules/hf.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/benchmark_modules/vllm.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/benchmarker.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/callbacks.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/cli.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/constants.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/data_loading.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/data_models.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/__init__.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/danish.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/dutch.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/english.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/faroese.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/finnish.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/french.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/german.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/icelandic.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/italian.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/norwegian.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/spanish.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/dataset_configs/swedish.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/enums.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/exceptions.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/finetuning.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/generation.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/generation_utils.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/human_evaluation.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/languages.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/model_config.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/model_loading.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/prompt_templates/__init__.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/prompt_templates/linguistic_acceptability.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/prompt_templates/multiple_choice.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/prompt_templates/named_entity_recognition.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/prompt_templates/reading_comprehension.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/prompt_templates/sentiment_classification.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/prompt_templates/summarization.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/scores.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/speed_benchmark.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/task_group_utils/__init__.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/task_group_utils/multiple_choice_classification.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/task_group_utils/question_answering.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/task_group_utils/sequence_classification.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/task_group_utils/text_to_text.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/task_group_utils/token_classification.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/tasks.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/tokenization_utils.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/types.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/utils.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/constants.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_allocine.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_angry_tweets.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_arc.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_arc_is.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_belebele.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_cnn_dailymail.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_conll_en.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_conll_es.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_conll_nl.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_dane.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_danish_citizen_tests.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_dansk.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_danske_talemaader.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_danske_talemaader_old.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_dbrd.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_dutch_cola.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_eltec.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_fone.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_foqa.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_fosent.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_fquad.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_germanquad.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_germeval.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_hellaswag.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_hellaswag_fi.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_hotter_and_colder_sentiment.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_ice_linguistic.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_icelandic_error_corpus.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_icelandic_knowledge.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_icelandic_qa.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_icesum.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_ilpost_sum.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_jentoft.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_mim_gold_ner.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_mlqa_es.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_mlsum_de.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_mlsum_es.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_mmlu.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_multinerd-it.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_no_cola.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_no_sammendrag.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_nor_common_sense_qa.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_nordjylland_news.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_norec.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_norglm_multiqa.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_norglm_multisum.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_norne.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_norquad.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_nqii.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_nrk_quiz_qa.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_orange_sum.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_personal_sum.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_rrn.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_sb10k.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_scala.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_scandiqa.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_scandisent_fi.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_schibsted.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_sentiment_headlines_es.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_sentipolc16.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_squad.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_squad_it.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_squad_nl.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_squad_nl_old.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_sst5.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_suc3.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_swedn.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_swerec.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_turku_ner_fi.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_tydiqa_fi.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_wiki_lingua_nl.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_wikiann_fo.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_wikineural-it.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_winogrande_is.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_xlsum_fi.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/create_xquad_es.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/fix_dot_env_file.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/load_ud_pos.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/src/scripts/versioning.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/__init__.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/conftest.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_benchmark_config_factory.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_benchmark_modules/__init__.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_benchmark_modules/test_base.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_benchmark_modules/test_fresh.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_benchmark_modules/test_hf.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_benchmark_modules/test_litellm.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_benchmark_modules/test_vllm.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_benchmarker.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_callbacks.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_cli.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_constants.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_data_loading.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_data_models.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_dataset_configs.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_enums.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_exceptions.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_finetuning.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_generation.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_human_evaluation.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_languages.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_model_cache.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_model_config.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_model_loading.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_scores.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_speed_benchmark.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_task_utils/__init__.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_task_utils/test_question_answering.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_task_utils/test_sequence_classification.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_task_utils/test_text_to_text.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_task_utils/test_token_classification.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_tasks.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_tokenization_utils.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_types.py +0 -0
- {euroeval-15.8.1 → euroeval-15.8.2}/tests/test_utils.py +0 -0
|
@@ -10,6 +10,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
## [v15.8.2] - 2025-05-12
|
|
14
|
+
### Fixed
|
|
15
|
+
- Catch error when caching generative model outputs, when the number of model inputs and
|
|
16
|
+
outputs do not match.
|
|
17
|
+
- Disallow vLLM >=0.8.5, as it breaks generation output for several models.
|
|
18
|
+
|
|
19
|
+
|
|
13
20
|
## [v15.8.1] - 2025-05-08
|
|
14
21
|
### Fixed
|
|
15
22
|
- NER labels were included twice in the prompt templates (which was due to there being
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: EuroEval
|
|
3
|
-
Version: 15.8.
|
|
3
|
+
Version: 15.8.2
|
|
4
4
|
Summary: The robust European language model benchmark.
|
|
5
5
|
Project-URL: Repository, https://github.com/EuroEval/EuroEval
|
|
6
6
|
Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
|
|
@@ -62,12 +62,12 @@ Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == '
|
|
|
62
62
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
|
|
63
63
|
Requires-Dist: gradio>=4.26.0; extra == 'all'
|
|
64
64
|
Requires-Dist: outlines>=0.1.11; extra == 'all'
|
|
65
|
-
Requires-Dist: vllm
|
|
65
|
+
Requires-Dist: vllm<0.8.5,>=0.8.3; (platform_system == 'Linux') and extra == 'all'
|
|
66
66
|
Provides-Extra: generative
|
|
67
67
|
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
|
|
68
68
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
|
|
69
69
|
Requires-Dist: outlines>=0.1.11; extra == 'generative'
|
|
70
|
-
Requires-Dist: vllm
|
|
70
|
+
Requires-Dist: vllm<0.8.5,>=0.8.3; (platform_system == 'Linux') and extra == 'generative'
|
|
71
71
|
Provides-Extra: human-evaluation
|
|
72
72
|
Requires-Dist: gradio>=4.26.0; extra == 'human-evaluation'
|
|
73
73
|
Provides-Extra: test
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "EuroEval"
|
|
3
|
-
version = "15.8.
|
|
3
|
+
version = "15.8.2"
|
|
4
4
|
description = "The robust European language model benchmark."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -46,7 +46,7 @@ dependencies = [
|
|
|
46
46
|
generative = [
|
|
47
47
|
"outlines>=0.1.11",
|
|
48
48
|
"bitsandbytes>=0.43.1; platform_system == 'Linux'",
|
|
49
|
-
"vllm>=0.8.3; platform_system == 'Linux'",
|
|
49
|
+
"vllm>=0.8.3,<0.8.5; platform_system == 'Linux'",
|
|
50
50
|
"fbgemm-gpu>=1.0.0; platform_system == 'Linux'",
|
|
51
51
|
]
|
|
52
52
|
human_evaluation = [
|
|
@@ -55,7 +55,7 @@ human_evaluation = [
|
|
|
55
55
|
all = [
|
|
56
56
|
"outlines>=0.1.11",
|
|
57
57
|
"bitsandbytes>=0.43.1; platform_system == 'Linux'",
|
|
58
|
-
"vllm>=0.8.3; platform_system == 'Linux'",
|
|
58
|
+
"vllm>=0.8.3,<0.8.5; platform_system == 'Linux'",
|
|
59
59
|
"fbgemm-gpu>=1.0.0; platform_system == 'Linux'",
|
|
60
60
|
"gradio>=4.26.0",
|
|
61
61
|
]
|
|
@@ -401,6 +401,12 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
401
401
|
model_responses=ordered_responses, model_id=self.model_config.model_id
|
|
402
402
|
)
|
|
403
403
|
|
|
404
|
+
if len(messages) != len(model_output.sequences):
|
|
405
|
+
raise InvalidBenchmark(
|
|
406
|
+
f"Number of model inputs ({len(messages):,}) does not match the "
|
|
407
|
+
f"number of model outputs ({len(model_output.sequences):,})."
|
|
408
|
+
)
|
|
409
|
+
|
|
404
410
|
return model_output
|
|
405
411
|
|
|
406
412
|
def _handle_exception(
|
|
@@ -616,8 +622,7 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
616
622
|
scores = []
|
|
617
623
|
for model_response in model_responses:
|
|
618
624
|
if not model_response.choices:
|
|
619
|
-
|
|
620
|
-
# and run out of tokens. Happens quite rarely, but we need to handle it.
|
|
625
|
+
sequences.append("")
|
|
621
626
|
logger.warning(
|
|
622
627
|
f"The model {model_id!r} did not end up "
|
|
623
628
|
"generating any text. This is likely because the model ran "
|
|
@@ -168,6 +168,15 @@ class ModelCache:
|
|
|
168
168
|
input_column = "messages" if "messages" in model_inputs else "text"
|
|
169
169
|
model_inputs = model_inputs[input_column]
|
|
170
170
|
|
|
171
|
+
# Double check that the number of inputs and outputs match
|
|
172
|
+
if not len(model_inputs) == len(model_output.sequences):
|
|
173
|
+
logger.warning(
|
|
174
|
+
f"Number of model inputs ({len(model_inputs)}) does not match the "
|
|
175
|
+
f"number of model outputs ({len(model_output.sequences)}). We will not "
|
|
176
|
+
f"cache the model outputs."
|
|
177
|
+
)
|
|
178
|
+
return
|
|
179
|
+
|
|
171
180
|
# Store the generated sequences in the cache, one by one
|
|
172
181
|
with tqdm(
|
|
173
182
|
iterable=model_inputs,
|
|
@@ -906,7 +906,7 @@ wheels = [
|
|
|
906
906
|
|
|
907
907
|
[[package]]
|
|
908
908
|
name = "euroeval"
|
|
909
|
-
version = "15.8.
|
|
909
|
+
version = "15.8.2"
|
|
910
910
|
source = { editable = "." }
|
|
911
911
|
dependencies = [
|
|
912
912
|
{ name = "accelerate" },
|
|
@@ -1034,8 +1034,8 @@ requires-dist = [
|
|
|
1034
1034
|
{ name = "termcolor", specifier = ">=2.0.0" },
|
|
1035
1035
|
{ name = "torch", specifier = ">=2.6.0" },
|
|
1036
1036
|
{ name = "transformers", specifier = ">=4.51.0" },
|
|
1037
|
-
{ name = "vllm", marker = "sys_platform == 'linux' and extra == 'all'", specifier = ">=0.8.3" },
|
|
1038
|
-
{ name = "vllm", marker = "sys_platform == 'linux' and extra == 'generative'", specifier = ">=0.8.3" },
|
|
1037
|
+
{ name = "vllm", marker = "sys_platform == 'linux' and extra == 'all'", specifier = ">=0.8.3,<0.8.5" },
|
|
1038
|
+
{ name = "vllm", marker = "sys_platform == 'linux' and extra == 'generative'", specifier = ">=0.8.3,<0.8.5" },
|
|
1039
1039
|
]
|
|
1040
1040
|
provides-extras = ["generative", "human-evaluation", "all", "test"]
|
|
1041
1041
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/prompt_templates/linguistic_acceptability.py
RENAMED
|
File without changes
|
|
File without changes
|
{euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/prompt_templates/named_entity_recognition.py
RENAMED
|
File without changes
|
|
File without changes
|
{euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/prompt_templates/sentiment_classification.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/task_group_utils/multiple_choice_classification.py
RENAMED
|
File without changes
|
|
File without changes
|
{euroeval-15.8.1 → euroeval-15.8.2}/src/euroeval/task_group_utils/sequence_classification.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|