EuroEval 15.8.0__tar.gz → 15.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- {euroeval-15.8.0 → euroeval-15.8.1}/CHANGELOG.md +10 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/PKG-INFO +1 -1
- {euroeval-15.8.0 → euroeval-15.8.1}/pyproject.toml +1 -1
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/data_models.py +9 -5
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/task_group_utils/sequence_classification.py +21 -32
- {euroeval-15.8.0 → euroeval-15.8.1}/uv.lock +1 -1
- {euroeval-15.8.0 → euroeval-15.8.1}/.github/ISSUE_TEMPLATE/benchmark_dataset_request.yaml +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/.github/ISSUE_TEMPLATE/bug.yaml +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/.github/ISSUE_TEMPLATE/model_evaluation_request.yaml +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/.github/workflows/ci.yaml +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/.gitignore +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/.pre-commit-config.yaml +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/CITATION.cff +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/CODE_OF_CONDUCT.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/CONTRIBUTING.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/Dockerfile.cuda +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/LICENSE +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/NEW_DATASET_GUIDE.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/README.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/CNAME +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/README.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/README.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/danish.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/dutch.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/english.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/faroese.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/finnish.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/french.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/german.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/icelandic.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/italian.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/norwegian.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/spanish.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/datasets/swedish.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/extras/radial_plotter.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/faq.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/gfx/favicon.png +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Monolingual/danish.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Monolingual/dutch.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Monolingual/english.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Monolingual/faroese.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Monolingual/french.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Monolingual/german.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Monolingual/icelandic.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Monolingual/italian.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Monolingual/norwegian.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Monolingual/spanish.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Monolingual/swedish.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Multilingual/european.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Multilingual/germanic.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Multilingual/mainland-scandinavian.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/Multilingual/romance.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/leaderboards/README.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/methodology.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/python-package.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/tasks/README.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/tasks/common-sense-reasoning.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/tasks/knowledge.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/tasks/linguistic-acceptability.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/tasks/named-entity-recognition.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/tasks/reading-comprehension.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/tasks/sentiment-classification.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/tasks/speed.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/docs/tasks/summarization.md +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/gfx/euroeval.png +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/gfx/euroeval.xcf +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/gfx/scandeval.png +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/makefile +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/mkdocs.yaml +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/__init__.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/benchmark_config_factory.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/benchmark_modules/__init__.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/benchmark_modules/base.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/benchmark_modules/fresh.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/benchmark_modules/hf.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/benchmark_modules/litellm.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/benchmark_modules/vllm.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/benchmarker.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/callbacks.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/cli.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/constants.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/data_loading.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/__init__.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/danish.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/dutch.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/english.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/faroese.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/finnish.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/french.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/german.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/icelandic.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/italian.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/norwegian.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/spanish.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/dataset_configs/swedish.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/enums.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/exceptions.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/finetuning.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/generation.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/generation_utils.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/human_evaluation.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/languages.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/model_cache.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/model_config.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/model_loading.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/prompt_templates/__init__.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/prompt_templates/linguistic_acceptability.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/prompt_templates/multiple_choice.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/prompt_templates/named_entity_recognition.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/prompt_templates/reading_comprehension.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/prompt_templates/sentiment_classification.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/prompt_templates/summarization.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/scores.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/speed_benchmark.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/task_group_utils/__init__.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/task_group_utils/multiple_choice_classification.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/task_group_utils/question_answering.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/task_group_utils/text_to_text.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/task_group_utils/token_classification.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/tasks.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/tokenization_utils.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/types.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/utils.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/constants.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_allocine.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_angry_tweets.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_arc.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_arc_is.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_belebele.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_cnn_dailymail.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_conll_en.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_conll_es.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_conll_nl.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_dane.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_danish_citizen_tests.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_dansk.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_danske_talemaader.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_danske_talemaader_old.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_dbrd.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_dutch_cola.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_eltec.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_fone.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_foqa.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_fosent.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_fquad.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_germanquad.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_germeval.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_hellaswag.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_hellaswag_fi.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_hotter_and_colder_sentiment.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_ice_linguistic.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_icelandic_error_corpus.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_icelandic_knowledge.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_icelandic_qa.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_icesum.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_ilpost_sum.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_jentoft.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_mim_gold_ner.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_mlqa_es.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_mlsum_de.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_mlsum_es.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_mmlu.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_multinerd-it.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_no_cola.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_no_sammendrag.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_nor_common_sense_qa.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_nordjylland_news.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_norec.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_norglm_multiqa.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_norglm_multisum.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_norne.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_norquad.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_nqii.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_nrk_quiz_qa.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_orange_sum.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_personal_sum.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_rrn.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_sb10k.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_scala.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_scandiqa.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_scandisent_fi.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_schibsted.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_sentiment_headlines_es.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_sentipolc16.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_squad.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_squad_it.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_squad_nl.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_squad_nl_old.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_sst5.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_suc3.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_swedn.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_swerec.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_turku_ner_fi.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_tydiqa_fi.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_wiki_lingua_nl.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_wikiann_fo.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_wikineural-it.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_winogrande_is.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_xlsum_fi.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/create_xquad_es.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/fix_dot_env_file.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/load_ud_pos.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/src/scripts/versioning.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/__init__.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/conftest.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_benchmark_config_factory.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_benchmark_modules/__init__.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_benchmark_modules/test_base.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_benchmark_modules/test_fresh.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_benchmark_modules/test_hf.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_benchmark_modules/test_litellm.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_benchmark_modules/test_vllm.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_benchmarker.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_callbacks.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_cli.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_constants.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_data_loading.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_data_models.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_dataset_configs.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_enums.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_exceptions.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_finetuning.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_generation.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_human_evaluation.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_languages.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_model_cache.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_model_config.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_model_loading.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_scores.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_speed_benchmark.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_task_utils/__init__.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_task_utils/test_question_answering.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_task_utils/test_sequence_classification.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_task_utils/test_text_to_text.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_task_utils/test_token_classification.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_tasks.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_tokenization_utils.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_types.py +0 -0
- {euroeval-15.8.0 → euroeval-15.8.1}/tests/test_utils.py +0 -0
|
@@ -10,6 +10,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
## [v15.8.1] - 2025-05-08
|
|
14
|
+
### Fixed
|
|
15
|
+
- NER labels were included twice in the prompt templates (which was due to there being
|
|
16
|
+
both, e.g., `B-ORG` and `I-ORG`). This caused models not using structured generation,
|
|
17
|
+
such as reasoning models, to sometimes output the wrong labels. This has been fixed
|
|
18
|
+
now.
|
|
19
|
+
- If a model outputs a `\boxed{}` answer, we now extract and use that, rather than the
|
|
20
|
+
full generated answer.
|
|
21
|
+
|
|
22
|
+
|
|
13
23
|
## [v15.8.0] - 2025-05-07
|
|
14
24
|
### Added
|
|
15
25
|
- Added the BeleBele datasets for Finnish, Italian and Spanish. They are listed as
|
|
@@ -529,12 +529,16 @@ class DatasetConfig:
|
|
|
529
529
|
else:
|
|
530
530
|
sep_word = main_language.or_separator
|
|
531
531
|
|
|
532
|
+
local_labels: list[str] = []
|
|
533
|
+
for label in self.labels:
|
|
534
|
+
if label not in self.prompt_label_mapping:
|
|
535
|
+
continue
|
|
536
|
+
local_label = self.prompt_label_mapping[label]
|
|
537
|
+
if local_label not in local_labels:
|
|
538
|
+
local_labels.append(local_label)
|
|
539
|
+
|
|
532
540
|
# Convert labels to single-quoted labels - and remove duplicates
|
|
533
|
-
quoted_labels = [
|
|
534
|
-
f"'{self.prompt_label_mapping[label]}'"
|
|
535
|
-
for label in set(self.labels)
|
|
536
|
-
if label in self.prompt_label_mapping
|
|
537
|
-
]
|
|
541
|
+
quoted_labels = [f"'{label}'" for label in local_labels]
|
|
538
542
|
|
|
539
543
|
if not quoted_labels:
|
|
540
544
|
return ""
|
{euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/task_group_utils/sequence_classification.py
RENAMED
|
@@ -144,9 +144,27 @@ def extract_labels_from_generation(
|
|
|
144
144
|
)
|
|
145
145
|
if labels is not None:
|
|
146
146
|
return labels
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
147
|
+
|
|
148
|
+
candidate_labels = [
|
|
149
|
+
dataset_config.prompt_label_mapping[lbl]
|
|
150
|
+
for lbl in dataset_config.id2label.values()
|
|
151
|
+
]
|
|
152
|
+
new_predicted_labels: list[str] = list()
|
|
153
|
+
for predicted_label in model_output.sequences:
|
|
154
|
+
# If the prediction includes a boxed answer, use that instead of the full
|
|
155
|
+
# generation
|
|
156
|
+
if (m := re.search(r"boxed\{(.*?)\}", predicted_label)) is not None:
|
|
157
|
+
predicted_label = m.group(1)
|
|
158
|
+
|
|
159
|
+
# Pick the label with the smallest word edit distance to the predicted label
|
|
160
|
+
edit_distances = [
|
|
161
|
+
Levenshtein.distance(s1=predicted_label.lower(), s2=candidate_label.lower())
|
|
162
|
+
for candidate_label in candidate_labels
|
|
163
|
+
]
|
|
164
|
+
predicted_label = candidate_labels[np.argmin(edit_distances).item()]
|
|
165
|
+
new_predicted_labels.append(predicted_label)
|
|
166
|
+
|
|
167
|
+
return new_predicted_labels
|
|
150
168
|
|
|
151
169
|
|
|
152
170
|
def get_closest_logprobs_labels(
|
|
@@ -305,32 +323,3 @@ def get_closest_logprobs_labels(
|
|
|
305
323
|
|
|
306
324
|
assert len(output_labels) == len(generation_logprobs)
|
|
307
325
|
return output_labels
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
def get_closest_word_edit_labels(
|
|
311
|
-
generated_sequences: list[str], dataset_config: "DatasetConfig"
|
|
312
|
-
) -> list[str]:
|
|
313
|
-
"""Get the labels with the smallest edit distance to the predicted labels.
|
|
314
|
-
|
|
315
|
-
Args:
|
|
316
|
-
generated_sequences:
|
|
317
|
-
The generated sequences from the model.
|
|
318
|
-
dataset_config:
|
|
319
|
-
The configuration of the dataset.
|
|
320
|
-
|
|
321
|
-
Returns:
|
|
322
|
-
The candidate labels with the smallest edit distance to the predicted labels.
|
|
323
|
-
"""
|
|
324
|
-
candidate_labels = [
|
|
325
|
-
dataset_config.prompt_label_mapping[lbl]
|
|
326
|
-
for lbl in dataset_config.id2label.values()
|
|
327
|
-
]
|
|
328
|
-
new_predicted_labels: list[str] = list()
|
|
329
|
-
for predicted_label in generated_sequences:
|
|
330
|
-
edit_distances = [
|
|
331
|
-
Levenshtein.distance(s1=predicted_label.lower(), s2=candidate_label.lower())
|
|
332
|
-
for candidate_label in candidate_labels
|
|
333
|
-
]
|
|
334
|
-
closest_label = candidate_labels[np.argmin(edit_distances).item()]
|
|
335
|
-
new_predicted_labels.append(closest_label)
|
|
336
|
-
return new_predicted_labels
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/prompt_templates/linguistic_acceptability.py
RENAMED
|
File without changes
|
|
File without changes
|
{euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/prompt_templates/named_entity_recognition.py
RENAMED
|
File without changes
|
|
File without changes
|
{euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/prompt_templates/sentiment_classification.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{euroeval-15.8.0 → euroeval-15.8.1}/src/euroeval/task_group_utils/multiple_choice_classification.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|