EuroEval 16.2.2__tar.gz → 16.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- {euroeval-16.2.2 → euroeval-16.4.0}/.github/ISSUE_TEMPLATE/benchmark_dataset_request.yaml +9 -2
- {euroeval-16.2.2 → euroeval-16.4.0}/.github/ISSUE_TEMPLATE/bug.yaml +6 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/.github/ISSUE_TEMPLATE/feature_request.yaml +3 -1
- {euroeval-16.2.2 → euroeval-16.4.0}/.github/ISSUE_TEMPLATE/model_evaluation_request.yaml +5 -4
- euroeval-16.4.0/.markdownlint.jsonc +10 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/.pre-commit-config.yaml +8 -2
- {euroeval-16.2.2 → euroeval-16.4.0}/CHANGELOG.md +618 -216
- {euroeval-16.2.2 → euroeval-16.4.0}/CODE_OF_CONDUCT.md +3 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/CONTRIBUTING.md +6 -5
- {euroeval-16.2.2 → euroeval-16.4.0}/NEW_DATASET_GUIDE.md +48 -25
- {euroeval-16.2.2 → euroeval-16.4.0}/PKG-INFO +182 -61
- {euroeval-16.2.2 → euroeval-16.4.0}/README.md +179 -58
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/README.md +9 -4
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/README.md +1 -1
- euroeval-16.4.0/docs/datasets/czech.md +671 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/danish.md +239 -152
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/dutch.md +147 -73
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/english.md +159 -78
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/estonian.md +188 -58
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/faroese.md +94 -54
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/finnish.md +123 -61
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/french.md +130 -65
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/german.md +167 -80
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/icelandic.md +187 -92
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/italian.md +151 -76
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/latvian.md +124 -66
- euroeval-16.4.0/docs/datasets/lithuanian.md +517 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/norwegian.md +288 -142
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/polish.md +136 -77
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/portuguese.md +167 -76
- euroeval-16.4.0/docs/datasets/slovak.md +446 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/spanish.md +154 -67
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/swedish.md +255 -153
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/extras/radial_plotter.md +2 -2
- euroeval-16.4.0/docs/leaderboards/Monolingual/czech.md +26 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/danish.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/dutch.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/english.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/estonian.md +5 -2
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/faroese.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/finnish.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/french.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/german.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/icelandic.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/italian.md +3 -0
- euroeval-16.4.0/docs/leaderboards/Monolingual/latvian.md +26 -0
- euroeval-16.4.0/docs/leaderboards/Monolingual/lithuanian.md +26 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/norwegian.md +3 -0
- euroeval-16.4.0/docs/leaderboards/Monolingual/polish.md +26 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/portuguese.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/spanish.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/swedish.md +3 -0
- euroeval-16.4.0/docs/leaderboards/Multilingual/baltic.md +26 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Multilingual/european.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Multilingual/finnic.md +5 -2
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Multilingual/germanic.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Multilingual/mainland-scandinavian.md +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Multilingual/romance.md +3 -0
- euroeval-16.4.0/docs/leaderboards/Multilingual/slavic.md +26 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/README.md +4 -6
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/methodology.md +2 -5
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/python-package.md +10 -12
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/README.md +0 -2
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/common-sense-reasoning.md +1 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/knowledge.md +1 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/linguistic-acceptability.md +1 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/named-entity-recognition.md +1 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/reading-comprehension.md +1 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/sentiment-classification.md +1 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/speed.md +1 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/summarization.md +1 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/makefile +1 -1
- {euroeval-16.2.2 → euroeval-16.4.0}/pyproject.toml +10 -5
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/__init__.py +7 -4
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_config_factory.py +0 -4
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_modules/base.py +3 -16
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_modules/fresh.py +5 -2
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_modules/hf.py +107 -66
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_modules/litellm.py +103 -55
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_modules/vllm.py +155 -82
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmarker.py +184 -129
- euroeval-16.4.0/src/euroeval/caching_utils.py +79 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/callbacks.py +5 -7
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/cli.py +1 -1
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/constants.py +9 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/data_loading.py +14 -11
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/data_models.py +12 -4
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/__init__.py +3 -0
- euroeval-16.4.0/src/euroeval/dataset_configs/czech.py +79 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/danish.py +10 -13
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/dutch.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/english.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/estonian.py +11 -1
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/finnish.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/french.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/german.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/italian.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/latvian.py +2 -4
- euroeval-16.4.0/src/euroeval/dataset_configs/lithuanian.py +68 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/norwegian.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/polish.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/portuguese.py +0 -3
- euroeval-16.4.0/src/euroeval/dataset_configs/slovak.py +60 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/spanish.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/swedish.py +10 -15
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/finetuning.py +21 -15
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/generation.py +10 -10
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/generation_utils.py +2 -3
- euroeval-16.4.0/src/euroeval/logging_utils.py +250 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/metrics/base.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/metrics/huggingface.py +10 -6
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/metrics/llm_as_a_judge.py +5 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/metrics/pipeline.py +22 -9
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/metrics/speed.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/model_cache.py +11 -14
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/model_config.py +4 -5
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/model_loading.py +3 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/linguistic_acceptability.py +30 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/multiple_choice.py +34 -1
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/named_entity_recognition.py +71 -11
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/reading_comprehension.py +41 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/sentiment_classification.py +34 -1
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/summarization.py +26 -6
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/scores.py +7 -7
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/speed_benchmark.py +3 -5
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/task_group_utils/multiple_choice_classification.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/task_group_utils/question_answering.py +0 -3
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/task_group_utils/sequence_classification.py +43 -31
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/task_group_utils/text_to_text.py +17 -8
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/task_group_utils/token_classification.py +10 -9
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/tokenisation_utils.py +22 -20
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/utils.py +30 -147
- euroeval-16.4.0/src/scripts/__init__.py +1 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/constants.py +3 -0
- euroeval-16.4.0/src/scripts/create_cs_gec.py +83 -0
- euroeval-16.4.0/src/scripts/create_csfd_sentiment.py +97 -0
- euroeval-16.4.0/src/scripts/create_csfd_sentiment_sk.py +92 -0
- euroeval-16.4.0/src/scripts/create_czech_news.py +75 -0
- euroeval-16.4.0/src/scripts/create_hellaswag_cs.py +120 -0
- euroeval-16.4.0/src/scripts/create_lithuanian_lrytas_summarization.py +87 -0
- euroeval-16.4.0/src/scripts/create_lt_emotions.py +159 -0
- euroeval-16.4.0/src/scripts/create_lt_history.py +154 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_mmlu.py +1 -1
- euroeval-16.4.0/src/scripts/create_mmlu_et.py +162 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_multi_wiki_qa.py +2 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_norglm_multiqa.py +20 -0
- euroeval-16.2.2/src/scripts/create_wikiann_lv.py → euroeval-16.4.0/src/scripts/create_poner.py +62 -44
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_scala.py +6 -0
- euroeval-16.2.2/src/scripts/create_swedish_skolprov.py → euroeval-16.4.0/src/scripts/create_skolprov.py +25 -18
- euroeval-16.4.0/src/scripts/create_sqad.py +137 -0
- euroeval-16.4.0/src/scripts/create_umimeto_qa.py +114 -0
- euroeval-16.4.0/src/scripts/create_uner_sk.py +183 -0
- euroeval-16.4.0/src/scripts/create_wikiann.py +115 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_winogrande.py +21 -1
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/load_ud_pos.py +216 -72
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/versioning.py +1 -1
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/conftest.py +28 -12
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_benchmark_modules/test_hf.py +11 -5
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_benchmarker.py +49 -55
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_constants.py +4 -2
- euroeval-16.4.0/tests/test_data_loading.py +166 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_data_models.py +2 -1
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_dataset_configs.py +36 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_model_config.py +1 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_model_loading.py +3 -0
- euroeval-16.4.0/tests/test_scripts/__init__.py +1 -0
- euroeval-16.4.0/tests/test_scripts/test_create_scala/__init__.py +1 -0
- euroeval-16.4.0/tests/test_scripts/test_create_scala/test_create_scala.py +86 -0
- euroeval-16.4.0/tests/test_scripts/test_create_scala/test_data/de_gsd-ud-train.conllu.adp_det +12 -0
- euroeval-16.4.0/tests/test_scripts/test_create_scala/test_data/empty.file +0 -0
- euroeval-16.4.0/tests/test_scripts/test_create_scala/test_data/en_gum-ud-train.conllu.case +70 -0
- euroeval-16.4.0/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_01 +11 -0
- euroeval-16.4.0/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_02 +14 -0
- euroeval-16.4.0/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_03 +16 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_speed_benchmark.py +1 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_tokenisation_utils.py +6 -2
- {euroeval-16.2.2 → euroeval-16.4.0}/uv.lock +142 -152
- euroeval-16.2.2/tests/test_data_loading.py +0 -141
- {euroeval-16.2.2 → euroeval-16.4.0}/.github/ISSUE_TEMPLATE/language_request.yaml +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/.github/workflows/ci.yaml +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/.gitignore +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/CITATION.cff +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/Dockerfile.cuda +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/LICENSE +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/CNAME +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/faq.md +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/docs/gfx/favicon.png +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/gfx/euroeval.png +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/gfx/euroeval.xcf +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/gfx/scandeval.png +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/mkdocs.yaml +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_modules/__init__.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/faroese.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/icelandic.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/enums.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/exceptions.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/languages.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/metrics/__init__.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/__init__.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/task_group_utils/__init__.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/tasks.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/types.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_allocine.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_angry_tweets.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_arc.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_arc_is.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_belebele.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_boolq_pt.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_cnn_dailymail.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_conll_en.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_conll_es.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_conll_nl.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_copa_lv.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_dane.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_danish_citizen_tests.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_dansk.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_danske_talemaader.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_danske_talemaader_old.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_dbrd.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_dutch_cola.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_eltec.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_err_news.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_estner.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_estonian_valence.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_european_values.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_exam_et.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_fone.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_foqa.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_fosent.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_fquad.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_fullstack_ner.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_germanquad.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_germeval.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_goldenswag.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_grammar_et.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_harem.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_hellaswag.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_hellaswag_fi.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_hotter_and_colder_sentiment.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_ice_linguistic.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_icelandic_error_corpus.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_icelandic_knowledge.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_icelandic_qa.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_icesum.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_idioms_no.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_ilpost_sum.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_jentoft.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_kpwr_ner.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_latvian_lsm_summary.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_latvian_twitter_sentiment.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_life_in_the_uk.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_llmzszl.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_mim_gold_ner.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_mlqa_es.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_mlsum_de.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_mlsum_es.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_mmlu_lv.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_multinerd-it.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_no_cola.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_no_sammendrag.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_nor_common_sense_qa.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_nordjylland_news.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_norec.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_norglm_multisum.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_norne.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_norquad.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_nqii.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_nrk_quiz_qa.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_orange_sum.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_personal_sum.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_polemo2.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_poquad.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_psc.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_publico.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_rrn.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_sb10k.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_scandiqa.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_scandisent_fi.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_schibsted.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_sentiment_headlines_es.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_sentipolc16.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_squad.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_squad_it.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_squad_nl.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_squad_nl_old.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_sst2_pt.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_sst5.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_suc3.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_swedn.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_swerec.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_trivia_et.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_turku_ner_fi.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_tydiqa_fi.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_wiki_lingua_nl.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_wikineural-it.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_winogrande_et.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_winogrande_is.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_xlsum_fi.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_xquad.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/fix_dot_env_file.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/__init__.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_benchmark_config_factory.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_benchmark_modules/__init__.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_callbacks.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_cli.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_enums.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_exceptions.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_finetuning.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_languages.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_scores.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_tasks.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_types.py +0 -0
- {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_utils.py +0 -0
|
@@ -14,7 +14,9 @@ body:
|
|
|
14
14
|
- type: input
|
|
15
15
|
attributes:
|
|
16
16
|
label: Dataset link
|
|
17
|
-
description:
|
|
17
|
+
description: >
|
|
18
|
+
Please give a link to where the dataset is hosted (doesn't have to be on the
|
|
19
|
+
Hugging Face Hub)
|
|
18
20
|
validations:
|
|
19
21
|
required: true
|
|
20
22
|
- type: checkboxes
|
|
@@ -22,6 +24,7 @@ body:
|
|
|
22
24
|
label: Dataset languages
|
|
23
25
|
description: What languages is the dataset in?
|
|
24
26
|
options:
|
|
27
|
+
- label: Czech
|
|
25
28
|
- label: Danish
|
|
26
29
|
- label: Dutch
|
|
27
30
|
- label: English
|
|
@@ -33,9 +36,11 @@ body:
|
|
|
33
36
|
- label: Icelandic
|
|
34
37
|
- label: Italian
|
|
35
38
|
- label: Latvian
|
|
39
|
+
- label: Lithuanian
|
|
36
40
|
- label: Norwegian (Bokmål or Nynorsk)
|
|
37
41
|
- label: Polish
|
|
38
42
|
- label: Portuguese
|
|
43
|
+
- label: Slovak
|
|
39
44
|
- label: Spanish
|
|
40
45
|
- label: Swedish
|
|
41
46
|
validations:
|
|
@@ -43,7 +48,9 @@ body:
|
|
|
43
48
|
- type: textarea
|
|
44
49
|
attributes:
|
|
45
50
|
label: Describe the dataset
|
|
46
|
-
description:
|
|
51
|
+
description: >
|
|
52
|
+
Describe what the dataset is measuring, and why you think it is important to
|
|
53
|
+
include it as a benchmark dataset in EuroEval.
|
|
47
54
|
validations:
|
|
48
55
|
required: true
|
|
49
56
|
- type: markdown
|
|
@@ -7,12 +7,15 @@ body:
|
|
|
7
7
|
- type: markdown
|
|
8
8
|
attributes:
|
|
9
9
|
value: >
|
|
10
|
-
|
|
10
|
+
Before submitting a bug, please make sure the issue hasn't been already addressed
|
|
11
|
+
by searching through [the existing and past
|
|
12
|
+
issues](https://github.com/EuroEval/EuroEval/issues?q=is%3Aissue).
|
|
11
13
|
- type: textarea
|
|
12
14
|
attributes:
|
|
13
15
|
label: 🐛 Describe the bug
|
|
14
|
-
description:
|
|
15
|
-
Please provide a clear and concise description of what the bug is. If relevant,
|
|
16
|
+
description: >
|
|
17
|
+
Please provide a clear and concise description of what the bug is. If relevant,
|
|
18
|
+
add a minimal example so that we can reproduce the error by running the code.
|
|
16
19
|
validations:
|
|
17
20
|
required: true
|
|
18
21
|
- type: dropdown
|
|
@@ -8,7 +8,9 @@ body:
|
|
|
8
8
|
attributes:
|
|
9
9
|
label: 🚀 The feature, motivation and pitch
|
|
10
10
|
description: >
|
|
11
|
-
A clear and concise description of the feature proposal. Please outline the
|
|
11
|
+
A clear and concise description of the feature proposal. Please outline the
|
|
12
|
+
motivation for the proposal. Is your feature request related to a specific
|
|
13
|
+
problem? e.g., *"I'm working on X and would like Y to be possible"*.
|
|
12
14
|
validations:
|
|
13
15
|
required: true
|
|
14
16
|
- type: markdown
|
|
@@ -18,12 +18,12 @@ body:
|
|
|
18
18
|
What languages should this model be evaluated on? Tick all that apply. If the
|
|
19
19
|
model is multilingual (e.g., Mistral, Llama), then tick all the languages.
|
|
20
20
|
options:
|
|
21
|
+
- label: Baltic languages (Latvian, Lithuanian)
|
|
22
|
+
- label: Finnic languages (Estonian, Finnish)
|
|
21
23
|
- label: Romance languages (French, Italian, Portuguese, Spanish)
|
|
22
24
|
- label: Scandinavian languages (Danish, Faroese, Icelandic, Norwegian, Swedish)
|
|
25
|
+
- label: Slavic languages (Czech, Polish, Slovak)
|
|
23
26
|
- label: West Germanic languages (Dutch, English, German)
|
|
24
|
-
- label: Finnic languages (Estonian, Finnish)
|
|
25
|
-
- label: Latvian
|
|
26
|
-
- label: Polish
|
|
27
27
|
validations:
|
|
28
28
|
required: true
|
|
29
29
|
- type: dropdown
|
|
@@ -49,7 +49,8 @@ body:
|
|
|
49
49
|
- type: dropdown
|
|
50
50
|
attributes:
|
|
51
51
|
label: Merged model
|
|
52
|
-
description:
|
|
52
|
+
description: >
|
|
53
|
+
Is the model a merge of other models, or built on top of a merged model?
|
|
53
54
|
options:
|
|
54
55
|
- Not a merged model
|
|
55
56
|
- Merged model
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
{
|
|
2
|
+
"MD013": {
|
|
3
|
+
"line_length": 88,
|
|
4
|
+
"code_blocks": false,
|
|
5
|
+
"tables": false
|
|
6
|
+
}, // Set maximum line length to 88 characters, except in code blocks and tables
|
|
7
|
+
"MD024": false, // Allow duplicate headings
|
|
8
|
+
"MD033": false, // Allow inline HTML
|
|
9
|
+
"MD059": false // Allow non-descriptive link text
|
|
10
|
+
}
|
|
@@ -10,7 +10,7 @@ repos:
|
|
|
10
10
|
- id: trailing-whitespace
|
|
11
11
|
- id: debug-statements
|
|
12
12
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
13
|
-
rev: v0.
|
|
13
|
+
rev: v0.14.1
|
|
14
14
|
hooks:
|
|
15
15
|
- id: ruff
|
|
16
16
|
args:
|
|
@@ -34,7 +34,7 @@ repos:
|
|
|
34
34
|
hooks:
|
|
35
35
|
- id: nbstripout
|
|
36
36
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
37
|
-
rev: v1.18.
|
|
37
|
+
rev: v1.18.2
|
|
38
38
|
hooks:
|
|
39
39
|
- id: mypy
|
|
40
40
|
args:
|
|
@@ -43,3 +43,9 @@ repos:
|
|
|
43
43
|
- --ignore-missing-imports
|
|
44
44
|
- --show-error-codes
|
|
45
45
|
- --check-untyped-defs
|
|
46
|
+
- repo: https://github.com/DavidAnson/markdownlint-cli2
|
|
47
|
+
rev: v0.18.1
|
|
48
|
+
hooks:
|
|
49
|
+
- id: markdownlint-cli2
|
|
50
|
+
args:
|
|
51
|
+
- --fix
|