ScandEval 16.8.0__tar.gz → 16.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scandeval-16.8.0 → scandeval-16.10.0}/.github/ISSUE_TEMPLATE/benchmark_dataset_request.yaml +1 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/.github/ISSUE_TEMPLATE/model_evaluation_request.yaml +4 -2
- {scandeval-16.8.0 → scandeval-16.10.0}/.pre-commit-config.yaml +5 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/CHANGELOG.md +56 -2
- {scandeval-16.8.0 → scandeval-16.10.0}/PKG-INFO +21 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/README.md +14 -0
- scandeval-16.10.0/docs/datasets/albanian.md +524 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/bosnian.md +2 -2
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/croatian.md +2 -2
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/czech.md +4 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/dutch.md +154 -1
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/lithuanian.md +3 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/serbian.md +2 -2
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/swedish.md +77 -0
- scandeval-16.10.0/docs/leaderboards/Monolingual/albanian.md +26 -0
- scandeval-16.10.0/docs/leaderboards/Monolingual/bosnian.md +26 -0
- scandeval-16.10.0/docs/leaderboards/Monolingual/catalan.md +26 -0
- scandeval-16.10.0/docs/leaderboards/Monolingual/hungarian.md +26 -0
- scandeval-16.10.0/docs/leaderboards/Monolingual/romanian.md +26 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/romance.md +1 -1
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/slavic.md +1 -1
- scandeval-16.10.0/docs/python-package.md +394 -0
- scandeval-16.10.0/docs/tasks/simplification.md +42 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/makefile +1 -1
- {scandeval-16.8.0 → scandeval-16.10.0}/pyproject.toml +13 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_modules/hf.py +18 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_modules/litellm.py +14 -13
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_modules/vllm.py +127 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmarker.py +0 -11
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/constants.py +9 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/data_models.py +5 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/__init__.py +1 -0
- scandeval-16.10.0/src/scandeval/dataset_configs/albanian.py +64 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/dutch.py +31 -1
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/swedish.py +9 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/logging_utils.py +1 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/metrics/huggingface.py +82 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/metrics/llm_as_a_judge.py +1 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/model_config.py +2 -2
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/__init__.py +1 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/linguistic_acceptability.py +9 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/multiple_choice.py +9 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/named_entity_recognition.py +20 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/reading_comprehension.py +9 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/sentiment_classification.py +11 -0
- scandeval-16.10.0/src/scandeval/prompt_templates/simplification.py +23 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/summarization.py +11 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/task_group_utils/question_answering.py +30 -19
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/task_group_utils/sequence_classification.py +4 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/task_group_utils/text_to_text.py +3 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/task_group_utils/token_classification.py +6 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/tasks.py +11 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/tokenisation_utils.py +7 -1
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/types.py +7 -1
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/utils.py +5 -6
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/constants.py +1 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_allocine.py +7 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_arc.py +13 -10
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_arc_is.py +16 -11
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_atsiliepimai.py +9 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_belebele.py +11 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_bg_ner_bsnlp.py +6 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_boolq_pt.py +12 -6
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_cinexio.py +9 -6
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_cnn_dailymail.py +10 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_conll_en.py +5 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_conll_es.py +5 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_conll_nl.py +5 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_copa_lv.py +9 -6
- scandeval-16.10.0/src/scripts/create_copa_nl.py +92 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_cross_domain_uk_reviews.py +16 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_cs_gec.py +16 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_csfd_sentiment.py +8 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_csfd_sentiment_sk.py +6 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_czech_news.py +15 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_dacsa.py +10 -6
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_dane.py +5 -6
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_danish_citizen_tests.py +7 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_dansk.py +7 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_danske_talemaader.py +7 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_danske_talemaader_old.py +10 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_dbrd.py +7 -4
- scandeval-16.10.0/src/scripts/create_duidelijke_taal.py +198 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_dutch_cola.py +7 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_elner.py +5 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_eltec.py +9 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_err_news.py +13 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_estner.py +6 -2
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_estonian_valence.py +7 -10
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_european_values.py +5 -2
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_exam_et.py +10 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_exams_bg.py +11 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_fone.py +7 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_foqa.py +5 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_fosent.py +7 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_fquad.py +11 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_fullstack_ner.py +23 -14
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_germanquad.py +13 -10
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_germeval.py +5 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_global_mmlu.py +95 -37
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_goldenswag.py +14 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_grammar_et.py +9 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_greek_sa.py +12 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_greek_wikipedia.py +10 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_guia_cat.py +15 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_harem.py +11 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_hellaswag.py +12 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_hellaswag_cs.py +12 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_hellaswag_fi.py +16 -11
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_hotter_and_colder_sentiment.py +9 -6
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_hun_sum.py +21 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_husst.py +13 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_ice_linguistic.py +17 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_icelandic_error_corpus.py +30 -20
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_icelandic_knowledge.py +11 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_icelandic_qa.py +21 -11
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_icesum.py +7 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_idioms_no.py +11 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_ilpost_sum.py +11 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_jentoft.py +14 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_kpwr_ner.py +10 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_latvian_lsm_summary.py +15 -6
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_latvian_twitter_sentiment.py +16 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_life_in_the_uk.py +12 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_lithuanian_lrytas_summarization.py +15 -6
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_llmzszl.py +14 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_lr_sum.py +20 -11
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_lt_emotions.py +12 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_lt_history.py +10 -6
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mlqa_es.py +9 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mlsum_de.py +11 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mlsum_es.py +11 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mmlu.py +17 -11
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mmlu_et.py +11 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mmlu_hr.py +12 -6
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mmlu_lv.py +19 -11
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mms.py +11 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_multi_wiki_qa.py +14 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_multinerd-it.py +9 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_ner_uk.py +14 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_no_cola.py +13 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_no_sammendrag.py +12 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_nor_common_sense_qa.py +14 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_nordjylland_news.py +11 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_norglm_multiqa.py +18 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_norglm_multisum.py +12 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_norne.py +14 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_norquad.py +12 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_nqii.py +17 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_nrk_quiz_qa.py +15 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_orange_sum.py +11 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_personal_sum.py +8 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_polemo2.py +10 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_poner.py +10 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_poquad.py +19 -10
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_psc.py +15 -6
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_publico.py +2 -1
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_ronec.py +11 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_rosent.py +17 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_rrn.py +12 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sb10k.py +11 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_scala.py +64 -21
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_scandiqa.py +13 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_scandisent_fi.py +11 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_schibsted.py +12 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sentiment_headlines_es.py +13 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sentinews.py +14 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sentipolc16.py +11 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_skolprov.py +10 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sqad.py +21 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_squad.py +19 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_squad_it.py +19 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_squad_nl.py +16 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_squad_nl_old.py +15 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_ssj500k_ner.py +12 -6
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sst2_pt.py +25 -11
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sst5.py +7 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_suc3.py +13 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sumo_ro.py +14 -7
- scandeval-16.10.0/src/scripts/create_swedish_facts.py +246 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_swedn.py +11 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_swerec.py +14 -5
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_szeged_ner.py +11 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_trivia_et.py +13 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_turku_ner_fi.py +9 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_tydiqa_fi.py +17 -10
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_umimeto_qa.py +7 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_uner_sk.py +10 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_uner_sr.py +14 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_wiki_lingua_nl.py +11 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_wikiann.py +6 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_wikineural-it.py +5 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_winogrande.py +14 -9
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_winogrande_et.py +17 -12
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_winogrande_is.py +11 -7
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_xlsum_fi.py +11 -4
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_xquad.py +15 -8
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/load_ud_pos.py +30 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_benchmarker.py +1 -6
- scandeval-16.10.0/tests/test_cli.py +39 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_data_loading.py +12 -11
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_data_models.py +8 -2
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_model_config.py +0 -1
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_model_loading.py +4 -3
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_speed_benchmark.py +0 -1
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_tokenisation_utils.py +0 -3
- scandeval-16.10.0/uv.lock +6239 -0
- scandeval-16.8.0/AGENTS.md +0 -121
- scandeval-16.8.0/docs/python-package.md +0 -130
- scandeval-16.8.0/tests/test_cli.py +0 -70
- scandeval-16.8.0/uv.lock +0 -5385
- {scandeval-16.8.0 → scandeval-16.10.0}/.github/ISSUE_TEMPLATE/bug.yaml +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/.github/ISSUE_TEMPLATE/language_request.yaml +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/.github/workflows/ci.yaml +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/.gitignore +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/.markdownlint.jsonc +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/CITATION.cff +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/CODE_OF_CONDUCT.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/CONTRIBUTING.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/Dockerfile.cuda +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/LICENSE +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/NEW_DATASET_GUIDE.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/CNAME +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/README.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/README.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/bulgarian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/catalan.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/danish.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/english.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/estonian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/faroese.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/finnish.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/french.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/german.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/greek.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/hungarian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/icelandic.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/italian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/latvian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/norwegian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/polish.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/portuguese.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/romanian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/slovak.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/slovene.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/spanish.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/ukrainian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/extras/radial_plotter.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/faq.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/gfx/favicon.png +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/bulgarian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/croatian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/czech.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/danish.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/dutch.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/english.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/estonian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/faroese.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/finnish.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/french.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/german.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/greek.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/icelandic.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/italian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/latvian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/lithuanian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/norwegian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/polish.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/portuguese.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/serbian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/slovak.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/slovene.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/spanish.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/swedish.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/ukrainian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/baltic.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/european.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/finnic.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/germanic.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/mainland-scandinavian.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/README.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/methodology.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/README.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/common-sense-reasoning.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/knowledge.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/linguistic-acceptability.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/named-entity-recognition.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/reading-comprehension.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/sentiment-classification.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/speed.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/summarization.md +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/gfx/euroeval.png +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/gfx/euroeval.xcf +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/gfx/scandeval.png +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/mkdocs.yaml +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/__init__.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_config_factory.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_modules/__init__.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_modules/base.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_modules/fresh.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/caching_utils.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/callbacks.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/cli.py +39 -39
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/data_loading.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/bosnian.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/bulgarian.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/catalan.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/croatian.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/czech.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/danish.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/english.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/estonian.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/faroese.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/finnish.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/french.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/german.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/greek.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/hungarian.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/icelandic.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/italian.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/latvian.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/lithuanian.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/norwegian.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/polish.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/portuguese.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/romanian.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/serbian.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/slovak.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/slovene.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/spanish.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/ukrainian.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/enums.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/exceptions.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/finetuning.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/generation.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/generation_utils.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/languages.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/metrics/__init__.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/metrics/base.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/metrics/pipeline.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/metrics/speed.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/model_cache.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/model_loading.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/classification.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/token_classification.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/scores.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/speed_benchmark.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/task_group_utils/__init__.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/task_group_utils/multiple_choice_classification.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/__init__.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_angry_tweets.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mim_gold_ner.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_norec.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/fix_dot_env_file.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/versioning.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/__init__.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/conftest.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_benchmark_config_factory.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_benchmark_modules/__init__.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_benchmark_modules/test_hf.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_callbacks.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_constants.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_dataset_configs.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_enums.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_exceptions.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_finetuning.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_languages.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scores.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/__init__.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/__init__.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_create_scala.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_data/de_gsd-ud-train.conllu.adp_det +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_data/empty.file +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_data/en_gum-ud-train.conllu.case +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_01 +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_02 +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_03 +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_types.py +0 -0
- {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_utils.py +0 -0
|
@@ -20,11 +20,13 @@ body:
|
|
|
20
20
|
options:
|
|
21
21
|
- label: Baltic languages (Latvian, Lithuanian)
|
|
22
22
|
- label: Finnic languages (Estonian, Finnish)
|
|
23
|
-
- label: Greek
|
|
24
23
|
- label: Romance languages (Catalan, French, Italian, Portuguese, Romanian, Spanish)
|
|
25
24
|
- label: Scandinavian languages (Danish, Faroese, Icelandic, Norwegian, Swedish)
|
|
26
|
-
- label: Slavic languages (Bulgarian, Bosnian, Croatian, Czech,
|
|
25
|
+
- label: Slavic languages (Bulgarian, Bosnian, Croatian, Czech, Polish, Serbian, Slovak, Slovenian, Ukrainian)
|
|
27
26
|
- label: West Germanic languages (Dutch, English, German)
|
|
27
|
+
- label: Albanian
|
|
28
|
+
- label: Greek
|
|
29
|
+
- label: Hungarian
|
|
28
30
|
validations:
|
|
29
31
|
required: true
|
|
30
32
|
- type: dropdown
|
|
@@ -8,9 +8,9 @@ repos:
|
|
|
8
8
|
hooks:
|
|
9
9
|
- id: end-of-file-fixer
|
|
10
10
|
- id: trailing-whitespace
|
|
11
|
-
- id: debug-statements
|
|
11
|
+
# - id: debug-statements
|
|
12
12
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
13
|
-
rev: v0.14.
|
|
13
|
+
rev: v0.14.10
|
|
14
14
|
hooks:
|
|
15
15
|
- id: ruff
|
|
16
16
|
args:
|
|
@@ -34,13 +34,13 @@ repos:
|
|
|
34
34
|
hooks:
|
|
35
35
|
- id: nbstripout
|
|
36
36
|
- repo: https://github.com/facebook/pyrefly-pre-commit
|
|
37
|
-
rev: 0.
|
|
37
|
+
rev: 0.46.2
|
|
38
38
|
hooks:
|
|
39
|
-
- id: pyrefly-
|
|
39
|
+
- id: pyrefly-check
|
|
40
40
|
name: Pyrefly (type checking)
|
|
41
41
|
pass_filenames: true
|
|
42
42
|
- repo: https://github.com/DavidAnson/markdownlint-cli2
|
|
43
|
-
rev: v0.
|
|
43
|
+
rev: v0.20.0
|
|
44
44
|
hooks:
|
|
45
45
|
- id: markdownlint-cli2
|
|
46
46
|
args:
|
|
@@ -7,6 +7,60 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [v16.10.0] - 2025-12-30
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Added support for Albanian 🇦🇱! This includes the sentiment classification dataset
|
|
15
|
+
MMS-sq, the linguistic acceptability dataset ScaLA-sq, the named entity recognition
|
|
16
|
+
dataset WikiANN-sq, the reading comprehension dataset MultiWikiQA-sq, the
|
|
17
|
+
summarisation dataset LR-Sum-sq, the knowledge dataset Global-MMLU-Lite-sq,
|
|
18
|
+
and the common-sense reasoning dataset Winogrande-sq. This was contributed by
|
|
19
|
+
@oliverkinch ✨
|
|
20
|
+
- Added the Dutch common sense reasoning dataset COPA-NL, which is part of the Dutch
|
|
21
|
+
[DUMB benchmark](https://github.com/wietsedv/dumb). This was contributed by @tvosch ✨
|
|
22
|
+
- Added new task for simplification and Dutch simplification dataset [Duidelijke
|
|
23
|
+
Taal](http://hdl.handle.net/10032/tm-a2-y8). dataset. This was contributed by
|
|
24
|
+
@simonevanbruggen ✨
|
|
25
|
+
- Added multi-node support with Ray as a backend in this case. This was contributed by
|
|
26
|
+
@tvosch ✨
|
|
27
|
+
- Added metadata for the Gemini-3 models.
|
|
28
|
+
|
|
29
|
+
### Fixed
|
|
30
|
+
|
|
31
|
+
- Fixed an issue with evaluations of LiteLLM models where asyncio event loops weren't
|
|
32
|
+
closed properly, leading to a buildup of file descriptors and eventually a "too many
|
|
33
|
+
open files" error.
|
|
34
|
+
|
|
35
|
+
## [v16.9.0] - 2025-12-16
|
|
36
|
+
|
|
37
|
+
### Added
|
|
38
|
+
|
|
39
|
+
- Added the Swedish factual knowledge dataset SwedishFacts, which is based on the
|
|
40
|
+
[liu-nlp/swedish-facts-v1](https://huggingface.co/datasets/liu-nlp/swedish-facts-v1)
|
|
41
|
+
dataset. This was contributed by @oliverkinch ✨
|
|
42
|
+
|
|
43
|
+
### Changed
|
|
44
|
+
|
|
45
|
+
- When benchmarking generative models, we now use their generation parameters as
|
|
46
|
+
specified in the `generation_config.json` file in the model repository on the Hugging
|
|
47
|
+
Face Hub, if it exists. We log this to the user if verbose mode is enabled.
|
|
48
|
+
|
|
49
|
+
### Fixed
|
|
50
|
+
|
|
51
|
+
- When a model has registered the number of parameters wrongly within their safetensors
|
|
52
|
+
files, we collect all the potential parameter counts from the safetensors file and
|
|
53
|
+
pick the largest one.
|
|
54
|
+
- We now pinned vLLM to v0.11.0, as all future versions (up to and including v0.12.0)
|
|
55
|
+
have breaking changes regarding loading of Mistral models. We aim to unpin this when a
|
|
56
|
+
new vLLM version fixes this.
|
|
57
|
+
- Removed mentions of `hf_transfer` and the associated environment variable
|
|
58
|
+
`HF_HUB_ENABLE_HF_TRANSFER`, since this has been removed from the `transformers`
|
|
59
|
+
library now.
|
|
60
|
+
- Marked the `PleIAs/Pleias-3b-Preview` as requiring the `TRITON_ATTN` backend over the
|
|
61
|
+
default `FLASHINFER` backend, as the model architecture is currently not supported by
|
|
62
|
+
the default backend.
|
|
63
|
+
|
|
10
64
|
## [v16.8.0] - 2025-11-25
|
|
11
65
|
|
|
12
66
|
### Added
|
|
@@ -2735,8 +2789,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
|
2735
2789
|
|
|
2736
2790
|
### Deprecated
|
|
2737
2791
|
|
|
2738
|
-
- Deprecated support for evaluating finetuned models, as the package was primarily used
|
|
2739
|
-
benchmark pretrained models anyway, and the change in datasets means that many
|
|
2792
|
+
- Deprecated support for evaluating finetuned models, as the package was primarily used
|
|
2793
|
+
to benchmark pretrained models anyway, and the change in datasets means that many
|
|
2740
2794
|
finetuned models would have been trained on (part of) the test sets, resulting in
|
|
2741
2795
|
artificially large scores. For evaluation of finetuned models, please check out the
|
|
2742
2796
|
`aiai_eval` Python package instead (under development).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ScandEval
|
|
3
|
-
Version: 16.
|
|
3
|
+
Version: 16.10.0
|
|
4
4
|
Summary: The robust European language model benchmark.
|
|
5
5
|
Project-URL: Repository, https://github.com/EuroEval/EuroEval
|
|
6
6
|
Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
|
|
@@ -39,6 +39,7 @@ Requires-Dist: evaluate>=0.4.1
|
|
|
39
39
|
Requires-Dist: huggingface-hub>=0.30.1
|
|
40
40
|
Requires-Dist: levenshtein>=0.24.0
|
|
41
41
|
Requires-Dist: litellm>=1.75.6
|
|
42
|
+
Requires-Dist: mistral-common[soundfile]
|
|
42
43
|
Requires-Dist: more-itertools>=10.5.0
|
|
43
44
|
Requires-Dist: numpy>=2.0.0
|
|
44
45
|
Requires-Dist: ollama>=0.5.1
|
|
@@ -49,6 +50,7 @@ Requires-Dist: pydantic>=2.6.0
|
|
|
49
50
|
Requires-Dist: pyinfer>=0.0.3
|
|
50
51
|
Requires-Dist: python-dotenv>=1.0.1
|
|
51
52
|
Requires-Dist: rouge-score>=0.1.2
|
|
53
|
+
Requires-Dist: sacrebleu>=2.5.1
|
|
52
54
|
Requires-Dist: sacremoses>=0.1.1
|
|
53
55
|
Requires-Dist: scikit-learn==1.6.1
|
|
54
56
|
Requires-Dist: sentencepiece>=0.1.96
|
|
@@ -61,13 +63,15 @@ Requires-Dist: transformers[mistral-common]>=4.56.0
|
|
|
61
63
|
Provides-Extra: all
|
|
62
64
|
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
|
|
63
65
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
|
|
66
|
+
Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'all'
|
|
64
67
|
Requires-Dist: timm>=1.0.19; extra == 'all'
|
|
65
|
-
Requires-Dist: vllm[flashinfer]
|
|
68
|
+
Requires-Dist: vllm[flashinfer]==0.11.0; (platform_system == 'Linux') and extra == 'all'
|
|
66
69
|
Provides-Extra: generative
|
|
67
70
|
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
|
|
68
71
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
|
|
72
|
+
Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'generative'
|
|
69
73
|
Requires-Dist: timm>=1.0.19; extra == 'generative'
|
|
70
|
-
Requires-Dist: vllm[flashinfer]
|
|
74
|
+
Requires-Dist: vllm[flashinfer]==0.11.0; (platform_system == 'Linux') and extra == 'generative'
|
|
71
75
|
Description-Content-Type: text/markdown
|
|
72
76
|
|
|
73
77
|
<!-- This disables the requirement that the first line is a top-level heading -->
|
|
@@ -574,6 +578,20 @@ A huge thank you to all the contributors who have helped make this project a suc
|
|
|
574
578
|
alt="Contributor avatar for mrkowalski"
|
|
575
579
|
/>
|
|
576
580
|
</a>
|
|
581
|
+
<a href="https://github.com/simonevanbruggen">
|
|
582
|
+
<img
|
|
583
|
+
src="https://avatars.githubusercontent.com/u/24842609"
|
|
584
|
+
width=50
|
|
585
|
+
alt="Contributor avatar for simonevanbruggen"
|
|
586
|
+
/>
|
|
587
|
+
</a>
|
|
588
|
+
<a href="https://github.com/tvosch">
|
|
589
|
+
<img
|
|
590
|
+
src="https://avatars.githubusercontent.com/u/110661769"
|
|
591
|
+
width=50
|
|
592
|
+
alt="Contributor avatar for tvosch"
|
|
593
|
+
/>
|
|
594
|
+
</a>
|
|
577
595
|
|
|
578
596
|
### Contribute to EuroEval
|
|
579
597
|
|
|
@@ -502,6 +502,20 @@ A huge thank you to all the contributors who have helped make this project a suc
|
|
|
502
502
|
alt="Contributor avatar for mrkowalski"
|
|
503
503
|
/>
|
|
504
504
|
</a>
|
|
505
|
+
<a href="https://github.com/simonevanbruggen">
|
|
506
|
+
<img
|
|
507
|
+
src="https://avatars.githubusercontent.com/u/24842609"
|
|
508
|
+
width=50
|
|
509
|
+
alt="Contributor avatar for simonevanbruggen"
|
|
510
|
+
/>
|
|
511
|
+
</a>
|
|
512
|
+
<a href="https://github.com/tvosch">
|
|
513
|
+
<img
|
|
514
|
+
src="https://avatars.githubusercontent.com/u/110661769"
|
|
515
|
+
width=50
|
|
516
|
+
alt="Contributor avatar for tvosch"
|
|
517
|
+
/>
|
|
518
|
+
</a>
|
|
505
519
|
|
|
506
520
|
### Contribute to EuroEval
|
|
507
521
|
|