ScandEval 16.8.0__tar.gz → 16.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (380) hide show
  1. {scandeval-16.8.0 → scandeval-16.10.0}/.github/ISSUE_TEMPLATE/benchmark_dataset_request.yaml +1 -0
  2. {scandeval-16.8.0 → scandeval-16.10.0}/.github/ISSUE_TEMPLATE/model_evaluation_request.yaml +4 -2
  3. {scandeval-16.8.0 → scandeval-16.10.0}/.pre-commit-config.yaml +5 -5
  4. {scandeval-16.8.0 → scandeval-16.10.0}/CHANGELOG.md +56 -2
  5. {scandeval-16.8.0 → scandeval-16.10.0}/PKG-INFO +21 -3
  6. {scandeval-16.8.0 → scandeval-16.10.0}/README.md +14 -0
  7. scandeval-16.10.0/docs/datasets/albanian.md +524 -0
  8. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/bosnian.md +2 -2
  9. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/croatian.md +2 -2
  10. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/czech.md +4 -4
  11. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/dutch.md +154 -1
  12. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/lithuanian.md +3 -3
  13. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/serbian.md +2 -2
  14. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/swedish.md +77 -0
  15. scandeval-16.10.0/docs/leaderboards/Monolingual/albanian.md +26 -0
  16. scandeval-16.10.0/docs/leaderboards/Monolingual/bosnian.md +26 -0
  17. scandeval-16.10.0/docs/leaderboards/Monolingual/catalan.md +26 -0
  18. scandeval-16.10.0/docs/leaderboards/Monolingual/hungarian.md +26 -0
  19. scandeval-16.10.0/docs/leaderboards/Monolingual/romanian.md +26 -0
  20. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/romance.md +1 -1
  21. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/slavic.md +1 -1
  22. scandeval-16.10.0/docs/python-package.md +394 -0
  23. scandeval-16.10.0/docs/tasks/simplification.md +42 -0
  24. {scandeval-16.8.0 → scandeval-16.10.0}/makefile +1 -1
  25. {scandeval-16.8.0 → scandeval-16.10.0}/pyproject.toml +13 -4
  26. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_modules/hf.py +18 -3
  27. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_modules/litellm.py +14 -13
  28. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_modules/vllm.py +127 -9
  29. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmarker.py +0 -11
  30. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/constants.py +9 -0
  31. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/data_models.py +5 -0
  32. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/__init__.py +1 -0
  33. scandeval-16.10.0/src/scandeval/dataset_configs/albanian.py +64 -0
  34. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/dutch.py +31 -1
  35. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/swedish.py +9 -0
  36. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/logging_utils.py +1 -0
  37. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/metrics/huggingface.py +82 -0
  38. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/metrics/llm_as_a_judge.py +1 -3
  39. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/model_config.py +2 -2
  40. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/__init__.py +1 -0
  41. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/linguistic_acceptability.py +9 -0
  42. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/multiple_choice.py +9 -0
  43. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/named_entity_recognition.py +20 -0
  44. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/reading_comprehension.py +9 -0
  45. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/sentiment_classification.py +11 -0
  46. scandeval-16.10.0/src/scandeval/prompt_templates/simplification.py +23 -0
  47. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/summarization.py +11 -0
  48. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/task_group_utils/question_answering.py +30 -19
  49. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/task_group_utils/sequence_classification.py +4 -4
  50. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/task_group_utils/text_to_text.py +3 -4
  51. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/task_group_utils/token_classification.py +6 -8
  52. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/tasks.py +11 -0
  53. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/tokenisation_utils.py +7 -1
  54. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/types.py +7 -1
  55. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/utils.py +5 -6
  56. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/constants.py +1 -0
  57. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_allocine.py +7 -4
  58. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_arc.py +13 -10
  59. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_arc_is.py +16 -11
  60. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_atsiliepimai.py +9 -4
  61. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_belebele.py +11 -8
  62. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_bg_ner_bsnlp.py +6 -4
  63. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_boolq_pt.py +12 -6
  64. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_cinexio.py +9 -6
  65. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_cnn_dailymail.py +10 -7
  66. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_conll_en.py +5 -3
  67. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_conll_es.py +5 -3
  68. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_conll_nl.py +5 -3
  69. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_copa_lv.py +9 -6
  70. scandeval-16.10.0/src/scripts/create_copa_nl.py +92 -0
  71. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_cross_domain_uk_reviews.py +16 -8
  72. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_cs_gec.py +16 -4
  73. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_csfd_sentiment.py +8 -4
  74. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_csfd_sentiment_sk.py +6 -4
  75. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_czech_news.py +15 -7
  76. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_dacsa.py +10 -6
  77. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_dane.py +5 -6
  78. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_danish_citizen_tests.py +7 -4
  79. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_dansk.py +7 -4
  80. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_danske_talemaader.py +7 -4
  81. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_danske_talemaader_old.py +10 -7
  82. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_dbrd.py +7 -4
  83. scandeval-16.10.0/src/scripts/create_duidelijke_taal.py +198 -0
  84. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_dutch_cola.py +7 -4
  85. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_elner.py +5 -3
  86. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_eltec.py +9 -7
  87. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_err_news.py +13 -8
  88. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_estner.py +6 -2
  89. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_estonian_valence.py +7 -10
  90. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_european_values.py +5 -2
  91. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_exam_et.py +10 -9
  92. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_exams_bg.py +11 -8
  93. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_fone.py +7 -5
  94. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_foqa.py +5 -3
  95. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_fosent.py +7 -4
  96. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_fquad.py +11 -8
  97. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_fullstack_ner.py +23 -14
  98. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_germanquad.py +13 -10
  99. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_germeval.py +5 -3
  100. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_global_mmlu.py +95 -37
  101. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_goldenswag.py +14 -9
  102. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_grammar_et.py +9 -7
  103. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_greek_sa.py +12 -7
  104. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_greek_wikipedia.py +10 -5
  105. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_guia_cat.py +15 -5
  106. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_harem.py +11 -9
  107. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_hellaswag.py +12 -9
  108. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_hellaswag_cs.py +12 -9
  109. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_hellaswag_fi.py +16 -11
  110. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_hotter_and_colder_sentiment.py +9 -6
  111. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_hun_sum.py +21 -7
  112. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_husst.py +13 -4
  113. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_ice_linguistic.py +17 -8
  114. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_icelandic_error_corpus.py +30 -20
  115. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_icelandic_knowledge.py +11 -4
  116. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_icelandic_qa.py +21 -11
  117. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_icesum.py +7 -4
  118. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_idioms_no.py +11 -4
  119. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_ilpost_sum.py +11 -4
  120. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_jentoft.py +14 -9
  121. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_kpwr_ner.py +10 -4
  122. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_latvian_lsm_summary.py +15 -6
  123. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_latvian_twitter_sentiment.py +16 -8
  124. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_life_in_the_uk.py +12 -9
  125. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_lithuanian_lrytas_summarization.py +15 -6
  126. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_llmzszl.py +14 -9
  127. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_lr_sum.py +20 -11
  128. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_lt_emotions.py +12 -5
  129. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_lt_history.py +10 -6
  130. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mlqa_es.py +9 -5
  131. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mlsum_de.py +11 -4
  132. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mlsum_es.py +11 -4
  133. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mmlu.py +17 -11
  134. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mmlu_et.py +11 -8
  135. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mmlu_hr.py +12 -6
  136. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mmlu_lv.py +19 -11
  137. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mms.py +11 -5
  138. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_multi_wiki_qa.py +14 -9
  139. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_multinerd-it.py +9 -3
  140. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_ner_uk.py +14 -4
  141. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_no_cola.py +13 -8
  142. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_no_sammendrag.py +12 -4
  143. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_nor_common_sense_qa.py +14 -7
  144. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_nordjylland_news.py +11 -4
  145. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_norglm_multiqa.py +18 -8
  146. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_norglm_multisum.py +12 -4
  147. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_norne.py +14 -4
  148. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_norquad.py +12 -8
  149. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_nqii.py +17 -9
  150. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_nrk_quiz_qa.py +15 -8
  151. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_orange_sum.py +11 -4
  152. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_personal_sum.py +8 -5
  153. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_polemo2.py +10 -7
  154. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_poner.py +10 -3
  155. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_poquad.py +19 -10
  156. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_psc.py +15 -6
  157. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_publico.py +2 -1
  158. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_ronec.py +11 -5
  159. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_rosent.py +17 -5
  160. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_rrn.py +12 -4
  161. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sb10k.py +11 -5
  162. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_scala.py +64 -21
  163. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_scandiqa.py +13 -9
  164. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_scandisent_fi.py +11 -7
  165. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_schibsted.py +12 -5
  166. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sentiment_headlines_es.py +13 -4
  167. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sentinews.py +14 -4
  168. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sentipolc16.py +11 -5
  169. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_skolprov.py +10 -7
  170. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sqad.py +21 -7
  171. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_squad.py +19 -9
  172. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_squad_it.py +19 -9
  173. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_squad_nl.py +16 -9
  174. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_squad_nl_old.py +15 -9
  175. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_ssj500k_ner.py +12 -6
  176. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sst2_pt.py +25 -11
  177. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sst5.py +7 -4
  178. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_suc3.py +13 -7
  179. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_sumo_ro.py +14 -7
  180. scandeval-16.10.0/src/scripts/create_swedish_facts.py +246 -0
  181. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_swedn.py +11 -4
  182. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_swerec.py +14 -5
  183. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_szeged_ner.py +11 -4
  184. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_trivia_et.py +13 -4
  185. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_turku_ner_fi.py +9 -4
  186. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_tydiqa_fi.py +17 -10
  187. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_umimeto_qa.py +7 -4
  188. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_uner_sk.py +10 -4
  189. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_uner_sr.py +14 -4
  190. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_wiki_lingua_nl.py +11 -4
  191. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_wikiann.py +6 -4
  192. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_wikineural-it.py +5 -3
  193. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_winogrande.py +14 -9
  194. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_winogrande_et.py +17 -12
  195. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_winogrande_is.py +11 -7
  196. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_xlsum_fi.py +11 -4
  197. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_xquad.py +15 -8
  198. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/load_ud_pos.py +30 -0
  199. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_benchmarker.py +1 -6
  200. scandeval-16.10.0/tests/test_cli.py +39 -0
  201. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_data_loading.py +12 -11
  202. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_data_models.py +8 -2
  203. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_model_config.py +0 -1
  204. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_model_loading.py +4 -3
  205. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_speed_benchmark.py +0 -1
  206. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_tokenisation_utils.py +0 -3
  207. scandeval-16.10.0/uv.lock +6239 -0
  208. scandeval-16.8.0/AGENTS.md +0 -121
  209. scandeval-16.8.0/docs/python-package.md +0 -130
  210. scandeval-16.8.0/tests/test_cli.py +0 -70
  211. scandeval-16.8.0/uv.lock +0 -5385
  212. {scandeval-16.8.0 → scandeval-16.10.0}/.github/ISSUE_TEMPLATE/bug.yaml +0 -0
  213. {scandeval-16.8.0 → scandeval-16.10.0}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  214. {scandeval-16.8.0 → scandeval-16.10.0}/.github/ISSUE_TEMPLATE/language_request.yaml +0 -0
  215. {scandeval-16.8.0 → scandeval-16.10.0}/.github/workflows/ci.yaml +0 -0
  216. {scandeval-16.8.0 → scandeval-16.10.0}/.gitignore +0 -0
  217. {scandeval-16.8.0 → scandeval-16.10.0}/.markdownlint.jsonc +0 -0
  218. {scandeval-16.8.0 → scandeval-16.10.0}/CITATION.cff +0 -0
  219. {scandeval-16.8.0 → scandeval-16.10.0}/CODE_OF_CONDUCT.md +0 -0
  220. {scandeval-16.8.0 → scandeval-16.10.0}/CONTRIBUTING.md +0 -0
  221. {scandeval-16.8.0 → scandeval-16.10.0}/Dockerfile.cuda +0 -0
  222. {scandeval-16.8.0 → scandeval-16.10.0}/LICENSE +0 -0
  223. {scandeval-16.8.0 → scandeval-16.10.0}/NEW_DATASET_GUIDE.md +0 -0
  224. {scandeval-16.8.0 → scandeval-16.10.0}/docs/CNAME +0 -0
  225. {scandeval-16.8.0 → scandeval-16.10.0}/docs/README.md +0 -0
  226. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/README.md +0 -0
  227. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/bulgarian.md +0 -0
  228. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/catalan.md +0 -0
  229. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/danish.md +0 -0
  230. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/english.md +0 -0
  231. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/estonian.md +0 -0
  232. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/faroese.md +0 -0
  233. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/finnish.md +0 -0
  234. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/french.md +0 -0
  235. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/german.md +0 -0
  236. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/greek.md +0 -0
  237. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/hungarian.md +0 -0
  238. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/icelandic.md +0 -0
  239. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/italian.md +0 -0
  240. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/latvian.md +0 -0
  241. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/norwegian.md +0 -0
  242. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/polish.md +0 -0
  243. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/portuguese.md +0 -0
  244. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/romanian.md +0 -0
  245. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/slovak.md +0 -0
  246. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/slovene.md +0 -0
  247. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/spanish.md +0 -0
  248. {scandeval-16.8.0 → scandeval-16.10.0}/docs/datasets/ukrainian.md +0 -0
  249. {scandeval-16.8.0 → scandeval-16.10.0}/docs/extras/radial_plotter.md +0 -0
  250. {scandeval-16.8.0 → scandeval-16.10.0}/docs/faq.md +0 -0
  251. {scandeval-16.8.0 → scandeval-16.10.0}/docs/gfx/favicon.png +0 -0
  252. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/bulgarian.md +0 -0
  253. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/croatian.md +0 -0
  254. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/czech.md +0 -0
  255. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/danish.md +0 -0
  256. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/dutch.md +0 -0
  257. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/english.md +0 -0
  258. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/estonian.md +0 -0
  259. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/faroese.md +0 -0
  260. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/finnish.md +0 -0
  261. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/french.md +0 -0
  262. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/german.md +0 -0
  263. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/greek.md +0 -0
  264. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/icelandic.md +0 -0
  265. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/italian.md +0 -0
  266. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/latvian.md +0 -0
  267. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/lithuanian.md +0 -0
  268. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/norwegian.md +0 -0
  269. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/polish.md +0 -0
  270. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/portuguese.md +0 -0
  271. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/serbian.md +0 -0
  272. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/slovak.md +0 -0
  273. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/slovene.md +0 -0
  274. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/spanish.md +0 -0
  275. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/swedish.md +0 -0
  276. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Monolingual/ukrainian.md +0 -0
  277. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/baltic.md +0 -0
  278. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/european.md +0 -0
  279. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/finnic.md +0 -0
  280. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/germanic.md +0 -0
  281. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/Multilingual/mainland-scandinavian.md +0 -0
  282. {scandeval-16.8.0 → scandeval-16.10.0}/docs/leaderboards/README.md +0 -0
  283. {scandeval-16.8.0 → scandeval-16.10.0}/docs/methodology.md +0 -0
  284. {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/README.md +0 -0
  285. {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/common-sense-reasoning.md +0 -0
  286. {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/knowledge.md +0 -0
  287. {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/linguistic-acceptability.md +0 -0
  288. {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/named-entity-recognition.md +0 -0
  289. {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/reading-comprehension.md +0 -0
  290. {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/sentiment-classification.md +0 -0
  291. {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/speed.md +0 -0
  292. {scandeval-16.8.0 → scandeval-16.10.0}/docs/tasks/summarization.md +0 -0
  293. {scandeval-16.8.0 → scandeval-16.10.0}/gfx/euroeval.png +0 -0
  294. {scandeval-16.8.0 → scandeval-16.10.0}/gfx/euroeval.xcf +0 -0
  295. {scandeval-16.8.0 → scandeval-16.10.0}/gfx/scandeval.png +0 -0
  296. {scandeval-16.8.0 → scandeval-16.10.0}/mkdocs.yaml +0 -0
  297. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/__init__.py +0 -0
  298. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_config_factory.py +0 -0
  299. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_modules/__init__.py +0 -0
  300. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_modules/base.py +0 -0
  301. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/benchmark_modules/fresh.py +0 -0
  302. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/caching_utils.py +0 -0
  303. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/callbacks.py +0 -0
  304. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/cli.py +39 -39
  305. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/data_loading.py +0 -0
  306. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/bosnian.py +0 -0
  307. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/bulgarian.py +0 -0
  308. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/catalan.py +0 -0
  309. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/croatian.py +0 -0
  310. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/czech.py +0 -0
  311. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/danish.py +0 -0
  312. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/english.py +0 -0
  313. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/estonian.py +0 -0
  314. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/faroese.py +0 -0
  315. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/finnish.py +0 -0
  316. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/french.py +0 -0
  317. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/german.py +0 -0
  318. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/greek.py +0 -0
  319. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/hungarian.py +0 -0
  320. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/icelandic.py +0 -0
  321. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/italian.py +0 -0
  322. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/latvian.py +0 -0
  323. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/lithuanian.py +0 -0
  324. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/norwegian.py +0 -0
  325. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/polish.py +0 -0
  326. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/portuguese.py +0 -0
  327. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/romanian.py +0 -0
  328. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/serbian.py +0 -0
  329. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/slovak.py +0 -0
  330. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/slovene.py +0 -0
  331. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/spanish.py +0 -0
  332. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/dataset_configs/ukrainian.py +0 -0
  333. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/enums.py +0 -0
  334. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/exceptions.py +0 -0
  335. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/finetuning.py +0 -0
  336. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/generation.py +0 -0
  337. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/generation_utils.py +0 -0
  338. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/languages.py +0 -0
  339. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/metrics/__init__.py +0 -0
  340. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/metrics/base.py +0 -0
  341. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/metrics/pipeline.py +0 -0
  342. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/metrics/speed.py +0 -0
  343. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/model_cache.py +0 -0
  344. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/model_loading.py +0 -0
  345. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/classification.py +0 -0
  346. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/prompt_templates/token_classification.py +0 -0
  347. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/scores.py +0 -0
  348. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/speed_benchmark.py +0 -0
  349. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/task_group_utils/__init__.py +0 -0
  350. {scandeval-16.8.0 → scandeval-16.10.0}/src/scandeval/task_group_utils/multiple_choice_classification.py +0 -0
  351. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/__init__.py +0 -0
  352. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_angry_tweets.py +0 -0
  353. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_mim_gold_ner.py +0 -0
  354. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/create_norec.py +0 -0
  355. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/fix_dot_env_file.py +0 -0
  356. {scandeval-16.8.0 → scandeval-16.10.0}/src/scripts/versioning.py +0 -0
  357. {scandeval-16.8.0 → scandeval-16.10.0}/tests/__init__.py +0 -0
  358. {scandeval-16.8.0 → scandeval-16.10.0}/tests/conftest.py +0 -0
  359. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_benchmark_config_factory.py +0 -0
  360. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_benchmark_modules/__init__.py +0 -0
  361. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_benchmark_modules/test_hf.py +0 -0
  362. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_callbacks.py +0 -0
  363. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_constants.py +0 -0
  364. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_dataset_configs.py +0 -0
  365. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_enums.py +0 -0
  366. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_exceptions.py +0 -0
  367. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_finetuning.py +0 -0
  368. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_languages.py +0 -0
  369. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scores.py +0 -0
  370. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/__init__.py +0 -0
  371. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/__init__.py +0 -0
  372. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_create_scala.py +0 -0
  373. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_data/de_gsd-ud-train.conllu.adp_det +0 -0
  374. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_data/empty.file +0 -0
  375. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_data/en_gum-ud-train.conllu.case +0 -0
  376. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_01 +0 -0
  377. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_02 +0 -0
  378. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_03 +0 -0
  379. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_types.py +0 -0
  380. {scandeval-16.8.0 → scandeval-16.10.0}/tests/test_utils.py +0 -0
@@ -24,6 +24,7 @@ body:
24
24
  label: Dataset languages
25
25
  description: What languages is the dataset in?
26
26
  options:
27
+ - label: Albanian
27
28
  - label: Bulgarian
28
29
  - label: Bosnian
29
30
  - label: Catalan
@@ -20,11 +20,13 @@ body:
20
20
  options:
21
21
  - label: Baltic languages (Latvian, Lithuanian)
22
22
  - label: Finnic languages (Estonian, Finnish)
23
- - label: Greek
24
23
  - label: Romance languages (Catalan, French, Italian, Portuguese, Romanian, Spanish)
25
24
  - label: Scandinavian languages (Danish, Faroese, Icelandic, Norwegian, Swedish)
26
- - label: Slavic languages (Bulgarian, Bosnian, Croatian, Czech, Hungarian, Polish, Serbian, Slovak, Slovenian, Ukrainian)
25
+ - label: Slavic languages (Bulgarian, Bosnian, Croatian, Czech, Polish, Serbian, Slovak, Slovenian, Ukrainian)
27
26
  - label: West Germanic languages (Dutch, English, German)
27
+ - label: Albanian
28
+ - label: Greek
29
+ - label: Hungarian
28
30
  validations:
29
31
  required: true
30
32
  - type: dropdown
@@ -8,9 +8,9 @@ repos:
8
8
  hooks:
9
9
  - id: end-of-file-fixer
10
10
  - id: trailing-whitespace
11
- - id: debug-statements
11
+ # - id: debug-statements
12
12
  - repo: https://github.com/astral-sh/ruff-pre-commit
13
- rev: v0.14.6
13
+ rev: v0.14.10
14
14
  hooks:
15
15
  - id: ruff
16
16
  args:
@@ -34,13 +34,13 @@ repos:
34
34
  hooks:
35
35
  - id: nbstripout
36
36
  - repo: https://github.com/facebook/pyrefly-pre-commit
37
- rev: 0.0.1
37
+ rev: 0.46.2
38
38
  hooks:
39
- - id: pyrefly-typecheck-system
39
+ - id: pyrefly-check
40
40
  name: Pyrefly (type checking)
41
41
  pass_filenames: true
42
42
  - repo: https://github.com/DavidAnson/markdownlint-cli2
43
- rev: v0.19.1
43
+ rev: v0.20.0
44
44
  hooks:
45
45
  - id: markdownlint-cli2
46
46
  args:
@@ -7,6 +7,60 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [v16.10.0] - 2025-12-30
11
+
12
+ ### Added
13
+
14
+ - Added support for Albanian 🇦🇱! This includes the sentiment classification dataset
15
+ MMS-sq, the linguistic acceptability dataset ScaLA-sq, the named entity recognition
16
+ dataset WikiANN-sq, the reading comprehension dataset MultiWikiQA-sq, the
17
+ summarisation dataset LR-Sum-sq, the knowledge dataset Global-MMLU-Lite-sq,
18
+ and the common-sense reasoning dataset Winogrande-sq. This was contributed by
19
+ @oliverkinch ✨
20
+ - Added the Dutch common sense reasoning dataset COPA-NL, which is part of the Dutch
21
+ [DUMB benchmark](https://github.com/wietsedv/dumb). This was contributed by @tvosch ✨
22
+ - Added new task for simplification and Dutch simplification dataset [Duidelijke
23
+ Taal](http://hdl.handle.net/10032/tm-a2-y8). dataset. This was contributed by
24
+ @simonevanbruggen ✨
25
+ - Added multi-node support with Ray as a backend in this case. This was contributed by
26
+ @tvosch ✨
27
+ - Added metadata for the Gemini-3 models.
28
+
29
+ ### Fixed
30
+
31
+ - Fixed an issue with evaluations of LiteLLM models where asyncio event loops weren't
32
+ closed properly, leading to a buildup of file descriptors and eventually a "too many
33
+ open files" error.
34
+
35
+ ## [v16.9.0] - 2025-12-16
36
+
37
+ ### Added
38
+
39
+ - Added the Swedish factual knowledge dataset SwedishFacts, which is based on the
40
+ [liu-nlp/swedish-facts-v1](https://huggingface.co/datasets/liu-nlp/swedish-facts-v1)
41
+ dataset. This was contributed by @oliverkinch ✨
42
+
43
+ ### Changed
44
+
45
+ - When benchmarking generative models, we now use their generation parameters as
46
+ specified in the `generation_config.json` file in the model repository on the Hugging
47
+ Face Hub, if it exists. We log this to the user if verbose mode is enabled.
48
+
49
+ ### Fixed
50
+
51
+ - When a model has registered the number of parameters wrongly within their safetensors
52
+ files, we collect all the potential parameter counts from the safetensors file and
53
+ pick the largest one.
54
+ - We now pinned vLLM to v0.11.0, as all future versions (up to and including v0.12.0)
55
+ have breaking changes regarding loading of Mistral models. We aim to unpin this when a
56
+ new vLLM version fixes this.
57
+ - Removed mentions of `hf_transfer` and the associated environment variable
58
+ `HF_HUB_ENABLE_HF_TRANSFER`, since this has been removed from the `transformers`
59
+ library now.
60
+ - Marked the `PleIAs/Pleias-3b-Preview` as requiring the `TRITON_ATTN` backend over the
61
+ default `FLASHINFER` backend, as the model architecture is currently not supported by
62
+ the default backend.
63
+
10
64
  ## [v16.8.0] - 2025-11-25
11
65
 
12
66
  ### Added
@@ -2735,8 +2789,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
2735
2789
 
2736
2790
  ### Deprecated
2737
2791
 
2738
- - Deprecated support for evaluating finetuned models, as the package was primarily used to
2739
- benchmark pretrained models anyway, and the change in datasets means that many
2792
+ - Deprecated support for evaluating finetuned models, as the package was primarily used
2793
+ to benchmark pretrained models anyway, and the change in datasets means that many
2740
2794
  finetuned models would have been trained on (part of) the test sets, resulting in
2741
2795
  artificially large scores. For evaluation of finetuned models, please check out the
2742
2796
  `aiai_eval` Python package instead (under development).
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ScandEval
3
- Version: 16.8.0
3
+ Version: 16.10.0
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -39,6 +39,7 @@ Requires-Dist: evaluate>=0.4.1
39
39
  Requires-Dist: huggingface-hub>=0.30.1
40
40
  Requires-Dist: levenshtein>=0.24.0
41
41
  Requires-Dist: litellm>=1.75.6
42
+ Requires-Dist: mistral-common[soundfile]
42
43
  Requires-Dist: more-itertools>=10.5.0
43
44
  Requires-Dist: numpy>=2.0.0
44
45
  Requires-Dist: ollama>=0.5.1
@@ -49,6 +50,7 @@ Requires-Dist: pydantic>=2.6.0
49
50
  Requires-Dist: pyinfer>=0.0.3
50
51
  Requires-Dist: python-dotenv>=1.0.1
51
52
  Requires-Dist: rouge-score>=0.1.2
53
+ Requires-Dist: sacrebleu>=2.5.1
52
54
  Requires-Dist: sacremoses>=0.1.1
53
55
  Requires-Dist: scikit-learn==1.6.1
54
56
  Requires-Dist: sentencepiece>=0.1.96
@@ -61,13 +63,15 @@ Requires-Dist: transformers[mistral-common]>=4.56.0
61
63
  Provides-Extra: all
62
64
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
63
65
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
66
+ Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'all'
64
67
  Requires-Dist: timm>=1.0.19; extra == 'all'
65
- Requires-Dist: vllm[flashinfer]>=0.11.0; (platform_system == 'Linux') and extra == 'all'
68
+ Requires-Dist: vllm[flashinfer]==0.11.0; (platform_system == 'Linux') and extra == 'all'
66
69
  Provides-Extra: generative
67
70
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
68
71
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
72
+ Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'generative'
69
73
  Requires-Dist: timm>=1.0.19; extra == 'generative'
70
- Requires-Dist: vllm[flashinfer]>=0.11.0; (platform_system == 'Linux') and extra == 'generative'
74
+ Requires-Dist: vllm[flashinfer]==0.11.0; (platform_system == 'Linux') and extra == 'generative'
71
75
  Description-Content-Type: text/markdown
72
76
 
73
77
  <!-- This disables the requirement that the first line is a top-level heading -->
@@ -574,6 +578,20 @@ A huge thank you to all the contributors who have helped make this project a suc
574
578
  alt="Contributor avatar for mrkowalski"
575
579
  />
576
580
  </a>
581
+ <a href="https://github.com/simonevanbruggen">
582
+ <img
583
+ src="https://avatars.githubusercontent.com/u/24842609"
584
+ width=50
585
+ alt="Contributor avatar for simonevanbruggen"
586
+ />
587
+ </a>
588
+ <a href="https://github.com/tvosch">
589
+ <img
590
+ src="https://avatars.githubusercontent.com/u/110661769"
591
+ width=50
592
+ alt="Contributor avatar for tvosch"
593
+ />
594
+ </a>
577
595
 
578
596
  ### Contribute to EuroEval
579
597
 
@@ -502,6 +502,20 @@ A huge thank you to all the contributors who have helped make this project a suc
502
502
  alt="Contributor avatar for mrkowalski"
503
503
  />
504
504
  </a>
505
+ <a href="https://github.com/simonevanbruggen">
506
+ <img
507
+ src="https://avatars.githubusercontent.com/u/24842609"
508
+ width=50
509
+ alt="Contributor avatar for simonevanbruggen"
510
+ />
511
+ </a>
512
+ <a href="https://github.com/tvosch">
513
+ <img
514
+ src="https://avatars.githubusercontent.com/u/110661769"
515
+ width=50
516
+ alt="Contributor avatar for tvosch"
517
+ />
518
+ </a>
505
519
 
506
520
  ### Contribute to EuroEval
507
521