EuroEval 15.6.0__tar.gz → 15.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (231) hide show
  1. {euroeval-15.6.0 → euroeval-15.6.1}/CHANGELOG.md +12 -0
  2. {euroeval-15.6.0 → euroeval-15.6.1}/PKG-INFO +2 -1
  3. {euroeval-15.6.0 → euroeval-15.6.1}/README.md +1 -0
  4. {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/dutch.md +8 -6
  5. {euroeval-15.6.0 → euroeval-15.6.1}/makefile +2 -15
  6. {euroeval-15.6.0 → euroeval-15.6.1}/pyproject.toml +1 -1
  7. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/data_models.py +8 -4
  8. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/faroese.py +1 -0
  9. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/norwegian.py +1 -1
  10. {euroeval-15.6.0 → euroeval-15.6.1}/uv.lock +1 -1
  11. {euroeval-15.6.0 → euroeval-15.6.1}/.github/ISSUE_TEMPLATE/benchmark_dataset_request.yaml +0 -0
  12. {euroeval-15.6.0 → euroeval-15.6.1}/.github/ISSUE_TEMPLATE/bug.yaml +0 -0
  13. {euroeval-15.6.0 → euroeval-15.6.1}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  14. {euroeval-15.6.0 → euroeval-15.6.1}/.github/ISSUE_TEMPLATE/model_evaluation_request.yaml +0 -0
  15. {euroeval-15.6.0 → euroeval-15.6.1}/.github/workflows/ci.yaml +0 -0
  16. {euroeval-15.6.0 → euroeval-15.6.1}/.gitignore +0 -0
  17. {euroeval-15.6.0 → euroeval-15.6.1}/.pre-commit-config.yaml +0 -0
  18. {euroeval-15.6.0 → euroeval-15.6.1}/CITATION.cff +0 -0
  19. {euroeval-15.6.0 → euroeval-15.6.1}/CODE_OF_CONDUCT.md +0 -0
  20. {euroeval-15.6.0 → euroeval-15.6.1}/CONTRIBUTING.md +0 -0
  21. {euroeval-15.6.0 → euroeval-15.6.1}/Dockerfile.cuda +0 -0
  22. {euroeval-15.6.0 → euroeval-15.6.1}/LICENSE +0 -0
  23. {euroeval-15.6.0 → euroeval-15.6.1}/docs/CNAME +0 -0
  24. {euroeval-15.6.0 → euroeval-15.6.1}/docs/README.md +0 -0
  25. {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/README.md +0 -0
  26. {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/danish.md +0 -0
  27. {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/english.md +0 -0
  28. {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/faroese.md +0 -0
  29. {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/french.md +0 -0
  30. {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/german.md +0 -0
  31. {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/icelandic.md +0 -0
  32. {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/italian.md +0 -0
  33. {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/norwegian.md +0 -0
  34. {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/spanish.md +0 -0
  35. {euroeval-15.6.0 → euroeval-15.6.1}/docs/datasets/swedish.md +0 -0
  36. {euroeval-15.6.0 → euroeval-15.6.1}/docs/extras/radial_plotter.md +0 -0
  37. {euroeval-15.6.0 → euroeval-15.6.1}/docs/faq.md +0 -0
  38. {euroeval-15.6.0 → euroeval-15.6.1}/docs/gfx/favicon.png +0 -0
  39. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/danish.md +0 -0
  40. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/dutch.md +0 -0
  41. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/english.md +0 -0
  42. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/faroese.md +0 -0
  43. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/french.md +0 -0
  44. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/german.md +0 -0
  45. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/icelandic.md +0 -0
  46. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/italian.md +0 -0
  47. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/norwegian.md +0 -0
  48. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Monolingual/swedish.md +0 -0
  49. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Multilingual/european.md +0 -0
  50. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Multilingual/germanic.md +0 -0
  51. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Multilingual/mainland-scandinavian.md +0 -0
  52. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/Multilingual/romance.md +0 -0
  53. {euroeval-15.6.0 → euroeval-15.6.1}/docs/leaderboards/README.md +0 -0
  54. {euroeval-15.6.0 → euroeval-15.6.1}/docs/methodology.md +0 -0
  55. {euroeval-15.6.0 → euroeval-15.6.1}/docs/python-package.md +0 -0
  56. {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/README.md +0 -0
  57. {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/common-sense-reasoning.md +0 -0
  58. {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/knowledge.md +0 -0
  59. {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/linguistic-acceptability.md +0 -0
  60. {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/named-entity-recognition.md +0 -0
  61. {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/reading-comprehension.md +0 -0
  62. {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/sentiment-classification.md +0 -0
  63. {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/speed.md +0 -0
  64. {euroeval-15.6.0 → euroeval-15.6.1}/docs/tasks/summarization.md +0 -0
  65. {euroeval-15.6.0 → euroeval-15.6.1}/gfx/euroeval.png +0 -0
  66. {euroeval-15.6.0 → euroeval-15.6.1}/gfx/euroeval.xcf +0 -0
  67. {euroeval-15.6.0 → euroeval-15.6.1}/gfx/scandeval.png +0 -0
  68. {euroeval-15.6.0 → euroeval-15.6.1}/mkdocs.yaml +0 -0
  69. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/__init__.py +0 -0
  70. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_config_factory.py +0 -0
  71. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_modules/__init__.py +0 -0
  72. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_modules/base.py +0 -0
  73. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_modules/fresh.py +0 -0
  74. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_modules/hf.py +0 -0
  75. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_modules/litellm.py +0 -0
  76. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmark_modules/vllm.py +0 -0
  77. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/benchmarker.py +0 -0
  78. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/callbacks.py +0 -0
  79. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/cli.py +0 -0
  80. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/constants.py +0 -0
  81. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/data_loading.py +0 -0
  82. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/__init__.py +0 -0
  83. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/danish.py +0 -0
  84. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/dutch.py +0 -0
  85. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/english.py +0 -0
  86. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/french.py +0 -0
  87. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/german.py +0 -0
  88. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/icelandic.py +0 -0
  89. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/italian.py +0 -0
  90. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/spanish.py +0 -0
  91. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/dataset_configs/swedish.py +0 -0
  92. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/enums.py +0 -0
  93. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/exceptions.py +0 -0
  94. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/finetuning.py +0 -0
  95. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/generation.py +0 -0
  96. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/human_evaluation.py +0 -0
  97. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/languages.py +0 -0
  98. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/model_cache.py +0 -0
  99. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/model_config.py +0 -0
  100. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/model_loading.py +0 -0
  101. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/__init__.py +0 -0
  102. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/linguistic_acceptability.py +0 -0
  103. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/multiple_choice.py +0 -0
  104. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/named_entity_recognition.py +0 -0
  105. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/reading_comprehension.py +0 -0
  106. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/sentiment_classification.py +0 -0
  107. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/prompt_templates/summarization.py +0 -0
  108. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/scores.py +0 -0
  109. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/speed_benchmark.py +0 -0
  110. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/__init__.py +0 -0
  111. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/multiple_choice_classification.py +0 -0
  112. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/question_answering.py +0 -0
  113. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/sequence_classification.py +0 -0
  114. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/text_to_text.py +0 -0
  115. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/task_group_utils/token_classification.py +0 -0
  116. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/tasks.py +0 -0
  117. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/tokenization_utils.py +0 -0
  118. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/types.py +0 -0
  119. {euroeval-15.6.0 → euroeval-15.6.1}/src/euroeval/utils.py +0 -0
  120. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/constants.py +0 -0
  121. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_allocine.py +0 -0
  122. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_angry_tweets.py +0 -0
  123. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_arc.py +0 -0
  124. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_arc_is.py +0 -0
  125. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_belebele.py +0 -0
  126. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_cnn_dailymail.py +0 -0
  127. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_conll_en.py +0 -0
  128. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_conll_es.py +0 -0
  129. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_conll_nl.py +0 -0
  130. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_dane.py +0 -0
  131. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_danish_citizen_tests.py +0 -0
  132. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_dansk.py +0 -0
  133. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_danske_talemaader.py +0 -0
  134. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_danske_talemaader_old.py +0 -0
  135. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_dbrd.py +0 -0
  136. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_dutch_cola.py +0 -0
  137. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_dutch_social.py +0 -0
  138. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_eltec.py +0 -0
  139. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_fone.py +0 -0
  140. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_foqa.py +0 -0
  141. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_fosent.py +0 -0
  142. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_fquad.py +0 -0
  143. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_germanquad.py +0 -0
  144. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_germeval.py +0 -0
  145. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_hellaswag.py +0 -0
  146. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_hotter_and_colder_sentiment.py +0 -0
  147. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_ice_linguistic.py +0 -0
  148. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_icelandic_error_corpus.py +0 -0
  149. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_icelandic_knowledge.py +0 -0
  150. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_icelandic_qa.py +0 -0
  151. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_icesum.py +0 -0
  152. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_ilpost_sum.py +0 -0
  153. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_jentoft.py +0 -0
  154. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_mim_gold_ner.py +0 -0
  155. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_mlqa_es.py +0 -0
  156. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_mlsum_de.py +0 -0
  157. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_mlsum_es.py +0 -0
  158. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_mmlu.py +0 -0
  159. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_multinerd-it.py +0 -0
  160. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_no_cola.py +0 -0
  161. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_no_sammendrag.py +0 -0
  162. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_nor_common_sense_qa.py +0 -0
  163. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_nordjylland_news.py +0 -0
  164. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_norec.py +0 -0
  165. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_norglm_multiqa.py +0 -0
  166. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_norglm_multisum.py +0 -0
  167. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_norne.py +0 -0
  168. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_norquad.py +0 -0
  169. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_nqii.py +0 -0
  170. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_nrk_quiz_qa.py +0 -0
  171. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_orange_sum.py +0 -0
  172. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_personal_sum.py +0 -0
  173. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_rrn.py +0 -0
  174. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_sb10k.py +0 -0
  175. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_scala.py +0 -0
  176. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_scandiqa.py +0 -0
  177. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_schibsted.py +0 -0
  178. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_sentiment_headlines_es.py +0 -0
  179. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_sentipolc16.py +0 -0
  180. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_squad.py +0 -0
  181. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_squad_it.py +0 -0
  182. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_squad_nl.py +0 -0
  183. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_squad_nl_old.py +0 -0
  184. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_sst5.py +0 -0
  185. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_suc3.py +0 -0
  186. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_swedn.py +0 -0
  187. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_swerec.py +0 -0
  188. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_wiki_lingua_nl.py +0 -0
  189. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_wikiann_fo.py +0 -0
  190. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_wikineural-it.py +0 -0
  191. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_winogrande_is.py +0 -0
  192. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/create_xquad_es.py +0 -0
  193. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/fix_dot_env_file.py +0 -0
  194. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/load_ud_pos.py +0 -0
  195. {euroeval-15.6.0 → euroeval-15.6.1}/src/scripts/versioning.py +0 -0
  196. {euroeval-15.6.0 → euroeval-15.6.1}/tests/__init__.py +0 -0
  197. {euroeval-15.6.0 → euroeval-15.6.1}/tests/conftest.py +0 -0
  198. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_config_factory.py +0 -0
  199. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_modules/__init__.py +0 -0
  200. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_modules/test_base.py +0 -0
  201. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_modules/test_fresh.py +0 -0
  202. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_modules/test_hf.py +0 -0
  203. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_modules/test_litellm.py +0 -0
  204. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmark_modules/test_vllm.py +0 -0
  205. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_benchmarker.py +0 -0
  206. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_callbacks.py +0 -0
  207. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_cli.py +0 -0
  208. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_constants.py +0 -0
  209. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_data_loading.py +0 -0
  210. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_data_models.py +0 -0
  211. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_dataset_configs.py +0 -0
  212. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_enums.py +0 -0
  213. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_exceptions.py +0 -0
  214. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_finetuning.py +0 -0
  215. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_generation.py +0 -0
  216. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_human_evaluation.py +0 -0
  217. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_languages.py +0 -0
  218. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_model_cache.py +0 -0
  219. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_model_config.py +0 -0
  220. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_model_loading.py +0 -0
  221. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_scores.py +0 -0
  222. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_speed_benchmark.py +0 -0
  223. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_task_utils/__init__.py +0 -0
  224. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_task_utils/test_question_answering.py +0 -0
  225. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_task_utils/test_sequence_classification.py +0 -0
  226. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_task_utils/test_text_to_text.py +0 -0
  227. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_task_utils/test_token_classification.py +0 -0
  228. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_tasks.py +0 -0
  229. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_tokenization_utils.py +0 -0
  230. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_types.py +0 -0
  231. {euroeval-15.6.0 → euroeval-15.6.1}/tests/test_utils.py +0 -0
@@ -10,6 +10,18 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
10
10
 
11
11
 
12
12
 
13
+ ## [v15.6.1] - 2025-04-14
14
+ ### Changed
15
+ - Added more info about SQuAD-nl in the documentation. This was contributed by
16
+ [@Rijgersberg](https://github.com/Rijgersberg) ✨
17
+
18
+ ### Fixed
19
+ - The "E" option for the Norwegian NorCommonSenseQA dataset was not included in the
20
+ refactor in v15.6.0, leading to evaluation errors. This has been fixed now.
21
+ - The number of few-shot examples for FoSent was not reduced to 5 again during the
22
+ refactor in v15.6.0, leading to evaluation errors. This has been fixed now.
23
+
24
+
13
25
  ## [v15.6.0] - 2025-04-13
14
26
  ### Added
15
27
  - We now support specifying custom inference providers when benchmarking via the Hugging
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 15.6.0
3
+ Version: 15.6.1
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -237,6 +237,7 @@ A huge thank you to all the contributors who have helped make this project a suc
237
237
  <a href="https://github.com/ThomasKluiters"><img src="https://avatars.githubusercontent.com/u/8137941" width=50 alt="Contributor avatar for ThomasKluiters"/></a>
238
238
  <a href="https://github.com/BramVanroy"><img src="https://avatars.githubusercontent.com/u/2779410" width=50 alt="Contributor avatar for BramVanroy"/></a>
239
239
  <a href="https://github.com/peregilk"><img src="https://avatars.githubusercontent.com/u/9079808" width=50 alt="Contributor avatar for peregilk"/></a>
240
+ <a href="https://github.com/Rijgersberg"><img src="https://avatars.githubusercontent.com/u/8604946" width=50 alt="Contributor avatar for Rijgersberg"/></a>
240
241
 
241
242
  ### Special Thanks
242
243
  - Thanks to [Google](https://google.com/) for sponsoring Gemini credits as part of their
@@ -161,6 +161,7 @@ A huge thank you to all the contributors who have helped make this project a suc
161
161
  <a href="https://github.com/ThomasKluiters"><img src="https://avatars.githubusercontent.com/u/8137941" width=50 alt="Contributor avatar for ThomasKluiters"/></a>
162
162
  <a href="https://github.com/BramVanroy"><img src="https://avatars.githubusercontent.com/u/2779410" width=50 alt="Contributor avatar for BramVanroy"/></a>
163
163
  <a href="https://github.com/peregilk"><img src="https://avatars.githubusercontent.com/u/9079808" width=50 alt="Contributor avatar for peregilk"/></a>
164
+ <a href="https://github.com/Rijgersberg"><img src="https://avatars.githubusercontent.com/u/8604946" width=50 alt="Contributor avatar for Rijgersberg"/></a>
164
165
 
165
166
  ### Special Thanks
166
167
  - Thanks to [Google](https://google.com/) for sponsoring Gemini credits as part of their
@@ -310,12 +310,14 @@ Here are a few examples from the training split:
310
310
  This dataset is published
311
311
  [here](https://huggingface.co/datasets/GroNLP/squad-nl-v2.0) and is a machine translated
312
312
  dataset of the English [SQuAD](https://aclanthology.org/D16-1264/) and
313
- [XQuAD](https://aclanthology.org/2020.acl-main.421/) datasets. Google Translate was used
314
- to translate the original datasets to Dutch.
315
-
316
- These are based on English Wikipedia articles and the questions and answers are written
317
- by crowdworkers. It is not clear how the translations were done, this information is
318
- pending from the authors.
313
+ [XQuAD](https://aclanthology.org/2020.acl-main.421/) datasets, created for the
314
+ Dutch-language [DUMB](https://dumbench.nl/) benchmark. Google Translate was used to
315
+ translate the original datasets to Dutch. The test data
316
+ [was manually corrected](https://aclanthology.org/2023.emnlp-main.447/) by eight BSc
317
+ students as part of their thesis work.
318
+
319
+ The original SQuAD and XQuAD datasets are based on English Wikipedia articles and the
320
+ questions and answers are written by crowdworkers.
319
321
 
320
322
  Here are a few examples from the training split:
321
323
 
@@ -81,21 +81,8 @@ test: ## Run tests
81
81
  tree: ## Print directory tree
82
82
  @tree -a --gitignore -I .git .
83
83
 
84
- lint: ## Lint the project
85
- uv run ruff check . --fix --unsafe-fixes
86
-
87
- format: ## Format the project
88
- uv run ruff format .
89
-
90
- type-check: ## Type-check the project
91
- @uv run mypy . \
92
- --install-types \
93
- --non-interactive \
94
- --ignore-missing-imports \
95
- --show-error-codes \
96
- --check-untyped-defs
97
-
98
- check: lint format type-check ## Lint, format, and type-check the code
84
+ check: ## Lint, format, and type-check the code
85
+ @uv run pre-commit run --all-files
99
86
 
100
87
  bump-major:
101
88
  @uv run python -m src.scripts.versioning --major
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "EuroEval"
3
- version = "15.6.0"
3
+ version = "15.6.1"
4
4
  description = "The robust European language model benchmark."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -388,8 +388,10 @@ class DatasetConfig:
388
388
  language.
389
389
  _prompt_label_mapping (optional):
390
390
  A mapping from the labels to another phrase which is used as a substitute
391
- for the label in few-shot evaluation. Defaults to the template for the task
392
- and language.
391
+ for the label in few-shot evaluation. If "auto" then the mapping will be set
392
+ to a 1:1 mapping between the labels and themselves. If None then the mapping
393
+ will be set to the default mapping for the task and language. Defaults to
394
+ None.
393
395
  unofficial (optional):
394
396
  Whether the dataset is unofficial. Defaults to False.
395
397
  """
@@ -405,7 +407,7 @@ class DatasetConfig:
405
407
  _num_few_shot_examples: int | None = None
406
408
  _max_generated_tokens: int | None = None
407
409
  _labels: list[str] | None = None
408
- _prompt_label_mapping: dict[str, str] | None = None
410
+ _prompt_label_mapping: dict[str, str] | t.Literal["auto"] | None = None
409
411
  unofficial: bool = False
410
412
 
411
413
  @property
@@ -475,7 +477,9 @@ class DatasetConfig:
475
477
  @property
476
478
  def prompt_label_mapping(self) -> dict[str, str]:
477
479
  """Mapping from English labels to localised labels."""
478
- if self._prompt_label_mapping is not None:
480
+ if self._prompt_label_mapping == "auto":
481
+ return {label: label for label in self.labels}
482
+ elif self._prompt_label_mapping is not None:
479
483
  return self._prompt_label_mapping
480
484
 
481
485
  main_language = self.languages[0]
@@ -12,6 +12,7 @@ FOSENT_CONFIG = DatasetConfig(
12
12
  huggingface_id="EuroEval/fosent",
13
13
  task=SENT,
14
14
  languages=[FO],
15
+ _num_few_shot_examples=5,
15
16
  )
16
17
 
17
18
  SCALA_FO_CONFIG = DatasetConfig(
@@ -83,6 +83,7 @@ NOR_COMMON_SENSE_QA_CONFIG = DatasetConfig(
83
83
  huggingface_id="EuroEval/nor-common-sense-qa",
84
84
  task=COMMON_SENSE,
85
85
  languages=[NB, NN, NO],
86
+ _labels=["a", "b", "c", "d", "e"],
86
87
  )
87
88
 
88
89
 
@@ -105,7 +106,6 @@ NORGLM_MULTI_QA = DatasetConfig(
105
106
  huggingface_id="EuroEval/norglm-multi-qa",
106
107
  task=RC,
107
108
  languages=[NB, NN, NO],
108
- _num_few_shot_examples=2,
109
109
  unofficial=True,
110
110
  )
111
111
 
@@ -906,7 +906,7 @@ wheels = [
906
906
 
907
907
  [[package]]
908
908
  name = "euroeval"
909
- version = "15.6.0"
909
+ version = "15.6.1"
910
910
  source = { editable = "." }
911
911
  dependencies = [
912
912
  { name = "accelerate" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes