ScandEval 16.12.0__tar.gz → 16.13.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (395) hide show
  1. {scandeval-16.12.0 → scandeval-16.13.0}/.github/ISSUE_TEMPLATE/benchmark_dataset_request.yaml +1 -0
  2. {scandeval-16.12.0 → scandeval-16.13.0}/.github/ISSUE_TEMPLATE/model_evaluation_request.yaml +1 -1
  3. scandeval-16.13.0/.github/auto_assign.yaml +9 -0
  4. {scandeval-16.12.0 → scandeval-16.13.0}/.gitignore +3 -0
  5. {scandeval-16.12.0 → scandeval-16.13.0}/.pre-commit-config.yaml +2 -2
  6. {scandeval-16.12.0 → scandeval-16.13.0}/CHANGELOG.md +36 -0
  7. scandeval-16.13.0/PKG-INFO +334 -0
  8. scandeval-16.13.0/README.md +254 -0
  9. scandeval-16.13.0/docs/datasets/belarusian.md +385 -0
  10. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/danish.md +2 -2
  11. {scandeval-16.12.0 → scandeval-16.13.0}/docs/python-package.md +61 -9
  12. {scandeval-16.12.0 → scandeval-16.13.0}/pyproject.toml +1 -1
  13. scandeval-16.13.0/src/scandeval/async_utils.py +46 -0
  14. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_config_factory.py +26 -2
  15. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_modules/fresh.py +2 -1
  16. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_modules/hf.py +50 -12
  17. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_modules/litellm.py +25 -15
  18. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_modules/vllm.py +3 -3
  19. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmarker.py +15 -33
  20. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/cli.py +2 -4
  21. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/constants.py +5 -0
  22. scandeval-16.13.0/src/scandeval/custom_dataset_configs.py +152 -0
  23. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/data_loading.py +87 -31
  24. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/data_models.py +396 -225
  25. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/__init__.py +51 -25
  26. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/albanian.py +1 -1
  27. scandeval-16.13.0/src/scandeval/dataset_configs/belarusian.py +47 -0
  28. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/bulgarian.py +1 -1
  29. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/catalan.py +1 -1
  30. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/croatian.py +1 -1
  31. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/danish.py +3 -2
  32. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/dutch.py +7 -6
  33. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/english.py +4 -3
  34. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/estonian.py +8 -7
  35. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/faroese.py +1 -1
  36. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/finnish.py +5 -4
  37. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/french.py +6 -5
  38. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/german.py +4 -3
  39. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/greek.py +1 -1
  40. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/hungarian.py +1 -1
  41. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/icelandic.py +4 -3
  42. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/italian.py +4 -3
  43. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/latvian.py +2 -2
  44. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/lithuanian.py +1 -1
  45. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/norwegian.py +6 -5
  46. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/polish.py +4 -3
  47. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/portuguese.py +5 -4
  48. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/romanian.py +2 -2
  49. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/serbian.py +1 -1
  50. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/slovene.py +1 -1
  51. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/spanish.py +4 -3
  52. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/swedish.py +4 -3
  53. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/ukrainian.py +1 -1
  54. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/generation_utils.py +6 -6
  55. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/llm_as_a_judge.py +1 -1
  56. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/pipeline.py +1 -1
  57. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/model_cache.py +34 -4
  58. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/linguistic_acceptability.py +9 -0
  59. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/multiple_choice.py +9 -0
  60. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/named_entity_recognition.py +21 -0
  61. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/reading_comprehension.py +10 -0
  62. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/sentiment_classification.py +11 -0
  63. scandeval-16.13.0/src/scandeval/string_utils.py +157 -0
  64. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/task_group_utils/sequence_classification.py +2 -5
  65. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/task_group_utils/token_classification.py +2 -4
  66. scandeval-16.13.0/src/scandeval/utils.py +231 -0
  67. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/constants.py +1 -0
  68. scandeval-16.13.0/src/scripts/create_be_wsc.py +463 -0
  69. scandeval-16.13.0/src/scripts/create_besls.py +146 -0
  70. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_duidelijke_taal.py +2 -2
  71. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_exam_et.py +4 -3
  72. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_goldenswag.py +4 -3
  73. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_hellaswag.py +5 -3
  74. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_hellaswag_cs.py +4 -3
  75. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_hellaswag_fi.py +6 -4
  76. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_multi_wiki_qa.py +1 -0
  77. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_wikiann.py +1 -1
  78. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/load_ud_pos.py +18 -0
  79. {scandeval-16.12.0 → scandeval-16.13.0}/tests/conftest.py +12 -3
  80. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_benchmark_config_factory.py +15 -2
  81. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_benchmarker.py +17 -43
  82. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_data_loading.py +16 -7
  83. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_data_models.py +1 -27
  84. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_dataset_configs.py +8 -22
  85. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_model_loading.py +23 -10
  86. scandeval-16.13.0/tests/test_string_utils.py +55 -0
  87. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_utils.py +1 -52
  88. {scandeval-16.12.0 → scandeval-16.13.0}/uv.lock +10 -10
  89. scandeval-16.12.0/.github/auto_assign.yaml +0 -29
  90. scandeval-16.12.0/PKG-INFO +0 -667
  91. scandeval-16.12.0/README.md +0 -587
  92. scandeval-16.12.0/src/scandeval/utils.py +0 -548
  93. {scandeval-16.12.0 → scandeval-16.13.0}/.github/ISSUE_TEMPLATE/bug.yaml +0 -0
  94. {scandeval-16.12.0 → scandeval-16.13.0}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  95. {scandeval-16.12.0 → scandeval-16.13.0}/.github/ISSUE_TEMPLATE/language_request.yaml +0 -0
  96. {scandeval-16.12.0 → scandeval-16.13.0}/.github/workflows/auto_assign_reviewers.yaml +0 -0
  97. {scandeval-16.12.0 → scandeval-16.13.0}/.github/workflows/ci.yaml +0 -0
  98. {scandeval-16.12.0 → scandeval-16.13.0}/.markdownlint.jsonc +0 -0
  99. {scandeval-16.12.0 → scandeval-16.13.0}/CITATION.cff +0 -0
  100. {scandeval-16.12.0 → scandeval-16.13.0}/CODE_OF_CONDUCT.md +0 -0
  101. {scandeval-16.12.0 → scandeval-16.13.0}/CONTRIBUTING.md +0 -0
  102. {scandeval-16.12.0 → scandeval-16.13.0}/Dockerfile.cuda +0 -0
  103. {scandeval-16.12.0 → scandeval-16.13.0}/LICENSE +0 -0
  104. {scandeval-16.12.0 → scandeval-16.13.0}/NEW_DATASET_GUIDE.md +0 -0
  105. {scandeval-16.12.0 → scandeval-16.13.0}/docs/CNAME +0 -0
  106. {scandeval-16.12.0 → scandeval-16.13.0}/docs/README.md +0 -0
  107. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/README.md +0 -0
  108. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/albanian.md +0 -0
  109. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/bosnian.md +0 -0
  110. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/bulgarian.md +0 -0
  111. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/catalan.md +0 -0
  112. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/croatian.md +0 -0
  113. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/czech.md +0 -0
  114. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/dutch.md +0 -0
  115. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/english.md +0 -0
  116. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/estonian.md +0 -0
  117. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/faroese.md +0 -0
  118. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/finnish.md +0 -0
  119. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/french.md +0 -0
  120. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/german.md +0 -0
  121. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/greek.md +0 -0
  122. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/hungarian.md +0 -0
  123. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/icelandic.md +0 -0
  124. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/italian.md +0 -0
  125. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/latvian.md +0 -0
  126. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/lithuanian.md +0 -0
  127. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/norwegian.md +0 -0
  128. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/polish.md +0 -0
  129. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/portuguese.md +0 -0
  130. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/romanian.md +0 -0
  131. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/serbian.md +0 -0
  132. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/slovak.md +0 -0
  133. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/slovene.md +0 -0
  134. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/spanish.md +0 -0
  135. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/swedish.md +0 -0
  136. {scandeval-16.12.0 → scandeval-16.13.0}/docs/datasets/ukrainian.md +0 -0
  137. {scandeval-16.12.0 → scandeval-16.13.0}/docs/extras/radial_plotter.md +0 -0
  138. {scandeval-16.12.0 → scandeval-16.13.0}/docs/faq.md +0 -0
  139. {scandeval-16.12.0 → scandeval-16.13.0}/docs/gfx/favicon.png +0 -0
  140. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/albanian.md +0 -0
  141. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/bosnian.md +0 -0
  142. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/bulgarian.md +0 -0
  143. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/catalan.md +0 -0
  144. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/croatian.md +0 -0
  145. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/czech.md +0 -0
  146. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/danish.md +0 -0
  147. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/dutch.md +0 -0
  148. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/english.md +0 -0
  149. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/estonian.md +0 -0
  150. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/faroese.md +0 -0
  151. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/finnish.md +0 -0
  152. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/french.md +0 -0
  153. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/german.md +0 -0
  154. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/greek.md +0 -0
  155. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/hungarian.md +0 -0
  156. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/icelandic.md +0 -0
  157. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/italian.md +0 -0
  158. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/latvian.md +0 -0
  159. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/lithuanian.md +0 -0
  160. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/norwegian.md +0 -0
  161. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/polish.md +0 -0
  162. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/portuguese.md +0 -0
  163. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/romanian.md +0 -0
  164. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/serbian.md +0 -0
  165. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/slovak.md +0 -0
  166. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/slovene.md +0 -0
  167. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/spanish.md +0 -0
  168. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/swedish.md +0 -0
  169. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Monolingual/ukrainian.md +0 -0
  170. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/baltic.md +0 -0
  171. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/european.md +0 -0
  172. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/finnic.md +0 -0
  173. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/germanic.md +0 -0
  174. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/mainland-scandinavian.md +0 -0
  175. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/romance.md +0 -0
  176. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/Multilingual/slavic.md +0 -0
  177. {scandeval-16.12.0 → scandeval-16.13.0}/docs/leaderboards/README.md +0 -0
  178. {scandeval-16.12.0 → scandeval-16.13.0}/docs/methodology.md +0 -0
  179. {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/README.md +0 -0
  180. {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/bias-detection.md +0 -0
  181. {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/common-sense-reasoning.md +0 -0
  182. {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/european-values.md +0 -0
  183. {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/knowledge.md +0 -0
  184. {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/linguistic-acceptability.md +0 -0
  185. {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/named-entity-recognition.md +0 -0
  186. {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/reading-comprehension.md +0 -0
  187. {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/sentiment-classification.md +0 -0
  188. {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/simplification.md +0 -0
  189. {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/speed.md +0 -0
  190. {scandeval-16.12.0 → scandeval-16.13.0}/docs/tasks/summarization.md +0 -0
  191. {scandeval-16.12.0 → scandeval-16.13.0}/gfx/euroeval.png +0 -0
  192. {scandeval-16.12.0 → scandeval-16.13.0}/gfx/euroeval.xcf +0 -0
  193. {scandeval-16.12.0 → scandeval-16.13.0}/gfx/scandeval.png +0 -0
  194. {scandeval-16.12.0 → scandeval-16.13.0}/makefile +0 -0
  195. {scandeval-16.12.0 → scandeval-16.13.0}/mkdocs.yaml +0 -0
  196. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/__init__.py +0 -0
  197. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_modules/__init__.py +0 -0
  198. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/benchmark_modules/base.py +0 -0
  199. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/caching_utils.py +0 -0
  200. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/callbacks.py +0 -0
  201. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/bosnian.py +0 -0
  202. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/czech.py +0 -0
  203. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/dataset_configs/slovak.py +0 -0
  204. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/enums.py +0 -0
  205. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/exceptions.py +0 -0
  206. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/finetuning.py +0 -0
  207. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/generation.py +0 -0
  208. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/languages.py +0 -0
  209. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/logging_utils.py +0 -0
  210. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/__init__.py +0 -0
  211. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/base.py +0 -0
  212. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/bias.py +0 -0
  213. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/huggingface.py +0 -0
  214. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/metrics/speed.py +0 -0
  215. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/model_config.py +0 -0
  216. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/model_loading.py +0 -0
  217. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/__init__.py +0 -0
  218. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/classification.py +0 -0
  219. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/simplification.py +0 -0
  220. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/summarization.py +0 -0
  221. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/prompt_templates/token_classification.py +0 -0
  222. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/scores.py +0 -0
  223. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/speed_benchmark.py +0 -0
  224. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/task_group_utils/__init__.py +0 -0
  225. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/task_group_utils/multiple_choice_classification.py +0 -0
  226. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/task_group_utils/question_answering.py +0 -0
  227. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/task_group_utils/text_to_text.py +0 -0
  228. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/tasks.py +0 -0
  229. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/tokenisation_utils.py +0 -0
  230. {scandeval-16.12.0 → scandeval-16.13.0}/src/scandeval/types.py +0 -0
  231. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/__init__.py +0 -0
  232. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_allocine.py +0 -0
  233. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_angry_tweets.py +0 -0
  234. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_arc.py +0 -0
  235. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_arc_is.py +0 -0
  236. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_atsiliepimai.py +0 -0
  237. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_belebele.py +0 -0
  238. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_bg_ner_bsnlp.py +0 -0
  239. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_boolq_pt.py +0 -0
  240. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_cinexio.py +0 -0
  241. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_cnn_dailymail.py +0 -0
  242. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_conll_en.py +0 -0
  243. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_conll_es.py +0 -0
  244. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_conll_nl.py +0 -0
  245. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_copa_lv.py +0 -0
  246. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_copa_nl.py +0 -0
  247. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_cross_domain_uk_reviews.py +0 -0
  248. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_cs_gec.py +0 -0
  249. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_csfd_sentiment.py +0 -0
  250. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_csfd_sentiment_sk.py +0 -0
  251. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_czech_news.py +0 -0
  252. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_dacsa.py +0 -0
  253. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_dane.py +0 -0
  254. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_danish_citizen_tests.py +0 -0
  255. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_dansk.py +0 -0
  256. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_danske_talemaader.py +0 -0
  257. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_danske_talemaader_old.py +0 -0
  258. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_dbrd.py +0 -0
  259. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_dutch_cola.py +0 -0
  260. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_elner.py +0 -0
  261. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_eltec.py +0 -0
  262. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_err_news.py +0 -0
  263. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_estner.py +0 -0
  264. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_estonian_valence.py +0 -0
  265. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_european_values.py +0 -0
  266. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_exams_bg.py +0 -0
  267. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_fone.py +0 -0
  268. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_foqa.py +0 -0
  269. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_fosent.py +0 -0
  270. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_fquad.py +0 -0
  271. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_fullstack_ner.py +0 -0
  272. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_germanquad.py +0 -0
  273. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_germeval.py +0 -0
  274. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_global_mmlu.py +0 -0
  275. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_grammar_et.py +0 -0
  276. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_greek_sa.py +0 -0
  277. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_greek_wikipedia.py +0 -0
  278. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_guia_cat.py +0 -0
  279. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_harem.py +0 -0
  280. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_hotter_and_colder_sentiment.py +0 -0
  281. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_hun_sum.py +0 -0
  282. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_husst.py +0 -0
  283. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_ice_linguistic.py +0 -0
  284. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_icelandic_error_corpus.py +0 -0
  285. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_icelandic_knowledge.py +0 -0
  286. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_icelandic_qa.py +0 -0
  287. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_icesum.py +0 -0
  288. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_idioms_no.py +0 -0
  289. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_ilpost_sum.py +0 -0
  290. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_jentoft.py +0 -0
  291. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_kpwr_ner.py +0 -0
  292. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_latvian_lsm_summary.py +0 -0
  293. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_latvian_twitter_sentiment.py +0 -0
  294. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_life_in_the_uk.py +0 -0
  295. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_lithuanian_lrytas_summarization.py +0 -0
  296. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_llmzszl.py +0 -0
  297. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_lr_sum.py +0 -0
  298. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_lt_emotions.py +0 -0
  299. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_lt_history.py +0 -0
  300. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mbbq_nl.py +0 -0
  301. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mim_gold_ner.py +0 -0
  302. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mlqa_es.py +0 -0
  303. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mlsum_de.py +0 -0
  304. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mlsum_es.py +0 -0
  305. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mmlu.py +0 -0
  306. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mmlu_et.py +0 -0
  307. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mmlu_hr.py +0 -0
  308. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mmlu_lv.py +0 -0
  309. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_mms.py +0 -0
  310. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_multinerd-it.py +0 -0
  311. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_ner_uk.py +0 -0
  312. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_no_cola.py +0 -0
  313. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_no_sammendrag.py +0 -0
  314. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_nor_common_sense_qa.py +0 -0
  315. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_nordjylland_news.py +0 -0
  316. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_norec.py +0 -0
  317. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_norglm_multiqa.py +0 -0
  318. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_norglm_multisum.py +0 -0
  319. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_norne.py +0 -0
  320. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_norquad.py +0 -0
  321. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_nqii.py +0 -0
  322. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_nrk_quiz_qa.py +0 -0
  323. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_orange_sum.py +0 -0
  324. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_personal_sum.py +0 -0
  325. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_polemo2.py +0 -0
  326. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_poner.py +0 -0
  327. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_poquad.py +0 -0
  328. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_psc.py +0 -0
  329. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_publico.py +0 -0
  330. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_ronec.py +0 -0
  331. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_rosent.py +0 -0
  332. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_rrn.py +0 -0
  333. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sb10k.py +0 -0
  334. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_scala.py +0 -0
  335. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_scandiqa.py +0 -0
  336. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_scandisent_fi.py +0 -0
  337. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_schibsted.py +0 -0
  338. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sentiment_headlines_es.py +0 -0
  339. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sentinews.py +0 -0
  340. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sentipolc16.py +0 -0
  341. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_skolprov.py +0 -0
  342. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sqad.py +0 -0
  343. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_squad.py +0 -0
  344. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_squad_it.py +0 -0
  345. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_squad_nl.py +0 -0
  346. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_squad_nl_old.py +0 -0
  347. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_ssj500k_ner.py +0 -0
  348. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sst2_pt.py +0 -0
  349. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sst5.py +0 -0
  350. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_suc3.py +0 -0
  351. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_sumo_ro.py +0 -0
  352. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_swedish_facts.py +0 -0
  353. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_swedn.py +0 -0
  354. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_swerec.py +0 -0
  355. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_szeged_ner.py +0 -0
  356. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_trivia_et.py +0 -0
  357. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_turku_ner_fi.py +0 -0
  358. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_tydiqa_fi.py +0 -0
  359. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_umimeto_qa.py +0 -0
  360. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_uner_sk.py +0 -0
  361. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_uner_sr.py +0 -0
  362. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_wiki_lingua_nl.py +0 -0
  363. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_wikineural-it.py +0 -0
  364. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_winogrande.py +0 -0
  365. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_winogrande_et.py +0 -0
  366. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_winogrande_is.py +0 -0
  367. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_xlsum_fi.py +0 -0
  368. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/create_xquad.py +0 -0
  369. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/fix_dot_env_file.py +0 -0
  370. {scandeval-16.12.0 → scandeval-16.13.0}/src/scripts/versioning.py +0 -0
  371. {scandeval-16.12.0 → scandeval-16.13.0}/tests/__init__.py +0 -0
  372. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_benchmark_modules/__init__.py +0 -0
  373. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_benchmark_modules/test_hf.py +0 -0
  374. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_bias_metrics.py +0 -0
  375. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_callbacks.py +0 -0
  376. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_cli.py +0 -0
  377. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_constants.py +0 -0
  378. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_enums.py +0 -0
  379. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_exceptions.py +0 -0
  380. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_finetuning.py +0 -0
  381. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_languages.py +0 -0
  382. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_model_config.py +0 -0
  383. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scores.py +0 -0
  384. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/__init__.py +0 -0
  385. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/__init__.py +0 -0
  386. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_create_scala.py +0 -0
  387. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_data/de_gsd-ud-train.conllu.adp_det +0 -0
  388. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_data/empty.file +0 -0
  389. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_data/en_gum-ud-train.conllu.case +0 -0
  390. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_01 +0 -0
  391. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_02 +0 -0
  392. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_03 +0 -0
  393. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_speed_benchmark.py +0 -0
  394. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_tokenisation_utils.py +0 -0
  395. {scandeval-16.12.0 → scandeval-16.13.0}/tests/test_types.py +0 -0
@@ -25,6 +25,7 @@ body:
25
25
  description: What languages is the dataset in?
26
26
  options:
27
27
  - label: Albanian
28
+ - label: Belarusian
28
29
  - label: Bulgarian
29
30
  - label: Bosnian
30
31
  - label: Catalan
@@ -22,7 +22,7 @@ body:
22
22
  - label: Finnic languages (Estonian, Finnish)
23
23
  - label: Romance languages (Catalan, French, Italian, Portuguese, Romanian, Spanish)
24
24
  - label: Scandinavian languages (Danish, Faroese, Icelandic, Norwegian, Swedish)
25
- - label: Slavic languages (Bulgarian, Bosnian, Croatian, Czech, Polish, Serbian, Slovak, Slovenian, Ukrainian)
25
+ - label: Slavic languages (Belarusian, Bulgarian, Bosnian, Croatian, Czech, Polish, Serbian, Slovak, Slovenian, Ukrainian)
26
26
  - label: West Germanic languages (Dutch, English, German)
27
27
  - label: Albanian
28
28
  - label: Greek
@@ -0,0 +1,9 @@
1
+ addAssignees: author
2
+ numberOfAssignees: 1
3
+
4
+ addReviewers: true
5
+ numberOfReviewers: 1
6
+ reviewers:
7
+ - saattrupdan
8
+
9
+ runOnDraft: true
@@ -125,3 +125,6 @@ gfx/different-poses/*
125
125
 
126
126
  # Contracts
127
127
  generated_contracts/
128
+
129
+ # Test config
130
+ euroeval_config.py
@@ -10,7 +10,7 @@ repos:
10
10
  - id: trailing-whitespace
11
11
  - id: debug-statements
12
12
  - repo: https://github.com/astral-sh/ruff-pre-commit
13
- rev: v0.14.14
13
+ rev: v0.15.0
14
14
  hooks:
15
15
  - id: ruff
16
16
  args:
@@ -34,7 +34,7 @@ repos:
34
34
  hooks:
35
35
  - id: nbstripout
36
36
  - repo: https://github.com/facebook/pyrefly-pre-commit
37
- rev: 0.50.1
37
+ rev: 0.51.1
38
38
  hooks:
39
39
  - id: pyrefly-check
40
40
  name: Pyrefly (type checking)
@@ -7,6 +7,42 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [v16.13.0] - 2026-02-06
11
+
12
+ ### Added
13
+
14
+ - Added support for Belarusian 🇧🇾! This includes the sentiment classification dataset,
15
+ the linguistic acceptability dataset ScaLA-be, the named entity recognition dataset
16
+ WikiANN-be, the reading comprehension dataset MultiWikiQA-be, and the common-sense
17
+ reasoning dataset BE-WSC. This was added by @oliverkinch ✨
18
+ - Added support for evaluating Hugging Face dataset repos directly, if they have a
19
+ `euroeval_config.py` file. We plan to allow support for a JSON/YAML config file in the
20
+ future, for simpler datasets.
21
+
22
+ ### Changed
23
+
24
+ - Replaced all `DatasetConfig` arguments starting with underscores with their
25
+ non-underscored version (e.g., `_labels` -> `labels`), as this caused some confusion
26
+ when defining custom datasets. We still maintain the underscored versions for
27
+ backwards compatibility, but raise a warning when using them.
28
+ - Now logs when the model inference service is temporarily unavailable, even when the
29
+ verbose flag is not set.
30
+ - When evaluating local models, we now automatically add the "/v1" suffix to the API
31
+ base URL if not present and required by the API.
32
+
33
+ ### Fixed
34
+
35
+ - Now allows all attention backends compatible with vLLM to be used, through the
36
+ `--attention-backend` CLI option. This was already possible through the Python API,
37
+ but was artificially restricted in the CLI.
38
+ - When intialising a custom `Task` object, we now default the `default_labels` argument
39
+ to an empty list.
40
+
41
+ ### Deprecated
42
+
43
+ - All underscored versions of `DatasetConfig` arguments are deprecated. Please use their
44
+ non-underscored version instead.
45
+
10
46
  ## [v16.12.0] - 2026-02-02
11
47
 
12
48
  ### Added
@@ -0,0 +1,334 @@
1
+ Metadata-Version: 2.4
2
+ Name: ScandEval
3
+ Version: 16.13.0
4
+ Summary: The robust European language model benchmark.
5
+ Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
+ Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
7
+ Author-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
8
+ Maintainer-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2022-2026 Dan Saattrup Smart
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Requires-Python: <4.0,>=3.12
32
+ Requires-Dist: accelerate>=1.9.0
33
+ Requires-Dist: bert-score>=0.3.13
34
+ Requires-Dist: click>=8.1.3
35
+ Requires-Dist: cloudpickle>=3.1.1
36
+ Requires-Dist: datasets>=3.5.0
37
+ Requires-Dist: demjson3>=3.0.6
38
+ Requires-Dist: evaluate>=0.4.1
39
+ Requires-Dist: huggingface-hub>=0.30.1
40
+ Requires-Dist: levenshtein>=0.24.0
41
+ Requires-Dist: litellm>=1.75.6
42
+ Requires-Dist: mistral-common[soundfile]
43
+ Requires-Dist: more-itertools>=10.5.0
44
+ Requires-Dist: numpy>=2.0.0
45
+ Requires-Dist: ollama>=0.5.1
46
+ Requires-Dist: pandas>=2.2.0
47
+ Requires-Dist: peft>=0.15.0
48
+ Requires-Dist: protobuf>=2.0.0
49
+ Requires-Dist: pydantic>=2.6.0
50
+ Requires-Dist: pyinfer>=0.0.3
51
+ Requires-Dist: python-dotenv>=1.0.1
52
+ Requires-Dist: rouge-score>=0.1.2
53
+ Requires-Dist: sacrebleu>=2.5.1
54
+ Requires-Dist: sacremoses>=0.1.1
55
+ Requires-Dist: scikit-learn==1.6.1
56
+ Requires-Dist: sentencepiece>=0.1.96
57
+ Requires-Dist: seqeval>=1.2.2
58
+ Requires-Dist: setuptools>=75.8.2
59
+ Requires-Dist: tenacity>=9.0.0
60
+ Requires-Dist: termcolor>=2.0.0
61
+ Requires-Dist: torch>=2.6.0
62
+ Requires-Dist: transformers[mistral-common]<5.0.0,>=4.56.0
63
+ Provides-Extra: all
64
+ Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
65
+ Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
66
+ Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'all'
67
+ Requires-Dist: timm>=1.0.19; extra == 'all'
68
+ Requires-Dist: vllm-metal>=0.1.0; (platform_system == 'Darwin') and extra == 'all'
69
+ Requires-Dist: vllm==0.11.0; (platform_system == 'Darwin') and extra == 'all'
70
+ Requires-Dist: vllm[flashinfer]>=0.14.1; (platform_system == 'Linux') and extra == 'all'
71
+ Provides-Extra: generative
72
+ Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
73
+ Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
74
+ Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'generative'
75
+ Requires-Dist: timm>=1.0.19; extra == 'generative'
76
+ Requires-Dist: vllm-metal>=0.1.0; (platform_system == 'Darwin') and extra == 'generative'
77
+ Requires-Dist: vllm==0.11.0; (platform_system == 'Darwin') and extra == 'generative'
78
+ Requires-Dist: vllm[flashinfer]>=0.14.1; (platform_system == 'Linux') and extra == 'generative'
79
+ Description-Content-Type: text/markdown
80
+
81
+ <!-- This disables the requirement that the first line is a top-level heading -->
82
+ <!-- markdownlint-configure-file { "MD041": false } -->
83
+
84
+ <div align='center'>
85
+ <img
86
+ src="https://raw.githubusercontent.com/EuroEval/EuroEval/main/gfx/euroeval.png"
87
+ height="500"
88
+ width="372"
89
+ >
90
+ </div>
91
+
92
+ ### The robust European language model benchmark
93
+
94
+ (formerly known as ScandEval)
95
+
96
+ ______________________________________________________________________
97
+ [![Documentation](https://img.shields.io/badge/docs-passing-green)](https://euroeval.com)
98
+ [![PyPI Status](https://badge.fury.io/py/euroeval.svg)](https://pypi.org/project/euroeval/)
99
+ [![First paper](https://img.shields.io/badge/arXiv-2304.00906-b31b1b.svg)](https://arxiv.org/abs/2304.00906)
100
+ [![Second paper](https://img.shields.io/badge/arXiv-2406.13469-b31b1b.svg)](https://arxiv.org/abs/2406.13469)
101
+ [![License](https://img.shields.io/github/license/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/blob/main/LICENSE)
102
+ [![LastCommit](https://img.shields.io/github/last-commit/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/commits/main)
103
+ [![Code Coverage](https://img.shields.io/badge/Coverage-73%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
104
+ [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg)](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
105
+
106
+ ## Maintainer
107
+
108
+ - Dan Saattrup Smart ([@saattrupdan](https://github.com/saattrupdan), <dan.smart@alexandra.dk>)
109
+
110
+ ## Installation and usage
111
+
112
+ See the [documentation](https://euroeval.com/python-package/) for more information.
113
+
114
+ ## Reproducing the evaluation datasets
115
+
116
+ All datasets used in this project are generated using the scripts located in the
117
+ [src/scripts](src/scripts) folder. To reproduce a dataset, run the corresponding script
118
+ with the following command
119
+
120
+ ```bash
121
+ uv run src/scripts/<name-of-script>.py
122
+ ```
123
+
124
+ Replace <name-of-script> with the specific script you wish to execute, e.g.,
125
+
126
+ ```bash
127
+ uv run src/scripts/create_allocine.py
128
+ ```
129
+
130
+ ## Contributors :pray:
131
+
132
+ A huge thank you to all the contributors who have helped make this project a success!
133
+
134
+ <a href="https://github.com/peter-sk">
135
+ <img
136
+ src="https://avatars.githubusercontent.com/u/6168908"
137
+ width=50
138
+ alt="Contributor avatar for peter-sk"
139
+ />
140
+ </a>
141
+ <a href="https://github.com/AJDERS">
142
+ <img
143
+ src="https://avatars.githubusercontent.com/u/38854604"
144
+ width=50
145
+ alt="Contributor avatar for AJDERS"
146
+ />
147
+ </a>
148
+ <a href="https://github.com/oliverkinch">
149
+ <img
150
+ src="https://avatars.githubusercontent.com/u/71556498"
151
+ width=50
152
+ alt="Contributor avatar for oliverkinch"
153
+ />
154
+ </a>
155
+ <a href="https://github.com/versae">
156
+ <img
157
+ src="https://avatars.githubusercontent.com/u/173537"
158
+ width=50
159
+ alt="Contributor avatar for versae"
160
+ />
161
+ </a>
162
+ <a href="https://github.com/KennethEnevoldsen">
163
+ <img
164
+ src="https://avatars.githubusercontent.com/u/23721977"
165
+ width=50
166
+ alt="Contributor avatar for KennethEnevoldsen"
167
+ />
168
+ </a>
169
+ <a href="https://github.com/viggo-gascou">
170
+ <img
171
+ src="https://avatars.githubusercontent.com/u/94069687"
172
+ width=50
173
+ alt="Contributor avatar for viggo-gascou"
174
+ />
175
+ </a>
176
+ <a href="https://github.com/mathiasesn">
177
+ <img
178
+ src="https://avatars.githubusercontent.com/u/27091759"
179
+ width=50
180
+ alt="Contributor avatar for mathiasesn"
181
+ />
182
+ </a>
183
+ <a href="https://github.com/Alkarex">
184
+ <img
185
+ src="https://avatars.githubusercontent.com/u/1008324"
186
+ width=50
187
+ alt="Contributor avatar for Alkarex"
188
+ />
189
+ </a>
190
+ <a href="https://github.com/marksverdhei">
191
+ <img
192
+ src="https://avatars.githubusercontent.com/u/46672778"
193
+ width=50
194
+ alt="Contributor avatar for marksverdhei"
195
+ />
196
+ </a>
197
+ <a href="https://github.com/Mikeriess">
198
+ <img
199
+ src="https://avatars.githubusercontent.com/u/19728563"
200
+ width=50
201
+ alt="Contributor avatar for Mikeriess"
202
+ />
203
+ </a>
204
+ <a href="https://github.com/ThomasKluiters">
205
+ <img
206
+ src="https://avatars.githubusercontent.com/u/8137941"
207
+ width=50
208
+ alt="Contributor avatar for ThomasKluiters"
209
+ />
210
+ </a>
211
+ <a href="https://github.com/BramVanroy">
212
+ <img
213
+ src="https://avatars.githubusercontent.com/u/2779410"
214
+ width=50
215
+ alt="Contributor avatar for BramVanroy"
216
+ />
217
+ </a>
218
+ <a href="https://github.com/peregilk">
219
+ <img
220
+ src="https://avatars.githubusercontent.com/u/9079808"
221
+ width=50
222
+ alt="Contributor avatar for peregilk"
223
+ />
224
+ </a>
225
+ <a href="https://github.com/Rijgersberg">
226
+ <img
227
+ src="https://avatars.githubusercontent.com/u/8604946"
228
+ width=50
229
+ alt="Contributor avatar for Rijgersberg"
230
+ />
231
+ </a>
232
+ <a href="https://github.com/duarteocarmo">
233
+ <img
234
+ src="https://avatars.githubusercontent.com/u/26342344"
235
+ width=50
236
+ alt="Contributor avatar for duarteocarmo"
237
+ />
238
+ </a>
239
+ <a href="https://github.com/slowwavesleep">
240
+ <img
241
+ src="https://avatars.githubusercontent.com/u/44175589"
242
+ width=50
243
+ alt="Contributor avatar for slowwavesleep"
244
+ />
245
+ </a>
246
+ <a href="https://github.com/mrkowalski">
247
+ <img
248
+ src="https://avatars.githubusercontent.com/u/6357044"
249
+ width=50
250
+ alt="Contributor avatar for mrkowalski"
251
+ />
252
+ </a>
253
+ <a href="https://github.com/simonevanbruggen">
254
+ <img
255
+ src="https://avatars.githubusercontent.com/u/24842609"
256
+ width=50
257
+ alt="Contributor avatar for simonevanbruggen"
258
+ />
259
+ </a>
260
+ <a href="https://github.com/tvosch">
261
+ <img
262
+ src="https://avatars.githubusercontent.com/u/110661769"
263
+ width=50
264
+ alt="Contributor avatar for tvosch"
265
+ />
266
+ </a>
267
+ <a href="https://github.com/Touzen">
268
+ <img
269
+ src="https://avatars.githubusercontent.com/u/1416265"
270
+ width=50
271
+ alt="Contributor avatar for Touzen"
272
+ />
273
+ </a>
274
+ <a href="https://github.com/caldaibis">
275
+ <img
276
+ src="https://avatars.githubusercontent.com/u/16032437"
277
+ width=50
278
+ alt="Contributor avatar for caldaibis"
279
+ />
280
+ </a>
281
+ <a href="https://github.com/SwekeR-463">
282
+ <img
283
+ src="https://avatars.githubusercontent.com/u/114919896?v=4"
284
+ width=50
285
+ alt="Contributor avatar for SwekeR-463"
286
+ />
287
+ </a>
288
+
289
+ ### Contribute to EuroEval
290
+
291
+ We welcome contributions to EuroEval! Whether you're fixing bugs, adding features, or
292
+ contributing new datasets, your help makes this project better for everyone.
293
+
294
+ - **General contributions**: Check out our [contribution guidelines](CONTRIBUTING.md)
295
+ for information on how to get started.
296
+ - **Adding datasets**: If you're interested in adding a new dataset to EuroEval, we have
297
+ a [dedicated guide](NEW_DATASET_GUIDE.md) with step-by-step instructions.
298
+
299
+ ### Special thanks
300
+
301
+ - Thanks to [Google](https://google.com/) for sponsoring Gemini credits as part of their
302
+ [Google Cloud for Researchers Program](https://cloud.google.com/edu/researchers).
303
+ - Thanks [@Mikeriess](https://github.com/Mikeriess) for evaluating many of the larger
304
+ models on the leaderboards.
305
+ - Thanks to [OpenAI](https://openai.com/) for sponsoring OpenAI credits as part of their
306
+ [Researcher Access Program](https://openai.com/form/researcher-access-program/).
307
+ - Thanks to [UWV](https://www.uwv.nl/) and [KU
308
+ Leuven](https://www.arts.kuleuven.be/ling/ccl) for sponsoring the Azure OpenAI
309
+ credits used to evaluate GPT-4-turbo in Dutch.
310
+ - Thanks to [Miðeind](https://mideind.is/en) for sponsoring the OpenAI
311
+ credits used to evaluate GPT-4-turbo in Icelandic and Faroese.
312
+ - Thanks to [CHC](https://chc.au.dk/) for sponsoring the OpenAI credits used to
313
+ evaluate GPT-4-turbo in German.
314
+
315
+ ## Citing EuroEval
316
+
317
+ If you want to cite the framework then feel free to use this:
318
+
319
+ ```bibtex
320
+ @article{smart2024encoder,
321
+ title={Encoder vs Decoder: Comparative Analysis of Encoder and Decoder Language Models on Multilingual NLU Tasks},
322
+ author={Smart, Dan Saattrup and Enevoldsen, Kenneth and Schneider-Kamp, Peter},
323
+ journal={arXiv preprint arXiv:2406.13469},
324
+ year={2024}
325
+ }
326
+ @inproceedings{smart2023scandeval,
327
+ author = {Smart, Dan Saattrup},
328
+ booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)},
329
+ month = may,
330
+ pages = {185--201},
331
+ title = {{ScandEval: A Benchmark for Scandinavian Natural Language Processing}},
332
+ year = {2023}
333
+ }
334
+ ```
@@ -0,0 +1,254 @@
1
+ <!-- This disables the requirement that the first line is a top-level heading -->
2
+ <!-- markdownlint-configure-file { "MD041": false } -->
3
+
4
+ <div align='center'>
5
+ <img
6
+ src="https://raw.githubusercontent.com/EuroEval/EuroEval/main/gfx/euroeval.png"
7
+ height="500"
8
+ width="372"
9
+ >
10
+ </div>
11
+
12
+ ### The robust European language model benchmark
13
+
14
+ (formerly known as ScandEval)
15
+
16
+ ______________________________________________________________________
17
+ [![Documentation](https://img.shields.io/badge/docs-passing-green)](https://euroeval.com)
18
+ [![PyPI Status](https://badge.fury.io/py/euroeval.svg)](https://pypi.org/project/euroeval/)
19
+ [![First paper](https://img.shields.io/badge/arXiv-2304.00906-b31b1b.svg)](https://arxiv.org/abs/2304.00906)
20
+ [![Second paper](https://img.shields.io/badge/arXiv-2406.13469-b31b1b.svg)](https://arxiv.org/abs/2406.13469)
21
+ [![License](https://img.shields.io/github/license/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/blob/main/LICENSE)
22
+ [![LastCommit](https://img.shields.io/github/last-commit/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/commits/main)
23
+ [![Code Coverage](https://img.shields.io/badge/Coverage-73%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
24
+ [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg)](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
25
+
26
+ ## Maintainer
27
+
28
+ - Dan Saattrup Smart ([@saattrupdan](https://github.com/saattrupdan), <dan.smart@alexandra.dk>)
29
+
30
+ ## Installation and usage
31
+
32
+ See the [documentation](https://euroeval.com/python-package/) for more information.
33
+
34
+ ## Reproducing the evaluation datasets
35
+
36
+ All datasets used in this project are generated using the scripts located in the
37
+ [src/scripts](src/scripts) folder. To reproduce a dataset, run the corresponding script
38
+ with the following command
39
+
40
+ ```bash
41
+ uv run src/scripts/<name-of-script>.py
42
+ ```
43
+
44
+ Replace <name-of-script> with the specific script you wish to execute, e.g.,
45
+
46
+ ```bash
47
+ uv run src/scripts/create_allocine.py
48
+ ```
49
+
50
+ ## Contributors :pray:
51
+
52
+ A huge thank you to all the contributors who have helped make this project a success!
53
+
54
+ <a href="https://github.com/peter-sk">
55
+ <img
56
+ src="https://avatars.githubusercontent.com/u/6168908"
57
+ width=50
58
+ alt="Contributor avatar for peter-sk"
59
+ />
60
+ </a>
61
+ <a href="https://github.com/AJDERS">
62
+ <img
63
+ src="https://avatars.githubusercontent.com/u/38854604"
64
+ width=50
65
+ alt="Contributor avatar for AJDERS"
66
+ />
67
+ </a>
68
+ <a href="https://github.com/oliverkinch">
69
+ <img
70
+ src="https://avatars.githubusercontent.com/u/71556498"
71
+ width=50
72
+ alt="Contributor avatar for oliverkinch"
73
+ />
74
+ </a>
75
+ <a href="https://github.com/versae">
76
+ <img
77
+ src="https://avatars.githubusercontent.com/u/173537"
78
+ width=50
79
+ alt="Contributor avatar for versae"
80
+ />
81
+ </a>
82
+ <a href="https://github.com/KennethEnevoldsen">
83
+ <img
84
+ src="https://avatars.githubusercontent.com/u/23721977"
85
+ width=50
86
+ alt="Contributor avatar for KennethEnevoldsen"
87
+ />
88
+ </a>
89
+ <a href="https://github.com/viggo-gascou">
90
+ <img
91
+ src="https://avatars.githubusercontent.com/u/94069687"
92
+ width=50
93
+ alt="Contributor avatar for viggo-gascou"
94
+ />
95
+ </a>
96
+ <a href="https://github.com/mathiasesn">
97
+ <img
98
+ src="https://avatars.githubusercontent.com/u/27091759"
99
+ width=50
100
+ alt="Contributor avatar for mathiasesn"
101
+ />
102
+ </a>
103
+ <a href="https://github.com/Alkarex">
104
+ <img
105
+ src="https://avatars.githubusercontent.com/u/1008324"
106
+ width=50
107
+ alt="Contributor avatar for Alkarex"
108
+ />
109
+ </a>
110
+ <a href="https://github.com/marksverdhei">
111
+ <img
112
+ src="https://avatars.githubusercontent.com/u/46672778"
113
+ width=50
114
+ alt="Contributor avatar for marksverdhei"
115
+ />
116
+ </a>
117
+ <a href="https://github.com/Mikeriess">
118
+ <img
119
+ src="https://avatars.githubusercontent.com/u/19728563"
120
+ width=50
121
+ alt="Contributor avatar for Mikeriess"
122
+ />
123
+ </a>
124
+ <a href="https://github.com/ThomasKluiters">
125
+ <img
126
+ src="https://avatars.githubusercontent.com/u/8137941"
127
+ width=50
128
+ alt="Contributor avatar for ThomasKluiters"
129
+ />
130
+ </a>
131
+ <a href="https://github.com/BramVanroy">
132
+ <img
133
+ src="https://avatars.githubusercontent.com/u/2779410"
134
+ width=50
135
+ alt="Contributor avatar for BramVanroy"
136
+ />
137
+ </a>
138
+ <a href="https://github.com/peregilk">
139
+ <img
140
+ src="https://avatars.githubusercontent.com/u/9079808"
141
+ width=50
142
+ alt="Contributor avatar for peregilk"
143
+ />
144
+ </a>
145
+ <a href="https://github.com/Rijgersberg">
146
+ <img
147
+ src="https://avatars.githubusercontent.com/u/8604946"
148
+ width=50
149
+ alt="Contributor avatar for Rijgersberg"
150
+ />
151
+ </a>
152
+ <a href="https://github.com/duarteocarmo">
153
+ <img
154
+ src="https://avatars.githubusercontent.com/u/26342344"
155
+ width=50
156
+ alt="Contributor avatar for duarteocarmo"
157
+ />
158
+ </a>
159
+ <a href="https://github.com/slowwavesleep">
160
+ <img
161
+ src="https://avatars.githubusercontent.com/u/44175589"
162
+ width=50
163
+ alt="Contributor avatar for slowwavesleep"
164
+ />
165
+ </a>
166
+ <a href="https://github.com/mrkowalski">
167
+ <img
168
+ src="https://avatars.githubusercontent.com/u/6357044"
169
+ width=50
170
+ alt="Contributor avatar for mrkowalski"
171
+ />
172
+ </a>
173
+ <a href="https://github.com/simonevanbruggen">
174
+ <img
175
+ src="https://avatars.githubusercontent.com/u/24842609"
176
+ width=50
177
+ alt="Contributor avatar for simonevanbruggen"
178
+ />
179
+ </a>
180
+ <a href="https://github.com/tvosch">
181
+ <img
182
+ src="https://avatars.githubusercontent.com/u/110661769"
183
+ width=50
184
+ alt="Contributor avatar for tvosch"
185
+ />
186
+ </a>
187
+ <a href="https://github.com/Touzen">
188
+ <img
189
+ src="https://avatars.githubusercontent.com/u/1416265"
190
+ width=50
191
+ alt="Contributor avatar for Touzen"
192
+ />
193
+ </a>
194
+ <a href="https://github.com/caldaibis">
195
+ <img
196
+ src="https://avatars.githubusercontent.com/u/16032437"
197
+ width=50
198
+ alt="Contributor avatar for caldaibis"
199
+ />
200
+ </a>
201
+ <a href="https://github.com/SwekeR-463">
202
+ <img
203
+ src="https://avatars.githubusercontent.com/u/114919896?v=4"
204
+ width=50
205
+ alt="Contributor avatar for SwekeR-463"
206
+ />
207
+ </a>
208
+
209
+ ### Contribute to EuroEval
210
+
211
+ We welcome contributions to EuroEval! Whether you're fixing bugs, adding features, or
212
+ contributing new datasets, your help makes this project better for everyone.
213
+
214
+ - **General contributions**: Check out our [contribution guidelines](CONTRIBUTING.md)
215
+ for information on how to get started.
216
+ - **Adding datasets**: If you're interested in adding a new dataset to EuroEval, we have
217
+ a [dedicated guide](NEW_DATASET_GUIDE.md) with step-by-step instructions.
218
+
219
+ ### Special thanks
220
+
221
+ - Thanks to [Google](https://google.com/) for sponsoring Gemini credits as part of their
222
+ [Google Cloud for Researchers Program](https://cloud.google.com/edu/researchers).
223
+ - Thanks [@Mikeriess](https://github.com/Mikeriess) for evaluating many of the larger
224
+ models on the leaderboards.
225
+ - Thanks to [OpenAI](https://openai.com/) for sponsoring OpenAI credits as part of their
226
+ [Researcher Access Program](https://openai.com/form/researcher-access-program/).
227
+ - Thanks to [UWV](https://www.uwv.nl/) and [KU
228
+ Leuven](https://www.arts.kuleuven.be/ling/ccl) for sponsoring the Azure OpenAI
229
+ credits used to evaluate GPT-4-turbo in Dutch.
230
+ - Thanks to [Miðeind](https://mideind.is/en) for sponsoring the OpenAI
231
+ credits used to evaluate GPT-4-turbo in Icelandic and Faroese.
232
+ - Thanks to [CHC](https://chc.au.dk/) for sponsoring the OpenAI credits used to
233
+ evaluate GPT-4-turbo in German.
234
+
235
+ ## Citing EuroEval
236
+
237
+ If you want to cite the framework then feel free to use this:
238
+
239
+ ```bibtex
240
+ @article{smart2024encoder,
241
+ title={Encoder vs Decoder: Comparative Analysis of Encoder and Decoder Language Models on Multilingual NLU Tasks},
242
+ author={Smart, Dan Saattrup and Enevoldsen, Kenneth and Schneider-Kamp, Peter},
243
+ journal={arXiv preprint arXiv:2406.13469},
244
+ year={2024}
245
+ }
246
+ @inproceedings{smart2023scandeval,
247
+ author = {Smart, Dan Saattrup},
248
+ booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)},
249
+ month = may,
250
+ pages = {185--201},
251
+ title = {{ScandEval: A Benchmark for Scandinavian Natural Language Processing}},
252
+ year = {2023}
253
+ }
254
+ ```