EuroEval 16.2.2__tar.gz → 16.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (313) hide show
  1. {euroeval-16.2.2 → euroeval-16.4.0}/.github/ISSUE_TEMPLATE/benchmark_dataset_request.yaml +9 -2
  2. {euroeval-16.2.2 → euroeval-16.4.0}/.github/ISSUE_TEMPLATE/bug.yaml +6 -3
  3. {euroeval-16.2.2 → euroeval-16.4.0}/.github/ISSUE_TEMPLATE/feature_request.yaml +3 -1
  4. {euroeval-16.2.2 → euroeval-16.4.0}/.github/ISSUE_TEMPLATE/model_evaluation_request.yaml +5 -4
  5. euroeval-16.4.0/.markdownlint.jsonc +10 -0
  6. {euroeval-16.2.2 → euroeval-16.4.0}/.pre-commit-config.yaml +8 -2
  7. {euroeval-16.2.2 → euroeval-16.4.0}/CHANGELOG.md +618 -216
  8. {euroeval-16.2.2 → euroeval-16.4.0}/CODE_OF_CONDUCT.md +3 -3
  9. {euroeval-16.2.2 → euroeval-16.4.0}/CONTRIBUTING.md +6 -5
  10. {euroeval-16.2.2 → euroeval-16.4.0}/NEW_DATASET_GUIDE.md +48 -25
  11. {euroeval-16.2.2 → euroeval-16.4.0}/PKG-INFO +182 -61
  12. {euroeval-16.2.2 → euroeval-16.4.0}/README.md +179 -58
  13. {euroeval-16.2.2 → euroeval-16.4.0}/docs/README.md +9 -4
  14. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/README.md +1 -1
  15. euroeval-16.4.0/docs/datasets/czech.md +671 -0
  16. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/danish.md +239 -152
  17. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/dutch.md +147 -73
  18. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/english.md +159 -78
  19. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/estonian.md +188 -58
  20. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/faroese.md +94 -54
  21. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/finnish.md +123 -61
  22. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/french.md +130 -65
  23. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/german.md +167 -80
  24. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/icelandic.md +187 -92
  25. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/italian.md +151 -76
  26. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/latvian.md +124 -66
  27. euroeval-16.4.0/docs/datasets/lithuanian.md +517 -0
  28. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/norwegian.md +288 -142
  29. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/polish.md +136 -77
  30. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/portuguese.md +167 -76
  31. euroeval-16.4.0/docs/datasets/slovak.md +446 -0
  32. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/spanish.md +154 -67
  33. {euroeval-16.2.2 → euroeval-16.4.0}/docs/datasets/swedish.md +255 -153
  34. {euroeval-16.2.2 → euroeval-16.4.0}/docs/extras/radial_plotter.md +2 -2
  35. euroeval-16.4.0/docs/leaderboards/Monolingual/czech.md +26 -0
  36. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/danish.md +3 -0
  37. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/dutch.md +3 -0
  38. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/english.md +3 -0
  39. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/estonian.md +5 -2
  40. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/faroese.md +3 -0
  41. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/finnish.md +3 -0
  42. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/french.md +3 -0
  43. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/german.md +3 -0
  44. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/icelandic.md +3 -0
  45. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/italian.md +3 -0
  46. euroeval-16.4.0/docs/leaderboards/Monolingual/latvian.md +26 -0
  47. euroeval-16.4.0/docs/leaderboards/Monolingual/lithuanian.md +26 -0
  48. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/norwegian.md +3 -0
  49. euroeval-16.4.0/docs/leaderboards/Monolingual/polish.md +26 -0
  50. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/portuguese.md +3 -0
  51. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/spanish.md +3 -0
  52. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Monolingual/swedish.md +3 -0
  53. euroeval-16.4.0/docs/leaderboards/Multilingual/baltic.md +26 -0
  54. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Multilingual/european.md +3 -0
  55. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Multilingual/finnic.md +5 -2
  56. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Multilingual/germanic.md +3 -0
  57. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Multilingual/mainland-scandinavian.md +3 -0
  58. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/Multilingual/romance.md +3 -0
  59. euroeval-16.4.0/docs/leaderboards/Multilingual/slavic.md +26 -0
  60. {euroeval-16.2.2 → euroeval-16.4.0}/docs/leaderboards/README.md +4 -6
  61. {euroeval-16.2.2 → euroeval-16.4.0}/docs/methodology.md +2 -5
  62. {euroeval-16.2.2 → euroeval-16.4.0}/docs/python-package.md +10 -12
  63. {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/README.md +0 -2
  64. {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/common-sense-reasoning.md +1 -3
  65. {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/knowledge.md +1 -3
  66. {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/linguistic-acceptability.md +1 -3
  67. {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/named-entity-recognition.md +1 -3
  68. {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/reading-comprehension.md +1 -3
  69. {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/sentiment-classification.md +1 -3
  70. {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/speed.md +1 -3
  71. {euroeval-16.2.2 → euroeval-16.4.0}/docs/tasks/summarization.md +1 -3
  72. {euroeval-16.2.2 → euroeval-16.4.0}/makefile +1 -1
  73. {euroeval-16.2.2 → euroeval-16.4.0}/pyproject.toml +10 -5
  74. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/__init__.py +7 -4
  75. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_config_factory.py +0 -4
  76. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_modules/base.py +3 -16
  77. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_modules/fresh.py +5 -2
  78. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_modules/hf.py +107 -66
  79. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_modules/litellm.py +103 -55
  80. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_modules/vllm.py +155 -82
  81. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmarker.py +184 -129
  82. euroeval-16.4.0/src/euroeval/caching_utils.py +79 -0
  83. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/callbacks.py +5 -7
  84. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/cli.py +1 -1
  85. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/constants.py +9 -0
  86. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/data_loading.py +14 -11
  87. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/data_models.py +12 -4
  88. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/__init__.py +3 -0
  89. euroeval-16.4.0/src/euroeval/dataset_configs/czech.py +79 -0
  90. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/danish.py +10 -13
  91. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/dutch.py +0 -3
  92. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/english.py +0 -3
  93. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/estonian.py +11 -1
  94. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/finnish.py +0 -3
  95. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/french.py +0 -3
  96. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/german.py +0 -3
  97. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/italian.py +0 -3
  98. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/latvian.py +2 -4
  99. euroeval-16.4.0/src/euroeval/dataset_configs/lithuanian.py +68 -0
  100. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/norwegian.py +0 -3
  101. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/polish.py +0 -3
  102. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/portuguese.py +0 -3
  103. euroeval-16.4.0/src/euroeval/dataset_configs/slovak.py +60 -0
  104. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/spanish.py +0 -3
  105. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/swedish.py +10 -15
  106. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/finetuning.py +21 -15
  107. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/generation.py +10 -10
  108. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/generation_utils.py +2 -3
  109. euroeval-16.4.0/src/euroeval/logging_utils.py +250 -0
  110. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/metrics/base.py +0 -3
  111. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/metrics/huggingface.py +10 -6
  112. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/metrics/llm_as_a_judge.py +5 -3
  113. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/metrics/pipeline.py +22 -9
  114. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/metrics/speed.py +0 -3
  115. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/model_cache.py +11 -14
  116. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/model_config.py +4 -5
  117. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/model_loading.py +3 -0
  118. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/linguistic_acceptability.py +30 -3
  119. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/multiple_choice.py +34 -1
  120. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/named_entity_recognition.py +71 -11
  121. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/reading_comprehension.py +41 -3
  122. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/sentiment_classification.py +34 -1
  123. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/summarization.py +26 -6
  124. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/scores.py +7 -7
  125. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/speed_benchmark.py +3 -5
  126. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/task_group_utils/multiple_choice_classification.py +0 -3
  127. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/task_group_utils/question_answering.py +0 -3
  128. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/task_group_utils/sequence_classification.py +43 -31
  129. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/task_group_utils/text_to_text.py +17 -8
  130. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/task_group_utils/token_classification.py +10 -9
  131. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/tokenisation_utils.py +22 -20
  132. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/utils.py +30 -147
  133. euroeval-16.4.0/src/scripts/__init__.py +1 -0
  134. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/constants.py +3 -0
  135. euroeval-16.4.0/src/scripts/create_cs_gec.py +83 -0
  136. euroeval-16.4.0/src/scripts/create_csfd_sentiment.py +97 -0
  137. euroeval-16.4.0/src/scripts/create_csfd_sentiment_sk.py +92 -0
  138. euroeval-16.4.0/src/scripts/create_czech_news.py +75 -0
  139. euroeval-16.4.0/src/scripts/create_hellaswag_cs.py +120 -0
  140. euroeval-16.4.0/src/scripts/create_lithuanian_lrytas_summarization.py +87 -0
  141. euroeval-16.4.0/src/scripts/create_lt_emotions.py +159 -0
  142. euroeval-16.4.0/src/scripts/create_lt_history.py +154 -0
  143. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_mmlu.py +1 -1
  144. euroeval-16.4.0/src/scripts/create_mmlu_et.py +162 -0
  145. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_multi_wiki_qa.py +2 -0
  146. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_norglm_multiqa.py +20 -0
  147. euroeval-16.2.2/src/scripts/create_wikiann_lv.py → euroeval-16.4.0/src/scripts/create_poner.py +62 -44
  148. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_scala.py +6 -0
  149. euroeval-16.2.2/src/scripts/create_swedish_skolprov.py → euroeval-16.4.0/src/scripts/create_skolprov.py +25 -18
  150. euroeval-16.4.0/src/scripts/create_sqad.py +137 -0
  151. euroeval-16.4.0/src/scripts/create_umimeto_qa.py +114 -0
  152. euroeval-16.4.0/src/scripts/create_uner_sk.py +183 -0
  153. euroeval-16.4.0/src/scripts/create_wikiann.py +115 -0
  154. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_winogrande.py +21 -1
  155. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/load_ud_pos.py +216 -72
  156. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/versioning.py +1 -1
  157. {euroeval-16.2.2 → euroeval-16.4.0}/tests/conftest.py +28 -12
  158. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_benchmark_modules/test_hf.py +11 -5
  159. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_benchmarker.py +49 -55
  160. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_constants.py +4 -2
  161. euroeval-16.4.0/tests/test_data_loading.py +166 -0
  162. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_data_models.py +2 -1
  163. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_dataset_configs.py +36 -0
  164. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_model_config.py +1 -0
  165. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_model_loading.py +3 -0
  166. euroeval-16.4.0/tests/test_scripts/__init__.py +1 -0
  167. euroeval-16.4.0/tests/test_scripts/test_create_scala/__init__.py +1 -0
  168. euroeval-16.4.0/tests/test_scripts/test_create_scala/test_create_scala.py +86 -0
  169. euroeval-16.4.0/tests/test_scripts/test_create_scala/test_data/de_gsd-ud-train.conllu.adp_det +12 -0
  170. euroeval-16.4.0/tests/test_scripts/test_create_scala/test_data/empty.file +0 -0
  171. euroeval-16.4.0/tests/test_scripts/test_create_scala/test_data/en_gum-ud-train.conllu.case +70 -0
  172. euroeval-16.4.0/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_01 +11 -0
  173. euroeval-16.4.0/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_02 +14 -0
  174. euroeval-16.4.0/tests/test_scripts/test_create_scala/test_data/pl_pdb-ud-train.conllu.aux_clitic_03 +16 -0
  175. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_speed_benchmark.py +1 -0
  176. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_tokenisation_utils.py +6 -2
  177. {euroeval-16.2.2 → euroeval-16.4.0}/uv.lock +142 -152
  178. euroeval-16.2.2/tests/test_data_loading.py +0 -141
  179. {euroeval-16.2.2 → euroeval-16.4.0}/.github/ISSUE_TEMPLATE/language_request.yaml +0 -0
  180. {euroeval-16.2.2 → euroeval-16.4.0}/.github/workflows/ci.yaml +0 -0
  181. {euroeval-16.2.2 → euroeval-16.4.0}/.gitignore +0 -0
  182. {euroeval-16.2.2 → euroeval-16.4.0}/CITATION.cff +0 -0
  183. {euroeval-16.2.2 → euroeval-16.4.0}/Dockerfile.cuda +0 -0
  184. {euroeval-16.2.2 → euroeval-16.4.0}/LICENSE +0 -0
  185. {euroeval-16.2.2 → euroeval-16.4.0}/docs/CNAME +0 -0
  186. {euroeval-16.2.2 → euroeval-16.4.0}/docs/faq.md +0 -0
  187. {euroeval-16.2.2 → euroeval-16.4.0}/docs/gfx/favicon.png +0 -0
  188. {euroeval-16.2.2 → euroeval-16.4.0}/gfx/euroeval.png +0 -0
  189. {euroeval-16.2.2 → euroeval-16.4.0}/gfx/euroeval.xcf +0 -0
  190. {euroeval-16.2.2 → euroeval-16.4.0}/gfx/scandeval.png +0 -0
  191. {euroeval-16.2.2 → euroeval-16.4.0}/mkdocs.yaml +0 -0
  192. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/benchmark_modules/__init__.py +0 -0
  193. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/faroese.py +0 -0
  194. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/dataset_configs/icelandic.py +0 -0
  195. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/enums.py +0 -0
  196. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/exceptions.py +0 -0
  197. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/languages.py +0 -0
  198. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/metrics/__init__.py +0 -0
  199. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/prompt_templates/__init__.py +0 -0
  200. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/task_group_utils/__init__.py +0 -0
  201. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/tasks.py +0 -0
  202. {euroeval-16.2.2 → euroeval-16.4.0}/src/euroeval/types.py +0 -0
  203. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_allocine.py +0 -0
  204. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_angry_tweets.py +0 -0
  205. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_arc.py +0 -0
  206. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_arc_is.py +0 -0
  207. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_belebele.py +0 -0
  208. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_boolq_pt.py +0 -0
  209. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_cnn_dailymail.py +0 -0
  210. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_conll_en.py +0 -0
  211. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_conll_es.py +0 -0
  212. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_conll_nl.py +0 -0
  213. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_copa_lv.py +0 -0
  214. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_dane.py +0 -0
  215. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_danish_citizen_tests.py +0 -0
  216. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_dansk.py +0 -0
  217. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_danske_talemaader.py +0 -0
  218. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_danske_talemaader_old.py +0 -0
  219. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_dbrd.py +0 -0
  220. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_dutch_cola.py +0 -0
  221. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_eltec.py +0 -0
  222. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_err_news.py +0 -0
  223. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_estner.py +0 -0
  224. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_estonian_valence.py +0 -0
  225. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_european_values.py +0 -0
  226. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_exam_et.py +0 -0
  227. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_fone.py +0 -0
  228. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_foqa.py +0 -0
  229. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_fosent.py +0 -0
  230. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_fquad.py +0 -0
  231. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_fullstack_ner.py +0 -0
  232. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_germanquad.py +0 -0
  233. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_germeval.py +0 -0
  234. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_goldenswag.py +0 -0
  235. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_grammar_et.py +0 -0
  236. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_harem.py +0 -0
  237. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_hellaswag.py +0 -0
  238. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_hellaswag_fi.py +0 -0
  239. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_hotter_and_colder_sentiment.py +0 -0
  240. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_ice_linguistic.py +0 -0
  241. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_icelandic_error_corpus.py +0 -0
  242. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_icelandic_knowledge.py +0 -0
  243. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_icelandic_qa.py +0 -0
  244. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_icesum.py +0 -0
  245. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_idioms_no.py +0 -0
  246. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_ilpost_sum.py +0 -0
  247. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_jentoft.py +0 -0
  248. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_kpwr_ner.py +0 -0
  249. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_latvian_lsm_summary.py +0 -0
  250. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_latvian_twitter_sentiment.py +0 -0
  251. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_life_in_the_uk.py +0 -0
  252. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_llmzszl.py +0 -0
  253. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_mim_gold_ner.py +0 -0
  254. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_mlqa_es.py +0 -0
  255. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_mlsum_de.py +0 -0
  256. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_mlsum_es.py +0 -0
  257. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_mmlu_lv.py +0 -0
  258. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_multinerd-it.py +0 -0
  259. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_no_cola.py +0 -0
  260. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_no_sammendrag.py +0 -0
  261. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_nor_common_sense_qa.py +0 -0
  262. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_nordjylland_news.py +0 -0
  263. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_norec.py +0 -0
  264. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_norglm_multisum.py +0 -0
  265. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_norne.py +0 -0
  266. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_norquad.py +0 -0
  267. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_nqii.py +0 -0
  268. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_nrk_quiz_qa.py +0 -0
  269. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_orange_sum.py +0 -0
  270. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_personal_sum.py +0 -0
  271. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_polemo2.py +0 -0
  272. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_poquad.py +0 -0
  273. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_psc.py +0 -0
  274. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_publico.py +0 -0
  275. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_rrn.py +0 -0
  276. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_sb10k.py +0 -0
  277. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_scandiqa.py +0 -0
  278. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_scandisent_fi.py +0 -0
  279. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_schibsted.py +0 -0
  280. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_sentiment_headlines_es.py +0 -0
  281. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_sentipolc16.py +0 -0
  282. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_squad.py +0 -0
  283. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_squad_it.py +0 -0
  284. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_squad_nl.py +0 -0
  285. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_squad_nl_old.py +0 -0
  286. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_sst2_pt.py +0 -0
  287. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_sst5.py +0 -0
  288. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_suc3.py +0 -0
  289. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_swedn.py +0 -0
  290. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_swerec.py +0 -0
  291. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_trivia_et.py +0 -0
  292. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_turku_ner_fi.py +0 -0
  293. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_tydiqa_fi.py +0 -0
  294. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_wiki_lingua_nl.py +0 -0
  295. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_wikineural-it.py +0 -0
  296. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_winogrande_et.py +0 -0
  297. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_winogrande_is.py +0 -0
  298. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_xlsum_fi.py +0 -0
  299. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/create_xquad.py +0 -0
  300. {euroeval-16.2.2 → euroeval-16.4.0}/src/scripts/fix_dot_env_file.py +0 -0
  301. {euroeval-16.2.2 → euroeval-16.4.0}/tests/__init__.py +0 -0
  302. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_benchmark_config_factory.py +0 -0
  303. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_benchmark_modules/__init__.py +0 -0
  304. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_callbacks.py +0 -0
  305. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_cli.py +0 -0
  306. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_enums.py +0 -0
  307. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_exceptions.py +0 -0
  308. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_finetuning.py +0 -0
  309. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_languages.py +0 -0
  310. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_scores.py +0 -0
  311. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_tasks.py +0 -0
  312. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_types.py +0 -0
  313. {euroeval-16.2.2 → euroeval-16.4.0}/tests/test_utils.py +0 -0
@@ -14,7 +14,9 @@ body:
14
14
  - type: input
15
15
  attributes:
16
16
  label: Dataset link
17
- description: Please give a link to where the dataset is hosted (doesn't have to be on the Hugging Face Hub)
17
+ description: >
18
+ Please give a link to where the dataset is hosted (doesn't have to be on the
19
+ Hugging Face Hub)
18
20
  validations:
19
21
  required: true
20
22
  - type: checkboxes
@@ -22,6 +24,7 @@ body:
22
24
  label: Dataset languages
23
25
  description: What languages is the dataset in?
24
26
  options:
27
+ - label: Czech
25
28
  - label: Danish
26
29
  - label: Dutch
27
30
  - label: English
@@ -33,9 +36,11 @@ body:
33
36
  - label: Icelandic
34
37
  - label: Italian
35
38
  - label: Latvian
39
+ - label: Lithuanian
36
40
  - label: Norwegian (Bokmål or Nynorsk)
37
41
  - label: Polish
38
42
  - label: Portuguese
43
+ - label: Slovak
39
44
  - label: Spanish
40
45
  - label: Swedish
41
46
  validations:
@@ -43,7 +48,9 @@ body:
43
48
  - type: textarea
44
49
  attributes:
45
50
  label: Describe the dataset
46
- description: Describe what the dataset is measuring, and why you think it is important to include it as a benchmark dataset in EuroEval.
51
+ description: >
52
+ Describe what the dataset is measuring, and why you think it is important to
53
+ include it as a benchmark dataset in EuroEval.
47
54
  validations:
48
55
  required: true
49
56
  - type: markdown
@@ -7,12 +7,15 @@ body:
7
7
  - type: markdown
8
8
  attributes:
9
9
  value: >
10
- #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/EuroEval/EuroEval/issues?q=is%3Aissue).
10
+ Before submitting a bug, please make sure the issue hasn't been already addressed
11
+ by searching through [the existing and past
12
+ issues](https://github.com/EuroEval/EuroEval/issues?q=is%3Aissue).
11
13
  - type: textarea
12
14
  attributes:
13
15
  label: 🐛 Describe the bug
14
- description: |
15
- Please provide a clear and concise description of what the bug is. If relevant, add a minimal example so that we can reproduce the error by running the code.
16
+ description: >
17
+ Please provide a clear and concise description of what the bug is. If relevant,
18
+ add a minimal example so that we can reproduce the error by running the code.
16
19
  validations:
17
20
  required: true
18
21
  - type: dropdown
@@ -8,7 +8,9 @@ body:
8
8
  attributes:
9
9
  label: 🚀 The feature, motivation and pitch
10
10
  description: >
11
- A clear and concise description of the feature proposal. Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., *"I'm working on X and would like Y to be possible"*.
11
+ A clear and concise description of the feature proposal. Please outline the
12
+ motivation for the proposal. Is your feature request related to a specific
13
+ problem? e.g., *"I'm working on X and would like Y to be possible"*.
12
14
  validations:
13
15
  required: true
14
16
  - type: markdown
@@ -18,12 +18,12 @@ body:
18
18
  What languages should this model be evaluated on? Tick all that apply. If the
19
19
  model is multilingual (e.g., Mistral, Llama), then tick all the languages.
20
20
  options:
21
+ - label: Baltic languages (Latvian, Lithuanian)
22
+ - label: Finnic languages (Estonian, Finnish)
21
23
  - label: Romance languages (French, Italian, Portuguese, Spanish)
22
24
  - label: Scandinavian languages (Danish, Faroese, Icelandic, Norwegian, Swedish)
25
+ - label: Slavic languages (Czech, Polish, Slovak)
23
26
  - label: West Germanic languages (Dutch, English, German)
24
- - label: Finnic languages (Estonian, Finnish)
25
- - label: Latvian
26
- - label: Polish
27
27
  validations:
28
28
  required: true
29
29
  - type: dropdown
@@ -49,7 +49,8 @@ body:
49
49
  - type: dropdown
50
50
  attributes:
51
51
  label: Merged model
52
- description: Is the model a merge of other models, or built on top of a merged model?
52
+ description: >
53
+ Is the model a merge of other models, or built on top of a merged model?
53
54
  options:
54
55
  - Not a merged model
55
56
  - Merged model
@@ -0,0 +1,10 @@
1
+ {
2
+ "MD013": {
3
+ "line_length": 88,
4
+ "code_blocks": false,
5
+ "tables": false
6
+ }, // Set maximum line length to 88 characters, except in code blocks and tables
7
+ "MD024": false, // Allow duplicate headings
8
+ "MD033": false, // Allow inline HTML
9
+ "MD059": false // Allow non-descriptive link text
10
+ }
@@ -10,7 +10,7 @@ repos:
10
10
  - id: trailing-whitespace
11
11
  - id: debug-statements
12
12
  - repo: https://github.com/astral-sh/ruff-pre-commit
13
- rev: v0.13.0
13
+ rev: v0.14.1
14
14
  hooks:
15
15
  - id: ruff
16
16
  args:
@@ -34,7 +34,7 @@ repos:
34
34
  hooks:
35
35
  - id: nbstripout
36
36
  - repo: https://github.com/pre-commit/mirrors-mypy
37
- rev: v1.18.1
37
+ rev: v1.18.2
38
38
  hooks:
39
39
  - id: mypy
40
40
  args:
@@ -43,3 +43,9 @@ repos:
43
43
  - --ignore-missing-imports
44
44
  - --show-error-codes
45
45
  - --check-untyped-defs
46
+ - repo: https://github.com/DavidAnson/markdownlint-cli2
47
+ rev: v0.18.1
48
+ hooks:
49
+ - id: markdownlint-cli2
50
+ args:
51
+ - --fix