EuroEval 16.4.0__py3-none-any.whl → 16.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (71) hide show
  1. euroeval/__init__.py +6 -0
  2. euroeval/benchmark_config_factory.py +51 -46
  3. euroeval/benchmark_modules/base.py +6 -5
  4. euroeval/benchmark_modules/hf.py +2 -9
  5. euroeval/benchmark_modules/litellm.py +14 -12
  6. euroeval/benchmark_modules/vllm.py +17 -10
  7. euroeval/benchmarker.py +61 -44
  8. euroeval/caching_utils.py +1 -1
  9. euroeval/cli.py +86 -8
  10. euroeval/constants.py +3 -0
  11. euroeval/data_loading.py +78 -30
  12. euroeval/data_models.py +326 -326
  13. euroeval/dataset_configs/__init__.py +10 -3
  14. euroeval/dataset_configs/bulgarian.py +56 -0
  15. euroeval/dataset_configs/czech.py +25 -29
  16. euroeval/dataset_configs/danish.py +51 -88
  17. euroeval/dataset_configs/dutch.py +48 -86
  18. euroeval/dataset_configs/english.py +45 -76
  19. euroeval/dataset_configs/estonian.py +36 -38
  20. euroeval/dataset_configs/faroese.py +19 -60
  21. euroeval/dataset_configs/finnish.py +36 -68
  22. euroeval/dataset_configs/french.py +39 -74
  23. euroeval/dataset_configs/german.py +45 -81
  24. euroeval/dataset_configs/greek.py +64 -0
  25. euroeval/dataset_configs/icelandic.py +54 -91
  26. euroeval/dataset_configs/italian.py +42 -78
  27. euroeval/dataset_configs/latvian.py +28 -34
  28. euroeval/dataset_configs/lithuanian.py +22 -26
  29. euroeval/dataset_configs/norwegian.py +72 -114
  30. euroeval/dataset_configs/polish.py +33 -60
  31. euroeval/dataset_configs/portuguese.py +33 -65
  32. euroeval/dataset_configs/serbian.py +64 -0
  33. euroeval/dataset_configs/slovak.py +19 -24
  34. euroeval/dataset_configs/spanish.py +42 -76
  35. euroeval/dataset_configs/swedish.py +48 -84
  36. euroeval/dataset_configs/ukrainian.py +64 -0
  37. euroeval/exceptions.py +1 -1
  38. euroeval/finetuning.py +3 -2
  39. euroeval/generation.py +5 -4
  40. euroeval/generation_utils.py +6 -5
  41. euroeval/languages.py +395 -323
  42. euroeval/metrics/huggingface.py +14 -3
  43. euroeval/metrics/llm_as_a_judge.py +1 -1
  44. euroeval/model_cache.py +6 -5
  45. euroeval/model_loading.py +1 -1
  46. euroeval/prompt_templates/__init__.py +2 -0
  47. euroeval/prompt_templates/classification.py +206 -0
  48. euroeval/prompt_templates/linguistic_acceptability.py +82 -43
  49. euroeval/prompt_templates/multiple_choice.py +81 -41
  50. euroeval/prompt_templates/named_entity_recognition.py +125 -44
  51. euroeval/prompt_templates/reading_comprehension.py +92 -43
  52. euroeval/prompt_templates/sentiment_classification.py +91 -43
  53. euroeval/prompt_templates/summarization.py +64 -39
  54. euroeval/prompt_templates/token_classification.py +279 -0
  55. euroeval/scores.py +4 -3
  56. euroeval/speed_benchmark.py +2 -1
  57. euroeval/task_group_utils/multiple_choice_classification.py +2 -1
  58. euroeval/task_group_utils/question_answering.py +24 -13
  59. euroeval/task_group_utils/sequence_classification.py +5 -4
  60. euroeval/task_group_utils/text_to_text.py +2 -1
  61. euroeval/task_group_utils/token_classification.py +11 -8
  62. euroeval/tasks.py +44 -1
  63. euroeval/tokenisation_utils.py +19 -10
  64. euroeval/types.py +10 -9
  65. euroeval/utils.py +6 -3
  66. {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/METADATA +194 -37
  67. euroeval-16.5.0.dist-info/RECORD +81 -0
  68. euroeval-16.4.0.dist-info/RECORD +0 -75
  69. {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/WHEEL +0 -0
  70. {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/entry_points.txt +0 -0
  71. {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,105 +1,99 @@
1
1
  """All Norwegian dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import NB, NN, NO
4
+ from ..languages import NORWEGIAN, NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK
5
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  NOREC_CONFIG = DatasetConfig(
10
10
  name="norec",
11
- pretty_name="the truncated version of the Norwegian sentiment classification "
12
- "dataset NoReC",
13
- huggingface_id="EuroEval/norec-mini",
11
+ pretty_name="NoReC",
12
+ source="EuroEval/norec-mini",
14
13
  task=SENT,
15
- languages=[NB, NN, NO],
14
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
16
15
  )
17
16
 
18
17
  SCALA_NB_CONFIG = DatasetConfig(
19
18
  name="scala-nb",
20
- pretty_name="the Bokmål part of the linguistic acceptability dataset ScaLA",
21
- huggingface_id="EuroEval/scala-nb",
19
+ pretty_name="ScaLA-nb",
20
+ source="EuroEval/scala-nb",
22
21
  task=LA,
23
- languages=[NB, NO],
22
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN],
24
23
  )
25
24
 
26
25
  SCALA_NN_CONFIG = DatasetConfig(
27
26
  name="scala-nn",
28
- pretty_name="the Nynorsk part of the linguistic acceptability dataset ScaLA",
29
- huggingface_id="EuroEval/scala-nn",
27
+ pretty_name="ScaLA-nn",
28
+ source="EuroEval/scala-nn",
30
29
  task=LA,
31
- languages=[NN],
30
+ languages=[NORWEGIAN_NYNORSK],
32
31
  )
33
32
 
34
33
  NORNE_NB_CONFIG = DatasetConfig(
35
34
  name="norne-nb",
36
- pretty_name="the truncated version of the Bokmål part of the Norwegian named "
37
- "entity recognition dataset NorNE",
38
- huggingface_id="EuroEval/norne-nb-mini",
35
+ pretty_name="NorNE-nb",
36
+ source="EuroEval/norne-nb-mini",
39
37
  task=NER,
40
- languages=[NB, NO],
38
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN],
41
39
  )
42
40
 
43
41
  NORNE_NN_CONFIG = DatasetConfig(
44
42
  name="norne-nn",
45
- pretty_name="the truncated version of the Nynorsk part of the Norwegian named "
46
- "entity recognition dataset NorNE",
47
- huggingface_id="EuroEval/norne-nn-mini",
43
+ pretty_name="NorNE-nn",
44
+ source="EuroEval/norne-nn-mini",
48
45
  task=NER,
49
- languages=[NN],
46
+ languages=[NORWEGIAN_NYNORSK],
50
47
  )
51
48
 
52
49
  NORQUAD_CONFIG = DatasetConfig(
53
50
  name="norquad",
54
- pretty_name="the truncated version of the Norwegian question answering "
55
- "dataset NorQuAD",
56
- huggingface_id="EuroEval/norquad-mini",
51
+ pretty_name="NorQuAD",
52
+ source="EuroEval/norquad-mini",
57
53
  task=RC,
58
- languages=[NB, NN, NO],
54
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
59
55
  _num_few_shot_examples=2,
60
56
  )
61
57
 
62
58
  NO_SAMMENDRAG_CONFIG = DatasetConfig(
63
59
  name="no-sammendrag",
64
- pretty_name="the truncated version of the Norwegian summarisation dataset "
65
- "Norske Sammendrag",
66
- huggingface_id="EuroEval/no-sammendrag-mini",
60
+ pretty_name="NoSammendrag",
61
+ source="EuroEval/no-sammendrag-mini",
67
62
  task=SUMM,
68
- languages=[NB, NN, NO],
63
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
69
64
  )
70
65
 
71
66
  NRK_QUIZ_QA_CONFIG = DatasetConfig(
72
67
  name="nrk-quiz-qa",
73
- pretty_name="the truncated version of the Norwegian knowledge dataset NRK Quiz QA",
74
- huggingface_id="EuroEval/nrk-quiz-qa-mini",
68
+ pretty_name="NRK Quiz QA",
69
+ source="EuroEval/nrk-quiz-qa-mini",
75
70
  task=KNOW,
76
- languages=[NB, NN, NO],
71
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
77
72
  )
78
73
 
79
74
  IDIOMS_NO_CONFIG = DatasetConfig(
80
75
  name="idioms-no",
81
- pretty_name="the Norwegian knowledge dataset Idioms-no",
82
- huggingface_id="EuroEval/idioms-no",
76
+ pretty_name="Idioms-no",
77
+ source="EuroEval/idioms-no",
83
78
  task=KNOW,
84
- languages=[NB, NN, NO],
79
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
85
80
  )
86
81
 
87
82
  NOR_COMMON_SENSE_QA_CONFIG = DatasetConfig(
88
83
  name="nor-common-sense-qa",
89
- pretty_name="the truncated version of the Norwegian common-sense reasoning dataset "
90
- "NorCommonSenseQA",
91
- huggingface_id="EuroEval/nor-common-sense-qa",
84
+ pretty_name="NorCommonSenseQA",
85
+ source="EuroEval/nor-common-sense-qa",
92
86
  task=COMMON_SENSE,
93
- languages=[NB, NN, NO],
87
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
94
88
  _labels=["a", "b", "c", "d", "e"],
95
89
  )
96
90
 
97
- EUROPEAN_VALUES_NO_CONFIG = DatasetConfig(
98
- name="european-values-no",
99
- pretty_name="the Norwegian version of the European values evaluation dataset",
100
- huggingface_id="EuroEval/european-values-no",
91
+ VALEU_NO_CONFIG = DatasetConfig(
92
+ name="valeu-no",
93
+ pretty_name="VaLEU-no",
94
+ source="EuroEval/european-values-no",
101
95
  task=EUROPEAN_VALUES,
102
- languages=[NB, NN, NO],
96
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
103
97
  splits=["test"],
104
98
  bootstrap_samples=False,
105
99
  _instruction_prompt="{text}",
@@ -110,145 +104,109 @@ EUROPEAN_VALUES_NO_CONFIG = DatasetConfig(
110
104
 
111
105
  NO_COLA_CONFIG = DatasetConfig(
112
106
  name="no-cola",
113
- pretty_name="the truncated version of the Norwegian linguistic acceptability "
114
- "dataset NoCoLA",
115
- huggingface_id="EuroEval/no-cola-mini",
107
+ pretty_name="NoCoLA",
108
+ source="EuroEval/no-cola-mini",
116
109
  task=LA,
117
- languages=[NB, NO],
110
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN],
118
111
  unofficial=True,
119
112
  )
120
113
 
121
114
  NORGLM_MULTI_QA = DatasetConfig(
122
115
  name="norglm-multi-qa",
123
- pretty_name="the question answering part of the Norwegian NorGLM multi-task human "
124
- "annotated dataset NO-Multi-QA-Sum",
125
- huggingface_id="EuroEval/norglm-multi-qa",
116
+ pretty_name="NorGLM-Multi-QA",
117
+ source="EuroEval/norglm-multi-qa",
126
118
  task=RC,
127
- languages=[NB, NN, NO],
119
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
128
120
  unofficial=True,
129
121
  )
130
122
 
131
123
  NORGLM_MULTI_SUM = DatasetConfig(
132
124
  name="norglm-multi-sum",
133
- pretty_name="the summarisation part of the Norwegian NorGLM multi-task human "
134
- "annotated dataset NO-Multi-QA-Sum",
135
- huggingface_id="EuroEval/norglm-multi-sum",
125
+ pretty_name="NorGLM-Multi-Sum",
126
+ source="EuroEval/norglm-multi-sum",
136
127
  task=SUMM,
137
- languages=[NB, NN, NO],
128
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
138
129
  unofficial=True,
139
130
  )
140
131
 
141
132
  SCHIBSTED_NO_CONFIG = DatasetConfig(
142
133
  name="schibsted-no",
143
- pretty_name="the Norwegian summarisation dataset Schibsted-no",
144
- huggingface_id="EuroEval/schibsted-article-summaries-no",
134
+ pretty_name="Schibsted-no",
135
+ source="EuroEval/schibsted-article-summaries-no",
145
136
  task=SUMM,
146
- languages=[NB, NN, NO],
137
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
147
138
  unofficial=True,
148
139
  )
149
140
 
150
141
  PERSONAL_SUM_CONFIG = DatasetConfig(
151
142
  name="personal-sum",
152
- pretty_name="the Norwegian summarisation dataset personal-sum",
153
- huggingface_id="EuroEval/personal-sum",
143
+ pretty_name="Personal Sum",
144
+ source="EuroEval/personal-sum",
154
145
  task=SUMM,
155
- languages=[NB, NN, NO],
146
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
156
147
  unofficial=True,
157
148
  )
158
149
 
159
150
  MMLU_NO_CONFIG = DatasetConfig(
160
151
  name="mmlu-no",
161
- pretty_name="the truncated version of the Norwegian knowledge dataset MMLU-no, "
162
- "translated from the English MMLU dataset",
163
- huggingface_id="EuroEval/mmlu-no-mini",
152
+ pretty_name="MMLU-no",
153
+ source="EuroEval/mmlu-no-mini",
164
154
  task=KNOW,
165
- languages=[NB, NN, NO],
155
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
166
156
  unofficial=True,
167
157
  )
168
158
 
169
159
  ARC_NO_CONFIG = DatasetConfig(
170
160
  name="arc-no",
171
- pretty_name="the truncated version of the Norwegian knowledge dataset ARC-no, "
172
- "translated from the English ARC dataset",
173
- huggingface_id="EuroEval/arc-no-mini",
161
+ pretty_name="ARC-no",
162
+ source="EuroEval/arc-no-mini",
174
163
  task=KNOW,
175
- languages=[NB, NN, NO],
164
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
176
165
  unofficial=True,
177
166
  )
178
167
 
179
168
  HELLASWAG_NO_CONFIG = DatasetConfig(
180
169
  name="hellaswag-no",
181
- pretty_name="the truncated version of the Norwegian common-sense reasoning dataset "
182
- "HellaSwag-no, translated from the English HellaSwag dataset",
183
- huggingface_id="EuroEval/hellaswag-no-mini",
170
+ pretty_name="HellaSwag-no",
171
+ source="EuroEval/hellaswag-no-mini",
184
172
  task=COMMON_SENSE,
185
- languages=[NB, NN, NO],
173
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
186
174
  unofficial=True,
187
175
  )
188
176
 
189
177
  BELEBELE_NO_CONFIG = DatasetConfig(
190
178
  name="belebele-no",
191
- pretty_name="the Norwegian multiple choice reading comprehension dataset "
192
- "BeleBele-no, translated from the English BeleBele dataset",
193
- huggingface_id="EuroEval/belebele-no-mini",
179
+ pretty_name="Belebele-no",
180
+ source="EuroEval/belebele-no-mini",
194
181
  task=MCRC,
195
- languages=[NB, NN, NO],
182
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
196
183
  unofficial=True,
197
184
  )
198
185
 
199
186
  MULTI_WIKI_QA_NB_CONFIG = DatasetConfig(
200
187
  name="multi-wiki-qa-nb",
201
- pretty_name="the truncated version of the Norwegian Bokmål part of the reading "
202
- "comprehension dataset MultiWikiQA",
203
- huggingface_id="EuroEval/multi-wiki-qa-no-mini",
188
+ pretty_name="MultiWikiQA-nb",
189
+ source="EuroEval/multi-wiki-qa-no-mini",
204
190
  task=RC,
205
- languages=[NB, NO],
191
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN],
206
192
  unofficial=True,
207
193
  )
208
194
 
209
195
  MULTI_WIKI_QA_NN_CONFIG = DatasetConfig(
210
196
  name="multi-wiki-qa-nn",
211
- pretty_name="the truncated version of the Norwegian Nynorsk part of the reading "
212
- "comprehension dataset MultiWikiQA",
213
- huggingface_id="EuroEval/multi-wiki-qa-nn-mini",
197
+ pretty_name="MultiWikiQA-nn",
198
+ source="EuroEval/multi-wiki-qa-nn-mini",
214
199
  task=RC,
215
- languages=[NN],
200
+ languages=[NORWEGIAN_NYNORSK],
216
201
  unofficial=True,
217
202
  )
218
203
 
219
204
  WINOGRANDE_NO_CONFIG = DatasetConfig(
220
205
  name="winogrande-no",
221
- pretty_name="the Norwegian common-sense reasoning dataset Winogrande-no, "
222
- "translated from the English Winogrande dataset",
223
- huggingface_id="EuroEval/winogrande-no",
206
+ pretty_name="Winogrande-no",
207
+ source="EuroEval/winogrande-no",
224
208
  task=COMMON_SENSE,
225
- languages=[NB, NN, NO],
209
+ languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
226
210
  _labels=["a", "b"],
227
211
  unofficial=True,
228
212
  )
229
-
230
- EUROPEAN_VALUES_SITUATIONAL_NO_CONFIG = DatasetConfig(
231
- name="european-values-situational-no",
232
- pretty_name="the Norwegian version of the European values evaluation dataset, "
233
- "where the questions are phrased in a situational way",
234
- huggingface_id="EuroEval/european-values-situational-no",
235
- task=EUROPEAN_VALUES,
236
- languages=[NB, NN, NO],
237
- splits=["test"],
238
- bootstrap_samples=False,
239
- _instruction_prompt="{text}",
240
- unofficial=True,
241
- )
242
-
243
- EUROPEAN_VALUES_COMPLETIONS_NO_CONFIG = DatasetConfig(
244
- name="european-values-completions-no",
245
- pretty_name="the Norwegian version of the European values evaluation dataset, "
246
- "where the questions are phrased as sentence completions",
247
- huggingface_id="EuroEval/european-values-completions-no",
248
- task=EUROPEAN_VALUES,
249
- languages=[NO],
250
- splits=["test"],
251
- bootstrap_samples=False,
252
- _instruction_prompt="{text}",
253
- unofficial=True,
254
- )
@@ -1,75 +1,74 @@
1
1
  """All Polish dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import PL
4
+ from ..languages import POLISH
5
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  POLEMO2_CONFIG = DatasetConfig(
10
10
  name="polemo2",
11
- pretty_name="the Polish sentiment classification dataset PolEmo2",
12
- huggingface_id="EuroEval/polemo2-mini",
11
+ pretty_name="Polemo2",
12
+ source="EuroEval/polemo2-mini",
13
13
  task=SENT,
14
- languages=[PL],
14
+ languages=[POLISH],
15
15
  )
16
16
 
17
17
  SCALA_PL_CONFIG = DatasetConfig(
18
18
  name="scala-pl",
19
- pretty_name="the Polish part of the linguistic acceptability dataset ScaLA",
20
- huggingface_id="EuroEval/scala-pl",
19
+ pretty_name="ScaLA-pl",
20
+ source="EuroEval/scala-pl",
21
21
  task=LA,
22
- languages=[PL],
22
+ languages=[POLISH],
23
23
  )
24
24
 
25
25
  KPWR_NER_CONFIG = DatasetConfig(
26
26
  name="kpwr-ner",
27
- pretty_name="the Polish entity recognition dataset KPWr-NER",
28
- huggingface_id="EuroEval/kpwr-ner",
27
+ pretty_name="KPWr-NER",
28
+ source="EuroEval/kpwr-ner",
29
29
  task=NER,
30
- languages=[PL],
30
+ languages=[POLISH],
31
31
  )
32
32
 
33
33
  POQUAD_CONFIG = DatasetConfig(
34
34
  name="poquad",
35
- pretty_name="the Polish question answering dataset PoQuAD",
36
- huggingface_id="EuroEval/poquad-mini",
35
+ pretty_name="PoQuAD",
36
+ source="EuroEval/poquad-mini",
37
37
  task=RC,
38
- languages=[PL],
38
+ languages=[POLISH],
39
39
  )
40
40
 
41
41
  PSC_CONFIG = DatasetConfig(
42
42
  name="psc",
43
- pretty_name="the Polish summarisation dataset PSC",
44
- huggingface_id="EuroEval/psc-mini",
43
+ pretty_name="PSC",
44
+ source="EuroEval/psc-mini",
45
45
  task=SUMM,
46
- languages=[PL],
46
+ languages=[POLISH],
47
47
  )
48
48
 
49
49
  LLMZSZL_CONFIG = DatasetConfig(
50
50
  name="llmzszl",
51
- pretty_name="the Polish knowledge dataset LLMzSzŁ",
52
- huggingface_id="EuroEval/llmzszl-mini",
51
+ pretty_name="LLMzSzŁ",
52
+ source="EuroEval/llmzszl-mini",
53
53
  task=KNOW,
54
- languages=[PL],
54
+ languages=[POLISH],
55
55
  )
56
56
 
57
57
  WINOGRANDE_PL_CONFIG = DatasetConfig(
58
58
  name="winogrande-pl",
59
- pretty_name="the Polish common-sense reasoning dataset Winogrande-pl, translated "
60
- "from the English Winogrande dataset",
61
- huggingface_id="EuroEval/winogrande-pl",
59
+ pretty_name="Winogrande-pl",
60
+ source="EuroEval/winogrande-pl",
62
61
  task=COMMON_SENSE,
63
- languages=[PL],
62
+ languages=[POLISH],
64
63
  _labels=["a", "b"],
65
64
  )
66
65
 
67
- EUROPEAN_VALUES_PL_CONFIG = DatasetConfig(
68
- name="european-values-pl",
69
- pretty_name="the Polish version of the European values evaluation dataset",
70
- huggingface_id="EuroEval/european-values-pl",
66
+ VALEU_PL_CONFIG = DatasetConfig(
67
+ name="valeu-pl",
68
+ pretty_name="VaLEU-pl",
69
+ source="EuroEval/european-values-pl",
71
70
  task=EUROPEAN_VALUES,
72
- languages=[PL],
71
+ languages=[POLISH],
73
72
  splits=["test"],
74
73
  bootstrap_samples=False,
75
74
  _instruction_prompt="{text}",
@@ -80,44 +79,18 @@ EUROPEAN_VALUES_PL_CONFIG = DatasetConfig(
80
79
 
81
80
  MULTI_WIKI_QA_PL_CONFIG = DatasetConfig(
82
81
  name="multi-wiki-qa-pl",
83
- pretty_name="the truncated version of the Polish part of the reading "
84
- "comprehension dataset MultiWikiQA",
85
- huggingface_id="EuroEval/multi-wiki-qa-pl-mini",
82
+ pretty_name="MultiWikiQA-pl",
83
+ source="EuroEval/multi-wiki-qa-pl-mini",
86
84
  task=RC,
87
- languages=[PL],
85
+ languages=[POLISH],
88
86
  unofficial=True,
89
87
  )
90
88
 
91
89
  GOLDENSWAG_PL_CONFIG = DatasetConfig(
92
90
  name="goldenswag-pl",
93
- pretty_name="the truncated version of the Polish common-sense reasoning "
94
- "dataset GoldenSwag-pl, translated from the English GoldenSwag dataset",
95
- huggingface_id="EuroEval/goldenswag-pl-mini",
91
+ pretty_name="GoldenSwag-pl",
92
+ source="EuroEval/goldenswag-pl-mini",
96
93
  task=COMMON_SENSE,
97
- languages=[PL],
98
- unofficial=True,
99
- )
100
-
101
- EUROPEAN_VALUES_SITUATIONAL_PL_CONFIG = DatasetConfig(
102
- name="european-values-situational-pl",
103
- pretty_name="the Polish version of the European values evaluation dataset, where "
104
- "the questions are phrased in a situational way",
105
- huggingface_id="EuroEval/european-values-situational-pl",
106
- task=EUROPEAN_VALUES,
107
- languages=[PL],
108
- splits=["test"],
109
- bootstrap_samples=False,
110
- unofficial=True,
111
- )
112
-
113
- EUROPEAN_VALUES_COMPLETIONS_PL_CONFIG = DatasetConfig(
114
- name="european-values-completions-pl",
115
- pretty_name="the Polish version of the European values evaluation dataset, where "
116
- "the questions are phrased as sentence completions",
117
- huggingface_id="EuroEval/european-values-completions-pl",
118
- task=EUROPEAN_VALUES,
119
- languages=[PL],
120
- splits=["test"],
121
- bootstrap_samples=False,
94
+ languages=[POLISH],
122
95
  unofficial=True,
123
96
  )
@@ -1,78 +1,74 @@
1
1
  """All Portuguese dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import PT
4
+ from ..languages import EUROPEAN_PORTUGUESE, PORTUGUESE
5
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  SST2_PT_CONFIG = DatasetConfig(
10
10
  name="sst2-pt",
11
- pretty_name="the truncated version of the Portuguese sentiment classification "
12
- "dataset SST2-pt, translated from the English SST2 dataset",
13
- huggingface_id="EuroEval/sst2-pt-mini",
11
+ pretty_name="SST2-pt",
12
+ source="EuroEval/sst2-pt-mini",
14
13
  task=SENT,
15
- languages=[PT],
14
+ languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
16
15
  _labels=["positive", "negative"],
17
16
  )
18
17
 
19
18
  SCALA_PT = DatasetConfig(
20
19
  name="scala-pt",
21
- pretty_name="the Portuguese part of the linguistic acceptability dataset ScaLA",
22
- huggingface_id="EuroEval/scala-pt",
20
+ pretty_name="ScaLA-pt",
21
+ source="EuroEval/scala-pt",
23
22
  task=LA,
24
- languages=[PT],
23
+ languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
25
24
  )
26
25
 
27
26
  HAREM_CONFIG = DatasetConfig(
28
27
  name="harem",
29
- pretty_name="the Portuguese named entity recognition dataset HAREM",
30
- huggingface_id="EuroEval/harem",
28
+ pretty_name="HAREM",
29
+ source="EuroEval/harem",
31
30
  task=NER,
32
- languages=[PT],
31
+ languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
33
32
  )
34
33
 
35
34
  MULTI_WIKI_QA_PT_CONFIG = DatasetConfig(
36
35
  name="multi-wiki-qa-pt",
37
- pretty_name="the truncated version of the Portuguese part of the reading "
38
- "comprehension dataset MultiWikiQA",
39
- huggingface_id="EuroEval/multi-wiki-qa-pt-pt-mini",
36
+ pretty_name="MultiWikiQA-pt",
37
+ source="EuroEval/multi-wiki-qa-pt-pt-mini",
40
38
  task=RC,
41
- languages=[PT],
39
+ languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
42
40
  )
43
41
 
44
42
  PUBLICO_CONFIG = DatasetConfig(
45
43
  name="publico",
46
- pretty_name="the truncated version of the Portuguese summarisation dataset Público",
47
- huggingface_id="EuroEval/publico-mini",
44
+ pretty_name="Publico",
45
+ source="EuroEval/publico-mini",
48
46
  task=SUMM,
49
- languages=[PT],
47
+ languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
50
48
  )
51
49
 
52
50
  MMLU_PT_CONFIG = DatasetConfig(
53
51
  name="mmlu-pt",
54
- pretty_name="the truncated version of the Portuguese knowledge dataset MMLU-pt, "
55
- "translated from the English MMLU dataset",
56
- huggingface_id="EuroEval/mmlu-pt-mini",
52
+ pretty_name="MMLU-pt",
53
+ source="EuroEval/mmlu-pt-mini",
57
54
  task=KNOW,
58
- languages=[PT],
55
+ languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
59
56
  )
60
57
 
61
58
  GOLDENSWAG_PT_CONFIG = DatasetConfig(
62
59
  name="goldenswag-pt",
63
- pretty_name="the truncated version of the Portuguese common-sense reasoning "
64
- "dataset GoldenSwag-pt, translated from the English GoldenSwag dataset",
65
- huggingface_id="EuroEval/goldenswag-pt-mini",
60
+ pretty_name="GoldenSwag-pt",
61
+ source="EuroEval/goldenswag-pt-mini",
66
62
  task=COMMON_SENSE,
67
- languages=[PT],
63
+ languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
68
64
  )
69
65
 
70
- EUROPEAN_VALUES_PT_CONFIG = DatasetConfig(
71
- name="european-values-pt",
72
- pretty_name="the Portuguese version of the European values evaluation dataset",
73
- huggingface_id="EuroEval/european-values-pt",
66
+ VALEU_PT_CONFIG = DatasetConfig(
67
+ name="valeu-pt",
68
+ pretty_name="VaLEU-pt",
69
+ source="EuroEval/european-values-pt",
74
70
  task=EUROPEAN_VALUES,
75
- languages=[PT],
71
+ languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
76
72
  splits=["test"],
77
73
  bootstrap_samples=False,
78
74
  _instruction_prompt="{text}",
@@ -83,47 +79,19 @@ EUROPEAN_VALUES_PT_CONFIG = DatasetConfig(
83
79
 
84
80
  BOOLQ_PT_CONFIG = DatasetConfig(
85
81
  name="boolq-pt",
86
- pretty_name="the Portuguese multiple choice reading comprehension dataset "
87
- "BoolQ-pt, translated from the English BoolQ dataset",
88
- huggingface_id="EuroEval/boolq-pt",
82
+ pretty_name="BoolQ-pt",
83
+ source="EuroEval/boolq-pt",
89
84
  task=MCRC,
90
- languages=[PT],
85
+ languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
91
86
  unofficial=True,
92
87
  )
93
88
 
94
89
  WINOGRANDE_PT_CONFIG = DatasetConfig(
95
90
  name="winogrande-pt",
96
- pretty_name="the Portuguese common-sense reasoning dataset Winogrande-pt, "
97
- "translated from the English Winogrande dataset",
98
- huggingface_id="EuroEval/winogrande-pt",
91
+ pretty_name="Winogrande-pt",
92
+ source="EuroEval/winogrande-pt",
99
93
  task=COMMON_SENSE,
100
- languages=[PT],
94
+ languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
101
95
  _labels=["a", "b"],
102
96
  unofficial=True,
103
97
  )
104
-
105
- EUROPEAN_VALUES_SITUATIONAL_PT_CONFIG = DatasetConfig(
106
- name="european-values-situational-pt",
107
- pretty_name="the Portuguese version of the European values evaluation dataset, "
108
- "where the questions are phrased in a situational way",
109
- huggingface_id="EuroEval/european-values-situational-pt",
110
- task=EUROPEAN_VALUES,
111
- languages=[PT],
112
- splits=["test"],
113
- bootstrap_samples=False,
114
- _instruction_prompt="{text}",
115
- unofficial=True,
116
- )
117
-
118
- EUROPEAN_VALUES_COMPLETIONS_PT_CONFIG = DatasetConfig(
119
- name="european-values-completions-pt",
120
- pretty_name="the Portuguese version of the European values evaluation dataset, "
121
- "where the questions are phrased as sentence completions",
122
- huggingface_id="EuroEval/european-values-completions-pt",
123
- task=EUROPEAN_VALUES,
124
- languages=[PT],
125
- splits=["test"],
126
- bootstrap_samples=False,
127
- _instruction_prompt="{text}",
128
- unofficial=True,
129
- )