EuroEval 15.15.0__py3-none-any.whl → 16.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (63) hide show
  1. euroeval/__init__.py +3 -7
  2. euroeval/benchmark_config_factory.py +3 -7
  3. euroeval/benchmark_modules/base.py +35 -19
  4. euroeval/benchmark_modules/fresh.py +24 -19
  5. euroeval/benchmark_modules/hf.py +136 -154
  6. euroeval/benchmark_modules/litellm.py +323 -193
  7. euroeval/benchmark_modules/vllm.py +166 -112
  8. euroeval/benchmarker.py +59 -33
  9. euroeval/cli.py +3 -3
  10. euroeval/constants.py +13 -15
  11. euroeval/data_loading.py +33 -28
  12. euroeval/data_models.py +53 -7
  13. euroeval/dataset_configs/__init__.py +2 -0
  14. euroeval/dataset_configs/danish.py +38 -1
  15. euroeval/dataset_configs/dutch.py +38 -1
  16. euroeval/dataset_configs/english.py +38 -1
  17. euroeval/dataset_configs/estonian.py +95 -0
  18. euroeval/dataset_configs/faroese.py +38 -0
  19. euroeval/dataset_configs/finnish.py +39 -1
  20. euroeval/dataset_configs/french.py +38 -1
  21. euroeval/dataset_configs/german.py +38 -1
  22. euroeval/dataset_configs/icelandic.py +39 -1
  23. euroeval/dataset_configs/italian.py +38 -1
  24. euroeval/dataset_configs/latvian.py +81 -0
  25. euroeval/dataset_configs/norwegian.py +38 -1
  26. euroeval/dataset_configs/portuguese.py +38 -1
  27. euroeval/dataset_configs/spanish.py +38 -1
  28. euroeval/dataset_configs/swedish.py +38 -1
  29. euroeval/enums.py +0 -6
  30. euroeval/finetuning.py +8 -7
  31. euroeval/generation.py +25 -14
  32. euroeval/generation_utils.py +46 -14
  33. euroeval/languages.py +947 -187
  34. euroeval/metrics/__init__.py +6 -0
  35. euroeval/metrics/base.py +76 -0
  36. euroeval/metrics/huggingface.py +192 -0
  37. euroeval/metrics/llm_as_a_judge.py +257 -0
  38. euroeval/metrics/pipeline.py +234 -0
  39. euroeval/metrics/speed.py +51 -0
  40. euroeval/prompt_templates/linguistic_acceptability.py +40 -2
  41. euroeval/prompt_templates/multiple_choice.py +23 -2
  42. euroeval/prompt_templates/named_entity_recognition.py +65 -2
  43. euroeval/prompt_templates/reading_comprehension.py +42 -2
  44. euroeval/prompt_templates/sentiment_classification.py +46 -2
  45. euroeval/prompt_templates/summarization.py +24 -4
  46. euroeval/scores.py +7 -2
  47. euroeval/speed_benchmark.py +6 -6
  48. euroeval/task_group_utils/multiple_choice_classification.py +17 -6
  49. euroeval/task_group_utils/question_answering.py +35 -28
  50. euroeval/task_group_utils/sequence_classification.py +96 -23
  51. euroeval/task_group_utils/text_to_text.py +7 -3
  52. euroeval/task_group_utils/token_classification.py +47 -75
  53. euroeval/tasks.py +31 -6
  54. euroeval/tokenization_utils.py +295 -207
  55. euroeval/utils.py +118 -34
  56. {euroeval-15.15.0.dist-info → euroeval-16.0.0.dist-info}/METADATA +12 -14
  57. euroeval-16.0.0.dist-info/RECORD +69 -0
  58. {euroeval-15.15.0.dist-info → euroeval-16.0.0.dist-info}/entry_points.txt +0 -1
  59. euroeval/human_evaluation.py +0 -738
  60. euroeval/metrics.py +0 -468
  61. euroeval-15.15.0.dist-info/RECORD +0 -63
  62. {euroeval-15.15.0.dist-info → euroeval-16.0.0.dist-info}/WHEEL +0 -0
  63. {euroeval-15.15.0.dist-info → euroeval-16.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -2,7 +2,7 @@
2
2
 
3
3
  from ..data_models import DatasetConfig
4
4
  from ..languages import EN
5
- from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
@@ -66,6 +66,17 @@ HELLASWAG_CONFIG = DatasetConfig(
66
66
  languages=[EN],
67
67
  )
68
68
 
69
+ EUROPEAN_VALUES_EN_CONFIG = DatasetConfig(
70
+ name="european-values-en",
71
+ pretty_name="the English version of the European values evaluation dataset",
72
+ huggingface_id="EuroEval/european-values-en",
73
+ task=EUROPEAN_VALUES,
74
+ languages=[EN],
75
+ splits=["test"],
76
+ bootstrap_samples=False,
77
+ _instruction_prompt="{text}",
78
+ )
79
+
69
80
 
70
81
  ### Unofficial datasets ###
71
82
 
@@ -105,3 +116,29 @@ MULTI_WIKI_QA_EN_CONFIG = DatasetConfig(
105
116
  languages=[EN],
106
117
  unofficial=True,
107
118
  )
119
+
120
+ EUROPEAN_VALUES_SITUATIONAL_EN_CONFIG = DatasetConfig(
121
+ name="european-values-situational-en",
122
+ pretty_name="the English version of the European values evaluation dataset, where "
123
+ "the questions are phrased in a situational way",
124
+ huggingface_id="EuroEval/european-values-situational-en",
125
+ task=EUROPEAN_VALUES,
126
+ languages=[EN],
127
+ splits=["test"],
128
+ bootstrap_samples=False,
129
+ _instruction_prompt="{text}",
130
+ unofficial=True,
131
+ )
132
+
133
+ EUROPEAN_VALUES_COMPLETIONS_EN_CONFIG = DatasetConfig(
134
+ name="european-values-completions-en",
135
+ pretty_name="the English version of the European values evaluation dataset, where "
136
+ "the questions are phrased as sentence completions",
137
+ huggingface_id="EuroEval/european-values-completions-en",
138
+ task=EUROPEAN_VALUES,
139
+ languages=[EN],
140
+ splits=["test"],
141
+ bootstrap_samples=False,
142
+ _instruction_prompt="{text}",
143
+ unofficial=True,
144
+ )
@@ -0,0 +1,95 @@
1
+ """All Estonian dataset configurations used in EuroEval."""
2
+
3
+ from ..data_models import DatasetConfig
4
+ from ..languages import ET
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, NER, RC, SENT, SUMM
6
+
7
+ ### Official datasets ###
8
+
9
+ ESTONIAN_VALENCE_CONFIG = DatasetConfig(
10
+ name="estonian-valence",
11
+ pretty_name="the Estonian sentiment classification dataset Estonian Valence",
12
+ huggingface_id="EuroEval/estonian-valence",
13
+ task=SENT,
14
+ languages=[ET],
15
+ )
16
+
17
+ GRAMMAR_ET_CONFIG = DatasetConfig(
18
+ name="grammar-et",
19
+ pretty_name="the Estonian linguistic acceptability dataset Grammar-et",
20
+ huggingface_id="EuroEval/grammar-et",
21
+ task=LA,
22
+ languages=[ET],
23
+ )
24
+
25
+ ESTNER_CONFIG = DatasetConfig(
26
+ name="estner",
27
+ pretty_name="the Estonian named entity recognition dataset EstNER",
28
+ huggingface_id="EuroEval/estner-mini",
29
+ task=NER,
30
+ languages=[ET],
31
+ )
32
+
33
+ MULTI_WIKI_QA_ET_CONFIG = DatasetConfig(
34
+ name="multi-wiki-qa-et",
35
+ pretty_name="the truncated version of the Estonian part of the reading "
36
+ "comprehension dataset MultiWikiQA",
37
+ huggingface_id="EuroEval/multi-wiki-qa-et-mini",
38
+ task=RC,
39
+ languages=[ET],
40
+ )
41
+
42
+ ERR_NEWS_CONFIG = DatasetConfig(
43
+ name="err-news",
44
+ pretty_name="the Estonian summarisation dataset ErrNews",
45
+ huggingface_id="EuroEval/err-news-mini",
46
+ task=SUMM,
47
+ languages=[ET],
48
+ )
49
+
50
+ EXAM_ET_CONFIG = DatasetConfig(
51
+ name="exam-et",
52
+ pretty_name="the Estonian knowledge assessment dataset Exam-et",
53
+ huggingface_id="EuroEval/exam-et",
54
+ task=KNOW,
55
+ languages=[ET],
56
+ _labels=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"],
57
+ )
58
+
59
+ WINOGRANDE_ET_CONFIG = DatasetConfig(
60
+ name="winogrande-et",
61
+ pretty_name="the Estonian common-sense reasoning dataset Winogrande-et",
62
+ huggingface_id="EuroEval/winogrande-et",
63
+ task=COMMON_SENSE,
64
+ languages=[ET],
65
+ _prompt_prefix="Sulle esitatakse lüngaga (_) tekstülesanded, "
66
+ "igal ülesandel on kaks vastusevarianti (a ja b).",
67
+ _prompt_template="Tekstülesanne: {text}\nVastus: {label}",
68
+ _instruction_prompt="Tekstülesanne: {text}\n\n"
69
+ "Sinu ülesanne on valida lünka sobiv vastusevariant. "
70
+ "Vasta ainult {labels_str}. Muud vastused ei ole lubatud.",
71
+ _labels=["a", "b"],
72
+ )
73
+
74
+ EUROPEAN_VALUES_ET_CONFIG = DatasetConfig(
75
+ name="european-values-et",
76
+ pretty_name="the Estonian version of the European values evaluation dataset",
77
+ huggingface_id="EuroEval/european-values-et",
78
+ task=EUROPEAN_VALUES,
79
+ languages=[ET],
80
+ splits=["test"],
81
+ bootstrap_samples=False,
82
+ _instruction_prompt="{text}",
83
+ )
84
+
85
+
86
+ ### Unofficial datasets ###
87
+
88
+ SCALA_ET_CONFIG = DatasetConfig(
89
+ name="scala-et",
90
+ pretty_name="the Estonian part of the linguistic acceptability dataset ScaLA",
91
+ huggingface_id="EuroEval/scala-et",
92
+ task=LA,
93
+ languages=[ET],
94
+ unofficial=True,
95
+ )
@@ -40,6 +40,44 @@ FOQA_CONFIG = DatasetConfig(
40
40
  languages=[FO],
41
41
  )
42
42
 
43
+ # TODO: No Faroese version of the European values dataset exists yet
44
+ # EUROPEAN_VALUES_FO_CONFIG = DatasetConfig(
45
+ # name="european-values-fo",
46
+ # pretty_name="the Faroese version of the European values evaluation dataset",
47
+ # huggingface_id="EuroEval/european-values-fo",
48
+ # task=EUROPEAN_VALUES,
49
+ # languages=[FO],
50
+ # splits=["test"],
51
+ # bootstrap_samples=False,
52
+ # _instruction_prompt="{text}",
53
+ # )
54
+ #
55
+ # EUROPEAN_VALUES_SITUATIONAL_FO_CONFIG = DatasetConfig(
56
+ # name="european-values-situational-fo",
57
+ # pretty_name="the Faroese version of the European values evaluation dataset, "
58
+ # "where the questions are phrased in a situational way",
59
+ # huggingface_id="EuroEval/european-values-situational-fo",
60
+ # task=EUROPEAN_VALUES,
61
+ # languages=[FO],
62
+ # splits=["test"],
63
+ # bootstrap_samples=False,
64
+ # _instruction_prompt="{text}",
65
+ # unofficial=True,
66
+ # )
67
+ #
68
+ # EUROPEAN_VALUES_COMPLETIONS_FO_CONFIG = DatasetConfig(
69
+ # name="european-values-completions-fo",
70
+ # pretty_name="the Faroese version of the European values evaluation dataset, "
71
+ # "where the questions are phrased as sentence completions",
72
+ # huggingface_id="EuroEval/european-values-completions-fo",
73
+ # task=EUROPEAN_VALUES,
74
+ # languages=[FO],
75
+ # splits=["test"],
76
+ # bootstrap_samples=False,
77
+ # _instruction_prompt="{text}",
78
+ # unofficial=True,
79
+ # )
80
+
43
81
 
44
82
  ### Unofficial datasets ###
45
83
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  from ..data_models import DatasetConfig
4
4
  from ..languages import FI
5
- from ..tasks import COMMON_SENSE, LA, MCRC, NER, RC, SENT, SUMM
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
@@ -57,6 +57,18 @@ SCALA_FI_CONFIG = DatasetConfig(
57
57
  languages=[FI],
58
58
  )
59
59
 
60
+ EUROPEAN_VALUES_FI_CONFIG = DatasetConfig(
61
+ name="european-values-fi",
62
+ pretty_name="the Finnish version of the European values evaluation dataset",
63
+ huggingface_id="EuroEval/european-values-fi",
64
+ task=EUROPEAN_VALUES,
65
+ languages=[FI],
66
+ splits=["test"],
67
+ bootstrap_samples=False,
68
+ _instruction_prompt="{text}",
69
+ )
70
+
71
+
60
72
  ### Unofficial datasets ###
61
73
 
62
74
  BELEBELE_FI_CONFIG = DatasetConfig(
@@ -88,3 +100,29 @@ GOLDENSWAG_FI_CONFIG = DatasetConfig(
88
100
  languages=[FI],
89
101
  unofficial=True,
90
102
  )
103
+
104
+ EUROPEAN_VALUES_SITUATIONAL_FI_CONFIG = DatasetConfig(
105
+ name="european-values-situational-fi",
106
+ pretty_name="the Finnish version of the European values evaluation dataset, where "
107
+ "the questions are phrased in a situational way",
108
+ huggingface_id="EuroEval/european-values-situational-fi",
109
+ task=EUROPEAN_VALUES,
110
+ languages=[FI],
111
+ splits=["test"],
112
+ bootstrap_samples=False,
113
+ _instruction_prompt="{text}",
114
+ unofficial=True,
115
+ )
116
+
117
+ EUROPEAN_VALUES_COMPLETIONS_FI_CONFIG = DatasetConfig(
118
+ name="european-values-completions-fi",
119
+ pretty_name="the Finnish version of the European values evaluation dataset, where "
120
+ "the questions are phrased as sentence completions",
121
+ huggingface_id="EuroEval/european-values-completions-fi",
122
+ task=EUROPEAN_VALUES,
123
+ languages=[FI],
124
+ splits=["test"],
125
+ bootstrap_samples=False,
126
+ _instruction_prompt="{text}",
127
+ unofficial=True,
128
+ )
@@ -2,7 +2,7 @@
2
2
 
3
3
  from ..data_models import DatasetConfig
4
4
  from ..languages import FR
5
- from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
@@ -69,6 +69,17 @@ HELLASWAG_FR_CONFIG = DatasetConfig(
69
69
  languages=[FR],
70
70
  )
71
71
 
72
+ EUROPEAN_VALUES_FR_CONFIG = DatasetConfig(
73
+ name="european-values-fr",
74
+ pretty_name="the French version of the European values evaluation dataset",
75
+ huggingface_id="EuroEval/european-values-fr",
76
+ task=EUROPEAN_VALUES,
77
+ languages=[FR],
78
+ splits=["test"],
79
+ bootstrap_samples=False,
80
+ _instruction_prompt="{text}",
81
+ )
82
+
72
83
 
73
84
  ### Unofficial datasets ###
74
85
 
@@ -101,3 +112,29 @@ GOLDENSWAG_FR_CONFIG = DatasetConfig(
101
112
  languages=[FR],
102
113
  unofficial=True,
103
114
  )
115
+
116
+ EUROPEAN_VALUES_SITUATIONAL_FR_CONFIG = DatasetConfig(
117
+ name="european-values-situational-fr",
118
+ pretty_name="the French version of the European values evaluation dataset, where "
119
+ "the questions are phrased in a situational way",
120
+ huggingface_id="EuroEval/european-values-situational-fr",
121
+ task=EUROPEAN_VALUES,
122
+ languages=[FR],
123
+ splits=["test"],
124
+ bootstrap_samples=False,
125
+ _instruction_prompt="{text}",
126
+ unofficial=True,
127
+ )
128
+
129
+ EUROPEAN_VALUES_COMPLETIONS_FR_CONFIG = DatasetConfig(
130
+ name="european-values-completions-fr",
131
+ pretty_name="the French version of the European values evaluation dataset, where "
132
+ "the questions are phrased as sentence completions",
133
+ huggingface_id="EuroEval/european-values-completions-fr",
134
+ task=EUROPEAN_VALUES,
135
+ languages=[FR],
136
+ splits=["test"],
137
+ bootstrap_samples=False,
138
+ _instruction_prompt="{text}",
139
+ unofficial=True,
140
+ )
@@ -2,7 +2,7 @@
2
2
 
3
3
  from ..data_models import DatasetConfig
4
4
  from ..languages import DE
5
- from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
@@ -67,6 +67,17 @@ HELLASWAG_DE_CONFIG = DatasetConfig(
67
67
  languages=[DE],
68
68
  )
69
69
 
70
+ EUROPEAN_VALUES_DE_CONFIG = DatasetConfig(
71
+ name="european-values-de",
72
+ pretty_name="the German version of the European values evaluation dataset",
73
+ huggingface_id="EuroEval/european-values-de",
74
+ task=EUROPEAN_VALUES,
75
+ languages=[DE],
76
+ splits=["test"],
77
+ bootstrap_samples=False,
78
+ _instruction_prompt="{text}",
79
+ )
80
+
70
81
 
71
82
  ### Unofficial datasets ###
72
83
 
@@ -109,3 +120,29 @@ GOLDENSWAG_DE_CONFIG = DatasetConfig(
109
120
  languages=[DE],
110
121
  unofficial=True,
111
122
  )
123
+
124
+ EUROPEAN_VALUES_SITUATIONAL_DE_CONFIG = DatasetConfig(
125
+ name="european-values-situational-de",
126
+ pretty_name="the German version of the European values evaluation dataset, where "
127
+ "the questions are phrased in a situational way",
128
+ huggingface_id="EuroEval/european-values-situational-de",
129
+ task=EUROPEAN_VALUES,
130
+ languages=[DE],
131
+ splits=["test"],
132
+ bootstrap_samples=False,
133
+ _instruction_prompt="{text}",
134
+ unofficial=True,
135
+ )
136
+
137
+ EUROPEAN_VALUES_COMPLETIONS_DE_CONFIG = DatasetConfig(
138
+ name="european-values-completions-de",
139
+ pretty_name="the German version of the European values evaluation dataset, where "
140
+ "the questions are phrased as sentence completions",
141
+ huggingface_id="EuroEval/european-values-completions-de",
142
+ task=EUROPEAN_VALUES,
143
+ languages=[DE],
144
+ splits=["test"],
145
+ bootstrap_samples=False,
146
+ _instruction_prompt="{text}",
147
+ unofficial=True,
148
+ )
@@ -2,7 +2,7 @@
2
2
 
3
3
  from ..data_models import DatasetConfig
4
4
  from ..languages import IS
5
- from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
@@ -66,6 +66,18 @@ WINOGRANDE_IS_CONFIG = DatasetConfig(
66
66
  huggingface_id="EuroEval/winogrande-is",
67
67
  task=COMMON_SENSE,
68
68
  languages=[IS],
69
+ _labels=["a", "b"],
70
+ )
71
+
72
+ EUROPEAN_VALUES_IS_CONFIG = DatasetConfig(
73
+ name="european-values-is",
74
+ pretty_name="the Icelandic version of the European values evaluation dataset",
75
+ huggingface_id="EuroEval/european-values-is",
76
+ task=EUROPEAN_VALUES,
77
+ languages=[IS],
78
+ splits=["test"],
79
+ bootstrap_samples=False,
80
+ _instruction_prompt="{text}",
69
81
  )
70
82
 
71
83
 
@@ -156,3 +168,29 @@ MULTI_WIKI_QA_IS_CONFIG = DatasetConfig(
156
168
  languages=[IS],
157
169
  unofficial=True,
158
170
  )
171
+
172
+ EUROPEAN_VALUES_SITUATIONAL_IS_CONFIG = DatasetConfig(
173
+ name="european-values-situational-is",
174
+ pretty_name="the Icelandic version of the European values evaluation dataset, "
175
+ "where the questions are phrased in a situational way",
176
+ huggingface_id="EuroEval/european-values-situational-is",
177
+ task=EUROPEAN_VALUES,
178
+ languages=[IS],
179
+ splits=["test"],
180
+ bootstrap_samples=False,
181
+ _instruction_prompt="{text}",
182
+ unofficial=True,
183
+ )
184
+
185
+ EUROPEAN_VALUES_COMPLETIONS_IS_CONFIG = DatasetConfig(
186
+ name="european-values-completions-is",
187
+ pretty_name="the Icelandic version of the European values evaluation dataset, "
188
+ "where the questions are phrased as sentence completions",
189
+ huggingface_id="EuroEval/european-values-completions-is",
190
+ task=EUROPEAN_VALUES,
191
+ languages=[IS],
192
+ splits=["test"],
193
+ bootstrap_samples=False,
194
+ _instruction_prompt="{text}",
195
+ unofficial=True,
196
+ )
@@ -2,7 +2,7 @@
2
2
 
3
3
  from ..data_models import DatasetConfig
4
4
  from ..languages import IT
5
- from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
@@ -67,6 +67,17 @@ HELLASWAG_IT_CONFIG = DatasetConfig(
67
67
  languages=[IT],
68
68
  )
69
69
 
70
+ EUROPEAN_VALUES_IT_CONFIG = DatasetConfig(
71
+ name="european-values-it",
72
+ pretty_name="the Italian version of the European values evaluation dataset",
73
+ huggingface_id="EuroEval/european-values-it",
74
+ task=EUROPEAN_VALUES,
75
+ languages=[IT],
76
+ splits=["test"],
77
+ bootstrap_samples=False,
78
+ _instruction_prompt="{text}",
79
+ )
80
+
70
81
 
71
82
  ### Unofficial datasets ###
72
83
 
@@ -109,3 +120,29 @@ GOLDENSWAG_IT_CONFIG = DatasetConfig(
109
120
  languages=[IT],
110
121
  unofficial=True,
111
122
  )
123
+
124
+ EUROPEAN_VALUES_SITUATIONAL_IT_CONFIG = DatasetConfig(
125
+ name="european-values-situational-it",
126
+ pretty_name="the Italian version of the European values evaluation dataset, "
127
+ "where the questions are phrased in a situational way",
128
+ huggingface_id="EuroEval/european-values-situational-it",
129
+ task=EUROPEAN_VALUES,
130
+ languages=[IT],
131
+ splits=["test"],
132
+ bootstrap_samples=False,
133
+ _instruction_prompt="{text}",
134
+ unofficial=True,
135
+ )
136
+
137
+ EUROPEAN_VALUES_COMPLETIONS_IT_CONFIG = DatasetConfig(
138
+ name="european-values-completions-it",
139
+ pretty_name="the Italian version of the European values evaluation dataset, where "
140
+ "the questions are phrased as sentence completions",
141
+ huggingface_id="EuroEval/european-values-completions-it",
142
+ task=EUROPEAN_VALUES,
143
+ languages=[IT],
144
+ splits=["test"],
145
+ bootstrap_samples=False,
146
+ _instruction_prompt="{text}",
147
+ unofficial=True,
148
+ )
@@ -0,0 +1,81 @@
1
+ """All Latvian dataset configurations used in EuroEval."""
2
+
3
+ from ..data_models import DatasetConfig
4
+ from ..languages import LV
5
+ from ..tasks import COMMON_SENSE, KNOW, LA, NER, RC, SENT, SUMM
6
+
7
+ ### Official datasets ###
8
+
9
+ LATVIAN_TWITTER_SENTIMENT_CONFIG = DatasetConfig(
10
+ name="latvian-twitter-sentiment",
11
+ pretty_name="the truncated version of the Latvian sentiment classification dataset",
12
+ huggingface_id="EuroEval/latvian-twitter-sentiment-mini",
13
+ task=SENT,
14
+ languages=[LV],
15
+ )
16
+
17
+ SCALA_LV_CONFIG = DatasetConfig(
18
+ name="scala-lv",
19
+ pretty_name="the Latvian part of the linguistic acceptability dataset ScaLA",
20
+ huggingface_id="EuroEval/scala-lv",
21
+ task=LA,
22
+ languages=[LV],
23
+ )
24
+
25
+ FULLSTACK_NER_LV_CONFIG = DatasetConfig(
26
+ name="fullstack-ner-lv",
27
+ pretty_name="the truncated version of the FullStack NER dataset",
28
+ huggingface_id="EuroEval/fullstack-ner-lv-mini",
29
+ task=NER,
30
+ languages=[LV],
31
+ )
32
+
33
+ MULTI_WIKI_QA_LV_CONFIG = DatasetConfig(
34
+ name="multi-wiki-qa-lv",
35
+ pretty_name="the truncated version of the Latvian part of the reading "
36
+ "comprehension dataset MultiWikiQA",
37
+ huggingface_id="EuroEval/multi-wiki-qa-lv-mini",
38
+ task=RC,
39
+ languages=[LV],
40
+ )
41
+
42
+ LSM_CONFIG = DatasetConfig(
43
+ name="lsm",
44
+ pretty_name="the truncated version of the Latvian summarisation dataset LSM",
45
+ huggingface_id="EuroEval/lsm-mini",
46
+ task=SUMM,
47
+ languages=[LV],
48
+ )
49
+
50
+
51
+ MMLU_LV_CONFIG = DatasetConfig(
52
+ name="mmlu-lv",
53
+ pretty_name="the truncated version of the Latvian knowledge dataset MMLU-lv, "
54
+ "translated from the English MMLU dataset",
55
+ huggingface_id="EuroEval/mmlu-lv-mini",
56
+ task=KNOW,
57
+ languages=[LV],
58
+ )
59
+
60
+ COPA_LV_CONFIG = DatasetConfig(
61
+ name="copa-lv",
62
+ pretty_name="the Latvian common-sense reasoning dataset COPA-lv, translated from "
63
+ "the English COPA dataset",
64
+ huggingface_id="EuroEval/copa-lv",
65
+ task=COMMON_SENSE,
66
+ languages=[LV],
67
+ _labels=["a", "b"],
68
+ )
69
+
70
+
71
+ ### Unofficial datasets ###
72
+
73
+ WIKIANN_LV_CONFIG = DatasetConfig(
74
+ name="wikiann-lv",
75
+ pretty_name="the truncated version of the Latvian part of the named entity "
76
+ "recognition dataset WikiANN",
77
+ huggingface_id="EuroEval/wikiann-lv-mini",
78
+ task=NER,
79
+ languages=[LV],
80
+ unofficial=True,
81
+ )
@@ -2,7 +2,7 @@
2
2
 
3
3
  from ..data_models import DatasetConfig
4
4
  from ..languages import NB, NN, NO
5
- from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
@@ -94,6 +94,17 @@ NOR_COMMON_SENSE_QA_CONFIG = DatasetConfig(
94
94
  _labels=["a", "b", "c", "d", "e"],
95
95
  )
96
96
 
97
+ EUROPEAN_VALUES_NO_CONFIG = DatasetConfig(
98
+ name="european-values-no",
99
+ pretty_name="the Norwegian version of the European values evaluation dataset",
100
+ huggingface_id="EuroEval/european-values-no",
101
+ task=EUROPEAN_VALUES,
102
+ languages=[NB, NN, NO],
103
+ splits=["test"],
104
+ bootstrap_samples=False,
105
+ _instruction_prompt="{text}",
106
+ )
107
+
97
108
 
98
109
  ### Unofficial datasets ###
99
110
 
@@ -204,3 +215,29 @@ MULTI_WIKI_QA_NN_CONFIG = DatasetConfig(
204
215
  languages=[NN],
205
216
  unofficial=True,
206
217
  )
218
+
219
+ EUROPEAN_VALUES_SITUATIONAL_NO_CONFIG = DatasetConfig(
220
+ name="european-values-situational-no",
221
+ pretty_name="the Norwegian version of the European values evaluation dataset, "
222
+ "where the questions are phrased in a situational way",
223
+ huggingface_id="EuroEval/european-values-situational-no",
224
+ task=EUROPEAN_VALUES,
225
+ languages=[NB, NN, NO],
226
+ splits=["test"],
227
+ bootstrap_samples=False,
228
+ _instruction_prompt="{text}",
229
+ unofficial=True,
230
+ )
231
+
232
+ EUROPEAN_VALUES_COMPLETIONS_NO_CONFIG = DatasetConfig(
233
+ name="european-values-completions-no",
234
+ pretty_name="the Norwegian version of the European values evaluation dataset, "
235
+ "where the questions are phrased as sentence completions",
236
+ huggingface_id="EuroEval/european-values-completions-no",
237
+ task=EUROPEAN_VALUES,
238
+ languages=[NO],
239
+ splits=["test"],
240
+ bootstrap_samples=False,
241
+ _instruction_prompt="{text}",
242
+ unofficial=True,
243
+ )
@@ -2,7 +2,7 @@
2
2
 
3
3
  from ..data_models import DatasetConfig
4
4
  from ..languages import PT
5
- from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
@@ -67,6 +67,17 @@ GOLDENSWAG_PT_CONFIG = DatasetConfig(
67
67
  languages=[PT],
68
68
  )
69
69
 
70
+ EUROPEAN_VALUES_PT_CONFIG = DatasetConfig(
71
+ name="european-values-pt",
72
+ pretty_name="the Portuguese version of the European values evaluation dataset",
73
+ huggingface_id="EuroEval/european-values-pt",
74
+ task=EUROPEAN_VALUES,
75
+ languages=[PT],
76
+ splits=["test"],
77
+ bootstrap_samples=False,
78
+ _instruction_prompt="{text}",
79
+ )
80
+
70
81
 
71
82
  ### Unofficial datasets ###
72
83
 
@@ -79,3 +90,29 @@ BOOLQ_PT_CONFIG = DatasetConfig(
79
90
  languages=[PT],
80
91
  unofficial=True,
81
92
  )
93
+
94
+ EUROPEAN_VALUES_SITUATIONAL_PT_CONFIG = DatasetConfig(
95
+ name="european-values-situational-pt",
96
+ pretty_name="the Portuguese version of the European values evaluation dataset, "
97
+ "where the questions are phrased in a situational way",
98
+ huggingface_id="EuroEval/european-values-situational-pt",
99
+ task=EUROPEAN_VALUES,
100
+ languages=[PT],
101
+ splits=["test"],
102
+ bootstrap_samples=False,
103
+ _instruction_prompt="{text}",
104
+ unofficial=True,
105
+ )
106
+
107
+ EUROPEAN_VALUES_COMPLETIONS_PT_CONFIG = DatasetConfig(
108
+ name="european-values-completions-pt",
109
+ pretty_name="the Portuguese version of the European values evaluation dataset, "
110
+ "where the questions are phrased as sentence completions",
111
+ huggingface_id="EuroEval/european-values-completions-pt",
112
+ task=EUROPEAN_VALUES,
113
+ languages=[PT],
114
+ splits=["test"],
115
+ bootstrap_samples=False,
116
+ _instruction_prompt="{text}",
117
+ unofficial=True,
118
+ )