EuroEval 15.12.0__py3-none-any.whl → 16.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. euroeval/__init__.py +32 -14
  2. euroeval/benchmark_config_factory.py +92 -180
  3. euroeval/benchmark_modules/base.py +49 -39
  4. euroeval/benchmark_modules/fresh.py +35 -21
  5. euroeval/benchmark_modules/hf.py +280 -244
  6. euroeval/benchmark_modules/litellm.py +752 -312
  7. euroeval/benchmark_modules/vllm.py +570 -268
  8. euroeval/benchmarker.py +651 -528
  9. euroeval/caching_utils.py +79 -0
  10. euroeval/callbacks.py +5 -7
  11. euroeval/cli.py +49 -38
  12. euroeval/constants.py +44 -25
  13. euroeval/data_loading.py +111 -55
  14. euroeval/data_models.py +490 -323
  15. euroeval/dataset_configs/__init__.py +26 -4
  16. euroeval/dataset_configs/bosnian.py +39 -0
  17. euroeval/dataset_configs/bulgarian.py +56 -0
  18. euroeval/dataset_configs/croatian.py +56 -0
  19. euroeval/dataset_configs/czech.py +75 -0
  20. euroeval/dataset_configs/danish.py +78 -50
  21. euroeval/dataset_configs/dutch.py +74 -44
  22. euroeval/dataset_configs/english.py +71 -36
  23. euroeval/dataset_configs/estonian.py +111 -0
  24. euroeval/dataset_configs/faroese.py +25 -18
  25. euroeval/dataset_configs/finnish.py +63 -26
  26. euroeval/dataset_configs/french.py +65 -32
  27. euroeval/dataset_configs/german.py +77 -36
  28. euroeval/dataset_configs/greek.py +64 -0
  29. euroeval/dataset_configs/icelandic.py +68 -57
  30. euroeval/dataset_configs/italian.py +68 -36
  31. euroeval/dataset_configs/latvian.py +87 -0
  32. euroeval/dataset_configs/lithuanian.py +64 -0
  33. euroeval/dataset_configs/norwegian.py +98 -72
  34. euroeval/dataset_configs/polish.py +96 -0
  35. euroeval/dataset_configs/portuguese.py +63 -40
  36. euroeval/dataset_configs/serbian.py +64 -0
  37. euroeval/dataset_configs/slovak.py +55 -0
  38. euroeval/dataset_configs/slovene.py +56 -0
  39. euroeval/dataset_configs/spanish.py +68 -34
  40. euroeval/dataset_configs/swedish.py +82 -41
  41. euroeval/dataset_configs/ukrainian.py +64 -0
  42. euroeval/enums.py +12 -6
  43. euroeval/exceptions.py +21 -1
  44. euroeval/finetuning.py +34 -26
  45. euroeval/generation.py +76 -41
  46. euroeval/generation_utils.py +169 -34
  47. euroeval/languages.py +1020 -188
  48. euroeval/logging_utils.py +268 -0
  49. euroeval/metrics/__init__.py +6 -0
  50. euroeval/metrics/base.py +85 -0
  51. euroeval/metrics/huggingface.py +216 -0
  52. euroeval/metrics/llm_as_a_judge.py +260 -0
  53. euroeval/metrics/pipeline.py +289 -0
  54. euroeval/metrics/speed.py +48 -0
  55. euroeval/model_cache.py +40 -21
  56. euroeval/model_config.py +4 -5
  57. euroeval/model_loading.py +3 -0
  58. euroeval/prompt_templates/__init__.py +2 -0
  59. euroeval/prompt_templates/classification.py +206 -0
  60. euroeval/prompt_templates/linguistic_acceptability.py +157 -22
  61. euroeval/prompt_templates/multiple_choice.py +159 -17
  62. euroeval/prompt_templates/named_entity_recognition.py +318 -21
  63. euroeval/prompt_templates/reading_comprehension.py +207 -16
  64. euroeval/prompt_templates/sentiment_classification.py +205 -22
  65. euroeval/prompt_templates/summarization.py +122 -22
  66. euroeval/prompt_templates/token_classification.py +279 -0
  67. euroeval/scores.py +20 -9
  68. euroeval/speed_benchmark.py +11 -12
  69. euroeval/task_group_utils/multiple_choice_classification.py +21 -12
  70. euroeval/task_group_utils/question_answering.py +101 -73
  71. euroeval/task_group_utils/sequence_classification.py +144 -61
  72. euroeval/task_group_utils/text_to_text.py +33 -12
  73. euroeval/task_group_utils/token_classification.py +86 -89
  74. euroeval/tasks.py +75 -16
  75. euroeval/tokenisation_utils.py +603 -0
  76. euroeval/types.py +17 -11
  77. euroeval/utils.py +332 -137
  78. euroeval-16.7.1.dist-info/METADATA +623 -0
  79. euroeval-16.7.1.dist-info/RECORD +84 -0
  80. {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/entry_points.txt +0 -1
  81. euroeval/human_evaluation.py +0 -737
  82. euroeval/metrics.py +0 -452
  83. euroeval/tokenization_utils.py +0 -498
  84. euroeval-15.12.0.dist-info/METADATA +0 -285
  85. euroeval-15.12.0.dist-info/RECORD +0 -63
  86. {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/WHEEL +0 -0
  87. {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,91 +1,132 @@
1
1
  """All German dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import DE
5
- from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
4
+ from ..languages import GERMAN
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  SB10K_CONFIG = DatasetConfig(
10
10
  name="sb10k",
11
- pretty_name="the truncated version of the German sentiment classification "
12
- "dataset SB10k",
13
- huggingface_id="EuroEval/sb10k-mini",
11
+ pretty_name="SB10K",
12
+ source="EuroEval/sb10k-mini",
14
13
  task=SENT,
15
- languages=[DE],
14
+ languages=[GERMAN],
16
15
  )
17
16
 
18
17
  SCALA_DE_CONFIG = DatasetConfig(
19
18
  name="scala-de",
20
- pretty_name="the German part of the linguistic acceptability dataset ScaLA",
21
- huggingface_id="EuroEval/scala-de",
19
+ pretty_name="ScaLA-de",
20
+ source="EuroEval/scala-de",
22
21
  task=LA,
23
- languages=[DE],
22
+ languages=[GERMAN],
24
23
  )
25
24
 
26
25
  GERMEVAL_CONFIG = DatasetConfig(
27
26
  name="germeval",
28
- pretty_name="the truncated version of the German named entity recognition "
29
- "dataset GermEval",
30
- huggingface_id="EuroEval/germeval-mini",
27
+ pretty_name="GermEval",
28
+ source="EuroEval/germeval-mini",
31
29
  task=NER,
32
- languages=[DE],
30
+ languages=[GERMAN],
33
31
  )
34
32
 
35
33
  GERMANQUAD_CONFIG = DatasetConfig(
36
34
  name="germanquad",
37
- pretty_name="the truncated version of the German reading comprehension dataset "
38
- "GermanQuAD",
39
- huggingface_id="EuroEval/germanquad-mini",
35
+ pretty_name="GermanQuAD",
36
+ source="EuroEval/germanquad-mini",
40
37
  task=RC,
41
- languages=[DE],
38
+ languages=[GERMAN],
42
39
  )
43
40
 
44
41
  MLSUM_DE_CONFIG = DatasetConfig(
45
42
  name="mlsum-de",
46
- pretty_name="the truncated version of the German summarisation dataset MLSum-de",
47
- huggingface_id="EuroEval/mlsum-mini",
43
+ pretty_name="MLSUM-de",
44
+ source="EuroEval/mlsum-mini",
48
45
  task=SUMM,
49
- languages=[DE],
46
+ languages=[GERMAN],
50
47
  )
51
48
 
52
49
  MMLU_DE_CONFIG = DatasetConfig(
53
50
  name="mmlu-de",
54
- pretty_name="the truncated version of the German knowledge dataset MMLU-de, "
55
- "translated from the English MMLU dataset",
56
- huggingface_id="EuroEval/mmlu-de-mini",
51
+ pretty_name="MMLU-de",
52
+ source="EuroEval/mmlu-de-mini",
57
53
  task=KNOW,
58
- languages=[DE],
54
+ languages=[GERMAN],
59
55
  )
60
56
 
61
57
  HELLASWAG_DE_CONFIG = DatasetConfig(
62
58
  name="hellaswag-de",
63
- pretty_name="the truncated version of the German common-sense reasoning dataset "
64
- "HellaSwag-de, translated from the English HellaSwag dataset",
65
- huggingface_id="EuroEval/hellaswag-de-mini",
59
+ pretty_name="HellaSwag-de",
60
+ source="EuroEval/hellaswag-de-mini",
66
61
  task=COMMON_SENSE,
67
- languages=[DE],
62
+ languages=[GERMAN],
63
+ )
64
+
65
+ VALEU_DE_CONFIG = DatasetConfig(
66
+ name="valeu-de",
67
+ pretty_name="VaLEU-de",
68
+ source="EuroEval/european-values-de",
69
+ task=EUROPEAN_VALUES,
70
+ languages=[GERMAN],
71
+ splits=["test"],
72
+ bootstrap_samples=False,
73
+ _instruction_prompt="{text}",
68
74
  )
69
75
 
70
76
 
71
77
  ### Unofficial datasets ###
72
78
 
79
+ XQUAD_DE_CONFIG = DatasetConfig(
80
+ name="xquad-de",
81
+ pretty_name="XQuAD-de",
82
+ source="EuroEval/xquad-de",
83
+ task=RC,
84
+ languages=[GERMAN],
85
+ unofficial=True,
86
+ )
87
+
73
88
  ARC_DE_CONFIG = DatasetConfig(
74
89
  name="arc-de",
75
- pretty_name="the truncated version of the German knowledge dataset ARC-de, "
76
- "translated from the English ARC dataset",
77
- huggingface_id="EuroEval/arc-de-mini",
90
+ pretty_name="ARC-de",
91
+ source="EuroEval/arc-de-mini",
78
92
  task=KNOW,
79
- languages=[DE],
93
+ languages=[GERMAN],
80
94
  unofficial=True,
81
95
  )
82
96
 
83
97
  BELEBELE_DE_CONFIG = DatasetConfig(
84
98
  name="belebele-de",
85
- pretty_name="the German multiple choice reading comprehension dataset BeleBele-de, "
86
- "translated from the English BeleBele dataset",
87
- huggingface_id="EuroEval/belebele-de-mini",
99
+ pretty_name="Belebele-de",
100
+ source="EuroEval/belebele-de-mini",
88
101
  task=MCRC,
89
- languages=[DE],
102
+ languages=[GERMAN],
103
+ unofficial=True,
104
+ )
105
+
106
+ MULTI_WIKI_QA_DE_CONFIG = DatasetConfig(
107
+ name="multi-wiki-qa-de",
108
+ pretty_name="MultiWikiQA-de",
109
+ source="EuroEval/multi-wiki-qa-de-mini",
110
+ task=RC,
111
+ languages=[GERMAN],
112
+ unofficial=True,
113
+ )
114
+
115
+ GOLDENSWAG_DE_CONFIG = DatasetConfig(
116
+ name="goldenswag-de",
117
+ pretty_name="GoldenSwag-de",
118
+ source="EuroEval/goldenswag-de-mini",
119
+ task=COMMON_SENSE,
120
+ languages=[GERMAN],
121
+ unofficial=True,
122
+ )
123
+
124
+ WINOGRANDE_DE_CONFIG = DatasetConfig(
125
+ name="winogrande-de",
126
+ pretty_name="Winogrande-de",
127
+ source="EuroEval/winogrande-de",
128
+ task=COMMON_SENSE,
129
+ languages=[GERMAN],
130
+ _labels=["a", "b"],
90
131
  unofficial=True,
91
132
  )
@@ -0,0 +1,64 @@
1
+ """All Greek dataset configurations used in EuroEval."""
2
+
3
+ from ..data_models import DatasetConfig
4
+ from ..languages import GREEK
5
+ from ..tasks import COMMON_SENSE, KNOW, LA, NER, RC, SENT, SUMM
6
+
7
+ ### Official datasets ###
8
+
9
+ GREEK_SA_CONFIG = DatasetConfig(
10
+ name="greek-sa",
11
+ pretty_name="Greek Sentiment Analysis",
12
+ source="EuroEval/greek-sa-mini",
13
+ task=SENT,
14
+ languages=[GREEK],
15
+ _labels=["negative", "positive"],
16
+ )
17
+
18
+ SCALA_EL_CONFIG = DatasetConfig(
19
+ name="scala-el",
20
+ pretty_name="ScaLA-el",
21
+ source="EuroEval/scala-el",
22
+ task=LA,
23
+ languages=[GREEK],
24
+ )
25
+
26
+ ELNER_CONFIG = DatasetConfig(
27
+ name="elner",
28
+ pretty_name="ElNER",
29
+ source="EuroEval/elner-mini",
30
+ task=NER,
31
+ languages=[GREEK],
32
+ )
33
+
34
+ MULTI_WIKI_QA_EL_CONFIG = DatasetConfig(
35
+ name="multi-wiki-qa-el",
36
+ pretty_name="MultiWikiQA-el",
37
+ source="EuroEval/multi-wiki-qa-el-mini",
38
+ task=RC,
39
+ languages=[GREEK],
40
+ )
41
+
42
+ GREEK_WIKIPEDIA_CONFIG = DatasetConfig(
43
+ name="greek-wikipedia",
44
+ pretty_name="Greek Wikipedia",
45
+ source="EuroEval/greek-wikipedia-mini",
46
+ task=SUMM,
47
+ languages=[GREEK],
48
+ )
49
+
50
+ GLOBAL_MMLU_EL_CONFIG = DatasetConfig(
51
+ name="global-mmlu-el",
52
+ pretty_name="GlobalMMLU-el",
53
+ source="EuroEval/global-mmlu-el-mini",
54
+ task=KNOW,
55
+ languages=[GREEK],
56
+ )
57
+
58
+ WINOGRANDE_EL_CONFIG = DatasetConfig(
59
+ name="winogrande-el",
60
+ pretty_name="Winogrande-el",
61
+ source="EuroEval/winogrande-el",
62
+ task=COMMON_SENSE,
63
+ languages=[GREEK],
64
+ )
@@ -1,71 +1,77 @@
1
1
  """All Icelandic dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import IS
5
- from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
4
+ from ..languages import ICELANDIC
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  HOTTER_AND_COLDER_SENTIMENT_CONFIG = DatasetConfig(
10
10
  name="hotter-and-colder-sentiment",
11
- pretty_name="the sentiment classification part of the Icelandic dataset Hotter "
12
- "and Colder",
13
- huggingface_id="EuroEval/hotter-and-colder-sentiment",
11
+ pretty_name="Hotter and Colder Sentiment",
12
+ source="EuroEval/hotter-and-colder-sentiment",
14
13
  task=SENT,
15
- languages=[IS],
14
+ languages=[ICELANDIC],
16
15
  )
17
16
 
18
17
  SCALA_IS_CONFIG = DatasetConfig(
19
18
  name="scala-is",
20
- pretty_name="the Icelandic part of the linguistic acceptability dataset ScaLA",
21
- huggingface_id="EuroEval/scala-is",
19
+ pretty_name="ScaLA-is",
20
+ source="EuroEval/scala-is",
22
21
  task=LA,
23
- languages=[IS],
22
+ languages=[ICELANDIC],
24
23
  )
25
24
 
26
25
  MIM_GOLD_NER_CONFIG = DatasetConfig(
27
26
  name="mim-gold-ner",
28
- pretty_name="the truncated version of the Icelandic named entity recognition "
29
- "dataset MIM-GOLD-NER",
30
- huggingface_id="EuroEval/mim-gold-ner-mini",
27
+ pretty_name="MIM-GOLD-NER",
28
+ source="EuroEval/mim-gold-ner-mini",
31
29
  task=NER,
32
- languages=[IS],
30
+ languages=[ICELANDIC],
33
31
  )
34
32
 
35
33
  NQII_CONFIG = DatasetConfig(
36
34
  name="nqii",
37
- pretty_name="the truncated version of the Icelandic reading comprehension dataset "
38
- "Natural Questions in Icelandic",
39
- huggingface_id="EuroEval/nqii-mini",
35
+ pretty_name="NQiI",
36
+ source="EuroEval/nqii-mini",
40
37
  task=RC,
41
- languages=[IS],
38
+ languages=[ICELANDIC],
42
39
  )
43
40
 
44
41
  RRN_CONFIG = DatasetConfig(
45
42
  name="rrn",
46
- pretty_name="the truncated version of the Icelandic summarisation dataset "
47
- "RÚV Radio News",
48
- huggingface_id="EuroEval/rrn-mini",
43
+ pretty_name="RRN",
44
+ source="EuroEval/rrn-mini",
49
45
  task=SUMM,
50
- languages=[IS],
46
+ languages=[ICELANDIC],
51
47
  )
52
48
 
53
49
  ICELANDIC_KNOWLEDGE_CONFIG = DatasetConfig(
54
50
  name="icelandic-knowledge",
55
- pretty_name="the Icelandic knowledge dataset IcelandicKnowledge, derived from the "
56
- "IcelandicQA dataset",
57
- huggingface_id="EuroEval/icelandic-knowledge",
51
+ pretty_name="Icelandic Knowledge",
52
+ source="EuroEval/icelandic-knowledge",
58
53
  task=KNOW,
59
- languages=[IS],
54
+ languages=[ICELANDIC],
60
55
  )
61
56
 
62
57
  WINOGRANDE_IS_CONFIG = DatasetConfig(
63
58
  name="winogrande-is",
64
- pretty_name="the Icelandic common-sense reasoning dataset "
65
- "Winogrande-is, manually translated from the English Winogrande dataset",
66
- huggingface_id="EuroEval/winogrande-is",
59
+ pretty_name="Winogrande-is",
60
+ source="EuroEval/winogrande-is",
67
61
  task=COMMON_SENSE,
68
- languages=[IS],
62
+ languages=[ICELANDIC],
63
+ _labels=["a", "b"],
64
+ )
65
+
66
+ VALEU_IS_CONFIG = DatasetConfig(
67
+ name="valeu-is",
68
+ pretty_name="VaLEU-is",
69
+ source="EuroEval/european-values-is",
70
+ task=EUROPEAN_VALUES,
71
+ languages=[ICELANDIC],
72
+ splits=["test"],
73
+ bootstrap_samples=False,
74
+ _instruction_prompt="{text}",
69
75
  )
70
76
 
71
77
 
@@ -73,76 +79,81 @@ WINOGRANDE_IS_CONFIG = DatasetConfig(
73
79
 
74
80
  ICE_EC_CONFIG = DatasetConfig(
75
81
  name="ice-ec",
76
- pretty_name="the truncated version of the Icelandic Error Corpus",
77
- huggingface_id="EuroEval/ice-ec",
82
+ pretty_name="ICE-EC",
83
+ source="EuroEval/ice-ec",
78
84
  task=LA,
79
- languages=[IS],
85
+ languages=[ICELANDIC],
80
86
  unofficial=True,
81
87
  )
82
88
 
83
89
  ICE_EC_FULL_CONFIG = DatasetConfig(
84
90
  name="ice-ec-full",
85
- pretty_name="the Icelandic Error Corpus",
86
- huggingface_id="EuroEval/ice-ec-full",
91
+ pretty_name="ICE-EC Full",
92
+ source="EuroEval/ice-ec-full",
87
93
  task=LA,
88
- languages=[IS],
94
+ languages=[ICELANDIC],
89
95
  unofficial=True,
90
96
  )
91
97
 
92
98
  ICE_LINGUISTIC_CONFIG = DatasetConfig(
93
99
  name="ice-linguistic",
94
- pretty_name="the Icelandic linguistic acceptability dataset IceLinguistic",
95
- huggingface_id="EuroEval/ice-linguistic",
100
+ pretty_name="IceLinguistic",
101
+ source="EuroEval/ice-linguistic",
96
102
  task=LA,
97
- languages=[IS],
103
+ languages=[ICELANDIC],
98
104
  unofficial=True,
99
105
  )
100
106
 
101
107
  ICELANDIC_QA_CONFIG = DatasetConfig(
102
108
  name="icelandic-qa",
103
- pretty_name="the Icelandic reading comprehension dataset IcelandicQA",
104
- huggingface_id="EuroEval/icelandic-qa",
109
+ pretty_name="Icelandic QA",
110
+ source="EuroEval/icelandic-qa",
105
111
  task=RC,
106
- languages=[IS],
112
+ languages=[ICELANDIC],
107
113
  unofficial=True,
108
114
  )
109
115
 
110
116
  MMLU_IS_CONFIG = DatasetConfig(
111
117
  name="mmlu-is",
112
- pretty_name="the truncated version of the Icelandic knowledge dataset MMLU-is, "
113
- "translated from the English MMLU dataset",
114
- huggingface_id="EuroEval/mmlu-is-mini",
118
+ pretty_name="MMLU-is",
119
+ source="EuroEval/mmlu-is-mini",
115
120
  task=KNOW,
116
- languages=[IS],
121
+ languages=[ICELANDIC],
117
122
  unofficial=True,
118
123
  )
119
124
 
120
125
  ARC_IS_CONFIG = DatasetConfig(
121
126
  name="arc-is",
122
- pretty_name="the truncated version of the Icelandic knowledge dataset ARC-is, "
123
- "translated from the English ARC dataset",
124
- huggingface_id="EuroEval/arc-is-mini",
127
+ pretty_name="ARC-is",
128
+ source="EuroEval/arc-is-mini",
125
129
  task=KNOW,
126
- languages=[IS],
130
+ languages=[ICELANDIC],
127
131
  unofficial=True,
128
132
  )
129
133
 
130
134
  HELLASWAG_IS_CONFIG = DatasetConfig(
131
135
  name="hellaswag-is",
132
- pretty_name="the truncated version of the Icelandic common-sense reasoning dataset "
133
- "HellaSwag-is, translated from the English HellaSwag dataset",
134
- huggingface_id="EuroEval/hellaswag-is-mini",
136
+ pretty_name="HellaSwag-is",
137
+ source="EuroEval/hellaswag-is-mini",
135
138
  task=COMMON_SENSE,
136
- languages=[IS],
139
+ languages=[ICELANDIC],
137
140
  unofficial=True,
138
141
  )
139
142
 
140
143
  BELEBELE_IS_CONFIG = DatasetConfig(
141
144
  name="belebele-is",
142
- pretty_name="the Icelandic multiple choice reading comprehension dataset "
143
- "BeleBele-is, translated from the English BeleBele dataset",
144
- huggingface_id="EuroEval/belebele-is-mini",
145
+ pretty_name="Belebele-is",
146
+ source="EuroEval/belebele-is-mini",
145
147
  task=MCRC,
146
- languages=[IS],
148
+ languages=[ICELANDIC],
149
+ unofficial=True,
150
+ )
151
+
152
+ MULTI_WIKI_QA_IS_CONFIG = DatasetConfig(
153
+ name="multi-wiki-qa-is",
154
+ pretty_name="MultiWikiQA-is",
155
+ source="EuroEval/multi-wiki-qa-is-mini",
156
+ task=RC,
157
+ languages=[ICELANDIC],
147
158
  unofficial=True,
148
159
  )
@@ -1,70 +1,76 @@
1
1
  """All Italian dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import IT
5
- from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
4
+ from ..languages import ITALIAN
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  SENTIPOLC_CONFIG = DatasetConfig(
10
10
  name="sentipolc16",
11
- pretty_name="the truncated version of the Italian sentiment classification "
12
- "dataset Sentipolc-16",
13
- huggingface_id="EuroEval/sentipolc16-mini",
11
+ pretty_name="Sentipolc16",
12
+ source="EuroEval/sentipolc16-mini",
14
13
  task=SENT,
15
- languages=[IT],
14
+ languages=[ITALIAN],
16
15
  )
17
16
 
18
17
  SCALA_IT_CONFIG = DatasetConfig(
19
18
  name="scala-it",
20
- pretty_name="the Italian part of the linguistic acceptability dataset ScaLA",
21
- huggingface_id="EuroEval/scala-it",
19
+ pretty_name="ScaLA-it",
20
+ source="EuroEval/scala-it",
22
21
  task=LA,
23
- languages=[IT],
22
+ languages=[ITALIAN],
24
23
  )
25
24
 
26
25
  MULTINERD_IT_CONFIG = DatasetConfig(
27
26
  name="multinerd-it",
28
- pretty_name="the truncated version of the Italian part of the named "
29
- "entity recognition dataset MultiNERD",
30
- huggingface_id="EuroEval/multinerd-mini-it",
27
+ pretty_name="MultiNERD-it",
28
+ source="EuroEval/multinerd-mini-it",
31
29
  task=NER,
32
- languages=[IT],
30
+ languages=[ITALIAN],
33
31
  )
34
32
 
35
33
  SQUAD_IT_CONFIG = DatasetConfig(
36
34
  name="squad-it",
37
- pretty_name="the truncated version of the Italian reading comprehension dataset "
38
- "SQuAD-it, translated from the English SQuAD dataset",
39
- huggingface_id="EuroEval/squad-it-mini",
35
+ pretty_name="SQuAD-it",
36
+ source="EuroEval/squad-it-mini",
40
37
  task=RC,
41
- languages=[IT],
38
+ languages=[ITALIAN],
42
39
  )
43
40
 
44
41
  ILPOST_SUM_CONFIG = DatasetConfig(
45
42
  name="ilpost-sum",
46
- pretty_name="the truncated version of the Italian summarisation dataset IlPost-Sum",
47
- huggingface_id="EuroEval/ilpost-sum",
43
+ pretty_name="IlPost-Sum",
44
+ source="EuroEval/ilpost-sum",
48
45
  task=SUMM,
49
- languages=[IT],
46
+ languages=[ITALIAN],
50
47
  )
51
48
 
52
49
  MMLU_IT_CONFIG = DatasetConfig(
53
50
  name="mmlu-it",
54
- pretty_name="the truncated version of the Italian knowledge dataset MMLU-it, "
55
- "translated from the English MMLU dataset",
56
- huggingface_id="EuroEval/mmlu-it-mini",
51
+ pretty_name="MMLU-it",
52
+ source="EuroEval/mmlu-it-mini",
57
53
  task=KNOW,
58
- languages=[IT],
54
+ languages=[ITALIAN],
59
55
  )
60
56
 
61
57
  HELLASWAG_IT_CONFIG = DatasetConfig(
62
58
  name="hellaswag-it",
63
- pretty_name="the truncated version of the Italian common-sense reasoning dataset "
64
- "HellaSwag-it, translated from the English HellaSwag dataset",
65
- huggingface_id="EuroEval/hellaswag-it-mini",
59
+ pretty_name="HellaSwag-it",
60
+ source="EuroEval/hellaswag-it-mini",
66
61
  task=COMMON_SENSE,
67
- languages=[IT],
62
+ languages=[ITALIAN],
63
+ )
64
+
65
+ VALEU_IT_CONFIG = DatasetConfig(
66
+ name="valeu-it",
67
+ pretty_name="VaLEU-it",
68
+ source="EuroEval/european-values-it",
69
+ task=EUROPEAN_VALUES,
70
+ languages=[ITALIAN],
71
+ splits=["test"],
72
+ bootstrap_samples=False,
73
+ _instruction_prompt="{text}",
68
74
  )
69
75
 
70
76
 
@@ -72,20 +78,46 @@ HELLASWAG_IT_CONFIG = DatasetConfig(
72
78
 
73
79
  WIKINEURAL_IT_CONFIG = DatasetConfig(
74
80
  name="wikineural-it",
75
- pretty_name="the truncated version of the Italian named "
76
- "entity recognition dataset WikiNEuRal IT",
77
- huggingface_id="EuroEval/wikineural-mini-it",
81
+ pretty_name="WikiNeural-it",
82
+ source="EuroEval/wikineural-mini-it",
78
83
  task=NER,
79
- languages=[IT],
84
+ languages=[ITALIAN],
80
85
  unofficial=True,
81
86
  )
82
87
 
83
88
  BELEBELE_IT_CONFIG = DatasetConfig(
84
89
  name="belebele-it",
85
- pretty_name="the Italian multiple choice reading comprehension dataset "
86
- "BeleBele-it, translated from the English BeleBele dataset",
87
- huggingface_id="EuroEval/belebele-it-mini",
90
+ pretty_name="Belebele-it",
91
+ source="EuroEval/belebele-it-mini",
88
92
  task=MCRC,
89
- languages=[IT],
93
+ languages=[ITALIAN],
94
+ unofficial=True,
95
+ )
96
+
97
+ MULTI_WIKI_QA_IT_CONFIG = DatasetConfig(
98
+ name="multi-wiki-qa-it",
99
+ pretty_name="MultiWikiQA-it",
100
+ source="EuroEval/multi-wiki-qa-it-mini",
101
+ task=RC,
102
+ languages=[ITALIAN],
103
+ unofficial=True,
104
+ )
105
+
106
+ GOLDENSWAG_IT_CONFIG = DatasetConfig(
107
+ name="goldenswag-it",
108
+ pretty_name="GoldenSwag-it",
109
+ source="EuroEval/goldenswag-it-mini",
110
+ task=COMMON_SENSE,
111
+ languages=[ITALIAN],
112
+ unofficial=True,
113
+ )
114
+
115
+ WINOGRANDE_IT_CONFIG = DatasetConfig(
116
+ name="winogrande-it",
117
+ pretty_name="Winogrande-it",
118
+ source="EuroEval/winogrande-it",
119
+ task=COMMON_SENSE,
120
+ languages=[ITALIAN],
121
+ _labels=["a", "b"],
90
122
  unofficial=True,
91
123
  )