EuroEval 16.4.0__py3-none-any.whl → 16.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (71) hide show
  1. euroeval/__init__.py +6 -0
  2. euroeval/benchmark_config_factory.py +51 -46
  3. euroeval/benchmark_modules/base.py +6 -5
  4. euroeval/benchmark_modules/hf.py +2 -9
  5. euroeval/benchmark_modules/litellm.py +14 -12
  6. euroeval/benchmark_modules/vllm.py +17 -10
  7. euroeval/benchmarker.py +61 -44
  8. euroeval/caching_utils.py +1 -1
  9. euroeval/cli.py +86 -8
  10. euroeval/constants.py +3 -0
  11. euroeval/data_loading.py +78 -30
  12. euroeval/data_models.py +326 -326
  13. euroeval/dataset_configs/__init__.py +10 -3
  14. euroeval/dataset_configs/bulgarian.py +56 -0
  15. euroeval/dataset_configs/czech.py +25 -29
  16. euroeval/dataset_configs/danish.py +51 -88
  17. euroeval/dataset_configs/dutch.py +48 -86
  18. euroeval/dataset_configs/english.py +45 -76
  19. euroeval/dataset_configs/estonian.py +36 -38
  20. euroeval/dataset_configs/faroese.py +19 -60
  21. euroeval/dataset_configs/finnish.py +36 -68
  22. euroeval/dataset_configs/french.py +39 -74
  23. euroeval/dataset_configs/german.py +45 -81
  24. euroeval/dataset_configs/greek.py +64 -0
  25. euroeval/dataset_configs/icelandic.py +54 -91
  26. euroeval/dataset_configs/italian.py +42 -78
  27. euroeval/dataset_configs/latvian.py +28 -34
  28. euroeval/dataset_configs/lithuanian.py +22 -26
  29. euroeval/dataset_configs/norwegian.py +72 -114
  30. euroeval/dataset_configs/polish.py +33 -60
  31. euroeval/dataset_configs/portuguese.py +33 -65
  32. euroeval/dataset_configs/serbian.py +64 -0
  33. euroeval/dataset_configs/slovak.py +19 -24
  34. euroeval/dataset_configs/spanish.py +42 -76
  35. euroeval/dataset_configs/swedish.py +48 -84
  36. euroeval/dataset_configs/ukrainian.py +64 -0
  37. euroeval/exceptions.py +1 -1
  38. euroeval/finetuning.py +3 -2
  39. euroeval/generation.py +5 -4
  40. euroeval/generation_utils.py +6 -5
  41. euroeval/languages.py +395 -323
  42. euroeval/metrics/huggingface.py +14 -3
  43. euroeval/metrics/llm_as_a_judge.py +1 -1
  44. euroeval/model_cache.py +6 -5
  45. euroeval/model_loading.py +1 -1
  46. euroeval/prompt_templates/__init__.py +2 -0
  47. euroeval/prompt_templates/classification.py +206 -0
  48. euroeval/prompt_templates/linguistic_acceptability.py +82 -43
  49. euroeval/prompt_templates/multiple_choice.py +81 -41
  50. euroeval/prompt_templates/named_entity_recognition.py +125 -44
  51. euroeval/prompt_templates/reading_comprehension.py +92 -43
  52. euroeval/prompt_templates/sentiment_classification.py +91 -43
  53. euroeval/prompt_templates/summarization.py +64 -39
  54. euroeval/prompt_templates/token_classification.py +279 -0
  55. euroeval/scores.py +4 -3
  56. euroeval/speed_benchmark.py +2 -1
  57. euroeval/task_group_utils/multiple_choice_classification.py +2 -1
  58. euroeval/task_group_utils/question_answering.py +24 -13
  59. euroeval/task_group_utils/sequence_classification.py +5 -4
  60. euroeval/task_group_utils/text_to_text.py +2 -1
  61. euroeval/task_group_utils/token_classification.py +11 -8
  62. euroeval/tasks.py +44 -1
  63. euroeval/tokenisation_utils.py +19 -10
  64. euroeval/types.py +10 -9
  65. euroeval/utils.py +6 -3
  66. {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/METADATA +194 -37
  67. euroeval-16.5.0.dist-info/RECORD +81 -0
  68. euroeval-16.4.0.dist-info/RECORD +0 -75
  69. {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/WHEEL +0 -0
  70. {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/entry_points.txt +0 -0
  71. {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,77 +1,73 @@
1
1
  """All English dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import EN
4
+ from ..languages import ENGLISH
5
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  SST5_CONFIG = DatasetConfig(
10
10
  name="sst5",
11
- pretty_name="the truncated version of the English sentiment classification "
12
- "dataset SST5",
13
- huggingface_id="EuroEval/sst5-mini",
11
+ pretty_name="SST-5",
12
+ source="EuroEval/sst5-mini",
14
13
  task=SENT,
15
- languages=[EN],
14
+ languages=[ENGLISH],
16
15
  )
17
16
 
18
17
  SCALA_EN_CONFIG = DatasetConfig(
19
18
  name="scala-en",
20
- pretty_name="the English part of the linguistic acceptability dataset ScaLA",
21
- huggingface_id="EuroEval/scala-en",
19
+ pretty_name="ScaLA-en",
20
+ source="EuroEval/scala-en",
22
21
  task=LA,
23
- languages=[EN],
22
+ languages=[ENGLISH],
24
23
  )
25
24
 
26
25
  CONLL_EN_CONFIG = DatasetConfig(
27
26
  name="conll-en",
28
- pretty_name="the truncated version of the English named entity recognition "
29
- "dataset CoNLL 2003",
30
- huggingface_id="EuroEval/conll-en-mini",
27
+ pretty_name="CoNLL-en",
28
+ source="EuroEval/conll-en-mini",
31
29
  task=NER,
32
- languages=[EN],
30
+ languages=[ENGLISH],
33
31
  )
34
32
 
35
33
  SQUAD_CONFIG = DatasetConfig(
36
34
  name="squad",
37
- pretty_name="the truncated version of the English question answering dataset SQuAD",
38
- huggingface_id="EuroEval/squad-mini",
35
+ pretty_name="SQuAD",
36
+ source="EuroEval/squad-mini",
39
37
  task=RC,
40
- languages=[EN],
38
+ languages=[ENGLISH],
41
39
  )
42
40
 
43
41
  CNN_DAILYMAIL_CONFIG = DatasetConfig(
44
42
  name="cnn-dailymail",
45
- pretty_name="the truncated version of the English summarisation dataset "
46
- "CNN-DailyMail",
47
- huggingface_id="EuroEval/cnn-dailymail-mini",
43
+ pretty_name="CNN/DailyMail",
44
+ source="EuroEval/cnn-dailymail-mini",
48
45
  task=SUMM,
49
- languages=[EN],
46
+ languages=[ENGLISH],
50
47
  )
51
48
 
52
49
  LIFE_IN_THE_UK_CONFIG = DatasetConfig(
53
50
  name="life-in-the-uk",
54
- pretty_name="the English knowledge dataset Life in the UK",
55
- huggingface_id="EuroEval/life-in-the-uk",
51
+ pretty_name="Life in the UK",
52
+ source="EuroEval/life-in-the-uk",
56
53
  task=KNOW,
57
- languages=[EN],
54
+ languages=[ENGLISH],
58
55
  )
59
56
 
60
57
  HELLASWAG_CONFIG = DatasetConfig(
61
58
  name="hellaswag",
62
- pretty_name="the truncated version of the English common-sense reasoning "
63
- "dataset HellaSwag",
64
- huggingface_id="EuroEval/hellaswag-mini",
59
+ pretty_name="HellaSwag",
60
+ source="EuroEval/hellaswag-mini",
65
61
  task=COMMON_SENSE,
66
- languages=[EN],
62
+ languages=[ENGLISH],
67
63
  )
68
64
 
69
- EUROPEAN_VALUES_EN_CONFIG = DatasetConfig(
70
- name="european-values-en",
71
- pretty_name="the English version of the European values evaluation dataset",
72
- huggingface_id="EuroEval/european-values-en",
65
+ VALEU_EN_CONFIG = DatasetConfig(
66
+ name="valeu-en",
67
+ pretty_name="VaLEU-en",
68
+ source="EuroEval/european-values-en",
73
69
  task=EUROPEAN_VALUES,
74
- languages=[EN],
70
+ languages=[ENGLISH],
75
71
  splits=["test"],
76
72
  bootstrap_samples=False,
77
73
  _instruction_prompt="{text}",
@@ -82,82 +78,55 @@ EUROPEAN_VALUES_EN_CONFIG = DatasetConfig(
82
78
 
83
79
  XQUAD_EN_CONFIG = DatasetConfig(
84
80
  name="xquad-en",
85
- pretty_name="the English version of the reading comprehension dataset XQuAD",
86
- huggingface_id="EuroEval/xquad-en",
81
+ pretty_name="XQuAD-en",
82
+ source="EuroEval/xquad-en",
87
83
  task=RC,
88
- languages=[EN],
84
+ languages=[ENGLISH],
89
85
  unofficial=True,
90
86
  )
91
87
 
92
88
  ARC_CONFIG = DatasetConfig(
93
89
  name="arc",
94
- pretty_name="the truncated version of the English knowledge dataset ARC",
95
- huggingface_id="EuroEval/arc-mini",
90
+ pretty_name="ARC",
91
+ source="EuroEval/arc-mini",
96
92
  task=KNOW,
97
- languages=[EN],
93
+ languages=[ENGLISH],
98
94
  unofficial=True,
99
95
  )
100
96
 
101
97
  BELEBELE_CONFIG = DatasetConfig(
102
98
  name="belebele-en",
103
- pretty_name="the English multiple choice reading comprehension dataset BeleBele",
104
- huggingface_id="EuroEval/belebele-mini",
99
+ pretty_name="Belebele-en",
100
+ source="EuroEval/belebele-mini",
105
101
  task=MCRC,
106
- languages=[EN],
102
+ languages=[ENGLISH],
107
103
  unofficial=True,
108
104
  )
109
105
 
110
106
  MMLU_CONFIG = DatasetConfig(
111
107
  name="mmlu",
112
- pretty_name="the truncated version of the English knowledge dataset MMLU",
113
- huggingface_id="EuroEval/mmlu-mini",
108
+ pretty_name="MMLU",
109
+ source="EuroEval/mmlu-mini",
114
110
  task=KNOW,
115
- languages=[EN],
111
+ languages=[ENGLISH],
116
112
  unofficial=True,
117
113
  )
118
114
 
119
115
  MULTI_WIKI_QA_EN_CONFIG = DatasetConfig(
120
116
  name="multi-wiki-qa-en",
121
- pretty_name="the truncated version of the English part of the reading "
122
- "comprehension dataset MultiWikiQA",
123
- huggingface_id="EuroEval/multi-wiki-qa-en-mini",
117
+ pretty_name="MultiWikiQA-en",
118
+ source="EuroEval/multi-wiki-qa-en-mini",
124
119
  task=RC,
125
- languages=[EN],
120
+ languages=[ENGLISH],
126
121
  unofficial=True,
127
122
  )
128
123
 
129
124
  WINOGRANDE_CONFIG = DatasetConfig(
130
125
  name="winogrande",
131
- pretty_name="the English common-sense reasoning dataset Winogrande",
132
- huggingface_id="EuroEval/winogrande-en",
126
+ pretty_name="Winogrande-en",
127
+ source="EuroEval/winogrande-en",
133
128
  task=COMMON_SENSE,
134
- languages=[EN],
129
+ languages=[ENGLISH],
135
130
  _labels=["a", "b"],
136
131
  unofficial=True,
137
132
  )
138
-
139
- EUROPEAN_VALUES_SITUATIONAL_EN_CONFIG = DatasetConfig(
140
- name="european-values-situational-en",
141
- pretty_name="the English version of the European values evaluation dataset, where "
142
- "the questions are phrased in a situational way",
143
- huggingface_id="EuroEval/european-values-situational-en",
144
- task=EUROPEAN_VALUES,
145
- languages=[EN],
146
- splits=["test"],
147
- bootstrap_samples=False,
148
- _instruction_prompt="{text}",
149
- unofficial=True,
150
- )
151
-
152
- EUROPEAN_VALUES_COMPLETIONS_EN_CONFIG = DatasetConfig(
153
- name="european-values-completions-en",
154
- pretty_name="the English version of the European values evaluation dataset, where "
155
- "the questions are phrased as sentence completions",
156
- huggingface_id="EuroEval/european-values-completions-en",
157
- task=EUROPEAN_VALUES,
158
- languages=[EN],
159
- splits=["test"],
160
- bootstrap_samples=False,
161
- _instruction_prompt="{text}",
162
- unofficial=True,
163
- )
@@ -1,66 +1,65 @@
1
1
  """All Estonian dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import ET
4
+ from ..languages import ESTONIAN
5
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  ESTONIAN_VALENCE_CONFIG = DatasetConfig(
10
10
  name="estonian-valence",
11
- pretty_name="the Estonian sentiment classification dataset Estonian Valence",
12
- huggingface_id="EuroEval/estonian-valence",
11
+ pretty_name="Estonian Valence",
12
+ source="EuroEval/estonian-valence",
13
13
  task=SENT,
14
- languages=[ET],
14
+ languages=[ESTONIAN],
15
15
  )
16
16
 
17
17
  GRAMMAR_ET_CONFIG = DatasetConfig(
18
18
  name="grammar-et",
19
- pretty_name="the Estonian linguistic acceptability dataset Grammar-et",
20
- huggingface_id="EuroEval/grammar-et",
19
+ pretty_name="Grammar-et",
20
+ source="EuroEval/grammar-et",
21
21
  task=LA,
22
- languages=[ET],
22
+ languages=[ESTONIAN],
23
23
  )
24
24
 
25
25
  ESTNER_CONFIG = DatasetConfig(
26
26
  name="estner",
27
- pretty_name="the Estonian named entity recognition dataset EstNER",
28
- huggingface_id="EuroEval/estner-mini",
27
+ pretty_name="EstNER",
28
+ source="EuroEval/estner-mini",
29
29
  task=NER,
30
- languages=[ET],
30
+ languages=[ESTONIAN],
31
31
  )
32
32
 
33
33
  MULTI_WIKI_QA_ET_CONFIG = DatasetConfig(
34
34
  name="multi-wiki-qa-et",
35
- pretty_name="the truncated version of the Estonian part of the reading "
36
- "comprehension dataset MultiWikiQA",
37
- huggingface_id="EuroEval/multi-wiki-qa-et-mini",
35
+ pretty_name="MultiWikiQA-et",
36
+ source="EuroEval/multi-wiki-qa-et-mini",
38
37
  task=RC,
39
- languages=[ET],
38
+ languages=[ESTONIAN],
40
39
  )
41
40
 
42
41
  ERR_NEWS_CONFIG = DatasetConfig(
43
42
  name="err-news",
44
- pretty_name="the Estonian summarisation dataset ErrNews",
45
- huggingface_id="EuroEval/err-news-mini",
43
+ pretty_name="ERR News",
44
+ source="EuroEval/err-news-mini",
46
45
  task=SUMM,
47
- languages=[ET],
46
+ languages=[ESTONIAN],
48
47
  )
49
48
 
50
49
  TRIVIA_ET_CONFIG = DatasetConfig(
51
50
  name="trivia-et",
52
- pretty_name="the Estonian knowledge dataset Trivia-et",
53
- huggingface_id="EuroEval/trivia-et",
51
+ pretty_name="Trivia-et",
52
+ source="EuroEval/trivia-et",
54
53
  task=KNOW,
55
- languages=[ET],
54
+ languages=[ESTONIAN],
56
55
  )
57
56
 
58
57
  WINOGRANDE_ET_CONFIG = DatasetConfig(
59
58
  name="winogrande-et",
60
- pretty_name="the Estonian common-sense reasoning dataset Winogrande-et",
61
- huggingface_id="EuroEval/winogrande-et",
59
+ pretty_name="Winogrande-et",
60
+ source="EuroEval/winogrande-et",
62
61
  task=COMMON_SENSE,
63
- languages=[ET],
62
+ languages=[ESTONIAN],
64
63
  _prompt_prefix="Sulle esitatakse lüngaga (_) tekstülesanded, "
65
64
  "igal ülesandel on kaks vastusevarianti (a ja b).",
66
65
  _prompt_template="Tekstülesanne: {text}\nVastus: {label}",
@@ -70,12 +69,12 @@ WINOGRANDE_ET_CONFIG = DatasetConfig(
70
69
  _labels=["a", "b"],
71
70
  )
72
71
 
73
- EUROPEAN_VALUES_ET_CONFIG = DatasetConfig(
74
- name="european-values-et",
75
- pretty_name="the Estonian version of the European values evaluation dataset",
76
- huggingface_id="EuroEval/european-values-et",
72
+ VALEU_ET_CONFIG = DatasetConfig(
73
+ name="valeu-et",
74
+ pretty_name="VaLEU-et",
75
+ source="EuroEval/european-values-et",
77
76
  task=EUROPEAN_VALUES,
78
- languages=[ET],
77
+ languages=[ESTONIAN],
79
78
  splits=["test"],
80
79
  bootstrap_samples=False,
81
80
  _instruction_prompt="{text}",
@@ -85,29 +84,28 @@ EUROPEAN_VALUES_ET_CONFIG = DatasetConfig(
85
84
 
86
85
  SCALA_ET_CONFIG = DatasetConfig(
87
86
  name="scala-et",
88
- pretty_name="the Estonian part of the linguistic acceptability dataset ScaLA",
89
- huggingface_id="EuroEval/scala-et",
87
+ pretty_name="ScaLA-et",
88
+ source="EuroEval/scala-et",
90
89
  task=LA,
91
- languages=[ET],
90
+ languages=[ESTONIAN],
92
91
  unofficial=True,
93
92
  )
94
93
 
95
94
  EXAM_ET_CONFIG = DatasetConfig(
96
95
  name="exam-et",
97
- pretty_name="the Estonian knowledge dataset Exam-et",
98
- huggingface_id="EuroEval/exam-et",
96
+ pretty_name="Exam-et",
97
+ source="EuroEval/exam-et",
99
98
  task=KNOW,
100
- languages=[ET],
99
+ languages=[ESTONIAN],
101
100
  _labels=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"],
102
101
  unofficial=True,
103
102
  )
104
103
 
105
104
  MMLU_ET_CONFIG = DatasetConfig(
106
105
  name="mmlu-et",
107
- pretty_name="the truncated version of the Estonian knowledge dataset MMLU-et, "
108
- "translated from the English MMLU dataset",
109
- huggingface_id="EuroEval/mmlu-et-mini",
106
+ pretty_name="MMLU-et",
107
+ source="EuroEval/mmlu-et-mini",
110
108
  task=KNOW,
111
- languages=[ET],
109
+ languages=[ESTONIAN],
112
110
  unofficial=True,
113
111
  )
@@ -1,102 +1,61 @@
1
1
  """All Faroese dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import FO
4
+ from ..languages import FAROESE
5
5
  from ..tasks import LA, NER, RC, SENT
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  FOSENT_CONFIG = DatasetConfig(
10
10
  name="fosent",
11
- pretty_name="the Faroese sentiment classification dataset FoSent",
12
- huggingface_id="EuroEval/fosent",
11
+ pretty_name="FoSent",
12
+ source="EuroEval/fosent",
13
13
  task=SENT,
14
- languages=[FO],
14
+ languages=[FAROESE],
15
15
  _num_few_shot_examples=5,
16
16
  )
17
17
 
18
18
  SCALA_FO_CONFIG = DatasetConfig(
19
19
  name="scala-fo",
20
- pretty_name="the Faroese part of the linguistic acceptability dataset ScaLA",
21
- huggingface_id="EuroEval/scala-fo",
20
+ pretty_name="ScaLA-fo",
21
+ source="EuroEval/scala-fo",
22
22
  task=LA,
23
- languages=[FO],
23
+ languages=[FAROESE],
24
24
  )
25
25
 
26
26
  FONE_CONFIG = DatasetConfig(
27
27
  name="fone",
28
- pretty_name="the truncated version of the Faroese named entity recognition "
29
- "dataset FoNE",
30
- huggingface_id="EuroEval/fone-mini",
28
+ pretty_name="FoNE",
29
+ source="EuroEval/fone-mini",
31
30
  task=NER,
32
- languages=[FO],
31
+ languages=[FAROESE],
33
32
  )
34
33
 
35
34
  FOQA_CONFIG = DatasetConfig(
36
35
  name="foqa",
37
- pretty_name="the Faroese reading comprehension dataset FoQA",
38
- huggingface_id="EuroEval/foqa",
36
+ pretty_name="FoQA",
37
+ source="EuroEval/foqa",
39
38
  task=RC,
40
- languages=[FO],
39
+ languages=[FAROESE],
41
40
  )
42
41
 
43
- # TODO: No Faroese version of the European values dataset exists yet
44
- # EUROPEAN_VALUES_FO_CONFIG = DatasetConfig(
45
- # name="european-values-fo",
46
- # pretty_name="the Faroese version of the European values evaluation dataset",
47
- # huggingface_id="EuroEval/european-values-fo",
48
- # task=EUROPEAN_VALUES,
49
- # languages=[FO],
50
- # splits=["test"],
51
- # bootstrap_samples=False,
52
- # _instruction_prompt="{text}",
53
- # )
54
- #
55
- # EUROPEAN_VALUES_SITUATIONAL_FO_CONFIG = DatasetConfig(
56
- # name="european-values-situational-fo",
57
- # pretty_name="the Faroese version of the European values evaluation dataset, "
58
- # "where the questions are phrased in a situational way",
59
- # huggingface_id="EuroEval/european-values-situational-fo",
60
- # task=EUROPEAN_VALUES,
61
- # languages=[FO],
62
- # splits=["test"],
63
- # bootstrap_samples=False,
64
- # _instruction_prompt="{text}",
65
- # unofficial=True,
66
- # )
67
- #
68
- # EUROPEAN_VALUES_COMPLETIONS_FO_CONFIG = DatasetConfig(
69
- # name="european-values-completions-fo",
70
- # pretty_name="the Faroese version of the European values evaluation dataset, "
71
- # "where the questions are phrased as sentence completions",
72
- # huggingface_id="EuroEval/european-values-completions-fo",
73
- # task=EUROPEAN_VALUES,
74
- # languages=[FO],
75
- # splits=["test"],
76
- # bootstrap_samples=False,
77
- # _instruction_prompt="{text}",
78
- # unofficial=True,
79
- # )
80
-
81
42
 
82
43
  ### Unofficial datasets ###
83
44
 
84
45
  WIKIANN_FO_CONFIG = DatasetConfig(
85
46
  name="wikiann-fo",
86
- pretty_name="the truncated version of the Faroese part of the named entity "
87
- "recognition dataset WikiANN",
88
- huggingface_id="EuroEval/wikiann-fo-mini",
47
+ pretty_name="WikiANN-fo",
48
+ source="EuroEval/wikiann-fo-mini",
89
49
  task=NER,
90
- languages=[FO],
50
+ languages=[FAROESE],
91
51
  unofficial=True,
92
52
  )
93
53
 
94
54
  MULTI_WIKI_QA_FO_CONFIG = DatasetConfig(
95
55
  name="multi-wiki-qa-fo",
96
- pretty_name="the truncated version of the Faroese part of the reading "
97
- "comprehension dataset MultiWikiQA",
98
- huggingface_id="EuroEval/multi-wiki-qa-fo-mini",
56
+ pretty_name="MultiWikiQA-fo",
57
+ source="EuroEval/multi-wiki-qa-fo-mini",
99
58
  task=RC,
100
- languages=[FO],
59
+ languages=[FAROESE],
101
60
  unofficial=True,
102
61
  )
@@ -1,68 +1,66 @@
1
1
  """All Finnish dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import FI
4
+ from ..languages import FINNISH
5
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  SCANDISENT_FI_CONFIG = DatasetConfig(
10
10
  name="scandisent-fi",
11
- pretty_name="the truncated version of the Finnish part of the binary sentiment "
12
- "classification dataset ScandiSent",
13
- huggingface_id="EuroEval/scandisent-fi-mini",
11
+ pretty_name="ScandiSent-fi",
12
+ source="EuroEval/scandisent-fi-mini",
14
13
  task=SENT,
15
- languages=[FI],
14
+ languages=[FINNISH],
16
15
  _labels=["negative", "positive"],
17
16
  )
18
17
 
19
18
  TURKU_NER_FI_CONFIG = DatasetConfig(
20
19
  name="turku-ner-fi",
21
- pretty_name="the Finnish part of the named entity recognition dataset Turku NER",
22
- huggingface_id="EuroEval/turku-ner-fi-mini",
20
+ pretty_name="Turku NER-fi",
21
+ source="EuroEval/turku-ner-fi-mini",
23
22
  task=NER,
24
- languages=[FI],
23
+ languages=[FINNISH],
25
24
  )
26
25
 
27
26
  TYDIQA_FI_CONFIG = DatasetConfig(
28
27
  name="tydiqa-fi",
29
- pretty_name="the Finnish part of the TydiQA reading comprehension dataset",
30
- huggingface_id="EuroEval/tydiqa-fi-mini",
28
+ pretty_name="TyDiQA-fi",
29
+ source="EuroEval/tydiqa-fi-mini",
31
30
  task=RC,
32
- languages=[FI],
31
+ languages=[FINNISH],
33
32
  )
34
33
 
35
34
  XLSUM_FI_CONFIG = DatasetConfig(
36
35
  name="xlsum-fi",
37
- pretty_name="the Finnish summarisation dataset XL-Sum",
38
- huggingface_id="EuroEval/xlsum-fi-mini",
36
+ pretty_name="XLSum-fi",
37
+ source="EuroEval/xlsum-fi-mini",
39
38
  task=SUMM,
40
- languages=[FI],
39
+ languages=[FINNISH],
41
40
  )
42
41
 
43
42
  HELLASWAG_FI_CONFIG = DatasetConfig(
44
43
  name="hellaswag-fi",
45
- pretty_name="the truncated version of the Finnish common-sense reasoning dataset "
46
- "HellaSwag-fi, translated from the English HellaSwag dataset",
47
- huggingface_id="EuroEval/hellaswag-fi-mini",
44
+ pretty_name="HellaSwag-fi",
45
+ source="EuroEval/hellaswag-fi-mini",
48
46
  task=COMMON_SENSE,
49
- languages=[FI],
47
+ languages=[FINNISH],
50
48
  )
51
49
 
52
50
  SCALA_FI_CONFIG = DatasetConfig(
53
51
  name="scala-fi",
54
- pretty_name="the Finnish part of the linguistic acceptability dataset ScaLA",
55
- huggingface_id="EuroEval/scala-fi",
52
+ pretty_name="ScaLA-fi",
53
+ source="EuroEval/scala-fi",
56
54
  task=LA,
57
- languages=[FI],
55
+ languages=[FINNISH],
58
56
  )
59
57
 
60
- EUROPEAN_VALUES_FI_CONFIG = DatasetConfig(
61
- name="european-values-fi",
62
- pretty_name="the Finnish version of the European values evaluation dataset",
63
- huggingface_id="EuroEval/european-values-fi",
58
+ VALEU_FI_CONFIG = DatasetConfig(
59
+ name="valeu-fi",
60
+ pretty_name="VaLEU-fi",
61
+ source="EuroEval/european-values-fi",
64
62
  task=EUROPEAN_VALUES,
65
- languages=[FI],
63
+ languages=[FINNISH],
66
64
  splits=["test"],
67
65
  bootstrap_samples=False,
68
66
  _instruction_prompt="{text}",
@@ -73,67 +71,37 @@ EUROPEAN_VALUES_FI_CONFIG = DatasetConfig(
73
71
 
74
72
  BELEBELE_FI_CONFIG = DatasetConfig(
75
73
  name="belebele-fi",
76
- pretty_name="the Finnish multiple choice reading comprehension dataset "
77
- "BeleBele-fi, translated from the English BeleBele dataset",
78
- huggingface_id="EuroEval/belebele-fi-mini",
74
+ pretty_name="Belebele-fi",
75
+ source="EuroEval/belebele-fi-mini",
79
76
  task=MCRC,
80
- languages=[FI],
77
+ languages=[FINNISH],
81
78
  unofficial=True,
82
79
  )
83
80
 
84
81
  MULTI_WIKI_QA_FI_CONFIG = DatasetConfig(
85
82
  name="multi-wiki-qa-fi",
86
- pretty_name="the truncated version of the Finnish part of the reading "
87
- "comprehension dataset MultiWikiQA",
88
- huggingface_id="EuroEval/multi-wiki-qa-fi-mini",
83
+ pretty_name="MultiWikiQA-fi",
84
+ source="EuroEval/multi-wiki-qa-fi-mini",
89
85
  task=RC,
90
- languages=[FI],
86
+ languages=[FINNISH],
91
87
  unofficial=True,
92
88
  )
93
89
 
94
90
  GOLDENSWAG_FI_CONFIG = DatasetConfig(
95
91
  name="goldenswag-fi",
96
- pretty_name="the truncated version of the Finnish common-sense reasoning "
97
- "dataset GoldenSwag-fi, translated from the English GoldenSwag dataset",
98
- huggingface_id="EuroEval/goldenswag-fi-mini",
92
+ pretty_name="GoldenSwag-fi",
93
+ source="EuroEval/goldenswag-fi-mini",
99
94
  task=COMMON_SENSE,
100
- languages=[FI],
95
+ languages=[FINNISH],
101
96
  unofficial=True,
102
97
  )
103
98
 
104
99
  WINOGRANDE_FI_CONFIG = DatasetConfig(
105
100
  name="winogrande-fi",
106
- pretty_name="the Finnish common-sense reasoning dataset Winogrande-fi, translated "
107
- "from the English Winogrande dataset",
108
- huggingface_id="EuroEval/winogrande-fi",
101
+ pretty_name="Winogrande-fi",
102
+ source="EuroEval/winogrande-fi",
109
103
  task=COMMON_SENSE,
110
- languages=[FI],
104
+ languages=[FINNISH],
111
105
  _labels=["a", "b"],
112
106
  unofficial=True,
113
107
  )
114
-
115
- EUROPEAN_VALUES_SITUATIONAL_FI_CONFIG = DatasetConfig(
116
- name="european-values-situational-fi",
117
- pretty_name="the Finnish version of the European values evaluation dataset, where "
118
- "the questions are phrased in a situational way",
119
- huggingface_id="EuroEval/european-values-situational-fi",
120
- task=EUROPEAN_VALUES,
121
- languages=[FI],
122
- splits=["test"],
123
- bootstrap_samples=False,
124
- _instruction_prompt="{text}",
125
- unofficial=True,
126
- )
127
-
128
- EUROPEAN_VALUES_COMPLETIONS_FI_CONFIG = DatasetConfig(
129
- name="european-values-completions-fi",
130
- pretty_name="the Finnish version of the European values evaluation dataset, where "
131
- "the questions are phrased as sentence completions",
132
- huggingface_id="EuroEval/european-values-completions-fi",
133
- task=EUROPEAN_VALUES,
134
- languages=[FI],
135
- splits=["test"],
136
- bootstrap_samples=False,
137
- _instruction_prompt="{text}",
138
- unofficial=True,
139
- )