EuroEval 15.12.0__py3-none-any.whl → 16.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. euroeval/__init__.py +32 -14
  2. euroeval/benchmark_config_factory.py +92 -180
  3. euroeval/benchmark_modules/base.py +49 -39
  4. euroeval/benchmark_modules/fresh.py +35 -21
  5. euroeval/benchmark_modules/hf.py +280 -244
  6. euroeval/benchmark_modules/litellm.py +752 -312
  7. euroeval/benchmark_modules/vllm.py +570 -268
  8. euroeval/benchmarker.py +651 -528
  9. euroeval/caching_utils.py +79 -0
  10. euroeval/callbacks.py +5 -7
  11. euroeval/cli.py +49 -38
  12. euroeval/constants.py +44 -25
  13. euroeval/data_loading.py +111 -55
  14. euroeval/data_models.py +490 -323
  15. euroeval/dataset_configs/__init__.py +26 -4
  16. euroeval/dataset_configs/bosnian.py +39 -0
  17. euroeval/dataset_configs/bulgarian.py +56 -0
  18. euroeval/dataset_configs/croatian.py +56 -0
  19. euroeval/dataset_configs/czech.py +75 -0
  20. euroeval/dataset_configs/danish.py +78 -50
  21. euroeval/dataset_configs/dutch.py +74 -44
  22. euroeval/dataset_configs/english.py +71 -36
  23. euroeval/dataset_configs/estonian.py +111 -0
  24. euroeval/dataset_configs/faroese.py +25 -18
  25. euroeval/dataset_configs/finnish.py +63 -26
  26. euroeval/dataset_configs/french.py +65 -32
  27. euroeval/dataset_configs/german.py +77 -36
  28. euroeval/dataset_configs/greek.py +64 -0
  29. euroeval/dataset_configs/icelandic.py +68 -57
  30. euroeval/dataset_configs/italian.py +68 -36
  31. euroeval/dataset_configs/latvian.py +87 -0
  32. euroeval/dataset_configs/lithuanian.py +64 -0
  33. euroeval/dataset_configs/norwegian.py +98 -72
  34. euroeval/dataset_configs/polish.py +96 -0
  35. euroeval/dataset_configs/portuguese.py +63 -40
  36. euroeval/dataset_configs/serbian.py +64 -0
  37. euroeval/dataset_configs/slovak.py +55 -0
  38. euroeval/dataset_configs/slovene.py +56 -0
  39. euroeval/dataset_configs/spanish.py +68 -34
  40. euroeval/dataset_configs/swedish.py +82 -41
  41. euroeval/dataset_configs/ukrainian.py +64 -0
  42. euroeval/enums.py +12 -6
  43. euroeval/exceptions.py +21 -1
  44. euroeval/finetuning.py +34 -26
  45. euroeval/generation.py +76 -41
  46. euroeval/generation_utils.py +169 -34
  47. euroeval/languages.py +1020 -188
  48. euroeval/logging_utils.py +268 -0
  49. euroeval/metrics/__init__.py +6 -0
  50. euroeval/metrics/base.py +85 -0
  51. euroeval/metrics/huggingface.py +216 -0
  52. euroeval/metrics/llm_as_a_judge.py +260 -0
  53. euroeval/metrics/pipeline.py +289 -0
  54. euroeval/metrics/speed.py +48 -0
  55. euroeval/model_cache.py +40 -21
  56. euroeval/model_config.py +4 -5
  57. euroeval/model_loading.py +3 -0
  58. euroeval/prompt_templates/__init__.py +2 -0
  59. euroeval/prompt_templates/classification.py +206 -0
  60. euroeval/prompt_templates/linguistic_acceptability.py +157 -22
  61. euroeval/prompt_templates/multiple_choice.py +159 -17
  62. euroeval/prompt_templates/named_entity_recognition.py +318 -21
  63. euroeval/prompt_templates/reading_comprehension.py +207 -16
  64. euroeval/prompt_templates/sentiment_classification.py +205 -22
  65. euroeval/prompt_templates/summarization.py +122 -22
  66. euroeval/prompt_templates/token_classification.py +279 -0
  67. euroeval/scores.py +20 -9
  68. euroeval/speed_benchmark.py +11 -12
  69. euroeval/task_group_utils/multiple_choice_classification.py +21 -12
  70. euroeval/task_group_utils/question_answering.py +101 -73
  71. euroeval/task_group_utils/sequence_classification.py +144 -61
  72. euroeval/task_group_utils/text_to_text.py +33 -12
  73. euroeval/task_group_utils/token_classification.py +86 -89
  74. euroeval/tasks.py +75 -16
  75. euroeval/tokenisation_utils.py +603 -0
  76. euroeval/types.py +17 -11
  77. euroeval/utils.py +332 -137
  78. euroeval-16.7.1.dist-info/METADATA +623 -0
  79. euroeval-16.7.1.dist-info/RECORD +84 -0
  80. {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/entry_points.txt +0 -1
  81. euroeval/human_evaluation.py +0 -737
  82. euroeval/metrics.py +0 -452
  83. euroeval/tokenization_utils.py +0 -498
  84. euroeval-15.12.0.dist-info/METADATA +0 -285
  85. euroeval-15.12.0.dist-info/RECORD +0 -63
  86. {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/WHEEL +0 -0
  87. {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,97 +1,132 @@
1
1
  """All English dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import EN
5
- from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
4
+ from ..languages import ENGLISH
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  SST5_CONFIG = DatasetConfig(
10
10
  name="sst5",
11
- pretty_name="the truncated version of the English sentiment classification "
12
- "dataset SST5",
13
- huggingface_id="EuroEval/sst5-mini",
11
+ pretty_name="SST-5",
12
+ source="EuroEval/sst5-mini",
14
13
  task=SENT,
15
- languages=[EN],
14
+ languages=[ENGLISH],
16
15
  )
17
16
 
18
17
  SCALA_EN_CONFIG = DatasetConfig(
19
18
  name="scala-en",
20
- pretty_name="the English part of the linguistic acceptability dataset ScaLA",
21
- huggingface_id="EuroEval/scala-en",
19
+ pretty_name="ScaLA-en",
20
+ source="EuroEval/scala-en",
22
21
  task=LA,
23
- languages=[EN],
22
+ languages=[ENGLISH],
24
23
  )
25
24
 
26
25
  CONLL_EN_CONFIG = DatasetConfig(
27
26
  name="conll-en",
28
- pretty_name="the truncated version of the English named entity recognition "
29
- "dataset CoNLL 2003",
30
- huggingface_id="EuroEval/conll-en-mini",
27
+ pretty_name="CoNLL-en",
28
+ source="EuroEval/conll-en-mini",
31
29
  task=NER,
32
- languages=[EN],
30
+ languages=[ENGLISH],
33
31
  )
34
32
 
35
33
  SQUAD_CONFIG = DatasetConfig(
36
34
  name="squad",
37
- pretty_name="the truncated version of the English question answering dataset SQuAD",
38
- huggingface_id="EuroEval/squad-mini",
35
+ pretty_name="SQuAD",
36
+ source="EuroEval/squad-mini",
39
37
  task=RC,
40
- languages=[EN],
38
+ languages=[ENGLISH],
41
39
  )
42
40
 
43
41
  CNN_DAILYMAIL_CONFIG = DatasetConfig(
44
42
  name="cnn-dailymail",
45
- pretty_name="the truncated version of the English summarisation dataset "
46
- "CNN-DailyMail",
47
- huggingface_id="EuroEval/cnn-dailymail-mini",
43
+ pretty_name="CNN/DailyMail",
44
+ source="EuroEval/cnn-dailymail-mini",
48
45
  task=SUMM,
49
- languages=[EN],
46
+ languages=[ENGLISH],
50
47
  )
51
48
 
52
49
  LIFE_IN_THE_UK_CONFIG = DatasetConfig(
53
50
  name="life-in-the-uk",
54
- pretty_name="the English knowledge dataset Life in the UK",
55
- huggingface_id="EuroEval/life-in-the-uk",
51
+ pretty_name="Life in the UK",
52
+ source="EuroEval/life-in-the-uk",
56
53
  task=KNOW,
57
- languages=[EN],
54
+ languages=[ENGLISH],
58
55
  )
59
56
 
60
57
  HELLASWAG_CONFIG = DatasetConfig(
61
58
  name="hellaswag",
62
- pretty_name="the truncated version of the English common-sense reasoning "
63
- "dataset HellaSwag",
64
- huggingface_id="EuroEval/hellaswag-mini",
59
+ pretty_name="HellaSwag",
60
+ source="EuroEval/hellaswag-mini",
65
61
  task=COMMON_SENSE,
66
- languages=[EN],
62
+ languages=[ENGLISH],
63
+ )
64
+
65
+ VALEU_EN_CONFIG = DatasetConfig(
66
+ name="valeu-en",
67
+ pretty_name="VaLEU-en",
68
+ source="EuroEval/european-values-en",
69
+ task=EUROPEAN_VALUES,
70
+ languages=[ENGLISH],
71
+ splits=["test"],
72
+ bootstrap_samples=False,
73
+ _instruction_prompt="{text}",
67
74
  )
68
75
 
69
76
 
70
77
  ### Unofficial datasets ###
71
78
 
79
+ XQUAD_EN_CONFIG = DatasetConfig(
80
+ name="xquad-en",
81
+ pretty_name="XQuAD-en",
82
+ source="EuroEval/xquad-en",
83
+ task=RC,
84
+ languages=[ENGLISH],
85
+ unofficial=True,
86
+ )
87
+
72
88
  ARC_CONFIG = DatasetConfig(
73
89
  name="arc",
74
- pretty_name="the truncated version of the English knowledge dataset ARC",
75
- huggingface_id="EuroEval/arc-mini",
90
+ pretty_name="ARC",
91
+ source="EuroEval/arc-mini",
76
92
  task=KNOW,
77
- languages=[EN],
93
+ languages=[ENGLISH],
78
94
  unofficial=True,
79
95
  )
80
96
 
81
97
  BELEBELE_CONFIG = DatasetConfig(
82
98
  name="belebele-en",
83
- pretty_name="the English multiple choice reading comprehension dataset BeleBele",
84
- huggingface_id="EuroEval/belebele-mini",
99
+ pretty_name="Belebele-en",
100
+ source="EuroEval/belebele-mini",
85
101
  task=MCRC,
86
- languages=[EN],
102
+ languages=[ENGLISH],
87
103
  unofficial=True,
88
104
  )
89
105
 
90
106
  MMLU_CONFIG = DatasetConfig(
91
107
  name="mmlu",
92
- pretty_name="the truncated version of the English knowledge dataset MMLU",
93
- huggingface_id="EuroEval/mmlu-mini",
108
+ pretty_name="MMLU",
109
+ source="EuroEval/mmlu-mini",
94
110
  task=KNOW,
95
- languages=[EN],
111
+ languages=[ENGLISH],
112
+ unofficial=True,
113
+ )
114
+
115
+ MULTI_WIKI_QA_EN_CONFIG = DatasetConfig(
116
+ name="multi-wiki-qa-en",
117
+ pretty_name="MultiWikiQA-en",
118
+ source="EuroEval/multi-wiki-qa-en-mini",
119
+ task=RC,
120
+ languages=[ENGLISH],
121
+ unofficial=True,
122
+ )
123
+
124
+ WINOGRANDE_CONFIG = DatasetConfig(
125
+ name="winogrande",
126
+ pretty_name="Winogrande-en",
127
+ source="EuroEval/winogrande-en",
128
+ task=COMMON_SENSE,
129
+ languages=[ENGLISH],
130
+ _labels=["a", "b"],
96
131
  unofficial=True,
97
132
  )
@@ -0,0 +1,111 @@
1
+ """All Estonian dataset configurations used in EuroEval."""
2
+
3
+ from ..data_models import DatasetConfig
4
+ from ..languages import ESTONIAN
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, NER, RC, SENT, SUMM
6
+
7
+ ### Official datasets ###
8
+
9
+ ESTONIAN_VALENCE_CONFIG = DatasetConfig(
10
+ name="estonian-valence",
11
+ pretty_name="Estonian Valence",
12
+ source="EuroEval/estonian-valence",
13
+ task=SENT,
14
+ languages=[ESTONIAN],
15
+ )
16
+
17
+ GRAMMAR_ET_CONFIG = DatasetConfig(
18
+ name="grammar-et",
19
+ pretty_name="Grammar-et",
20
+ source="EuroEval/grammar-et",
21
+ task=LA,
22
+ languages=[ESTONIAN],
23
+ )
24
+
25
+ ESTNER_CONFIG = DatasetConfig(
26
+ name="estner",
27
+ pretty_name="EstNER",
28
+ source="EuroEval/estner-mini",
29
+ task=NER,
30
+ languages=[ESTONIAN],
31
+ )
32
+
33
+ MULTI_WIKI_QA_ET_CONFIG = DatasetConfig(
34
+ name="multi-wiki-qa-et",
35
+ pretty_name="MultiWikiQA-et",
36
+ source="EuroEval/multi-wiki-qa-et-mini",
37
+ task=RC,
38
+ languages=[ESTONIAN],
39
+ )
40
+
41
+ ERR_NEWS_CONFIG = DatasetConfig(
42
+ name="err-news",
43
+ pretty_name="ERR News",
44
+ source="EuroEval/err-news-mini",
45
+ task=SUMM,
46
+ languages=[ESTONIAN],
47
+ )
48
+
49
+ TRIVIA_ET_CONFIG = DatasetConfig(
50
+ name="trivia-et",
51
+ pretty_name="Trivia-et",
52
+ source="EuroEval/trivia-et",
53
+ task=KNOW,
54
+ languages=[ESTONIAN],
55
+ )
56
+
57
+ WINOGRANDE_ET_CONFIG = DatasetConfig(
58
+ name="winogrande-et",
59
+ pretty_name="Winogrande-et",
60
+ source="EuroEval/winogrande-et",
61
+ task=COMMON_SENSE,
62
+ languages=[ESTONIAN],
63
+ _prompt_prefix="Sulle esitatakse lüngaga (_) tekstülesanded, "
64
+ "igal ülesandel on kaks vastusevarianti (a ja b).",
65
+ _prompt_template="Tekstülesanne: {text}\nVastus: {label}",
66
+ _instruction_prompt="Tekstülesanne: {text}\n\n"
67
+ "Sinu ülesanne on valida lünka sobiv vastusevariant. "
68
+ "Vasta ainult {labels_str}. Muud vastused ei ole lubatud.",
69
+ _labels=["a", "b"],
70
+ )
71
+
72
+ VALEU_ET_CONFIG = DatasetConfig(
73
+ name="valeu-et",
74
+ pretty_name="VaLEU-et",
75
+ source="EuroEval/european-values-et",
76
+ task=EUROPEAN_VALUES,
77
+ languages=[ESTONIAN],
78
+ splits=["test"],
79
+ bootstrap_samples=False,
80
+ _instruction_prompt="{text}",
81
+ )
82
+
83
+ ### Unofficial datasets ###
84
+
85
+ SCALA_ET_CONFIG = DatasetConfig(
86
+ name="scala-et",
87
+ pretty_name="ScaLA-et",
88
+ source="EuroEval/scala-et",
89
+ task=LA,
90
+ languages=[ESTONIAN],
91
+ unofficial=True,
92
+ )
93
+
94
+ EXAM_ET_CONFIG = DatasetConfig(
95
+ name="exam-et",
96
+ pretty_name="Exam-et",
97
+ source="EuroEval/exam-et",
98
+ task=KNOW,
99
+ languages=[ESTONIAN],
100
+ _labels=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"],
101
+ unofficial=True,
102
+ )
103
+
104
+ MMLU_ET_CONFIG = DatasetConfig(
105
+ name="mmlu-et",
106
+ pretty_name="MMLU-et",
107
+ source="EuroEval/mmlu-et-mini",
108
+ task=KNOW,
109
+ languages=[ESTONIAN],
110
+ unofficial=True,
111
+ )
@@ -1,43 +1,42 @@
1
1
  """All Faroese dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import FO
4
+ from ..languages import FAROESE
5
5
  from ..tasks import LA, NER, RC, SENT
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  FOSENT_CONFIG = DatasetConfig(
10
10
  name="fosent",
11
- pretty_name="the Faroese sentiment classification dataset FoSent",
12
- huggingface_id="EuroEval/fosent",
11
+ pretty_name="FoSent",
12
+ source="EuroEval/fosent",
13
13
  task=SENT,
14
- languages=[FO],
14
+ languages=[FAROESE],
15
15
  _num_few_shot_examples=5,
16
16
  )
17
17
 
18
18
  SCALA_FO_CONFIG = DatasetConfig(
19
19
  name="scala-fo",
20
- pretty_name="the Faroese part of the linguistic acceptability dataset ScaLA",
21
- huggingface_id="EuroEval/scala-fo",
20
+ pretty_name="ScaLA-fo",
21
+ source="EuroEval/scala-fo",
22
22
  task=LA,
23
- languages=[FO],
23
+ languages=[FAROESE],
24
24
  )
25
25
 
26
26
  FONE_CONFIG = DatasetConfig(
27
27
  name="fone",
28
- pretty_name="the truncated version of the Faroese named entity recognition "
29
- "dataset FoNE",
30
- huggingface_id="EuroEval/fone-mini",
28
+ pretty_name="FoNE",
29
+ source="EuroEval/fone-mini",
31
30
  task=NER,
32
- languages=[FO],
31
+ languages=[FAROESE],
33
32
  )
34
33
 
35
34
  FOQA_CONFIG = DatasetConfig(
36
35
  name="foqa",
37
- pretty_name="the Faroese reading comprehension dataset FoQA",
38
- huggingface_id="EuroEval/foqa",
36
+ pretty_name="FoQA",
37
+ source="EuroEval/foqa",
39
38
  task=RC,
40
- languages=[FO],
39
+ languages=[FAROESE],
41
40
  )
42
41
 
43
42
 
@@ -45,10 +44,18 @@ FOQA_CONFIG = DatasetConfig(
45
44
 
46
45
  WIKIANN_FO_CONFIG = DatasetConfig(
47
46
  name="wikiann-fo",
48
- pretty_name="the truncated version of the Faroese part of the named entity "
49
- "recognition dataset WikiANN",
50
- huggingface_id="EuroEval/wikiann-fo-mini",
47
+ pretty_name="WikiANN-fo",
48
+ source="EuroEval/wikiann-fo-mini",
51
49
  task=NER,
52
- languages=[FO],
50
+ languages=[FAROESE],
51
+ unofficial=True,
52
+ )
53
+
54
+ MULTI_WIKI_QA_FO_CONFIG = DatasetConfig(
55
+ name="multi-wiki-qa-fo",
56
+ pretty_name="MultiWikiQA-fo",
57
+ source="EuroEval/multi-wiki-qa-fo-mini",
58
+ task=RC,
59
+ languages=[FAROESE],
53
60
  unofficial=True,
54
61
  )
@@ -1,70 +1,107 @@
1
1
  """All Finnish dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import FI
5
- from ..tasks import COMMON_SENSE, LA, MCRC, NER, RC, SENT, SUMM
4
+ from ..languages import FINNISH
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  SCANDISENT_FI_CONFIG = DatasetConfig(
10
10
  name="scandisent-fi",
11
- pretty_name="the truncated version of the Finnish part of the binary sentiment "
12
- "classification dataset ScandiSent",
13
- huggingface_id="EuroEval/scandisent-fi-mini",
11
+ pretty_name="ScandiSent-fi",
12
+ source="EuroEval/scandisent-fi-mini",
14
13
  task=SENT,
15
- languages=[FI],
14
+ languages=[FINNISH],
16
15
  _labels=["negative", "positive"],
17
16
  )
18
17
 
19
18
  TURKU_NER_FI_CONFIG = DatasetConfig(
20
19
  name="turku-ner-fi",
21
- pretty_name="the Finnish part of the named entity recognition dataset Turku NER",
22
- huggingface_id="EuroEval/turku-ner-fi-mini",
20
+ pretty_name="Turku NER-fi",
21
+ source="EuroEval/turku-ner-fi-mini",
23
22
  task=NER,
24
- languages=[FI],
23
+ languages=[FINNISH],
25
24
  )
26
25
 
27
26
  TYDIQA_FI_CONFIG = DatasetConfig(
28
27
  name="tydiqa-fi",
29
- pretty_name="the Finnish part of the TydiQA reading comprehension dataset",
30
- huggingface_id="EuroEval/tydiqa-fi-mini",
28
+ pretty_name="TyDiQA-fi",
29
+ source="EuroEval/tydiqa-fi-mini",
31
30
  task=RC,
32
- languages=[FI],
31
+ languages=[FINNISH],
33
32
  )
34
33
 
35
34
  XLSUM_FI_CONFIG = DatasetConfig(
36
35
  name="xlsum-fi",
37
- pretty_name="the Finnish summarisation dataset XL-Sum",
38
- huggingface_id="EuroEval/xlsum-fi-mini",
36
+ pretty_name="XLSum-fi",
37
+ source="EuroEval/xlsum-fi-mini",
39
38
  task=SUMM,
40
- languages=[FI],
39
+ languages=[FINNISH],
41
40
  )
42
41
 
43
42
  HELLASWAG_FI_CONFIG = DatasetConfig(
44
43
  name="hellaswag-fi",
45
- pretty_name="the truncated version of the Finnish common-sense reasoning dataset "
46
- "HellaSwag-fi, translated from the English HellaSwag dataset",
47
- huggingface_id="EuroEval/hellaswag-fi-mini",
44
+ pretty_name="HellaSwag-fi",
45
+ source="EuroEval/hellaswag-fi-mini",
48
46
  task=COMMON_SENSE,
49
- languages=[FI],
47
+ languages=[FINNISH],
50
48
  )
51
49
 
52
50
  SCALA_FI_CONFIG = DatasetConfig(
53
51
  name="scala-fi",
54
- pretty_name="the Finnish part of the linguistic acceptability dataset ScaLA",
55
- huggingface_id="EuroEval/scala-fi",
52
+ pretty_name="ScaLA-fi",
53
+ source="EuroEval/scala-fi",
56
54
  task=LA,
57
- languages=[FI],
55
+ languages=[FINNISH],
58
56
  )
59
57
 
58
+ VALEU_FI_CONFIG = DatasetConfig(
59
+ name="valeu-fi",
60
+ pretty_name="VaLEU-fi",
61
+ source="EuroEval/european-values-fi",
62
+ task=EUROPEAN_VALUES,
63
+ languages=[FINNISH],
64
+ splits=["test"],
65
+ bootstrap_samples=False,
66
+ _instruction_prompt="{text}",
67
+ )
68
+
69
+
60
70
  ### Unofficial datasets ###
61
71
 
62
72
  BELEBELE_FI_CONFIG = DatasetConfig(
63
73
  name="belebele-fi",
64
- pretty_name="the Finnish multiple choice reading comprehension dataset "
65
- "BeleBele-fi, translated from the English BeleBele dataset",
66
- huggingface_id="EuroEval/belebele-fi-mini",
74
+ pretty_name="Belebele-fi",
75
+ source="EuroEval/belebele-fi-mini",
67
76
  task=MCRC,
68
- languages=[FI],
77
+ languages=[FINNISH],
78
+ unofficial=True,
79
+ )
80
+
81
+ MULTI_WIKI_QA_FI_CONFIG = DatasetConfig(
82
+ name="multi-wiki-qa-fi",
83
+ pretty_name="MultiWikiQA-fi",
84
+ source="EuroEval/multi-wiki-qa-fi-mini",
85
+ task=RC,
86
+ languages=[FINNISH],
87
+ unofficial=True,
88
+ )
89
+
90
+ GOLDENSWAG_FI_CONFIG = DatasetConfig(
91
+ name="goldenswag-fi",
92
+ pretty_name="GoldenSwag-fi",
93
+ source="EuroEval/goldenswag-fi-mini",
94
+ task=COMMON_SENSE,
95
+ languages=[FINNISH],
96
+ unofficial=True,
97
+ )
98
+
99
+ WINOGRANDE_FI_CONFIG = DatasetConfig(
100
+ name="winogrande-fi",
101
+ pretty_name="Winogrande-fi",
102
+ source="EuroEval/winogrande-fi",
103
+ task=COMMON_SENSE,
104
+ languages=[FINNISH],
105
+ _labels=["a", "b"],
69
106
  unofficial=True,
70
107
  )
@@ -1,72 +1,78 @@
1
1
  """All French dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..languages import FR
5
- from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
4
+ from ..languages import FRENCH
5
+ from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
9
  ALLOCINE_CONFIG = DatasetConfig(
10
10
  name="allocine",
11
- pretty_name="the truncated version of the French sentiment classification "
12
- "dataset AlloCiné",
13
- huggingface_id="EuroEval/allocine-mini",
11
+ pretty_name="AlloCiné",
12
+ source="EuroEval/allocine-mini",
14
13
  task=SENT,
15
- languages=[FR],
14
+ languages=[FRENCH],
16
15
  _labels=["negative", "positive"],
17
16
  _prompt_label_mapping=dict(positive="positif", negative="négatif"),
18
17
  )
19
18
 
20
19
  SCALA_FR_CONFIG = DatasetConfig(
21
20
  name="scala-fr",
22
- pretty_name="the French part of the linguistic acceptability dataset ScaLA",
23
- huggingface_id="EuroEval/scala-fr",
21
+ pretty_name="ScaLA-fr",
22
+ source="EuroEval/scala-fr",
24
23
  task=LA,
25
- languages=[FR],
24
+ languages=[FRENCH],
26
25
  )
27
26
 
28
27
  ELTEC_CONFIG = DatasetConfig(
29
28
  name="eltec",
30
- pretty_name="the truncated version of the French named entity recognition "
31
- "dataset ELTeC",
32
- huggingface_id="EuroEval/eltec-mini",
29
+ pretty_name="ELTeC",
30
+ source="EuroEval/eltec-mini",
33
31
  task=NER,
34
- languages=[FR],
32
+ languages=[FRENCH],
35
33
  )
36
34
 
37
35
  FQUAD_CONFIG = DatasetConfig(
38
36
  name="fquad",
39
- pretty_name="the truncated version of the French reading comprehension dataset "
40
- "FQuAD",
41
- huggingface_id="EuroEval/fquad-mini",
37
+ pretty_name="FQuAD",
38
+ source="EuroEval/fquad-mini",
42
39
  task=RC,
43
- languages=[FR],
40
+ languages=[FRENCH],
44
41
  )
45
42
 
46
43
  ORANGE_SUM_CONFIG = DatasetConfig(
47
44
  name="orange-sum",
48
- pretty_name="the truncated version of the French summarisation dataset OrangeSum",
49
- huggingface_id="EuroEval/orange-sum-mini",
45
+ pretty_name="OrangeSum",
46
+ source="EuroEval/orange-sum-mini",
50
47
  task=SUMM,
51
- languages=[FR],
48
+ languages=[FRENCH],
52
49
  )
53
50
 
54
51
  MMLU_FR_CONFIG = DatasetConfig(
55
52
  name="mmlu-fr",
56
- pretty_name="the truncated version of the French knowledge dataset MMLU-fr, "
57
- "translated from the English MMLU dataset",
58
- huggingface_id="EuroEval/mmlu-fr-mini",
53
+ pretty_name="MMLU-fr",
54
+ source="EuroEval/mmlu-fr-mini",
59
55
  task=KNOW,
60
- languages=[FR],
56
+ languages=[FRENCH],
61
57
  )
62
58
 
63
59
  HELLASWAG_FR_CONFIG = DatasetConfig(
64
60
  name="hellaswag-fr",
65
- pretty_name="the truncated version of the French common-sense reasoning dataset "
66
- "HellaSwag-fr, translated from the English HellaSwag dataset",
67
- huggingface_id="EuroEval/hellaswag-fr-mini",
61
+ pretty_name="HellaSwag-fr",
62
+ source="EuroEval/hellaswag-fr-mini",
68
63
  task=COMMON_SENSE,
69
- languages=[FR],
64
+ languages=[FRENCH],
65
+ )
66
+
67
+ VALEU_FR_CONFIG = DatasetConfig(
68
+ name="valeu-fr",
69
+ pretty_name="VaLEU-fr",
70
+ source="EuroEval/european-values-fr",
71
+ task=EUROPEAN_VALUES,
72
+ languages=[FRENCH],
73
+ splits=["test"],
74
+ bootstrap_samples=False,
75
+ _instruction_prompt="{text}",
70
76
  )
71
77
 
72
78
 
@@ -74,10 +80,37 @@ HELLASWAG_FR_CONFIG = DatasetConfig(
74
80
 
75
81
  BELEBELE_FR_CONFIG = DatasetConfig(
76
82
  name="belebele-fr",
77
- pretty_name="the French multiple choice reading comprehension dataset BeleBele-fr, "
78
- "translated from the English BeleBele dataset",
79
- huggingface_id="EuroEval/belebele-fr-mini",
83
+ pretty_name="Belebele-fr",
84
+ source="EuroEval/belebele-fr-mini",
80
85
  task=MCRC,
81
- languages=[FR],
86
+ languages=[FRENCH],
87
+ unofficial=True,
88
+ )
89
+
90
+ MULTI_WIKI_QA_FR_CONFIG = DatasetConfig(
91
+ name="multi-wiki-qa-fr",
92
+ pretty_name="MultiWikiQA-fr",
93
+ source="EuroEval/multi-wiki-qa-fr-mini",
94
+ task=RC,
95
+ languages=[FRENCH],
96
+ unofficial=True,
97
+ )
98
+
99
+ GOLDENSWAG_FR_CONFIG = DatasetConfig(
100
+ name="goldenswag-fr",
101
+ pretty_name="GoldenSwag-fr",
102
+ source="EuroEval/goldenswag-fr-mini",
103
+ task=COMMON_SENSE,
104
+ languages=[FRENCH],
105
+ unofficial=True,
106
+ )
107
+
108
+ WINOGRANDE_FR_CONFIG = DatasetConfig(
109
+ name="winogrande-fr",
110
+ pretty_name="Winogrande-fr",
111
+ source="EuroEval/winogrande-fr",
112
+ task=COMMON_SENSE,
113
+ languages=[FRENCH],
114
+ _labels=["a", "b"],
82
115
  unofficial=True,
83
116
  )