EuroEval 15.12.0__py3-none-any.whl → 16.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- euroeval/__init__.py +32 -14
- euroeval/benchmark_config_factory.py +92 -180
- euroeval/benchmark_modules/base.py +49 -39
- euroeval/benchmark_modules/fresh.py +35 -21
- euroeval/benchmark_modules/hf.py +280 -244
- euroeval/benchmark_modules/litellm.py +752 -312
- euroeval/benchmark_modules/vllm.py +570 -268
- euroeval/benchmarker.py +651 -528
- euroeval/caching_utils.py +79 -0
- euroeval/callbacks.py +5 -7
- euroeval/cli.py +49 -38
- euroeval/constants.py +44 -25
- euroeval/data_loading.py +111 -55
- euroeval/data_models.py +490 -323
- euroeval/dataset_configs/__init__.py +26 -4
- euroeval/dataset_configs/bosnian.py +39 -0
- euroeval/dataset_configs/bulgarian.py +56 -0
- euroeval/dataset_configs/croatian.py +56 -0
- euroeval/dataset_configs/czech.py +75 -0
- euroeval/dataset_configs/danish.py +78 -50
- euroeval/dataset_configs/dutch.py +74 -44
- euroeval/dataset_configs/english.py +71 -36
- euroeval/dataset_configs/estonian.py +111 -0
- euroeval/dataset_configs/faroese.py +25 -18
- euroeval/dataset_configs/finnish.py +63 -26
- euroeval/dataset_configs/french.py +65 -32
- euroeval/dataset_configs/german.py +77 -36
- euroeval/dataset_configs/greek.py +64 -0
- euroeval/dataset_configs/icelandic.py +68 -57
- euroeval/dataset_configs/italian.py +68 -36
- euroeval/dataset_configs/latvian.py +87 -0
- euroeval/dataset_configs/lithuanian.py +64 -0
- euroeval/dataset_configs/norwegian.py +98 -72
- euroeval/dataset_configs/polish.py +96 -0
- euroeval/dataset_configs/portuguese.py +63 -40
- euroeval/dataset_configs/serbian.py +64 -0
- euroeval/dataset_configs/slovak.py +55 -0
- euroeval/dataset_configs/slovene.py +56 -0
- euroeval/dataset_configs/spanish.py +68 -34
- euroeval/dataset_configs/swedish.py +82 -41
- euroeval/dataset_configs/ukrainian.py +64 -0
- euroeval/enums.py +12 -6
- euroeval/exceptions.py +21 -1
- euroeval/finetuning.py +34 -26
- euroeval/generation.py +76 -41
- euroeval/generation_utils.py +169 -34
- euroeval/languages.py +1020 -188
- euroeval/logging_utils.py +268 -0
- euroeval/metrics/__init__.py +6 -0
- euroeval/metrics/base.py +85 -0
- euroeval/metrics/huggingface.py +216 -0
- euroeval/metrics/llm_as_a_judge.py +260 -0
- euroeval/metrics/pipeline.py +289 -0
- euroeval/metrics/speed.py +48 -0
- euroeval/model_cache.py +40 -21
- euroeval/model_config.py +4 -5
- euroeval/model_loading.py +3 -0
- euroeval/prompt_templates/__init__.py +2 -0
- euroeval/prompt_templates/classification.py +206 -0
- euroeval/prompt_templates/linguistic_acceptability.py +157 -22
- euroeval/prompt_templates/multiple_choice.py +159 -17
- euroeval/prompt_templates/named_entity_recognition.py +318 -21
- euroeval/prompt_templates/reading_comprehension.py +207 -16
- euroeval/prompt_templates/sentiment_classification.py +205 -22
- euroeval/prompt_templates/summarization.py +122 -22
- euroeval/prompt_templates/token_classification.py +279 -0
- euroeval/scores.py +20 -9
- euroeval/speed_benchmark.py +11 -12
- euroeval/task_group_utils/multiple_choice_classification.py +21 -12
- euroeval/task_group_utils/question_answering.py +101 -73
- euroeval/task_group_utils/sequence_classification.py +144 -61
- euroeval/task_group_utils/text_to_text.py +33 -12
- euroeval/task_group_utils/token_classification.py +86 -89
- euroeval/tasks.py +75 -16
- euroeval/tokenisation_utils.py +603 -0
- euroeval/types.py +17 -11
- euroeval/utils.py +332 -137
- euroeval-16.7.1.dist-info/METADATA +623 -0
- euroeval-16.7.1.dist-info/RECORD +84 -0
- {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/entry_points.txt +0 -1
- euroeval/human_evaluation.py +0 -737
- euroeval/metrics.py +0 -452
- euroeval/tokenization_utils.py +0 -498
- euroeval-15.12.0.dist-info/METADATA +0 -285
- euroeval-15.12.0.dist-info/RECORD +0 -63
- {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/WHEEL +0 -0
- {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,97 +1,132 @@
|
|
|
1
1
|
"""All English dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..languages import
|
|
5
|
-
from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
4
|
+
from ..languages import ENGLISH
|
|
5
|
+
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
6
6
|
|
|
7
7
|
### Official datasets ###
|
|
8
8
|
|
|
9
9
|
SST5_CONFIG = DatasetConfig(
|
|
10
10
|
name="sst5",
|
|
11
|
-
pretty_name="
|
|
12
|
-
"
|
|
13
|
-
huggingface_id="EuroEval/sst5-mini",
|
|
11
|
+
pretty_name="SST-5",
|
|
12
|
+
source="EuroEval/sst5-mini",
|
|
14
13
|
task=SENT,
|
|
15
|
-
languages=[
|
|
14
|
+
languages=[ENGLISH],
|
|
16
15
|
)
|
|
17
16
|
|
|
18
17
|
SCALA_EN_CONFIG = DatasetConfig(
|
|
19
18
|
name="scala-en",
|
|
20
|
-
pretty_name="
|
|
21
|
-
|
|
19
|
+
pretty_name="ScaLA-en",
|
|
20
|
+
source="EuroEval/scala-en",
|
|
22
21
|
task=LA,
|
|
23
|
-
languages=[
|
|
22
|
+
languages=[ENGLISH],
|
|
24
23
|
)
|
|
25
24
|
|
|
26
25
|
CONLL_EN_CONFIG = DatasetConfig(
|
|
27
26
|
name="conll-en",
|
|
28
|
-
pretty_name="
|
|
29
|
-
"
|
|
30
|
-
huggingface_id="EuroEval/conll-en-mini",
|
|
27
|
+
pretty_name="CoNLL-en",
|
|
28
|
+
source="EuroEval/conll-en-mini",
|
|
31
29
|
task=NER,
|
|
32
|
-
languages=[
|
|
30
|
+
languages=[ENGLISH],
|
|
33
31
|
)
|
|
34
32
|
|
|
35
33
|
SQUAD_CONFIG = DatasetConfig(
|
|
36
34
|
name="squad",
|
|
37
|
-
pretty_name="
|
|
38
|
-
|
|
35
|
+
pretty_name="SQuAD",
|
|
36
|
+
source="EuroEval/squad-mini",
|
|
39
37
|
task=RC,
|
|
40
|
-
languages=[
|
|
38
|
+
languages=[ENGLISH],
|
|
41
39
|
)
|
|
42
40
|
|
|
43
41
|
CNN_DAILYMAIL_CONFIG = DatasetConfig(
|
|
44
42
|
name="cnn-dailymail",
|
|
45
|
-
pretty_name="
|
|
46
|
-
"
|
|
47
|
-
huggingface_id="EuroEval/cnn-dailymail-mini",
|
|
43
|
+
pretty_name="CNN/DailyMail",
|
|
44
|
+
source="EuroEval/cnn-dailymail-mini",
|
|
48
45
|
task=SUMM,
|
|
49
|
-
languages=[
|
|
46
|
+
languages=[ENGLISH],
|
|
50
47
|
)
|
|
51
48
|
|
|
52
49
|
LIFE_IN_THE_UK_CONFIG = DatasetConfig(
|
|
53
50
|
name="life-in-the-uk",
|
|
54
|
-
pretty_name="
|
|
55
|
-
|
|
51
|
+
pretty_name="Life in the UK",
|
|
52
|
+
source="EuroEval/life-in-the-uk",
|
|
56
53
|
task=KNOW,
|
|
57
|
-
languages=[
|
|
54
|
+
languages=[ENGLISH],
|
|
58
55
|
)
|
|
59
56
|
|
|
60
57
|
HELLASWAG_CONFIG = DatasetConfig(
|
|
61
58
|
name="hellaswag",
|
|
62
|
-
pretty_name="
|
|
63
|
-
"
|
|
64
|
-
huggingface_id="EuroEval/hellaswag-mini",
|
|
59
|
+
pretty_name="HellaSwag",
|
|
60
|
+
source="EuroEval/hellaswag-mini",
|
|
65
61
|
task=COMMON_SENSE,
|
|
66
|
-
languages=[
|
|
62
|
+
languages=[ENGLISH],
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
VALEU_EN_CONFIG = DatasetConfig(
|
|
66
|
+
name="valeu-en",
|
|
67
|
+
pretty_name="VaLEU-en",
|
|
68
|
+
source="EuroEval/european-values-en",
|
|
69
|
+
task=EUROPEAN_VALUES,
|
|
70
|
+
languages=[ENGLISH],
|
|
71
|
+
splits=["test"],
|
|
72
|
+
bootstrap_samples=False,
|
|
73
|
+
_instruction_prompt="{text}",
|
|
67
74
|
)
|
|
68
75
|
|
|
69
76
|
|
|
70
77
|
### Unofficial datasets ###
|
|
71
78
|
|
|
79
|
+
XQUAD_EN_CONFIG = DatasetConfig(
|
|
80
|
+
name="xquad-en",
|
|
81
|
+
pretty_name="XQuAD-en",
|
|
82
|
+
source="EuroEval/xquad-en",
|
|
83
|
+
task=RC,
|
|
84
|
+
languages=[ENGLISH],
|
|
85
|
+
unofficial=True,
|
|
86
|
+
)
|
|
87
|
+
|
|
72
88
|
ARC_CONFIG = DatasetConfig(
|
|
73
89
|
name="arc",
|
|
74
|
-
pretty_name="
|
|
75
|
-
|
|
90
|
+
pretty_name="ARC",
|
|
91
|
+
source="EuroEval/arc-mini",
|
|
76
92
|
task=KNOW,
|
|
77
|
-
languages=[
|
|
93
|
+
languages=[ENGLISH],
|
|
78
94
|
unofficial=True,
|
|
79
95
|
)
|
|
80
96
|
|
|
81
97
|
BELEBELE_CONFIG = DatasetConfig(
|
|
82
98
|
name="belebele-en",
|
|
83
|
-
pretty_name="
|
|
84
|
-
|
|
99
|
+
pretty_name="Belebele-en",
|
|
100
|
+
source="EuroEval/belebele-mini",
|
|
85
101
|
task=MCRC,
|
|
86
|
-
languages=[
|
|
102
|
+
languages=[ENGLISH],
|
|
87
103
|
unofficial=True,
|
|
88
104
|
)
|
|
89
105
|
|
|
90
106
|
MMLU_CONFIG = DatasetConfig(
|
|
91
107
|
name="mmlu",
|
|
92
|
-
pretty_name="
|
|
93
|
-
|
|
108
|
+
pretty_name="MMLU",
|
|
109
|
+
source="EuroEval/mmlu-mini",
|
|
94
110
|
task=KNOW,
|
|
95
|
-
languages=[
|
|
111
|
+
languages=[ENGLISH],
|
|
112
|
+
unofficial=True,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
MULTI_WIKI_QA_EN_CONFIG = DatasetConfig(
|
|
116
|
+
name="multi-wiki-qa-en",
|
|
117
|
+
pretty_name="MultiWikiQA-en",
|
|
118
|
+
source="EuroEval/multi-wiki-qa-en-mini",
|
|
119
|
+
task=RC,
|
|
120
|
+
languages=[ENGLISH],
|
|
121
|
+
unofficial=True,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
WINOGRANDE_CONFIG = DatasetConfig(
|
|
125
|
+
name="winogrande",
|
|
126
|
+
pretty_name="Winogrande-en",
|
|
127
|
+
source="EuroEval/winogrande-en",
|
|
128
|
+
task=COMMON_SENSE,
|
|
129
|
+
languages=[ENGLISH],
|
|
130
|
+
_labels=["a", "b"],
|
|
96
131
|
unofficial=True,
|
|
97
132
|
)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""All Estonian dataset configurations used in EuroEval."""
|
|
2
|
+
|
|
3
|
+
from ..data_models import DatasetConfig
|
|
4
|
+
from ..languages import ESTONIAN
|
|
5
|
+
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, NER, RC, SENT, SUMM
|
|
6
|
+
|
|
7
|
+
### Official datasets ###
|
|
8
|
+
|
|
9
|
+
ESTONIAN_VALENCE_CONFIG = DatasetConfig(
|
|
10
|
+
name="estonian-valence",
|
|
11
|
+
pretty_name="Estonian Valence",
|
|
12
|
+
source="EuroEval/estonian-valence",
|
|
13
|
+
task=SENT,
|
|
14
|
+
languages=[ESTONIAN],
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
GRAMMAR_ET_CONFIG = DatasetConfig(
|
|
18
|
+
name="grammar-et",
|
|
19
|
+
pretty_name="Grammar-et",
|
|
20
|
+
source="EuroEval/grammar-et",
|
|
21
|
+
task=LA,
|
|
22
|
+
languages=[ESTONIAN],
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
ESTNER_CONFIG = DatasetConfig(
|
|
26
|
+
name="estner",
|
|
27
|
+
pretty_name="EstNER",
|
|
28
|
+
source="EuroEval/estner-mini",
|
|
29
|
+
task=NER,
|
|
30
|
+
languages=[ESTONIAN],
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
MULTI_WIKI_QA_ET_CONFIG = DatasetConfig(
|
|
34
|
+
name="multi-wiki-qa-et",
|
|
35
|
+
pretty_name="MultiWikiQA-et",
|
|
36
|
+
source="EuroEval/multi-wiki-qa-et-mini",
|
|
37
|
+
task=RC,
|
|
38
|
+
languages=[ESTONIAN],
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
ERR_NEWS_CONFIG = DatasetConfig(
|
|
42
|
+
name="err-news",
|
|
43
|
+
pretty_name="ERR News",
|
|
44
|
+
source="EuroEval/err-news-mini",
|
|
45
|
+
task=SUMM,
|
|
46
|
+
languages=[ESTONIAN],
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
TRIVIA_ET_CONFIG = DatasetConfig(
|
|
50
|
+
name="trivia-et",
|
|
51
|
+
pretty_name="Trivia-et",
|
|
52
|
+
source="EuroEval/trivia-et",
|
|
53
|
+
task=KNOW,
|
|
54
|
+
languages=[ESTONIAN],
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
WINOGRANDE_ET_CONFIG = DatasetConfig(
|
|
58
|
+
name="winogrande-et",
|
|
59
|
+
pretty_name="Winogrande-et",
|
|
60
|
+
source="EuroEval/winogrande-et",
|
|
61
|
+
task=COMMON_SENSE,
|
|
62
|
+
languages=[ESTONIAN],
|
|
63
|
+
_prompt_prefix="Sulle esitatakse lüngaga (_) tekstülesanded, "
|
|
64
|
+
"igal ülesandel on kaks vastusevarianti (a ja b).",
|
|
65
|
+
_prompt_template="Tekstülesanne: {text}\nVastus: {label}",
|
|
66
|
+
_instruction_prompt="Tekstülesanne: {text}\n\n"
|
|
67
|
+
"Sinu ülesanne on valida lünka sobiv vastusevariant. "
|
|
68
|
+
"Vasta ainult {labels_str}. Muud vastused ei ole lubatud.",
|
|
69
|
+
_labels=["a", "b"],
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
VALEU_ET_CONFIG = DatasetConfig(
|
|
73
|
+
name="valeu-et",
|
|
74
|
+
pretty_name="VaLEU-et",
|
|
75
|
+
source="EuroEval/european-values-et",
|
|
76
|
+
task=EUROPEAN_VALUES,
|
|
77
|
+
languages=[ESTONIAN],
|
|
78
|
+
splits=["test"],
|
|
79
|
+
bootstrap_samples=False,
|
|
80
|
+
_instruction_prompt="{text}",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
### Unofficial datasets ###
|
|
84
|
+
|
|
85
|
+
SCALA_ET_CONFIG = DatasetConfig(
|
|
86
|
+
name="scala-et",
|
|
87
|
+
pretty_name="ScaLA-et",
|
|
88
|
+
source="EuroEval/scala-et",
|
|
89
|
+
task=LA,
|
|
90
|
+
languages=[ESTONIAN],
|
|
91
|
+
unofficial=True,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
EXAM_ET_CONFIG = DatasetConfig(
|
|
95
|
+
name="exam-et",
|
|
96
|
+
pretty_name="Exam-et",
|
|
97
|
+
source="EuroEval/exam-et",
|
|
98
|
+
task=KNOW,
|
|
99
|
+
languages=[ESTONIAN],
|
|
100
|
+
_labels=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"],
|
|
101
|
+
unofficial=True,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
MMLU_ET_CONFIG = DatasetConfig(
|
|
105
|
+
name="mmlu-et",
|
|
106
|
+
pretty_name="MMLU-et",
|
|
107
|
+
source="EuroEval/mmlu-et-mini",
|
|
108
|
+
task=KNOW,
|
|
109
|
+
languages=[ESTONIAN],
|
|
110
|
+
unofficial=True,
|
|
111
|
+
)
|
|
@@ -1,43 +1,42 @@
|
|
|
1
1
|
"""All Faroese dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..languages import
|
|
4
|
+
from ..languages import FAROESE
|
|
5
5
|
from ..tasks import LA, NER, RC, SENT
|
|
6
6
|
|
|
7
7
|
### Official datasets ###
|
|
8
8
|
|
|
9
9
|
FOSENT_CONFIG = DatasetConfig(
|
|
10
10
|
name="fosent",
|
|
11
|
-
pretty_name="
|
|
12
|
-
|
|
11
|
+
pretty_name="FoSent",
|
|
12
|
+
source="EuroEval/fosent",
|
|
13
13
|
task=SENT,
|
|
14
|
-
languages=[
|
|
14
|
+
languages=[FAROESE],
|
|
15
15
|
_num_few_shot_examples=5,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
SCALA_FO_CONFIG = DatasetConfig(
|
|
19
19
|
name="scala-fo",
|
|
20
|
-
pretty_name="
|
|
21
|
-
|
|
20
|
+
pretty_name="ScaLA-fo",
|
|
21
|
+
source="EuroEval/scala-fo",
|
|
22
22
|
task=LA,
|
|
23
|
-
languages=[
|
|
23
|
+
languages=[FAROESE],
|
|
24
24
|
)
|
|
25
25
|
|
|
26
26
|
FONE_CONFIG = DatasetConfig(
|
|
27
27
|
name="fone",
|
|
28
|
-
pretty_name="
|
|
29
|
-
"
|
|
30
|
-
huggingface_id="EuroEval/fone-mini",
|
|
28
|
+
pretty_name="FoNE",
|
|
29
|
+
source="EuroEval/fone-mini",
|
|
31
30
|
task=NER,
|
|
32
|
-
languages=[
|
|
31
|
+
languages=[FAROESE],
|
|
33
32
|
)
|
|
34
33
|
|
|
35
34
|
FOQA_CONFIG = DatasetConfig(
|
|
36
35
|
name="foqa",
|
|
37
|
-
pretty_name="
|
|
38
|
-
|
|
36
|
+
pretty_name="FoQA",
|
|
37
|
+
source="EuroEval/foqa",
|
|
39
38
|
task=RC,
|
|
40
|
-
languages=[
|
|
39
|
+
languages=[FAROESE],
|
|
41
40
|
)
|
|
42
41
|
|
|
43
42
|
|
|
@@ -45,10 +44,18 @@ FOQA_CONFIG = DatasetConfig(
|
|
|
45
44
|
|
|
46
45
|
WIKIANN_FO_CONFIG = DatasetConfig(
|
|
47
46
|
name="wikiann-fo",
|
|
48
|
-
pretty_name="
|
|
49
|
-
"
|
|
50
|
-
huggingface_id="EuroEval/wikiann-fo-mini",
|
|
47
|
+
pretty_name="WikiANN-fo",
|
|
48
|
+
source="EuroEval/wikiann-fo-mini",
|
|
51
49
|
task=NER,
|
|
52
|
-
languages=[
|
|
50
|
+
languages=[FAROESE],
|
|
51
|
+
unofficial=True,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
MULTI_WIKI_QA_FO_CONFIG = DatasetConfig(
|
|
55
|
+
name="multi-wiki-qa-fo",
|
|
56
|
+
pretty_name="MultiWikiQA-fo",
|
|
57
|
+
source="EuroEval/multi-wiki-qa-fo-mini",
|
|
58
|
+
task=RC,
|
|
59
|
+
languages=[FAROESE],
|
|
53
60
|
unofficial=True,
|
|
54
61
|
)
|
|
@@ -1,70 +1,107 @@
|
|
|
1
1
|
"""All Finnish dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..languages import
|
|
5
|
-
from ..tasks import COMMON_SENSE, LA, MCRC, NER, RC, SENT, SUMM
|
|
4
|
+
from ..languages import FINNISH
|
|
5
|
+
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, LA, MCRC, NER, RC, SENT, SUMM
|
|
6
6
|
|
|
7
7
|
### Official datasets ###
|
|
8
8
|
|
|
9
9
|
SCANDISENT_FI_CONFIG = DatasetConfig(
|
|
10
10
|
name="scandisent-fi",
|
|
11
|
-
pretty_name="
|
|
12
|
-
"
|
|
13
|
-
huggingface_id="EuroEval/scandisent-fi-mini",
|
|
11
|
+
pretty_name="ScandiSent-fi",
|
|
12
|
+
source="EuroEval/scandisent-fi-mini",
|
|
14
13
|
task=SENT,
|
|
15
|
-
languages=[
|
|
14
|
+
languages=[FINNISH],
|
|
16
15
|
_labels=["negative", "positive"],
|
|
17
16
|
)
|
|
18
17
|
|
|
19
18
|
TURKU_NER_FI_CONFIG = DatasetConfig(
|
|
20
19
|
name="turku-ner-fi",
|
|
21
|
-
pretty_name="
|
|
22
|
-
|
|
20
|
+
pretty_name="Turku NER-fi",
|
|
21
|
+
source="EuroEval/turku-ner-fi-mini",
|
|
23
22
|
task=NER,
|
|
24
|
-
languages=[
|
|
23
|
+
languages=[FINNISH],
|
|
25
24
|
)
|
|
26
25
|
|
|
27
26
|
TYDIQA_FI_CONFIG = DatasetConfig(
|
|
28
27
|
name="tydiqa-fi",
|
|
29
|
-
pretty_name="
|
|
30
|
-
|
|
28
|
+
pretty_name="TyDiQA-fi",
|
|
29
|
+
source="EuroEval/tydiqa-fi-mini",
|
|
31
30
|
task=RC,
|
|
32
|
-
languages=[
|
|
31
|
+
languages=[FINNISH],
|
|
33
32
|
)
|
|
34
33
|
|
|
35
34
|
XLSUM_FI_CONFIG = DatasetConfig(
|
|
36
35
|
name="xlsum-fi",
|
|
37
|
-
pretty_name="
|
|
38
|
-
|
|
36
|
+
pretty_name="XLSum-fi",
|
|
37
|
+
source="EuroEval/xlsum-fi-mini",
|
|
39
38
|
task=SUMM,
|
|
40
|
-
languages=[
|
|
39
|
+
languages=[FINNISH],
|
|
41
40
|
)
|
|
42
41
|
|
|
43
42
|
HELLASWAG_FI_CONFIG = DatasetConfig(
|
|
44
43
|
name="hellaswag-fi",
|
|
45
|
-
pretty_name="
|
|
46
|
-
"
|
|
47
|
-
huggingface_id="EuroEval/hellaswag-fi-mini",
|
|
44
|
+
pretty_name="HellaSwag-fi",
|
|
45
|
+
source="EuroEval/hellaswag-fi-mini",
|
|
48
46
|
task=COMMON_SENSE,
|
|
49
|
-
languages=[
|
|
47
|
+
languages=[FINNISH],
|
|
50
48
|
)
|
|
51
49
|
|
|
52
50
|
SCALA_FI_CONFIG = DatasetConfig(
|
|
53
51
|
name="scala-fi",
|
|
54
|
-
pretty_name="
|
|
55
|
-
|
|
52
|
+
pretty_name="ScaLA-fi",
|
|
53
|
+
source="EuroEval/scala-fi",
|
|
56
54
|
task=LA,
|
|
57
|
-
languages=[
|
|
55
|
+
languages=[FINNISH],
|
|
58
56
|
)
|
|
59
57
|
|
|
58
|
+
VALEU_FI_CONFIG = DatasetConfig(
|
|
59
|
+
name="valeu-fi",
|
|
60
|
+
pretty_name="VaLEU-fi",
|
|
61
|
+
source="EuroEval/european-values-fi",
|
|
62
|
+
task=EUROPEAN_VALUES,
|
|
63
|
+
languages=[FINNISH],
|
|
64
|
+
splits=["test"],
|
|
65
|
+
bootstrap_samples=False,
|
|
66
|
+
_instruction_prompt="{text}",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
60
70
|
### Unofficial datasets ###
|
|
61
71
|
|
|
62
72
|
BELEBELE_FI_CONFIG = DatasetConfig(
|
|
63
73
|
name="belebele-fi",
|
|
64
|
-
pretty_name="
|
|
65
|
-
"
|
|
66
|
-
huggingface_id="EuroEval/belebele-fi-mini",
|
|
74
|
+
pretty_name="Belebele-fi",
|
|
75
|
+
source="EuroEval/belebele-fi-mini",
|
|
67
76
|
task=MCRC,
|
|
68
|
-
languages=[
|
|
77
|
+
languages=[FINNISH],
|
|
78
|
+
unofficial=True,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
MULTI_WIKI_QA_FI_CONFIG = DatasetConfig(
|
|
82
|
+
name="multi-wiki-qa-fi",
|
|
83
|
+
pretty_name="MultiWikiQA-fi",
|
|
84
|
+
source="EuroEval/multi-wiki-qa-fi-mini",
|
|
85
|
+
task=RC,
|
|
86
|
+
languages=[FINNISH],
|
|
87
|
+
unofficial=True,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
GOLDENSWAG_FI_CONFIG = DatasetConfig(
|
|
91
|
+
name="goldenswag-fi",
|
|
92
|
+
pretty_name="GoldenSwag-fi",
|
|
93
|
+
source="EuroEval/goldenswag-fi-mini",
|
|
94
|
+
task=COMMON_SENSE,
|
|
95
|
+
languages=[FINNISH],
|
|
96
|
+
unofficial=True,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
WINOGRANDE_FI_CONFIG = DatasetConfig(
|
|
100
|
+
name="winogrande-fi",
|
|
101
|
+
pretty_name="Winogrande-fi",
|
|
102
|
+
source="EuroEval/winogrande-fi",
|
|
103
|
+
task=COMMON_SENSE,
|
|
104
|
+
languages=[FINNISH],
|
|
105
|
+
_labels=["a", "b"],
|
|
69
106
|
unofficial=True,
|
|
70
107
|
)
|
|
@@ -1,72 +1,78 @@
|
|
|
1
1
|
"""All French dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..languages import
|
|
5
|
-
from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
4
|
+
from ..languages import FRENCH
|
|
5
|
+
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
6
6
|
|
|
7
7
|
### Official datasets ###
|
|
8
8
|
|
|
9
9
|
ALLOCINE_CONFIG = DatasetConfig(
|
|
10
10
|
name="allocine",
|
|
11
|
-
pretty_name="
|
|
12
|
-
"
|
|
13
|
-
huggingface_id="EuroEval/allocine-mini",
|
|
11
|
+
pretty_name="AlloCiné",
|
|
12
|
+
source="EuroEval/allocine-mini",
|
|
14
13
|
task=SENT,
|
|
15
|
-
languages=[
|
|
14
|
+
languages=[FRENCH],
|
|
16
15
|
_labels=["negative", "positive"],
|
|
17
16
|
_prompt_label_mapping=dict(positive="positif", negative="négatif"),
|
|
18
17
|
)
|
|
19
18
|
|
|
20
19
|
SCALA_FR_CONFIG = DatasetConfig(
|
|
21
20
|
name="scala-fr",
|
|
22
|
-
pretty_name="
|
|
23
|
-
|
|
21
|
+
pretty_name="ScaLA-fr",
|
|
22
|
+
source="EuroEval/scala-fr",
|
|
24
23
|
task=LA,
|
|
25
|
-
languages=[
|
|
24
|
+
languages=[FRENCH],
|
|
26
25
|
)
|
|
27
26
|
|
|
28
27
|
ELTEC_CONFIG = DatasetConfig(
|
|
29
28
|
name="eltec",
|
|
30
|
-
pretty_name="
|
|
31
|
-
"
|
|
32
|
-
huggingface_id="EuroEval/eltec-mini",
|
|
29
|
+
pretty_name="ELTeC",
|
|
30
|
+
source="EuroEval/eltec-mini",
|
|
33
31
|
task=NER,
|
|
34
|
-
languages=[
|
|
32
|
+
languages=[FRENCH],
|
|
35
33
|
)
|
|
36
34
|
|
|
37
35
|
FQUAD_CONFIG = DatasetConfig(
|
|
38
36
|
name="fquad",
|
|
39
|
-
pretty_name="
|
|
40
|
-
"
|
|
41
|
-
huggingface_id="EuroEval/fquad-mini",
|
|
37
|
+
pretty_name="FQuAD",
|
|
38
|
+
source="EuroEval/fquad-mini",
|
|
42
39
|
task=RC,
|
|
43
|
-
languages=[
|
|
40
|
+
languages=[FRENCH],
|
|
44
41
|
)
|
|
45
42
|
|
|
46
43
|
ORANGE_SUM_CONFIG = DatasetConfig(
|
|
47
44
|
name="orange-sum",
|
|
48
|
-
pretty_name="
|
|
49
|
-
|
|
45
|
+
pretty_name="OrangeSum",
|
|
46
|
+
source="EuroEval/orange-sum-mini",
|
|
50
47
|
task=SUMM,
|
|
51
|
-
languages=[
|
|
48
|
+
languages=[FRENCH],
|
|
52
49
|
)
|
|
53
50
|
|
|
54
51
|
MMLU_FR_CONFIG = DatasetConfig(
|
|
55
52
|
name="mmlu-fr",
|
|
56
|
-
pretty_name="
|
|
57
|
-
"
|
|
58
|
-
huggingface_id="EuroEval/mmlu-fr-mini",
|
|
53
|
+
pretty_name="MMLU-fr",
|
|
54
|
+
source="EuroEval/mmlu-fr-mini",
|
|
59
55
|
task=KNOW,
|
|
60
|
-
languages=[
|
|
56
|
+
languages=[FRENCH],
|
|
61
57
|
)
|
|
62
58
|
|
|
63
59
|
HELLASWAG_FR_CONFIG = DatasetConfig(
|
|
64
60
|
name="hellaswag-fr",
|
|
65
|
-
pretty_name="
|
|
66
|
-
"
|
|
67
|
-
huggingface_id="EuroEval/hellaswag-fr-mini",
|
|
61
|
+
pretty_name="HellaSwag-fr",
|
|
62
|
+
source="EuroEval/hellaswag-fr-mini",
|
|
68
63
|
task=COMMON_SENSE,
|
|
69
|
-
languages=[
|
|
64
|
+
languages=[FRENCH],
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
VALEU_FR_CONFIG = DatasetConfig(
|
|
68
|
+
name="valeu-fr",
|
|
69
|
+
pretty_name="VaLEU-fr",
|
|
70
|
+
source="EuroEval/european-values-fr",
|
|
71
|
+
task=EUROPEAN_VALUES,
|
|
72
|
+
languages=[FRENCH],
|
|
73
|
+
splits=["test"],
|
|
74
|
+
bootstrap_samples=False,
|
|
75
|
+
_instruction_prompt="{text}",
|
|
70
76
|
)
|
|
71
77
|
|
|
72
78
|
|
|
@@ -74,10 +80,37 @@ HELLASWAG_FR_CONFIG = DatasetConfig(
|
|
|
74
80
|
|
|
75
81
|
BELEBELE_FR_CONFIG = DatasetConfig(
|
|
76
82
|
name="belebele-fr",
|
|
77
|
-
pretty_name="
|
|
78
|
-
"
|
|
79
|
-
huggingface_id="EuroEval/belebele-fr-mini",
|
|
83
|
+
pretty_name="Belebele-fr",
|
|
84
|
+
source="EuroEval/belebele-fr-mini",
|
|
80
85
|
task=MCRC,
|
|
81
|
-
languages=[
|
|
86
|
+
languages=[FRENCH],
|
|
87
|
+
unofficial=True,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
MULTI_WIKI_QA_FR_CONFIG = DatasetConfig(
|
|
91
|
+
name="multi-wiki-qa-fr",
|
|
92
|
+
pretty_name="MultiWikiQA-fr",
|
|
93
|
+
source="EuroEval/multi-wiki-qa-fr-mini",
|
|
94
|
+
task=RC,
|
|
95
|
+
languages=[FRENCH],
|
|
96
|
+
unofficial=True,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
GOLDENSWAG_FR_CONFIG = DatasetConfig(
|
|
100
|
+
name="goldenswag-fr",
|
|
101
|
+
pretty_name="GoldenSwag-fr",
|
|
102
|
+
source="EuroEval/goldenswag-fr-mini",
|
|
103
|
+
task=COMMON_SENSE,
|
|
104
|
+
languages=[FRENCH],
|
|
105
|
+
unofficial=True,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
WINOGRANDE_FR_CONFIG = DatasetConfig(
|
|
109
|
+
name="winogrande-fr",
|
|
110
|
+
pretty_name="Winogrande-fr",
|
|
111
|
+
source="EuroEval/winogrande-fr",
|
|
112
|
+
task=COMMON_SENSE,
|
|
113
|
+
languages=[FRENCH],
|
|
114
|
+
_labels=["a", "b"],
|
|
82
115
|
unofficial=True,
|
|
83
116
|
)
|