EuroEval 15.5.0__py3-none-any.whl → 15.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/benchmark_modules/base.py +3 -2
- euroeval/benchmark_modules/fresh.py +8 -6
- euroeval/benchmark_modules/hf.py +33 -31
- euroeval/benchmark_modules/litellm.py +120 -56
- euroeval/benchmark_modules/vllm.py +41 -26
- euroeval/benchmarker.py +23 -21
- euroeval/callbacks.py +2 -2
- euroeval/constants.py +1 -1
- euroeval/data_models.py +257 -42
- euroeval/dataset_configs/__init__.py +61 -0
- euroeval/dataset_configs/danish.py +120 -0
- euroeval/dataset_configs/dutch.py +123 -0
- euroeval/dataset_configs/english.py +88 -0
- euroeval/dataset_configs/faroese.py +53 -0
- euroeval/dataset_configs/french.py +83 -0
- euroeval/dataset_configs/german.py +91 -0
- euroeval/dataset_configs/icelandic.py +148 -0
- euroeval/dataset_configs/italian.py +81 -0
- euroeval/dataset_configs/norwegian.py +178 -0
- euroeval/dataset_configs/spanish.py +78 -0
- euroeval/dataset_configs/swedish.py +100 -0
- euroeval/exceptions.py +10 -10
- euroeval/finetuning.py +6 -10
- euroeval/generation.py +1 -0
- euroeval/human_evaluation.py +2 -2
- euroeval/languages.py +20 -13
- euroeval/model_cache.py +1 -1
- euroeval/model_loading.py +1 -12
- euroeval/prompt_templates/__init__.py +8 -0
- euroeval/prompt_templates/linguistic_acceptability.py +112 -0
- euroeval/prompt_templates/multiple_choice.py +97 -0
- euroeval/prompt_templates/named_entity_recognition.py +257 -0
- euroeval/prompt_templates/reading_comprehension.py +118 -0
- euroeval/prompt_templates/sentiment_classification.py +137 -0
- euroeval/prompt_templates/summarization.py +97 -0
- euroeval/speed_benchmark.py +1 -1
- euroeval/{task_utils → task_group_utils}/multiple_choice_classification.py +19 -11
- euroeval/{task_utils → task_group_utils}/question_answering.py +31 -30
- euroeval/{task_utils → task_group_utils}/sequence_classification.py +1 -1
- euroeval/{task_utils → task_group_utils}/text_to_text.py +1 -1
- euroeval/{task_utils → task_group_utils}/token_classification.py +3 -2
- euroeval/tasks.py +54 -0
- euroeval/tokenization_utils.py +343 -0
- euroeval/types.py +3 -1
- euroeval/utils.py +2 -347
- {euroeval-15.5.0.dist-info → euroeval-15.6.0.dist-info}/METADATA +30 -9
- euroeval-15.6.0.dist-info/RECORD +59 -0
- euroeval/dataset_configs.py +0 -2408
- euroeval-15.5.0.dist-info/RECORD +0 -40
- /euroeval/{task_utils → task_group_utils}/__init__.py +0 -0
- {euroeval-15.5.0.dist-info → euroeval-15.6.0.dist-info}/WHEEL +0 -0
- {euroeval-15.5.0.dist-info → euroeval-15.6.0.dist-info}/entry_points.txt +0 -0
- {euroeval-15.5.0.dist-info → euroeval-15.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""All Dutch dataset configurations used in EuroEval."""
|
|
2
|
+
|
|
3
|
+
from ..data_models import DatasetConfig
|
|
4
|
+
from ..languages import NL
|
|
5
|
+
from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
6
|
+
|
|
7
|
+
### Official datasets ###
|
|
8
|
+
|
|
9
|
+
DUTCH_SOCIAL_CONFIG = DatasetConfig(
|
|
10
|
+
name="dutch-social",
|
|
11
|
+
pretty_name="the truncated version of the Dutch sentiment classification "
|
|
12
|
+
"dataset Dutch Social",
|
|
13
|
+
huggingface_id="EuroEval/dutch-social-mini",
|
|
14
|
+
task=SENT,
|
|
15
|
+
languages=[NL],
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
SCALA_NL_CONFIG = DatasetConfig(
|
|
19
|
+
name="scala-nl",
|
|
20
|
+
pretty_name="the Dutch part of the linguistic acceptability dataset ScaLA",
|
|
21
|
+
huggingface_id="EuroEval/scala-nl",
|
|
22
|
+
task=LA,
|
|
23
|
+
languages=[NL],
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
CONLL_NL_CONFIG = DatasetConfig(
|
|
27
|
+
name="conll-nl",
|
|
28
|
+
pretty_name="the Dutch part of the truncated version of the named entity "
|
|
29
|
+
"recognition dataset CoNLL 2002",
|
|
30
|
+
huggingface_id="EuroEval/conll-nl-mini",
|
|
31
|
+
task=NER,
|
|
32
|
+
languages=[NL],
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
SQUAD_NL_CONFIG = DatasetConfig(
|
|
36
|
+
name="squad-nl",
|
|
37
|
+
pretty_name="the truncated version of the Dutch reading comprehension dataset "
|
|
38
|
+
"SQuAD-nl, translated from the English SQuAD dataset",
|
|
39
|
+
huggingface_id="EuroEval/squad-nl-v2-mini",
|
|
40
|
+
task=RC,
|
|
41
|
+
languages=[NL],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
WIKI_LINGUA_NL_CONFIG = DatasetConfig(
|
|
45
|
+
name="wiki-lingua-nl",
|
|
46
|
+
pretty_name="the Dutch part of the truncated version of the summarisation dataset "
|
|
47
|
+
"WikiLingua",
|
|
48
|
+
huggingface_id="EuroEval/wiki-lingua-nl-mini",
|
|
49
|
+
task=SUMM,
|
|
50
|
+
languages=[NL],
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
MMLU_NL_CONFIG = DatasetConfig(
|
|
54
|
+
name="mmlu-nl",
|
|
55
|
+
pretty_name="the truncated version of the Dutch knowledge dataset MMLU-nl, "
|
|
56
|
+
"translated from the English MMLU dataset",
|
|
57
|
+
huggingface_id="EuroEval/mmlu-nl-mini",
|
|
58
|
+
task=KNOW,
|
|
59
|
+
languages=[NL],
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
HELLASWAG_NL_CONFIG = DatasetConfig(
|
|
63
|
+
name="hellaswag-nl",
|
|
64
|
+
pretty_name="the truncated version of the Dutch common-sense reasoning dataset "
|
|
65
|
+
"HellaSwag-nl, translated from the English HellaSwag dataset",
|
|
66
|
+
huggingface_id="EuroEval/hellaswag-nl-mini",
|
|
67
|
+
task=COMMON_SENSE,
|
|
68
|
+
languages=[NL],
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
### Unofficial datasets ###
|
|
73
|
+
|
|
74
|
+
DBRD_CONFIG = DatasetConfig(
|
|
75
|
+
name="dbrd",
|
|
76
|
+
pretty_name="the truncated version of the Dutch sentiment classification "
|
|
77
|
+
"dataset DBRD",
|
|
78
|
+
huggingface_id="EuroEval/dbrd-mini",
|
|
79
|
+
task=SENT,
|
|
80
|
+
languages=[NL],
|
|
81
|
+
_labels=["negative", "positive"],
|
|
82
|
+
_prompt_label_mapping=dict(positive="positief", negative="negatief"),
|
|
83
|
+
unofficial=True,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
DUTCH_COLA_CONFIG = DatasetConfig(
|
|
87
|
+
name="dutch-cola",
|
|
88
|
+
pretty_name="the truncated version of the Dutch linguistic acceptability dataset "
|
|
89
|
+
"Dutch CoLA",
|
|
90
|
+
huggingface_id="EuroEval/dutch-cola",
|
|
91
|
+
task=LA,
|
|
92
|
+
languages=[NL],
|
|
93
|
+
unofficial=True,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
DUTCH_COLA_FULL_CONFIG = DatasetConfig(
|
|
97
|
+
name="dutch-cola-full",
|
|
98
|
+
pretty_name="the Dutch linguistic acceptability dataset Dutch CoLA",
|
|
99
|
+
huggingface_id="EuroEval/dutch-cola-full",
|
|
100
|
+
task=LA,
|
|
101
|
+
languages=[NL],
|
|
102
|
+
unofficial=True,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
ARC_NL_CONFIG = DatasetConfig(
|
|
106
|
+
name="arc-nl",
|
|
107
|
+
pretty_name="the truncated version of the Dutch knowledge dataset ARC-nl, "
|
|
108
|
+
"translated from the English ARC dataset",
|
|
109
|
+
huggingface_id="EuroEval/arc-nl-mini",
|
|
110
|
+
task=KNOW,
|
|
111
|
+
languages=[NL],
|
|
112
|
+
unofficial=True,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
BELEBELE_NL_CONFIG = DatasetConfig(
|
|
116
|
+
name="belebele-nl",
|
|
117
|
+
pretty_name="the Dutch multiple choice reading comprehension dataset BeleBele-nl, "
|
|
118
|
+
"translated from the English BeleBele dataset",
|
|
119
|
+
huggingface_id="EuroEval/belebele-nl-mini",
|
|
120
|
+
task=MCRC,
|
|
121
|
+
languages=[NL],
|
|
122
|
+
unofficial=True,
|
|
123
|
+
)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""All English dataset configurations used in EuroEval."""
|
|
2
|
+
|
|
3
|
+
from ..data_models import DatasetConfig
|
|
4
|
+
from ..languages import EN
|
|
5
|
+
from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
6
|
+
|
|
7
|
+
### Official datasets ###
|
|
8
|
+
|
|
9
|
+
SST5_CONFIG = DatasetConfig(
|
|
10
|
+
name="sst5",
|
|
11
|
+
pretty_name="the truncated version of the English sentiment classification "
|
|
12
|
+
"dataset SST5",
|
|
13
|
+
huggingface_id="EuroEval/sst5-mini",
|
|
14
|
+
task=SENT,
|
|
15
|
+
languages=[EN],
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
SCALA_EN_CONFIG = DatasetConfig(
|
|
19
|
+
name="scala-en",
|
|
20
|
+
pretty_name="the English part of the linguistic acceptability dataset ScaLA",
|
|
21
|
+
huggingface_id="EuroEval/scala-en",
|
|
22
|
+
task=LA,
|
|
23
|
+
languages=[EN],
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
CONLL_EN_CONFIG = DatasetConfig(
|
|
27
|
+
name="conll-en",
|
|
28
|
+
pretty_name="the truncated version of the English named entity recognition "
|
|
29
|
+
"dataset CoNLL 2003",
|
|
30
|
+
huggingface_id="EuroEval/conll-en-mini",
|
|
31
|
+
task=NER,
|
|
32
|
+
languages=[EN],
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
SQUAD_CONFIG = DatasetConfig(
|
|
36
|
+
name="squad",
|
|
37
|
+
pretty_name="the truncated version of the English question answering dataset SQuAD",
|
|
38
|
+
huggingface_id="EuroEval/squad-mini",
|
|
39
|
+
task=RC,
|
|
40
|
+
languages=[EN],
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
CNN_DAILYMAIL_CONFIG = DatasetConfig(
|
|
44
|
+
name="cnn-dailymail",
|
|
45
|
+
pretty_name="the truncated version of the English summarisation dataset "
|
|
46
|
+
"CNN-DailyMail",
|
|
47
|
+
huggingface_id="EuroEval/cnn-dailymail-mini",
|
|
48
|
+
task=SUMM,
|
|
49
|
+
languages=[EN],
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
MMLU_CONFIG = DatasetConfig(
|
|
53
|
+
name="mmlu",
|
|
54
|
+
pretty_name="the truncated version of the English knowledge dataset MMLU",
|
|
55
|
+
huggingface_id="EuroEval/mmlu-mini",
|
|
56
|
+
task=KNOW,
|
|
57
|
+
languages=[EN],
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
HELLASWAG_CONFIG = DatasetConfig(
|
|
61
|
+
name="hellaswag",
|
|
62
|
+
pretty_name="the truncated version of the English common-sense reasoning "
|
|
63
|
+
"dataset HellaSwag",
|
|
64
|
+
huggingface_id="EuroEval/hellaswag-mini",
|
|
65
|
+
task=COMMON_SENSE,
|
|
66
|
+
languages=[EN],
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
### Unofficial datasets ###
|
|
71
|
+
|
|
72
|
+
ARC_CONFIG = DatasetConfig(
|
|
73
|
+
name="arc",
|
|
74
|
+
pretty_name="the truncated version of the English knowledge dataset ARC",
|
|
75
|
+
huggingface_id="EuroEval/arc-mini",
|
|
76
|
+
task=KNOW,
|
|
77
|
+
languages=[EN],
|
|
78
|
+
unofficial=True,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
BELEBELE_CONFIG = DatasetConfig(
|
|
82
|
+
name="belebele",
|
|
83
|
+
pretty_name="the English multiple choice reading comprehension dataset BeleBele",
|
|
84
|
+
huggingface_id="EuroEval/belebele-mini",
|
|
85
|
+
task=MCRC,
|
|
86
|
+
languages=[EN],
|
|
87
|
+
unofficial=True,
|
|
88
|
+
)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""All Faroese dataset configurations used in EuroEval."""
|
|
2
|
+
|
|
3
|
+
from ..data_models import DatasetConfig
|
|
4
|
+
from ..languages import FO
|
|
5
|
+
from ..tasks import LA, NER, RC, SENT
|
|
6
|
+
|
|
7
|
+
### Official datasets ###
|
|
8
|
+
|
|
9
|
+
FOSENT_CONFIG = DatasetConfig(
|
|
10
|
+
name="fosent",
|
|
11
|
+
pretty_name="the Faroese sentiment classification dataset FoSent",
|
|
12
|
+
huggingface_id="EuroEval/fosent",
|
|
13
|
+
task=SENT,
|
|
14
|
+
languages=[FO],
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
SCALA_FO_CONFIG = DatasetConfig(
|
|
18
|
+
name="scala-fo",
|
|
19
|
+
pretty_name="the Faroese part of the linguistic acceptability dataset ScaLA",
|
|
20
|
+
huggingface_id="EuroEval/scala-fo",
|
|
21
|
+
task=LA,
|
|
22
|
+
languages=[FO],
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
FONE_CONFIG = DatasetConfig(
|
|
26
|
+
name="fone",
|
|
27
|
+
pretty_name="the truncated version of the Faroese named entity recognition "
|
|
28
|
+
"dataset FoNE",
|
|
29
|
+
huggingface_id="EuroEval/fone-mini",
|
|
30
|
+
task=NER,
|
|
31
|
+
languages=[FO],
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
FOQA_CONFIG = DatasetConfig(
|
|
35
|
+
name="foqa",
|
|
36
|
+
pretty_name="the Faroese reading comprehension dataset FoQA",
|
|
37
|
+
huggingface_id="EuroEval/foqa",
|
|
38
|
+
task=RC,
|
|
39
|
+
languages=[FO],
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
### Unofficial datasets ###
|
|
44
|
+
|
|
45
|
+
WIKIANN_FO_CONFIG = DatasetConfig(
|
|
46
|
+
name="wikiann-fo",
|
|
47
|
+
pretty_name="the truncated version of the Faroese part of the named entity "
|
|
48
|
+
"recognition dataset WikiANN",
|
|
49
|
+
huggingface_id="EuroEval/wikiann-fo-mini",
|
|
50
|
+
task=NER,
|
|
51
|
+
languages=[FO],
|
|
52
|
+
unofficial=True,
|
|
53
|
+
)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""All French dataset configurations used in EuroEval."""
|
|
2
|
+
|
|
3
|
+
from ..data_models import DatasetConfig
|
|
4
|
+
from ..languages import FR
|
|
5
|
+
from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
6
|
+
|
|
7
|
+
### Official datasets ###
|
|
8
|
+
|
|
9
|
+
ALLOCINE_CONFIG = DatasetConfig(
|
|
10
|
+
name="allocine",
|
|
11
|
+
pretty_name="the truncated version of the French sentiment classification "
|
|
12
|
+
"dataset AlloCiné",
|
|
13
|
+
huggingface_id="EuroEval/allocine-mini",
|
|
14
|
+
task=SENT,
|
|
15
|
+
languages=[FR],
|
|
16
|
+
_labels=["negative", "positive"],
|
|
17
|
+
_prompt_label_mapping=dict(positive="positif", negative="négatif"),
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
SCALA_FR_CONFIG = DatasetConfig(
|
|
21
|
+
name="scala-fr",
|
|
22
|
+
pretty_name="the French part of the linguistic acceptability dataset ScaLA",
|
|
23
|
+
huggingface_id="EuroEval/scala-fr",
|
|
24
|
+
task=LA,
|
|
25
|
+
languages=[FR],
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
ELTEC_CONFIG = DatasetConfig(
|
|
29
|
+
name="eltec",
|
|
30
|
+
pretty_name="the truncated version of the French named entity recognition "
|
|
31
|
+
"dataset ELTeC",
|
|
32
|
+
huggingface_id="EuroEval/eltec-mini",
|
|
33
|
+
task=NER,
|
|
34
|
+
languages=[FR],
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
FQUAD_CONFIG = DatasetConfig(
|
|
38
|
+
name="fquad",
|
|
39
|
+
pretty_name="the truncated version of the French reading comprehension dataset "
|
|
40
|
+
"FQuAD",
|
|
41
|
+
huggingface_id="EuroEval/fquad-mini",
|
|
42
|
+
task=RC,
|
|
43
|
+
languages=[FR],
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
ORANGE_SUM_CONFIG = DatasetConfig(
|
|
47
|
+
name="orange-sum",
|
|
48
|
+
pretty_name="the truncated version of the French summarisation dataset OrangeSum",
|
|
49
|
+
huggingface_id="EuroEval/orange-sum-mini",
|
|
50
|
+
task=SUMM,
|
|
51
|
+
languages=[FR],
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
MMLU_FR_CONFIG = DatasetConfig(
|
|
55
|
+
name="mmlu-fr",
|
|
56
|
+
pretty_name="the truncated version of the French knowledge dataset MMLU-fr, "
|
|
57
|
+
"translated from the English MMLU dataset",
|
|
58
|
+
huggingface_id="EuroEval/mmlu-fr-mini",
|
|
59
|
+
task=KNOW,
|
|
60
|
+
languages=[FR],
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
HELLASWAG_FR_CONFIG = DatasetConfig(
|
|
64
|
+
name="hellaswag-fr",
|
|
65
|
+
pretty_name="the truncated version of the French common-sense reasoning dataset "
|
|
66
|
+
"HellaSwag-fr, translated from the English HellaSwag dataset",
|
|
67
|
+
huggingface_id="EuroEval/hellaswag-fr-mini",
|
|
68
|
+
task=COMMON_SENSE,
|
|
69
|
+
languages=[FR],
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
### Unofficial datasets ###
|
|
74
|
+
|
|
75
|
+
BELEBELE_FR_CONFIG = DatasetConfig(
|
|
76
|
+
name="belebele-fr",
|
|
77
|
+
pretty_name="the French multiple choice reading comprehension dataset BeleBele-fr, "
|
|
78
|
+
"translated from the English BeleBele dataset",
|
|
79
|
+
huggingface_id="EuroEval/belebele-fr-mini",
|
|
80
|
+
task=MCRC,
|
|
81
|
+
languages=[FR],
|
|
82
|
+
unofficial=True,
|
|
83
|
+
)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""All German dataset configurations used in EuroEval."""
|
|
2
|
+
|
|
3
|
+
from ..data_models import DatasetConfig
|
|
4
|
+
from ..languages import DE
|
|
5
|
+
from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
6
|
+
|
|
7
|
+
### Official datasets ###
|
|
8
|
+
|
|
9
|
+
SB10K_CONFIG = DatasetConfig(
|
|
10
|
+
name="sb10k",
|
|
11
|
+
pretty_name="the truncated version of the German sentiment classification "
|
|
12
|
+
"dataset SB10k",
|
|
13
|
+
huggingface_id="EuroEval/sb10k-mini",
|
|
14
|
+
task=SENT,
|
|
15
|
+
languages=[DE],
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
SCALA_DE_CONFIG = DatasetConfig(
|
|
19
|
+
name="scala-de",
|
|
20
|
+
pretty_name="the German part of the linguistic acceptability dataset ScaLA",
|
|
21
|
+
huggingface_id="EuroEval/scala-de",
|
|
22
|
+
task=LA,
|
|
23
|
+
languages=[DE],
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
GERMEVAL_CONFIG = DatasetConfig(
|
|
27
|
+
name="germeval",
|
|
28
|
+
pretty_name="the truncated version of the German named entity recognition "
|
|
29
|
+
"dataset GermEval",
|
|
30
|
+
huggingface_id="EuroEval/germeval-mini",
|
|
31
|
+
task=NER,
|
|
32
|
+
languages=[DE],
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
GERMANQUAD_CONFIG = DatasetConfig(
|
|
36
|
+
name="germanquad",
|
|
37
|
+
pretty_name="the truncated version of the German reading comprehension dataset "
|
|
38
|
+
"GermanQuAD",
|
|
39
|
+
huggingface_id="EuroEval/germanquad-mini",
|
|
40
|
+
task=RC,
|
|
41
|
+
languages=[DE],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
MLSUM_DE_CONFIG = DatasetConfig(
|
|
45
|
+
name="mlsum-de",
|
|
46
|
+
pretty_name="the truncated version of the German summarisation dataset MLSum-de",
|
|
47
|
+
huggingface_id="EuroEval/mlsum-mini",
|
|
48
|
+
task=SUMM,
|
|
49
|
+
languages=[DE],
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
MMLU_DE_CONFIG = DatasetConfig(
|
|
53
|
+
name="mmlu-de",
|
|
54
|
+
pretty_name="the truncated version of the German knowledge dataset MMLU-de, "
|
|
55
|
+
"translated from the English MMLU dataset",
|
|
56
|
+
huggingface_id="EuroEval/mmlu-de-mini",
|
|
57
|
+
task=KNOW,
|
|
58
|
+
languages=[DE],
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
HELLASWAG_DE_CONFIG = DatasetConfig(
|
|
62
|
+
name="hellaswag-de",
|
|
63
|
+
pretty_name="the truncated version of the German common-sense reasoning dataset "
|
|
64
|
+
"HellaSwag-de, translated from the English HellaSwag dataset",
|
|
65
|
+
huggingface_id="EuroEval/hellaswag-de-mini",
|
|
66
|
+
task=COMMON_SENSE,
|
|
67
|
+
languages=[DE],
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
### Unofficial datasets ###
|
|
72
|
+
|
|
73
|
+
ARC_DE_CONFIG = DatasetConfig(
|
|
74
|
+
name="arc-de",
|
|
75
|
+
pretty_name="the truncated version of the German knowledge dataset ARC-de, "
|
|
76
|
+
"translated from the English ARC dataset",
|
|
77
|
+
huggingface_id="EuroEval/arc-de-mini",
|
|
78
|
+
task=KNOW,
|
|
79
|
+
languages=[DE],
|
|
80
|
+
unofficial=True,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
BELEBELE_DE_CONFIG = DatasetConfig(
|
|
84
|
+
name="belebele-de",
|
|
85
|
+
pretty_name="the German multiple choice reading comprehension dataset BeleBele-de, "
|
|
86
|
+
"translated from the English BeleBele dataset",
|
|
87
|
+
huggingface_id="EuroEval/belebele-de-mini",
|
|
88
|
+
task=MCRC,
|
|
89
|
+
languages=[DE],
|
|
90
|
+
unofficial=True,
|
|
91
|
+
)
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""All Icelandic dataset configurations used in EuroEval."""
|
|
2
|
+
|
|
3
|
+
from ..data_models import DatasetConfig
|
|
4
|
+
from ..languages import IS
|
|
5
|
+
from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
6
|
+
|
|
7
|
+
### Official datasets ###
|
|
8
|
+
|
|
9
|
+
HOTTER_AND_COLDER_SENTIMENT_CONFIG = DatasetConfig(
|
|
10
|
+
name="hotter-and-colder-sentiment",
|
|
11
|
+
pretty_name="the sentiment classification part of the Icelandic dataset Hotter "
|
|
12
|
+
"and Colder",
|
|
13
|
+
huggingface_id="EuroEval/hotter-and-colder-sentiment",
|
|
14
|
+
task=SENT,
|
|
15
|
+
languages=[IS],
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
SCALA_IS_CONFIG = DatasetConfig(
|
|
19
|
+
name="scala-is",
|
|
20
|
+
pretty_name="the Icelandic part of the linguistic acceptability dataset ScaLA",
|
|
21
|
+
huggingface_id="EuroEval/scala-is",
|
|
22
|
+
task=LA,
|
|
23
|
+
languages=[IS],
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
MIM_GOLD_NER_CONFIG = DatasetConfig(
|
|
27
|
+
name="mim-gold-ner",
|
|
28
|
+
pretty_name="the truncated version of the Icelandic named entity recognition "
|
|
29
|
+
"dataset MIM-GOLD-NER",
|
|
30
|
+
huggingface_id="EuroEval/mim-gold-ner-mini",
|
|
31
|
+
task=NER,
|
|
32
|
+
languages=[IS],
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
NQII_CONFIG = DatasetConfig(
|
|
36
|
+
name="nqii",
|
|
37
|
+
pretty_name="the truncated version of the Icelandic reading comprehension dataset "
|
|
38
|
+
"Natural Questions in Icelandic",
|
|
39
|
+
huggingface_id="EuroEval/nqii-mini",
|
|
40
|
+
task=RC,
|
|
41
|
+
languages=[IS],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
RRN_CONFIG = DatasetConfig(
|
|
45
|
+
name="rrn",
|
|
46
|
+
pretty_name="the truncated version of the Icelandic summarisation dataset "
|
|
47
|
+
"RÚV Radio News",
|
|
48
|
+
huggingface_id="EuroEval/rrn-mini",
|
|
49
|
+
task=SUMM,
|
|
50
|
+
languages=[IS],
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
ICELANDIC_KNOWLEDGE_CONFIG = DatasetConfig(
|
|
54
|
+
name="icelandic-knowledge",
|
|
55
|
+
pretty_name="the Icelandic knowledge dataset IcelandicKnowledge, derived from the "
|
|
56
|
+
"IcelandicQA dataset",
|
|
57
|
+
huggingface_id="EuroEval/icelandic-knowledge",
|
|
58
|
+
task=KNOW,
|
|
59
|
+
languages=[IS],
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
WINOGRANDE_IS_CONFIG = DatasetConfig(
|
|
63
|
+
name="winogrande-is",
|
|
64
|
+
pretty_name="the Icelandic common-sense reasoning dataset "
|
|
65
|
+
"Winogrande-is, manually translated from the English Winogrande dataset",
|
|
66
|
+
huggingface_id="EuroEval/winogrande-is",
|
|
67
|
+
task=COMMON_SENSE,
|
|
68
|
+
languages=[IS],
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
### Unofficial datasets ###
|
|
73
|
+
|
|
74
|
+
ICE_EC_CONFIG = DatasetConfig(
|
|
75
|
+
name="ice-ec",
|
|
76
|
+
pretty_name="the truncated version of the Icelandic Error Corpus",
|
|
77
|
+
huggingface_id="EuroEval/ice-ec",
|
|
78
|
+
task=LA,
|
|
79
|
+
languages=[IS],
|
|
80
|
+
unofficial=True,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
ICE_EC_FULL_CONFIG = DatasetConfig(
|
|
84
|
+
name="ice-ec-full",
|
|
85
|
+
pretty_name="the Icelandic Error Corpus",
|
|
86
|
+
huggingface_id="EuroEval/ice-ec-full",
|
|
87
|
+
task=LA,
|
|
88
|
+
languages=[IS],
|
|
89
|
+
unofficial=True,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
ICE_LINGUISTIC_CONFIG = DatasetConfig(
|
|
93
|
+
name="ice-linguistic",
|
|
94
|
+
pretty_name="the Icelandic linguistic acceptability dataset IceLinguistic",
|
|
95
|
+
huggingface_id="EuroEval/ice-linguistic",
|
|
96
|
+
task=LA,
|
|
97
|
+
languages=[IS],
|
|
98
|
+
unofficial=True,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
ICELANDIC_QA_CONFIG = DatasetConfig(
|
|
102
|
+
name="icelandic-qa",
|
|
103
|
+
pretty_name="the Icelandic reading comprehension dataset IcelandicQA",
|
|
104
|
+
huggingface_id="EuroEval/icelandic-qa",
|
|
105
|
+
task=RC,
|
|
106
|
+
languages=[IS],
|
|
107
|
+
unofficial=True,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
MMLU_IS_CONFIG = DatasetConfig(
|
|
111
|
+
name="mmlu-is",
|
|
112
|
+
pretty_name="the truncated version of the Icelandic knowledge dataset MMLU-is, "
|
|
113
|
+
"translated from the English MMLU dataset",
|
|
114
|
+
huggingface_id="EuroEval/mmlu-is-mini",
|
|
115
|
+
task=KNOW,
|
|
116
|
+
languages=[IS],
|
|
117
|
+
unofficial=True,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
ARC_IS_CONFIG = DatasetConfig(
|
|
121
|
+
name="arc-is",
|
|
122
|
+
pretty_name="the truncated version of the Icelandic knowledge dataset ARC-is, "
|
|
123
|
+
"translated from the English ARC dataset",
|
|
124
|
+
huggingface_id="EuroEval/arc-is-mini",
|
|
125
|
+
task=KNOW,
|
|
126
|
+
languages=[IS],
|
|
127
|
+
unofficial=True,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
HELLASWAG_IS_CONFIG = DatasetConfig(
|
|
131
|
+
name="hellaswag-is",
|
|
132
|
+
pretty_name="the truncated version of the Icelandic common-sense reasoning dataset "
|
|
133
|
+
"HellaSwag-is, translated from the English HellaSwag dataset",
|
|
134
|
+
huggingface_id="EuroEval/hellaswag-is-mini",
|
|
135
|
+
task=COMMON_SENSE,
|
|
136
|
+
languages=[IS],
|
|
137
|
+
unofficial=True,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
BELEBELE_IS_CONFIG = DatasetConfig(
|
|
141
|
+
name="belebele-is",
|
|
142
|
+
pretty_name="the Icelandic multiple choice reading comprehension dataset "
|
|
143
|
+
"BeleBele-is, translated from the English BeleBele dataset",
|
|
144
|
+
huggingface_id="EuroEval/belebele-is-mini",
|
|
145
|
+
task=MCRC,
|
|
146
|
+
languages=[IS],
|
|
147
|
+
unofficial=True,
|
|
148
|
+
)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""All Italian dataset configurations used in EuroEval."""
|
|
2
|
+
|
|
3
|
+
from ..data_models import DatasetConfig
|
|
4
|
+
from ..languages import IT
|
|
5
|
+
from ..tasks import COMMON_SENSE, KNOW, LA, NER, RC, SENT, SUMM
|
|
6
|
+
|
|
7
|
+
### Official datasets ###
|
|
8
|
+
|
|
9
|
+
SENTIPOLC_CONFIG = DatasetConfig(
|
|
10
|
+
name="sentipolc16",
|
|
11
|
+
pretty_name="the truncated version of the Italian sentiment classification "
|
|
12
|
+
"dataset Sentipolc-16",
|
|
13
|
+
huggingface_id="EuroEval/sentipolc16-mini",
|
|
14
|
+
task=SENT,
|
|
15
|
+
languages=[IT],
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
SCALA_IT_CONFIG = DatasetConfig(
|
|
19
|
+
name="scala-it",
|
|
20
|
+
pretty_name="the Italian part of the linguistic acceptability dataset ScaLA",
|
|
21
|
+
huggingface_id="EuroEval/scala-it",
|
|
22
|
+
task=LA,
|
|
23
|
+
languages=[IT],
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
MULTINERD_IT_CONFIG = DatasetConfig(
|
|
27
|
+
name="multinerd-it",
|
|
28
|
+
pretty_name="the truncated version of the Italian part of the named "
|
|
29
|
+
"entity recognition dataset MultiNERD",
|
|
30
|
+
huggingface_id="EuroEval/multinerd-mini-it",
|
|
31
|
+
task=NER,
|
|
32
|
+
languages=[IT],
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
SQUAD_IT_CONFIG = DatasetConfig(
|
|
36
|
+
name="squad-it",
|
|
37
|
+
pretty_name="the truncated version of the Italian reading comprehension dataset "
|
|
38
|
+
"SQuAD-it, translated from the English SQuAD dataset",
|
|
39
|
+
huggingface_id="EuroEval/squad-it-mini",
|
|
40
|
+
task=RC,
|
|
41
|
+
languages=[IT],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
ILPOST_SUM_CONFIG = DatasetConfig(
|
|
45
|
+
name="ilpost-sum",
|
|
46
|
+
pretty_name="the truncated version of the Italian summarisation dataset IlPost-Sum",
|
|
47
|
+
huggingface_id="EuroEval/ilpost-sum",
|
|
48
|
+
task=SUMM,
|
|
49
|
+
languages=[IT],
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
MMLU_IT_CONFIG = DatasetConfig(
|
|
53
|
+
name="mmlu-it",
|
|
54
|
+
pretty_name="the truncated version of the Italian knowledge dataset MMLU-it, "
|
|
55
|
+
"translated from the English MMLU dataset",
|
|
56
|
+
huggingface_id="EuroEval/mmlu-it-mini",
|
|
57
|
+
task=KNOW,
|
|
58
|
+
languages=[IT],
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
HELLASWAG_IT_CONFIG = DatasetConfig(
|
|
62
|
+
name="hellaswag-it",
|
|
63
|
+
pretty_name="the truncated version of the Italian common-sense reasoning dataset "
|
|
64
|
+
"HellaSwag-it, translated from the English HellaSwag dataset",
|
|
65
|
+
huggingface_id="EuroEval/hellaswag-it-mini",
|
|
66
|
+
task=COMMON_SENSE,
|
|
67
|
+
languages=[IT],
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
### Unofficial datasets ###
|
|
72
|
+
|
|
73
|
+
WIKINEURAL_IT_CONFIG = DatasetConfig(
|
|
74
|
+
name="wikineural-it",
|
|
75
|
+
pretty_name="the truncated version of the Italian named "
|
|
76
|
+
"entity recognition dataset WikiNEuRal IT",
|
|
77
|
+
huggingface_id="EuroEval/wikineural-mini-it",
|
|
78
|
+
task=NER,
|
|
79
|
+
languages=[IT],
|
|
80
|
+
unofficial=True,
|
|
81
|
+
)
|