ScandEval 16.12.0__py3-none-any.whl → 16.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scandeval/async_utils.py +46 -0
- scandeval/benchmark_config_factory.py +26 -2
- scandeval/benchmark_modules/fresh.py +2 -1
- scandeval/benchmark_modules/hf.py +50 -12
- scandeval/benchmark_modules/litellm.py +25 -15
- scandeval/benchmark_modules/vllm.py +3 -3
- scandeval/benchmarker.py +15 -33
- scandeval/cli.py +2 -4
- scandeval/constants.py +5 -0
- scandeval/custom_dataset_configs.py +152 -0
- scandeval/data_loading.py +87 -31
- scandeval/data_models.py +396 -225
- scandeval/dataset_configs/__init__.py +51 -25
- scandeval/dataset_configs/albanian.py +1 -1
- scandeval/dataset_configs/belarusian.py +47 -0
- scandeval/dataset_configs/bulgarian.py +1 -1
- scandeval/dataset_configs/catalan.py +1 -1
- scandeval/dataset_configs/croatian.py +1 -1
- scandeval/dataset_configs/danish.py +3 -2
- scandeval/dataset_configs/dutch.py +7 -6
- scandeval/dataset_configs/english.py +4 -3
- scandeval/dataset_configs/estonian.py +8 -7
- scandeval/dataset_configs/faroese.py +1 -1
- scandeval/dataset_configs/finnish.py +5 -4
- scandeval/dataset_configs/french.py +6 -5
- scandeval/dataset_configs/german.py +4 -3
- scandeval/dataset_configs/greek.py +1 -1
- scandeval/dataset_configs/hungarian.py +1 -1
- scandeval/dataset_configs/icelandic.py +4 -3
- scandeval/dataset_configs/italian.py +4 -3
- scandeval/dataset_configs/latvian.py +2 -2
- scandeval/dataset_configs/lithuanian.py +1 -1
- scandeval/dataset_configs/norwegian.py +6 -5
- scandeval/dataset_configs/polish.py +4 -3
- scandeval/dataset_configs/portuguese.py +5 -4
- scandeval/dataset_configs/romanian.py +2 -2
- scandeval/dataset_configs/serbian.py +1 -1
- scandeval/dataset_configs/slovene.py +1 -1
- scandeval/dataset_configs/spanish.py +4 -3
- scandeval/dataset_configs/swedish.py +4 -3
- scandeval/dataset_configs/ukrainian.py +1 -1
- scandeval/generation_utils.py +6 -6
- scandeval/metrics/llm_as_a_judge.py +1 -1
- scandeval/metrics/pipeline.py +1 -1
- scandeval/model_cache.py +34 -4
- scandeval/prompt_templates/linguistic_acceptability.py +9 -0
- scandeval/prompt_templates/multiple_choice.py +9 -0
- scandeval/prompt_templates/named_entity_recognition.py +21 -0
- scandeval/prompt_templates/reading_comprehension.py +10 -0
- scandeval/prompt_templates/sentiment_classification.py +11 -0
- scandeval/string_utils.py +157 -0
- scandeval/task_group_utils/sequence_classification.py +2 -5
- scandeval/task_group_utils/token_classification.py +2 -4
- scandeval/utils.py +6 -323
- scandeval-16.13.0.dist-info/METADATA +334 -0
- scandeval-16.13.0.dist-info/RECORD +94 -0
- scandeval-16.12.0.dist-info/METADATA +0 -667
- scandeval-16.12.0.dist-info/RECORD +0 -90
- {scandeval-16.12.0.dist-info → scandeval-16.13.0.dist-info}/WHEEL +0 -0
- {scandeval-16.12.0.dist-info → scandeval-16.13.0.dist-info}/entry_points.txt +0 -0
- {scandeval-16.12.0.dist-info → scandeval-16.13.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
"""All dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
|
+
import collections.abc as c
|
|
4
|
+
import logging
|
|
3
5
|
from pathlib import Path
|
|
4
6
|
|
|
7
|
+
from ..custom_dataset_configs import (
|
|
8
|
+
load_custom_datasets_module,
|
|
9
|
+
try_get_dataset_config_from_repo,
|
|
10
|
+
)
|
|
5
11
|
from ..data_models import DatasetConfig
|
|
6
12
|
from ..languages import get_all_languages
|
|
13
|
+
from ..logging_utils import log_once
|
|
7
14
|
from ..tasks import SPEED
|
|
8
|
-
from ..utils import load_custom_datasets_module
|
|
9
15
|
from .albanian import * # noqa: F403
|
|
10
16
|
from .bosnian import * # noqa: F403
|
|
11
17
|
from .bulgarian import * # noqa: F403
|
|
@@ -38,20 +44,62 @@ from .swedish import * # noqa: F403
|
|
|
38
44
|
from .ukrainian import * # noqa: F403
|
|
39
45
|
|
|
40
46
|
|
|
41
|
-
def get_all_dataset_configs(
|
|
47
|
+
def get_all_dataset_configs(
|
|
48
|
+
custom_datasets_file: Path,
|
|
49
|
+
dataset_ids: c.Sequence[str],
|
|
50
|
+
api_key: str | None,
|
|
51
|
+
cache_dir: Path,
|
|
52
|
+
) -> dict[str, DatasetConfig]:
|
|
42
53
|
"""Get a mapping of all the dataset configurations.
|
|
43
54
|
|
|
44
55
|
Args:
|
|
45
56
|
custom_datasets_file:
|
|
46
57
|
A path to a Python file containing custom dataset configurations.
|
|
58
|
+
dataset_ids:
|
|
59
|
+
The IDs of the datasets to include in the mapping.
|
|
60
|
+
api_key:
|
|
61
|
+
The Hugging Face API key to use to check if the repositories have custom
|
|
62
|
+
dataset configs.
|
|
63
|
+
cache_dir:
|
|
64
|
+
The directory to store the cache in.
|
|
47
65
|
|
|
48
66
|
Returns:
|
|
49
67
|
A mapping between names of datasets and their configurations.
|
|
50
68
|
"""
|
|
51
69
|
globals_dict = globals()
|
|
70
|
+
|
|
71
|
+
# If any of the dataset IDs are referring to Hugging Face dataset IDs, then we check
|
|
72
|
+
# if the repositories have custom dataset configs and if they do, we add them to the
|
|
73
|
+
# globals dict.
|
|
74
|
+
for dataset_id in dataset_ids:
|
|
75
|
+
dataset_config_or_none = try_get_dataset_config_from_repo(
|
|
76
|
+
dataset_id=dataset_id, api_key=api_key, cache_dir=cache_dir
|
|
77
|
+
)
|
|
78
|
+
if dataset_config_or_none is not None:
|
|
79
|
+
globals_dict[dataset_id] = dataset_config_or_none
|
|
80
|
+
msg = f"Loaded external dataset {dataset_id}"
|
|
81
|
+
split_strings = []
|
|
82
|
+
if dataset_config_or_none.train_split is not None:
|
|
83
|
+
split_strings.append(
|
|
84
|
+
f"train split '{dataset_config_or_none.train_split}'"
|
|
85
|
+
)
|
|
86
|
+
if dataset_config_or_none.val_split is not None:
|
|
87
|
+
split_strings.append(f"val split '{dataset_config_or_none.val_split}'")
|
|
88
|
+
if dataset_config_or_none.test_split is not None:
|
|
89
|
+
split_strings.append(
|
|
90
|
+
f"test split '{dataset_config_or_none.test_split}'"
|
|
91
|
+
)
|
|
92
|
+
if split_strings:
|
|
93
|
+
msg += f" with {', '.join(split_strings[:-1])} and {split_strings[-1]}"
|
|
94
|
+
msg += "."
|
|
95
|
+
log_once(msg, level=logging.INFO)
|
|
96
|
+
|
|
97
|
+
# Add the custom datasets from the custom datasets file to the globals dict
|
|
52
98
|
module = load_custom_datasets_module(custom_datasets_file=custom_datasets_file)
|
|
53
99
|
if module is not None:
|
|
54
100
|
globals_dict |= vars(module)
|
|
101
|
+
|
|
102
|
+
# Extract the dataset configs from the globals dict
|
|
55
103
|
dataset_configs = [
|
|
56
104
|
cfg
|
|
57
105
|
for cfg in globals_dict.values()
|
|
@@ -61,37 +109,15 @@ def get_all_dataset_configs(custom_datasets_file: Path) -> dict[str, DatasetConf
|
|
|
61
109
|
"There are duplicate dataset configurations. Please ensure that each dataset "
|
|
62
110
|
"has a unique name."
|
|
63
111
|
)
|
|
112
|
+
|
|
64
113
|
mapping = {cfg.name: cfg for cfg in dataset_configs}
|
|
65
114
|
return mapping
|
|
66
115
|
|
|
67
116
|
|
|
68
|
-
def get_dataset_config(dataset_name: str, custom_datasets_file: Path) -> DatasetConfig:
|
|
69
|
-
"""Get the dataset configuration for a dataset.
|
|
70
|
-
|
|
71
|
-
Args:
|
|
72
|
-
dataset_name:
|
|
73
|
-
The name of the dataset.
|
|
74
|
-
custom_datasets_file:
|
|
75
|
-
A path to a Python file containing custom dataset configurations.
|
|
76
|
-
|
|
77
|
-
Returns:
|
|
78
|
-
The dataset configuration.
|
|
79
|
-
|
|
80
|
-
Raises:
|
|
81
|
-
ValueError:
|
|
82
|
-
If the dataset is not found.
|
|
83
|
-
"""
|
|
84
|
-
dataset_configs = get_all_dataset_configs(custom_datasets_file=custom_datasets_file)
|
|
85
|
-
if dataset_name not in dataset_configs:
|
|
86
|
-
raise ValueError(f"No dataset config found for dataset {dataset_name}.")
|
|
87
|
-
return dataset_configs[dataset_name]
|
|
88
|
-
|
|
89
|
-
|
|
90
117
|
SPEED_CONFIG = DatasetConfig(
|
|
91
118
|
name="speed",
|
|
92
119
|
pretty_name="",
|
|
93
120
|
source="",
|
|
94
121
|
task=SPEED,
|
|
95
122
|
languages=list(get_all_languages().values()),
|
|
96
|
-
_logging_string="the speed estimation benchmark",
|
|
97
123
|
)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""All Belarusian dataset configurations used in EuroEval."""
|
|
2
|
+
|
|
3
|
+
from ..data_models import DatasetConfig
|
|
4
|
+
from ..languages import BELARUSIAN
|
|
5
|
+
from ..tasks import COMMON_SENSE, LA, NER, RC, SENT
|
|
6
|
+
|
|
7
|
+
### Official datasets ###
|
|
8
|
+
|
|
9
|
+
BESLS_CONFIG = DatasetConfig(
|
|
10
|
+
name="besls",
|
|
11
|
+
pretty_name="BeSLS",
|
|
12
|
+
source="EuroEval/besls",
|
|
13
|
+
task=SENT,
|
|
14
|
+
languages=[BELARUSIAN],
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
SCALA_BE_CONFIG = DatasetConfig(
|
|
18
|
+
name="scala-be",
|
|
19
|
+
pretty_name="ScaLA-be",
|
|
20
|
+
source="EuroEval/scala-be",
|
|
21
|
+
task=LA,
|
|
22
|
+
languages=[BELARUSIAN],
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
WIKIANN_BE_CONFIG = DatasetConfig(
|
|
26
|
+
name="wikiann-be",
|
|
27
|
+
pretty_name="WikiANN-be",
|
|
28
|
+
source="EuroEval/wikiann-be-mini",
|
|
29
|
+
task=NER,
|
|
30
|
+
languages=[BELARUSIAN],
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
MULTI_WIKI_QA_BE_CONFIG = DatasetConfig(
|
|
34
|
+
name="multi-wiki-qa-be",
|
|
35
|
+
pretty_name="MultiWikiQA-be",
|
|
36
|
+
source="EuroEval/multi-wiki-qa-be-mini",
|
|
37
|
+
task=RC,
|
|
38
|
+
languages=[BELARUSIAN],
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
BE_WSC_CONFIG = DatasetConfig(
|
|
42
|
+
name="be-wsc",
|
|
43
|
+
pretty_name="BE-WSC",
|
|
44
|
+
source="EuroEval/be-wsc",
|
|
45
|
+
task=COMMON_SENSE,
|
|
46
|
+
languages=[BELARUSIAN],
|
|
47
|
+
)
|
|
@@ -76,7 +76,8 @@ VALEU_DA_CONFIG = DatasetConfig(
|
|
|
76
76
|
source="EuroEval/european-values-da",
|
|
77
77
|
task=EUROPEAN_VALUES,
|
|
78
78
|
languages=[DANISH],
|
|
79
|
-
|
|
79
|
+
train_split=None,
|
|
80
|
+
val_split=None,
|
|
80
81
|
bootstrap_samples=False,
|
|
81
82
|
)
|
|
82
83
|
|
|
@@ -143,6 +144,6 @@ WINOGRANDE_DA_CONFIG = DatasetConfig(
|
|
|
143
144
|
source="EuroEval/winogrande-da",
|
|
144
145
|
task=COMMON_SENSE,
|
|
145
146
|
languages=[DANISH],
|
|
146
|
-
|
|
147
|
+
labels=["a", "b"],
|
|
147
148
|
unofficial=True,
|
|
148
149
|
)
|
|
@@ -24,7 +24,7 @@ DBRD_CONFIG = DatasetConfig(
|
|
|
24
24
|
source="EuroEval/dbrd-mini",
|
|
25
25
|
task=SENT,
|
|
26
26
|
languages=[DUTCH],
|
|
27
|
-
|
|
27
|
+
labels=["negative", "positive"],
|
|
28
28
|
)
|
|
29
29
|
|
|
30
30
|
SCALA_NL_CONFIG = DatasetConfig(
|
|
@@ -89,9 +89,10 @@ VALEU_NL_CONFIG = DatasetConfig(
|
|
|
89
89
|
source="EuroEval/european-values-nl",
|
|
90
90
|
task=EUROPEAN_VALUES,
|
|
91
91
|
languages=[DUTCH],
|
|
92
|
-
|
|
92
|
+
train_split=None,
|
|
93
|
+
val_split=None,
|
|
93
94
|
bootstrap_samples=False,
|
|
94
|
-
|
|
95
|
+
instruction_prompt="{text}",
|
|
95
96
|
)
|
|
96
97
|
|
|
97
98
|
MBBQ_NL_CONFIG = DatasetConfig(
|
|
@@ -100,7 +101,7 @@ MBBQ_NL_CONFIG = DatasetConfig(
|
|
|
100
101
|
source="EuroEval/mbbq-nl",
|
|
101
102
|
task=MCSTEREO,
|
|
102
103
|
languages=[DUTCH],
|
|
103
|
-
|
|
104
|
+
train_split=None,
|
|
104
105
|
)
|
|
105
106
|
|
|
106
107
|
|
|
@@ -158,7 +159,7 @@ COPA_NL_CONFIG = DatasetConfig(
|
|
|
158
159
|
task=COMMON_SENSE,
|
|
159
160
|
languages=[DUTCH],
|
|
160
161
|
unofficial=True,
|
|
161
|
-
|
|
162
|
+
labels=["a", "b"],
|
|
162
163
|
)
|
|
163
164
|
|
|
164
165
|
GOLDENSWAG_NL_CONFIG = DatasetConfig(
|
|
@@ -176,6 +177,6 @@ WINOGRANDE_NL_CONFIG = DatasetConfig(
|
|
|
176
177
|
source="EuroEval/winogrande-nl",
|
|
177
178
|
task=COMMON_SENSE,
|
|
178
179
|
languages=[DUTCH],
|
|
179
|
-
|
|
180
|
+
labels=["a", "b"],
|
|
180
181
|
unofficial=True,
|
|
181
182
|
)
|
|
@@ -68,9 +68,10 @@ VALEU_EN_CONFIG = DatasetConfig(
|
|
|
68
68
|
source="EuroEval/european-values-en",
|
|
69
69
|
task=EUROPEAN_VALUES,
|
|
70
70
|
languages=[ENGLISH],
|
|
71
|
-
|
|
71
|
+
train_split=None,
|
|
72
|
+
val_split=None,
|
|
72
73
|
bootstrap_samples=False,
|
|
73
|
-
|
|
74
|
+
instruction_prompt="{text}",
|
|
74
75
|
)
|
|
75
76
|
|
|
76
77
|
|
|
@@ -127,6 +128,6 @@ WINOGRANDE_CONFIG = DatasetConfig(
|
|
|
127
128
|
source="EuroEval/winogrande-en",
|
|
128
129
|
task=COMMON_SENSE,
|
|
129
130
|
languages=[ENGLISH],
|
|
130
|
-
|
|
131
|
+
labels=["a", "b"],
|
|
131
132
|
unofficial=True,
|
|
132
133
|
)
|
|
@@ -60,13 +60,13 @@ WINOGRANDE_ET_CONFIG = DatasetConfig(
|
|
|
60
60
|
source="EuroEval/winogrande-et",
|
|
61
61
|
task=COMMON_SENSE,
|
|
62
62
|
languages=[ESTONIAN],
|
|
63
|
-
|
|
63
|
+
prompt_prefix="Sulle esitatakse lüngaga (_) tekstülesanded, "
|
|
64
64
|
"igal ülesandel on kaks vastusevarianti (a ja b).",
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
prompt_template="Tekstülesanne: {text}\nVastus: {label}",
|
|
66
|
+
instruction_prompt="Tekstülesanne: {text}\n\n"
|
|
67
67
|
"Sinu ülesanne on valida lünka sobiv vastusevariant. "
|
|
68
68
|
"Vasta ainult {labels_str}. Muud vastused ei ole lubatud.",
|
|
69
|
-
|
|
69
|
+
labels=["a", "b"],
|
|
70
70
|
)
|
|
71
71
|
|
|
72
72
|
VALEU_ET_CONFIG = DatasetConfig(
|
|
@@ -75,9 +75,10 @@ VALEU_ET_CONFIG = DatasetConfig(
|
|
|
75
75
|
source="EuroEval/european-values-et",
|
|
76
76
|
task=EUROPEAN_VALUES,
|
|
77
77
|
languages=[ESTONIAN],
|
|
78
|
-
|
|
78
|
+
train_split=None,
|
|
79
|
+
val_split=None,
|
|
79
80
|
bootstrap_samples=False,
|
|
80
|
-
|
|
81
|
+
instruction_prompt="{text}",
|
|
81
82
|
)
|
|
82
83
|
|
|
83
84
|
### Unofficial datasets ###
|
|
@@ -97,7 +98,7 @@ EXAM_ET_CONFIG = DatasetConfig(
|
|
|
97
98
|
source="EuroEval/exam-et",
|
|
98
99
|
task=KNOW,
|
|
99
100
|
languages=[ESTONIAN],
|
|
100
|
-
|
|
101
|
+
labels=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"],
|
|
101
102
|
unofficial=True,
|
|
102
103
|
)
|
|
103
104
|
|
|
@@ -12,7 +12,7 @@ SCANDISENT_FI_CONFIG = DatasetConfig(
|
|
|
12
12
|
source="EuroEval/scandisent-fi-mini",
|
|
13
13
|
task=SENT,
|
|
14
14
|
languages=[FINNISH],
|
|
15
|
-
|
|
15
|
+
labels=["negative", "positive"],
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
TURKU_NER_FI_CONFIG = DatasetConfig(
|
|
@@ -61,9 +61,10 @@ VALEU_FI_CONFIG = DatasetConfig(
|
|
|
61
61
|
source="EuroEval/european-values-fi",
|
|
62
62
|
task=EUROPEAN_VALUES,
|
|
63
63
|
languages=[FINNISH],
|
|
64
|
-
|
|
64
|
+
train_split=None,
|
|
65
|
+
val_split=None,
|
|
65
66
|
bootstrap_samples=False,
|
|
66
|
-
|
|
67
|
+
instruction_prompt="{text}",
|
|
67
68
|
)
|
|
68
69
|
|
|
69
70
|
|
|
@@ -102,6 +103,6 @@ WINOGRANDE_FI_CONFIG = DatasetConfig(
|
|
|
102
103
|
source="EuroEval/winogrande-fi",
|
|
103
104
|
task=COMMON_SENSE,
|
|
104
105
|
languages=[FINNISH],
|
|
105
|
-
|
|
106
|
+
labels=["a", "b"],
|
|
106
107
|
unofficial=True,
|
|
107
108
|
)
|
|
@@ -12,8 +12,8 @@ ALLOCINE_CONFIG = DatasetConfig(
|
|
|
12
12
|
source="EuroEval/allocine-mini",
|
|
13
13
|
task=SENT,
|
|
14
14
|
languages=[FRENCH],
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
labels=["negative", "positive"],
|
|
16
|
+
prompt_label_mapping=dict(positive="positif", negative="négatif"),
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
SCALA_FR_CONFIG = DatasetConfig(
|
|
@@ -70,9 +70,10 @@ VALEU_FR_CONFIG = DatasetConfig(
|
|
|
70
70
|
source="EuroEval/european-values-fr",
|
|
71
71
|
task=EUROPEAN_VALUES,
|
|
72
72
|
languages=[FRENCH],
|
|
73
|
-
|
|
73
|
+
train_split=None,
|
|
74
|
+
val_split=None,
|
|
74
75
|
bootstrap_samples=False,
|
|
75
|
-
|
|
76
|
+
instruction_prompt="{text}",
|
|
76
77
|
)
|
|
77
78
|
|
|
78
79
|
|
|
@@ -111,6 +112,6 @@ WINOGRANDE_FR_CONFIG = DatasetConfig(
|
|
|
111
112
|
source="EuroEval/winogrande-fr",
|
|
112
113
|
task=COMMON_SENSE,
|
|
113
114
|
languages=[FRENCH],
|
|
114
|
-
|
|
115
|
+
labels=["a", "b"],
|
|
115
116
|
unofficial=True,
|
|
116
117
|
)
|
|
@@ -68,9 +68,10 @@ VALEU_DE_CONFIG = DatasetConfig(
|
|
|
68
68
|
source="EuroEval/european-values-de",
|
|
69
69
|
task=EUROPEAN_VALUES,
|
|
70
70
|
languages=[GERMAN],
|
|
71
|
-
|
|
71
|
+
train_split=None,
|
|
72
|
+
val_split=None,
|
|
72
73
|
bootstrap_samples=False,
|
|
73
|
-
|
|
74
|
+
instruction_prompt="{text}",
|
|
74
75
|
)
|
|
75
76
|
|
|
76
77
|
|
|
@@ -127,6 +128,6 @@ WINOGRANDE_DE_CONFIG = DatasetConfig(
|
|
|
127
128
|
source="EuroEval/winogrande-de",
|
|
128
129
|
task=COMMON_SENSE,
|
|
129
130
|
languages=[GERMAN],
|
|
130
|
-
|
|
131
|
+
labels=["a", "b"],
|
|
131
132
|
unofficial=True,
|
|
132
133
|
)
|
|
@@ -60,7 +60,7 @@ WINOGRANDE_IS_CONFIG = DatasetConfig(
|
|
|
60
60
|
source="EuroEval/winogrande-is",
|
|
61
61
|
task=COMMON_SENSE,
|
|
62
62
|
languages=[ICELANDIC],
|
|
63
|
-
|
|
63
|
+
labels=["a", "b"],
|
|
64
64
|
)
|
|
65
65
|
|
|
66
66
|
VALEU_IS_CONFIG = DatasetConfig(
|
|
@@ -69,9 +69,10 @@ VALEU_IS_CONFIG = DatasetConfig(
|
|
|
69
69
|
source="EuroEval/european-values-is",
|
|
70
70
|
task=EUROPEAN_VALUES,
|
|
71
71
|
languages=[ICELANDIC],
|
|
72
|
-
|
|
72
|
+
train_split=None,
|
|
73
|
+
val_split=None,
|
|
73
74
|
bootstrap_samples=False,
|
|
74
|
-
|
|
75
|
+
instruction_prompt="{text}",
|
|
75
76
|
)
|
|
76
77
|
|
|
77
78
|
|
|
@@ -68,9 +68,10 @@ VALEU_IT_CONFIG = DatasetConfig(
|
|
|
68
68
|
source="EuroEval/european-values-it",
|
|
69
69
|
task=EUROPEAN_VALUES,
|
|
70
70
|
languages=[ITALIAN],
|
|
71
|
-
|
|
71
|
+
train_split=None,
|
|
72
|
+
val_split=None,
|
|
72
73
|
bootstrap_samples=False,
|
|
73
|
-
|
|
74
|
+
instruction_prompt="{text}",
|
|
74
75
|
)
|
|
75
76
|
|
|
76
77
|
|
|
@@ -118,6 +119,6 @@ WINOGRANDE_IT_CONFIG = DatasetConfig(
|
|
|
118
119
|
source="EuroEval/winogrande-it",
|
|
119
120
|
task=COMMON_SENSE,
|
|
120
121
|
languages=[ITALIAN],
|
|
121
|
-
|
|
122
|
+
labels=["a", "b"],
|
|
122
123
|
unofficial=True,
|
|
123
124
|
)
|
|
@@ -61,7 +61,7 @@ COPA_LV_CONFIG = DatasetConfig(
|
|
|
61
61
|
source="EuroEval/copa-lv",
|
|
62
62
|
task=COMMON_SENSE,
|
|
63
63
|
languages=[LATVIAN],
|
|
64
|
-
|
|
64
|
+
labels=["a", "b"],
|
|
65
65
|
)
|
|
66
66
|
|
|
67
67
|
|
|
@@ -82,6 +82,6 @@ WINOGRANDE_LV_CONFIG = DatasetConfig(
|
|
|
82
82
|
source="EuroEval/winogrande-lv",
|
|
83
83
|
task=COMMON_SENSE,
|
|
84
84
|
languages=[LATVIAN],
|
|
85
|
-
|
|
85
|
+
labels=["a", "b"],
|
|
86
86
|
unofficial=True,
|
|
87
87
|
)
|
|
@@ -52,7 +52,7 @@ NORQUAD_CONFIG = DatasetConfig(
|
|
|
52
52
|
source="EuroEval/norquad-mini",
|
|
53
53
|
task=RC,
|
|
54
54
|
languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
|
|
55
|
-
|
|
55
|
+
num_few_shot_examples=2,
|
|
56
56
|
)
|
|
57
57
|
|
|
58
58
|
NO_SAMMENDRAG_CONFIG = DatasetConfig(
|
|
@@ -85,7 +85,7 @@ NOR_COMMON_SENSE_QA_CONFIG = DatasetConfig(
|
|
|
85
85
|
source="EuroEval/nor-common-sense-qa",
|
|
86
86
|
task=COMMON_SENSE,
|
|
87
87
|
languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
|
|
88
|
-
|
|
88
|
+
labels=["a", "b", "c", "d", "e"],
|
|
89
89
|
)
|
|
90
90
|
|
|
91
91
|
VALEU_NO_CONFIG = DatasetConfig(
|
|
@@ -94,9 +94,10 @@ VALEU_NO_CONFIG = DatasetConfig(
|
|
|
94
94
|
source="EuroEval/european-values-no",
|
|
95
95
|
task=EUROPEAN_VALUES,
|
|
96
96
|
languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
|
|
97
|
-
|
|
97
|
+
train_split=None,
|
|
98
|
+
val_split=None,
|
|
98
99
|
bootstrap_samples=False,
|
|
99
|
-
|
|
100
|
+
instruction_prompt="{text}",
|
|
100
101
|
)
|
|
101
102
|
|
|
102
103
|
|
|
@@ -207,6 +208,6 @@ WINOGRANDE_NO_CONFIG = DatasetConfig(
|
|
|
207
208
|
source="EuroEval/winogrande-no",
|
|
208
209
|
task=COMMON_SENSE,
|
|
209
210
|
languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
|
|
210
|
-
|
|
211
|
+
labels=["a", "b"],
|
|
211
212
|
unofficial=True,
|
|
212
213
|
)
|
|
@@ -60,7 +60,7 @@ WINOGRANDE_PL_CONFIG = DatasetConfig(
|
|
|
60
60
|
source="EuroEval/winogrande-pl",
|
|
61
61
|
task=COMMON_SENSE,
|
|
62
62
|
languages=[POLISH],
|
|
63
|
-
|
|
63
|
+
labels=["a", "b"],
|
|
64
64
|
)
|
|
65
65
|
|
|
66
66
|
VALEU_PL_CONFIG = DatasetConfig(
|
|
@@ -69,9 +69,10 @@ VALEU_PL_CONFIG = DatasetConfig(
|
|
|
69
69
|
source="EuroEval/european-values-pl",
|
|
70
70
|
task=EUROPEAN_VALUES,
|
|
71
71
|
languages=[POLISH],
|
|
72
|
-
|
|
72
|
+
train_split=None,
|
|
73
|
+
val_split=None,
|
|
73
74
|
bootstrap_samples=False,
|
|
74
|
-
|
|
75
|
+
instruction_prompt="{text}",
|
|
75
76
|
)
|
|
76
77
|
|
|
77
78
|
|
|
@@ -12,7 +12,7 @@ SST2_PT_CONFIG = DatasetConfig(
|
|
|
12
12
|
source="EuroEval/sst2-pt-mini",
|
|
13
13
|
task=SENT,
|
|
14
14
|
languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
|
|
15
|
-
|
|
15
|
+
labels=["positive", "negative"],
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
SCALA_PT = DatasetConfig(
|
|
@@ -69,9 +69,10 @@ VALEU_PT_CONFIG = DatasetConfig(
|
|
|
69
69
|
source="EuroEval/european-values-pt",
|
|
70
70
|
task=EUROPEAN_VALUES,
|
|
71
71
|
languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
|
|
72
|
-
|
|
72
|
+
train_split=None,
|
|
73
|
+
val_split=None,
|
|
73
74
|
bootstrap_samples=False,
|
|
74
|
-
|
|
75
|
+
instruction_prompt="{text}",
|
|
75
76
|
)
|
|
76
77
|
|
|
77
78
|
|
|
@@ -92,6 +93,6 @@ WINOGRANDE_PT_CONFIG = DatasetConfig(
|
|
|
92
93
|
source="EuroEval/winogrande-pt",
|
|
93
94
|
task=COMMON_SENSE,
|
|
94
95
|
languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
|
|
95
|
-
|
|
96
|
+
labels=["a", "b"],
|
|
96
97
|
unofficial=True,
|
|
97
98
|
)
|
|
@@ -12,7 +12,7 @@ ROSENT_CONFIG = DatasetConfig(
|
|
|
12
12
|
source="EuroEval/ro-sent-mini",
|
|
13
13
|
task=SENT,
|
|
14
14
|
languages=[ROMANIAN],
|
|
15
|
-
|
|
15
|
+
labels=["positive", "negative"],
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
SCALA_RO_CONFIG = DatasetConfig(
|
|
@@ -61,5 +61,5 @@ WINOGRANDE_RO_CONFIG = DatasetConfig(
|
|
|
61
61
|
source="EuroEval/winogrande-ro",
|
|
62
62
|
task=COMMON_SENSE,
|
|
63
63
|
languages=[ROMANIAN],
|
|
64
|
-
|
|
64
|
+
labels=["a", "b"],
|
|
65
65
|
)
|
|
@@ -68,9 +68,10 @@ VALEU_ES_CONFIG = DatasetConfig(
|
|
|
68
68
|
source="EuroEval/european-values-es",
|
|
69
69
|
task=EUROPEAN_VALUES,
|
|
70
70
|
languages=[SPANISH],
|
|
71
|
-
|
|
71
|
+
train_split=None,
|
|
72
|
+
val_split=None,
|
|
72
73
|
bootstrap_samples=False,
|
|
73
|
-
|
|
74
|
+
instruction_prompt="{text}",
|
|
74
75
|
)
|
|
75
76
|
|
|
76
77
|
|
|
@@ -127,6 +128,6 @@ WINOGRANDE_ES_CONFIG = DatasetConfig(
|
|
|
127
128
|
source="EuroEval/winogrande-es",
|
|
128
129
|
task=COMMON_SENSE,
|
|
129
130
|
languages=[SPANISH],
|
|
130
|
-
|
|
131
|
+
labels=["a", "b"],
|
|
131
132
|
unofficial=True,
|
|
132
133
|
)
|