ScandEval 16.12.0__py3-none-any.whl → 16.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. scandeval/async_utils.py +46 -0
  2. scandeval/benchmark_config_factory.py +26 -2
  3. scandeval/benchmark_modules/fresh.py +2 -1
  4. scandeval/benchmark_modules/hf.py +50 -12
  5. scandeval/benchmark_modules/litellm.py +25 -15
  6. scandeval/benchmark_modules/vllm.py +3 -3
  7. scandeval/benchmarker.py +15 -33
  8. scandeval/cli.py +2 -4
  9. scandeval/constants.py +5 -0
  10. scandeval/custom_dataset_configs.py +152 -0
  11. scandeval/data_loading.py +87 -31
  12. scandeval/data_models.py +396 -225
  13. scandeval/dataset_configs/__init__.py +51 -25
  14. scandeval/dataset_configs/albanian.py +1 -1
  15. scandeval/dataset_configs/belarusian.py +47 -0
  16. scandeval/dataset_configs/bulgarian.py +1 -1
  17. scandeval/dataset_configs/catalan.py +1 -1
  18. scandeval/dataset_configs/croatian.py +1 -1
  19. scandeval/dataset_configs/danish.py +3 -2
  20. scandeval/dataset_configs/dutch.py +7 -6
  21. scandeval/dataset_configs/english.py +4 -3
  22. scandeval/dataset_configs/estonian.py +8 -7
  23. scandeval/dataset_configs/faroese.py +1 -1
  24. scandeval/dataset_configs/finnish.py +5 -4
  25. scandeval/dataset_configs/french.py +6 -5
  26. scandeval/dataset_configs/german.py +4 -3
  27. scandeval/dataset_configs/greek.py +1 -1
  28. scandeval/dataset_configs/hungarian.py +1 -1
  29. scandeval/dataset_configs/icelandic.py +4 -3
  30. scandeval/dataset_configs/italian.py +4 -3
  31. scandeval/dataset_configs/latvian.py +2 -2
  32. scandeval/dataset_configs/lithuanian.py +1 -1
  33. scandeval/dataset_configs/norwegian.py +6 -5
  34. scandeval/dataset_configs/polish.py +4 -3
  35. scandeval/dataset_configs/portuguese.py +5 -4
  36. scandeval/dataset_configs/romanian.py +2 -2
  37. scandeval/dataset_configs/serbian.py +1 -1
  38. scandeval/dataset_configs/slovene.py +1 -1
  39. scandeval/dataset_configs/spanish.py +4 -3
  40. scandeval/dataset_configs/swedish.py +4 -3
  41. scandeval/dataset_configs/ukrainian.py +1 -1
  42. scandeval/generation_utils.py +6 -6
  43. scandeval/metrics/llm_as_a_judge.py +1 -1
  44. scandeval/metrics/pipeline.py +1 -1
  45. scandeval/model_cache.py +34 -4
  46. scandeval/prompt_templates/linguistic_acceptability.py +9 -0
  47. scandeval/prompt_templates/multiple_choice.py +9 -0
  48. scandeval/prompt_templates/named_entity_recognition.py +21 -0
  49. scandeval/prompt_templates/reading_comprehension.py +10 -0
  50. scandeval/prompt_templates/sentiment_classification.py +11 -0
  51. scandeval/string_utils.py +157 -0
  52. scandeval/task_group_utils/sequence_classification.py +2 -5
  53. scandeval/task_group_utils/token_classification.py +2 -4
  54. scandeval/utils.py +6 -323
  55. scandeval-16.13.0.dist-info/METADATA +334 -0
  56. scandeval-16.13.0.dist-info/RECORD +94 -0
  57. scandeval-16.12.0.dist-info/METADATA +0 -667
  58. scandeval-16.12.0.dist-info/RECORD +0 -90
  59. {scandeval-16.12.0.dist-info → scandeval-16.13.0.dist-info}/WHEEL +0 -0
  60. {scandeval-16.12.0.dist-info → scandeval-16.13.0.dist-info}/entry_points.txt +0 -0
  61. {scandeval-16.12.0.dist-info → scandeval-16.13.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,11 +1,17 @@
1
1
  """All dataset configurations used in EuroEval."""
2
2
 
3
+ import collections.abc as c
4
+ import logging
3
5
  from pathlib import Path
4
6
 
7
+ from ..custom_dataset_configs import (
8
+ load_custom_datasets_module,
9
+ try_get_dataset_config_from_repo,
10
+ )
5
11
  from ..data_models import DatasetConfig
6
12
  from ..languages import get_all_languages
13
+ from ..logging_utils import log_once
7
14
  from ..tasks import SPEED
8
- from ..utils import load_custom_datasets_module
9
15
  from .albanian import * # noqa: F403
10
16
  from .bosnian import * # noqa: F403
11
17
  from .bulgarian import * # noqa: F403
@@ -38,20 +44,62 @@ from .swedish import * # noqa: F403
38
44
  from .ukrainian import * # noqa: F403
39
45
 
40
46
 
41
- def get_all_dataset_configs(custom_datasets_file: Path) -> dict[str, DatasetConfig]:
47
+ def get_all_dataset_configs(
48
+ custom_datasets_file: Path,
49
+ dataset_ids: c.Sequence[str],
50
+ api_key: str | None,
51
+ cache_dir: Path,
52
+ ) -> dict[str, DatasetConfig]:
42
53
  """Get a mapping of all the dataset configurations.
43
54
 
44
55
  Args:
45
56
  custom_datasets_file:
46
57
  A path to a Python file containing custom dataset configurations.
58
+ dataset_ids:
59
+ The IDs of the datasets to include in the mapping.
60
+ api_key:
61
+ The Hugging Face API key to use to check if the repositories have custom
62
+ dataset configs.
63
+ cache_dir:
64
+ The directory to store the cache in.
47
65
 
48
66
  Returns:
49
67
  A mapping between names of datasets and their configurations.
50
68
  """
51
69
  globals_dict = globals()
70
+
71
+ # If any of the dataset IDs are referring to Hugging Face dataset IDs, then we check
72
+ # if the repositories have custom dataset configs and if they do, we add them to the
73
+ # globals dict.
74
+ for dataset_id in dataset_ids:
75
+ dataset_config_or_none = try_get_dataset_config_from_repo(
76
+ dataset_id=dataset_id, api_key=api_key, cache_dir=cache_dir
77
+ )
78
+ if dataset_config_or_none is not None:
79
+ globals_dict[dataset_id] = dataset_config_or_none
80
+ msg = f"Loaded external dataset {dataset_id}"
81
+ split_strings = []
82
+ if dataset_config_or_none.train_split is not None:
83
+ split_strings.append(
84
+ f"train split '{dataset_config_or_none.train_split}'"
85
+ )
86
+ if dataset_config_or_none.val_split is not None:
87
+ split_strings.append(f"val split '{dataset_config_or_none.val_split}'")
88
+ if dataset_config_or_none.test_split is not None:
89
+ split_strings.append(
90
+ f"test split '{dataset_config_or_none.test_split}'"
91
+ )
92
+ if split_strings:
93
+ msg += f" with {', '.join(split_strings[:-1])} and {split_strings[-1]}"
94
+ msg += "."
95
+ log_once(msg, level=logging.INFO)
96
+
97
+ # Add the custom datasets from the custom datasets file to the globals dict
52
98
  module = load_custom_datasets_module(custom_datasets_file=custom_datasets_file)
53
99
  if module is not None:
54
100
  globals_dict |= vars(module)
101
+
102
+ # Extract the dataset configs from the globals dict
55
103
  dataset_configs = [
56
104
  cfg
57
105
  for cfg in globals_dict.values()
@@ -61,37 +109,15 @@ def get_all_dataset_configs(custom_datasets_file: Path) -> dict[str, DatasetConf
61
109
  "There are duplicate dataset configurations. Please ensure that each dataset "
62
110
  "has a unique name."
63
111
  )
112
+
64
113
  mapping = {cfg.name: cfg for cfg in dataset_configs}
65
114
  return mapping
66
115
 
67
116
 
68
- def get_dataset_config(dataset_name: str, custom_datasets_file: Path) -> DatasetConfig:
69
- """Get the dataset configuration for a dataset.
70
-
71
- Args:
72
- dataset_name:
73
- The name of the dataset.
74
- custom_datasets_file:
75
- A path to a Python file containing custom dataset configurations.
76
-
77
- Returns:
78
- The dataset configuration.
79
-
80
- Raises:
81
- ValueError:
82
- If the dataset is not found.
83
- """
84
- dataset_configs = get_all_dataset_configs(custom_datasets_file=custom_datasets_file)
85
- if dataset_name not in dataset_configs:
86
- raise ValueError(f"No dataset config found for dataset {dataset_name}.")
87
- return dataset_configs[dataset_name]
88
-
89
-
90
117
  SPEED_CONFIG = DatasetConfig(
91
118
  name="speed",
92
119
  pretty_name="",
93
120
  source="",
94
121
  task=SPEED,
95
122
  languages=list(get_all_languages().values()),
96
- _logging_string="the speed estimation benchmark",
97
123
  )
@@ -60,5 +60,5 @@ WINOGRANDE_SQ_CONFIG = DatasetConfig(
60
60
  source="EuroEval/winogrande-sq",
61
61
  task=COMMON_SENSE,
62
62
  languages=[ALBANIAN],
63
- _labels=["a", "b"],
63
+ labels=["a", "b"],
64
64
  )
@@ -0,0 +1,47 @@
1
+ """All Belarusian dataset configurations used in EuroEval."""
2
+
3
+ from ..data_models import DatasetConfig
4
+ from ..languages import BELARUSIAN
5
+ from ..tasks import COMMON_SENSE, LA, NER, RC, SENT
6
+
7
+ ### Official datasets ###
8
+
9
+ BESLS_CONFIG = DatasetConfig(
10
+ name="besls",
11
+ pretty_name="BeSLS",
12
+ source="EuroEval/besls",
13
+ task=SENT,
14
+ languages=[BELARUSIAN],
15
+ )
16
+
17
+ SCALA_BE_CONFIG = DatasetConfig(
18
+ name="scala-be",
19
+ pretty_name="ScaLA-be",
20
+ source="EuroEval/scala-be",
21
+ task=LA,
22
+ languages=[BELARUSIAN],
23
+ )
24
+
25
+ WIKIANN_BE_CONFIG = DatasetConfig(
26
+ name="wikiann-be",
27
+ pretty_name="WikiANN-be",
28
+ source="EuroEval/wikiann-be-mini",
29
+ task=NER,
30
+ languages=[BELARUSIAN],
31
+ )
32
+
33
+ MULTI_WIKI_QA_BE_CONFIG = DatasetConfig(
34
+ name="multi-wiki-qa-be",
35
+ pretty_name="MultiWikiQA-be",
36
+ source="EuroEval/multi-wiki-qa-be-mini",
37
+ task=RC,
38
+ languages=[BELARUSIAN],
39
+ )
40
+
41
+ BE_WSC_CONFIG = DatasetConfig(
42
+ name="be-wsc",
43
+ pretty_name="BE-WSC",
44
+ source="EuroEval/be-wsc",
45
+ task=COMMON_SENSE,
46
+ languages=[BELARUSIAN],
47
+ )
@@ -52,5 +52,5 @@ WINOGRANDE_BG_CONFIG = DatasetConfig(
52
52
  source="EuroEval/winogrande-bg",
53
53
  task=COMMON_SENSE,
54
54
  languages=[BULGARIAN],
55
- _labels=["a", "b"],
55
+ labels=["a", "b"],
56
56
  )
@@ -60,5 +60,5 @@ WINOGRANDE_CA_CONFIG = DatasetConfig(
60
60
  source="EuroEval/winogrande-ca",
61
61
  task=COMMON_SENSE,
62
62
  languages=[CATALAN],
63
- _labels=["a", "b"],
63
+ labels=["a", "b"],
64
64
  )
@@ -52,5 +52,5 @@ WINOGRANDE_HR_CONFIG = DatasetConfig(
52
52
  source="EuroEval/winogrande-hr",
53
53
  task=COMMON_SENSE,
54
54
  languages=[CROATIAN],
55
- _labels=["a", "b"],
55
+ labels=["a", "b"],
56
56
  )
@@ -76,7 +76,8 @@ VALEU_DA_CONFIG = DatasetConfig(
76
76
  source="EuroEval/european-values-da",
77
77
  task=EUROPEAN_VALUES,
78
78
  languages=[DANISH],
79
- splits=["test"],
79
+ train_split=None,
80
+ val_split=None,
80
81
  bootstrap_samples=False,
81
82
  )
82
83
 
@@ -143,6 +144,6 @@ WINOGRANDE_DA_CONFIG = DatasetConfig(
143
144
  source="EuroEval/winogrande-da",
144
145
  task=COMMON_SENSE,
145
146
  languages=[DANISH],
146
- _labels=["a", "b"],
147
+ labels=["a", "b"],
147
148
  unofficial=True,
148
149
  )
@@ -24,7 +24,7 @@ DBRD_CONFIG = DatasetConfig(
24
24
  source="EuroEval/dbrd-mini",
25
25
  task=SENT,
26
26
  languages=[DUTCH],
27
- _labels=["negative", "positive"],
27
+ labels=["negative", "positive"],
28
28
  )
29
29
 
30
30
  SCALA_NL_CONFIG = DatasetConfig(
@@ -89,9 +89,10 @@ VALEU_NL_CONFIG = DatasetConfig(
89
89
  source="EuroEval/european-values-nl",
90
90
  task=EUROPEAN_VALUES,
91
91
  languages=[DUTCH],
92
- splits=["test"],
92
+ train_split=None,
93
+ val_split=None,
93
94
  bootstrap_samples=False,
94
- _instruction_prompt="{text}",
95
+ instruction_prompt="{text}",
95
96
  )
96
97
 
97
98
  MBBQ_NL_CONFIG = DatasetConfig(
@@ -100,7 +101,7 @@ MBBQ_NL_CONFIG = DatasetConfig(
100
101
  source="EuroEval/mbbq-nl",
101
102
  task=MCSTEREO,
102
103
  languages=[DUTCH],
103
- splits=["val", "test"],
104
+ train_split=None,
104
105
  )
105
106
 
106
107
 
@@ -158,7 +159,7 @@ COPA_NL_CONFIG = DatasetConfig(
158
159
  task=COMMON_SENSE,
159
160
  languages=[DUTCH],
160
161
  unofficial=True,
161
- _labels=["a", "b"],
162
+ labels=["a", "b"],
162
163
  )
163
164
 
164
165
  GOLDENSWAG_NL_CONFIG = DatasetConfig(
@@ -176,6 +177,6 @@ WINOGRANDE_NL_CONFIG = DatasetConfig(
176
177
  source="EuroEval/winogrande-nl",
177
178
  task=COMMON_SENSE,
178
179
  languages=[DUTCH],
179
- _labels=["a", "b"],
180
+ labels=["a", "b"],
180
181
  unofficial=True,
181
182
  )
@@ -68,9 +68,10 @@ VALEU_EN_CONFIG = DatasetConfig(
68
68
  source="EuroEval/european-values-en",
69
69
  task=EUROPEAN_VALUES,
70
70
  languages=[ENGLISH],
71
- splits=["test"],
71
+ train_split=None,
72
+ val_split=None,
72
73
  bootstrap_samples=False,
73
- _instruction_prompt="{text}",
74
+ instruction_prompt="{text}",
74
75
  )
75
76
 
76
77
 
@@ -127,6 +128,6 @@ WINOGRANDE_CONFIG = DatasetConfig(
127
128
  source="EuroEval/winogrande-en",
128
129
  task=COMMON_SENSE,
129
130
  languages=[ENGLISH],
130
- _labels=["a", "b"],
131
+ labels=["a", "b"],
131
132
  unofficial=True,
132
133
  )
@@ -60,13 +60,13 @@ WINOGRANDE_ET_CONFIG = DatasetConfig(
60
60
  source="EuroEval/winogrande-et",
61
61
  task=COMMON_SENSE,
62
62
  languages=[ESTONIAN],
63
- _prompt_prefix="Sulle esitatakse lüngaga (_) tekstülesanded, "
63
+ prompt_prefix="Sulle esitatakse lüngaga (_) tekstülesanded, "
64
64
  "igal ülesandel on kaks vastusevarianti (a ja b).",
65
- _prompt_template="Tekstülesanne: {text}\nVastus: {label}",
66
- _instruction_prompt="Tekstülesanne: {text}\n\n"
65
+ prompt_template="Tekstülesanne: {text}\nVastus: {label}",
66
+ instruction_prompt="Tekstülesanne: {text}\n\n"
67
67
  "Sinu ülesanne on valida lünka sobiv vastusevariant. "
68
68
  "Vasta ainult {labels_str}. Muud vastused ei ole lubatud.",
69
- _labels=["a", "b"],
69
+ labels=["a", "b"],
70
70
  )
71
71
 
72
72
  VALEU_ET_CONFIG = DatasetConfig(
@@ -75,9 +75,10 @@ VALEU_ET_CONFIG = DatasetConfig(
75
75
  source="EuroEval/european-values-et",
76
76
  task=EUROPEAN_VALUES,
77
77
  languages=[ESTONIAN],
78
- splits=["test"],
78
+ train_split=None,
79
+ val_split=None,
79
80
  bootstrap_samples=False,
80
- _instruction_prompt="{text}",
81
+ instruction_prompt="{text}",
81
82
  )
82
83
 
83
84
  ### Unofficial datasets ###
@@ -97,7 +98,7 @@ EXAM_ET_CONFIG = DatasetConfig(
97
98
  source="EuroEval/exam-et",
98
99
  task=KNOW,
99
100
  languages=[ESTONIAN],
100
- _labels=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"],
101
+ labels=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"],
101
102
  unofficial=True,
102
103
  )
103
104
 
@@ -12,7 +12,7 @@ FOSENT_CONFIG = DatasetConfig(
12
12
  source="EuroEval/fosent",
13
13
  task=SENT,
14
14
  languages=[FAROESE],
15
- _num_few_shot_examples=5,
15
+ num_few_shot_examples=5,
16
16
  )
17
17
 
18
18
  SCALA_FO_CONFIG = DatasetConfig(
@@ -12,7 +12,7 @@ SCANDISENT_FI_CONFIG = DatasetConfig(
12
12
  source="EuroEval/scandisent-fi-mini",
13
13
  task=SENT,
14
14
  languages=[FINNISH],
15
- _labels=["negative", "positive"],
15
+ labels=["negative", "positive"],
16
16
  )
17
17
 
18
18
  TURKU_NER_FI_CONFIG = DatasetConfig(
@@ -61,9 +61,10 @@ VALEU_FI_CONFIG = DatasetConfig(
61
61
  source="EuroEval/european-values-fi",
62
62
  task=EUROPEAN_VALUES,
63
63
  languages=[FINNISH],
64
- splits=["test"],
64
+ train_split=None,
65
+ val_split=None,
65
66
  bootstrap_samples=False,
66
- _instruction_prompt="{text}",
67
+ instruction_prompt="{text}",
67
68
  )
68
69
 
69
70
 
@@ -102,6 +103,6 @@ WINOGRANDE_FI_CONFIG = DatasetConfig(
102
103
  source="EuroEval/winogrande-fi",
103
104
  task=COMMON_SENSE,
104
105
  languages=[FINNISH],
105
- _labels=["a", "b"],
106
+ labels=["a", "b"],
106
107
  unofficial=True,
107
108
  )
@@ -12,8 +12,8 @@ ALLOCINE_CONFIG = DatasetConfig(
12
12
  source="EuroEval/allocine-mini",
13
13
  task=SENT,
14
14
  languages=[FRENCH],
15
- _labels=["negative", "positive"],
16
- _prompt_label_mapping=dict(positive="positif", negative="négatif"),
15
+ labels=["negative", "positive"],
16
+ prompt_label_mapping=dict(positive="positif", negative="négatif"),
17
17
  )
18
18
 
19
19
  SCALA_FR_CONFIG = DatasetConfig(
@@ -70,9 +70,10 @@ VALEU_FR_CONFIG = DatasetConfig(
70
70
  source="EuroEval/european-values-fr",
71
71
  task=EUROPEAN_VALUES,
72
72
  languages=[FRENCH],
73
- splits=["test"],
73
+ train_split=None,
74
+ val_split=None,
74
75
  bootstrap_samples=False,
75
- _instruction_prompt="{text}",
76
+ instruction_prompt="{text}",
76
77
  )
77
78
 
78
79
 
@@ -111,6 +112,6 @@ WINOGRANDE_FR_CONFIG = DatasetConfig(
111
112
  source="EuroEval/winogrande-fr",
112
113
  task=COMMON_SENSE,
113
114
  languages=[FRENCH],
114
- _labels=["a", "b"],
115
+ labels=["a", "b"],
115
116
  unofficial=True,
116
117
  )
@@ -68,9 +68,10 @@ VALEU_DE_CONFIG = DatasetConfig(
68
68
  source="EuroEval/european-values-de",
69
69
  task=EUROPEAN_VALUES,
70
70
  languages=[GERMAN],
71
- splits=["test"],
71
+ train_split=None,
72
+ val_split=None,
72
73
  bootstrap_samples=False,
73
- _instruction_prompt="{text}",
74
+ instruction_prompt="{text}",
74
75
  )
75
76
 
76
77
 
@@ -127,6 +128,6 @@ WINOGRANDE_DE_CONFIG = DatasetConfig(
127
128
  source="EuroEval/winogrande-de",
128
129
  task=COMMON_SENSE,
129
130
  languages=[GERMAN],
130
- _labels=["a", "b"],
131
+ labels=["a", "b"],
131
132
  unofficial=True,
132
133
  )
@@ -12,7 +12,7 @@ GREEK_SA_CONFIG = DatasetConfig(
12
12
  source="EuroEval/greek-sa-mini",
13
13
  task=SENT,
14
14
  languages=[GREEK],
15
- _labels=["negative", "positive"],
15
+ labels=["negative", "positive"],
16
16
  )
17
17
 
18
18
  SCALA_EL_CONFIG = DatasetConfig(
@@ -60,5 +60,5 @@ WINOGRANDE_HU_CONFIG = DatasetConfig(
60
60
  source="EuroEval/winogrande-hu",
61
61
  task=COMMON_SENSE,
62
62
  languages=[HUNGARIAN],
63
- _labels=["a", "b"],
63
+ labels=["a", "b"],
64
64
  )
@@ -60,7 +60,7 @@ WINOGRANDE_IS_CONFIG = DatasetConfig(
60
60
  source="EuroEval/winogrande-is",
61
61
  task=COMMON_SENSE,
62
62
  languages=[ICELANDIC],
63
- _labels=["a", "b"],
63
+ labels=["a", "b"],
64
64
  )
65
65
 
66
66
  VALEU_IS_CONFIG = DatasetConfig(
@@ -69,9 +69,10 @@ VALEU_IS_CONFIG = DatasetConfig(
69
69
  source="EuroEval/european-values-is",
70
70
  task=EUROPEAN_VALUES,
71
71
  languages=[ICELANDIC],
72
- splits=["test"],
72
+ train_split=None,
73
+ val_split=None,
73
74
  bootstrap_samples=False,
74
- _instruction_prompt="{text}",
75
+ instruction_prompt="{text}",
75
76
  )
76
77
 
77
78
 
@@ -68,9 +68,10 @@ VALEU_IT_CONFIG = DatasetConfig(
68
68
  source="EuroEval/european-values-it",
69
69
  task=EUROPEAN_VALUES,
70
70
  languages=[ITALIAN],
71
- splits=["test"],
71
+ train_split=None,
72
+ val_split=None,
72
73
  bootstrap_samples=False,
73
- _instruction_prompt="{text}",
74
+ instruction_prompt="{text}",
74
75
  )
75
76
 
76
77
 
@@ -118,6 +119,6 @@ WINOGRANDE_IT_CONFIG = DatasetConfig(
118
119
  source="EuroEval/winogrande-it",
119
120
  task=COMMON_SENSE,
120
121
  languages=[ITALIAN],
121
- _labels=["a", "b"],
122
+ labels=["a", "b"],
122
123
  unofficial=True,
123
124
  )
@@ -61,7 +61,7 @@ COPA_LV_CONFIG = DatasetConfig(
61
61
  source="EuroEval/copa-lv",
62
62
  task=COMMON_SENSE,
63
63
  languages=[LATVIAN],
64
- _labels=["a", "b"],
64
+ labels=["a", "b"],
65
65
  )
66
66
 
67
67
 
@@ -82,6 +82,6 @@ WINOGRANDE_LV_CONFIG = DatasetConfig(
82
82
  source="EuroEval/winogrande-lv",
83
83
  task=COMMON_SENSE,
84
84
  languages=[LATVIAN],
85
- _labels=["a", "b"],
85
+ labels=["a", "b"],
86
86
  unofficial=True,
87
87
  )
@@ -60,7 +60,7 @@ WINOGRANDE_LT_CONFIG = DatasetConfig(
60
60
  source="EuroEval/winogrande-lt",
61
61
  task=COMMON_SENSE,
62
62
  languages=[LITHUANIAN],
63
- _labels=["a", "b"],
63
+ labels=["a", "b"],
64
64
  )
65
65
 
66
66
  ### Unofficial datasets ###
@@ -52,7 +52,7 @@ NORQUAD_CONFIG = DatasetConfig(
52
52
  source="EuroEval/norquad-mini",
53
53
  task=RC,
54
54
  languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
55
- _num_few_shot_examples=2,
55
+ num_few_shot_examples=2,
56
56
  )
57
57
 
58
58
  NO_SAMMENDRAG_CONFIG = DatasetConfig(
@@ -85,7 +85,7 @@ NOR_COMMON_SENSE_QA_CONFIG = DatasetConfig(
85
85
  source="EuroEval/nor-common-sense-qa",
86
86
  task=COMMON_SENSE,
87
87
  languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
88
- _labels=["a", "b", "c", "d", "e"],
88
+ labels=["a", "b", "c", "d", "e"],
89
89
  )
90
90
 
91
91
  VALEU_NO_CONFIG = DatasetConfig(
@@ -94,9 +94,10 @@ VALEU_NO_CONFIG = DatasetConfig(
94
94
  source="EuroEval/european-values-no",
95
95
  task=EUROPEAN_VALUES,
96
96
  languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
97
- splits=["test"],
97
+ train_split=None,
98
+ val_split=None,
98
99
  bootstrap_samples=False,
99
- _instruction_prompt="{text}",
100
+ instruction_prompt="{text}",
100
101
  )
101
102
 
102
103
 
@@ -207,6 +208,6 @@ WINOGRANDE_NO_CONFIG = DatasetConfig(
207
208
  source="EuroEval/winogrande-no",
208
209
  task=COMMON_SENSE,
209
210
  languages=[NORWEGIAN_BOKMÅL, NORWEGIAN_NYNORSK, NORWEGIAN],
210
- _labels=["a", "b"],
211
+ labels=["a", "b"],
211
212
  unofficial=True,
212
213
  )
@@ -60,7 +60,7 @@ WINOGRANDE_PL_CONFIG = DatasetConfig(
60
60
  source="EuroEval/winogrande-pl",
61
61
  task=COMMON_SENSE,
62
62
  languages=[POLISH],
63
- _labels=["a", "b"],
63
+ labels=["a", "b"],
64
64
  )
65
65
 
66
66
  VALEU_PL_CONFIG = DatasetConfig(
@@ -69,9 +69,10 @@ VALEU_PL_CONFIG = DatasetConfig(
69
69
  source="EuroEval/european-values-pl",
70
70
  task=EUROPEAN_VALUES,
71
71
  languages=[POLISH],
72
- splits=["test"],
72
+ train_split=None,
73
+ val_split=None,
73
74
  bootstrap_samples=False,
74
- _instruction_prompt="{text}",
75
+ instruction_prompt="{text}",
75
76
  )
76
77
 
77
78
 
@@ -12,7 +12,7 @@ SST2_PT_CONFIG = DatasetConfig(
12
12
  source="EuroEval/sst2-pt-mini",
13
13
  task=SENT,
14
14
  languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
15
- _labels=["positive", "negative"],
15
+ labels=["positive", "negative"],
16
16
  )
17
17
 
18
18
  SCALA_PT = DatasetConfig(
@@ -69,9 +69,10 @@ VALEU_PT_CONFIG = DatasetConfig(
69
69
  source="EuroEval/european-values-pt",
70
70
  task=EUROPEAN_VALUES,
71
71
  languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
72
- splits=["test"],
72
+ train_split=None,
73
+ val_split=None,
73
74
  bootstrap_samples=False,
74
- _instruction_prompt="{text}",
75
+ instruction_prompt="{text}",
75
76
  )
76
77
 
77
78
 
@@ -92,6 +93,6 @@ WINOGRANDE_PT_CONFIG = DatasetConfig(
92
93
  source="EuroEval/winogrande-pt",
93
94
  task=COMMON_SENSE,
94
95
  languages=[PORTUGUESE, EUROPEAN_PORTUGUESE],
95
- _labels=["a", "b"],
96
+ labels=["a", "b"],
96
97
  unofficial=True,
97
98
  )
@@ -12,7 +12,7 @@ ROSENT_CONFIG = DatasetConfig(
12
12
  source="EuroEval/ro-sent-mini",
13
13
  task=SENT,
14
14
  languages=[ROMANIAN],
15
- _labels=["positive", "negative"],
15
+ labels=["positive", "negative"],
16
16
  )
17
17
 
18
18
  SCALA_RO_CONFIG = DatasetConfig(
@@ -61,5 +61,5 @@ WINOGRANDE_RO_CONFIG = DatasetConfig(
61
61
  source="EuroEval/winogrande-ro",
62
62
  task=COMMON_SENSE,
63
63
  languages=[ROMANIAN],
64
- _labels=["a", "b"],
64
+ labels=["a", "b"],
65
65
  )
@@ -60,5 +60,5 @@ WINOGRANDE_SR_CONFIG = DatasetConfig(
60
60
  source="EuroEval/winogrande-sr",
61
61
  task=COMMON_SENSE,
62
62
  languages=[SERBIAN],
63
- _labels=["a", "b"],
63
+ labels=["a", "b"],
64
64
  )
@@ -52,5 +52,5 @@ WINOGRANDE_SL_CONFIG = DatasetConfig(
52
52
  source="EuroEval/winogrande-sl",
53
53
  task=COMMON_SENSE,
54
54
  languages=[SLOVENE],
55
- _labels=["a", "b"],
55
+ labels=["a", "b"],
56
56
  )
@@ -68,9 +68,10 @@ VALEU_ES_CONFIG = DatasetConfig(
68
68
  source="EuroEval/european-values-es",
69
69
  task=EUROPEAN_VALUES,
70
70
  languages=[SPANISH],
71
- splits=["test"],
71
+ train_split=None,
72
+ val_split=None,
72
73
  bootstrap_samples=False,
73
- _instruction_prompt="{text}",
74
+ instruction_prompt="{text}",
74
75
  )
75
76
 
76
77
 
@@ -127,6 +128,6 @@ WINOGRANDE_ES_CONFIG = DatasetConfig(
127
128
  source="EuroEval/winogrande-es",
128
129
  task=COMMON_SENSE,
129
130
  languages=[SPANISH],
130
- _labels=["a", "b"],
131
+ labels=["a", "b"],
131
132
  unofficial=True,
132
133
  )