EuroEval 15.11.0__py3-none-any.whl → 15.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (35) hide show
  1. euroeval/benchmark_config_factory.py +7 -0
  2. euroeval/benchmark_modules/vllm.py +1 -1
  3. euroeval/benchmarker.py +7 -0
  4. euroeval/cli.py +10 -0
  5. euroeval/data_models.py +7 -0
  6. euroeval/dataset_configs/__init__.py +1 -0
  7. euroeval/dataset_configs/danish.py +10 -0
  8. euroeval/dataset_configs/dutch.py +10 -0
  9. euroeval/dataset_configs/english.py +10 -0
  10. euroeval/dataset_configs/faroese.py +10 -0
  11. euroeval/dataset_configs/finnish.py +10 -0
  12. euroeval/dataset_configs/french.py +10 -0
  13. euroeval/dataset_configs/german.py +10 -0
  14. euroeval/dataset_configs/icelandic.py +10 -0
  15. euroeval/dataset_configs/italian.py +10 -0
  16. euroeval/dataset_configs/norwegian.py +20 -0
  17. euroeval/dataset_configs/portuguese.py +81 -0
  18. euroeval/dataset_configs/spanish.py +14 -3
  19. euroeval/dataset_configs/swedish.py +10 -0
  20. euroeval/generation.py +22 -4
  21. euroeval/generation_utils.py +0 -1
  22. euroeval/human_evaluation.py +1 -0
  23. euroeval/languages.py +1 -2
  24. euroeval/prompt_templates/linguistic_acceptability.py +9 -1
  25. euroeval/prompt_templates/multiple_choice.py +9 -1
  26. euroeval/prompt_templates/named_entity_recognition.py +20 -1
  27. euroeval/prompt_templates/reading_comprehension.py +10 -1
  28. euroeval/prompt_templates/sentiment_classification.py +11 -1
  29. euroeval/prompt_templates/summarization.py +8 -1
  30. {euroeval-15.11.0.dist-info → euroeval-15.13.0.dist-info}/METADATA +2 -2
  31. euroeval-15.13.0.dist-info/RECORD +63 -0
  32. euroeval-15.11.0.dist-info/RECORD +0 -62
  33. {euroeval-15.11.0.dist-info → euroeval-15.13.0.dist-info}/WHEEL +0 -0
  34. {euroeval-15.11.0.dist-info → euroeval-15.13.0.dist-info}/entry_points.txt +0 -0
  35. {euroeval-15.11.0.dist-info → euroeval-15.13.0.dist-info}/licenses/LICENSE +0 -0
@@ -42,6 +42,7 @@ def build_benchmark_config(
42
42
  num_iterations: int,
43
43
  api_base: str | None,
44
44
  api_version: str | None,
45
+ gpu_memory_utilization: float,
45
46
  debug: bool,
46
47
  run_with_cli: bool,
47
48
  only_allow_safetensors: bool,
@@ -102,6 +103,11 @@ def build_benchmark_config(
102
103
  model on an inference API.
103
104
  api_version:
104
105
  The version of the API to use for a given inference API.
106
+ gpu_memory_utilization:
107
+ The GPU memory utilization to use for vLLM. A larger value will result in
108
+ faster evaluation, but at the risk of running out of GPU memory. Only reduce
109
+ this if you are running out of GPU memory. Only relevant if the model is
110
+ generative.
105
111
  debug:
106
112
  Whether to run the benchmark in debug mode.
107
113
  run_with_cli:
@@ -154,6 +160,7 @@ def build_benchmark_config(
154
160
  num_iterations=num_iterations,
155
161
  api_base=api_base,
156
162
  api_version=api_version,
163
+ gpu_memory_utilization=gpu_memory_utilization,
157
164
  debug=debug,
158
165
  run_with_cli=run_with_cli,
159
166
  only_allow_safetensors=only_allow_safetensors,
@@ -757,7 +757,7 @@ def load_model_and_tokenizer(
757
757
  model = LLM(
758
758
  model=model_id,
759
759
  tokenizer=model_id,
760
- gpu_memory_utilization=0.9,
760
+ gpu_memory_utilization=benchmark_config.gpu_memory_utilization,
761
761
  max_model_len=min(true_max_model_len, MAX_CONTEXT_LENGTH),
762
762
  download_dir=download_dir,
763
763
  trust_remote_code=benchmark_config.trust_remote_code,
euroeval/benchmarker.py CHANGED
@@ -78,6 +78,7 @@ class Benchmarker:
78
78
  num_iterations: int = 10,
79
79
  api_base: str | None = None,
80
80
  api_version: str | None = None,
81
+ gpu_memory_utilization: float = 0.9,
81
82
  debug: bool = False,
82
83
  run_with_cli: bool = False,
83
84
  only_allow_safetensors: bool = False,
@@ -145,6 +146,11 @@ class Benchmarker:
145
146
  to a model on an inference API. Defaults to None.
146
147
  api_version:
147
148
  The version of the API to use. Defaults to None.
149
+ gpu_memory_utilization:
150
+ The GPU memory utilization to use for vLLM. Only relevant if the model
151
+ is generative. A larger value will result in faster evaluation, but at
152
+ the risk of running out of GPU memory. Only reduce this if you are
153
+ running out of GPU memory. Defaults to 0.9.
148
154
  debug:
149
155
  Whether to output debug information. Defaults to False.
150
156
  run_with_cli:
@@ -192,6 +198,7 @@ class Benchmarker:
192
198
  num_iterations=num_iterations,
193
199
  api_base=api_base,
194
200
  api_version=api_version,
201
+ gpu_memory_utilization=gpu_memory_utilization,
195
202
  debug=debug,
196
203
  run_with_cli=run_with_cli,
197
204
  only_allow_safetensors=only_allow_safetensors,
euroeval/cli.py CHANGED
@@ -186,6 +186,14 @@ from .tasks import get_all_tasks
186
186
  help="The version of the API to use. Only relevant if `model` refers to a model on "
187
187
  "an inference API.",
188
188
  )
189
+ @click.option(
190
+ "--gpu-memory-utilization",
191
+ default=0.9,
192
+ show_default=True,
193
+ help="The GPU memory utilization to use for vLLM. A larger value will result in "
194
+ "faster evaluation, but at the risk of running out of GPU memory. Only reduce this "
195
+ "if you are running out of GPU memory. Only relevant if the model is generative.",
196
+ )
189
197
  @click.option(
190
198
  "--debug/--no-debug",
191
199
  default=False,
@@ -223,6 +231,7 @@ def benchmark(
223
231
  num_iterations: int,
224
232
  api_base: str | None,
225
233
  api_version: str | None,
234
+ gpu_memory_utilization: float,
226
235
  debug: bool,
227
236
  only_allow_safetensors: bool,
228
237
  ) -> None:
@@ -258,6 +267,7 @@ def benchmark(
258
267
  num_iterations=num_iterations,
259
268
  api_base=api_base,
260
269
  api_version=api_version,
270
+ gpu_memory_utilization=gpu_memory_utilization,
261
271
  debug=debug,
262
272
  run_with_cli=True,
263
273
  only_allow_safetensors=only_allow_safetensors,
euroeval/data_models.py CHANGED
@@ -168,6 +168,11 @@ class BenchmarkConfig:
168
168
  api_version:
169
169
  The version of the API to use. Only relevant if `model` refers to a model on
170
170
  an inference API.
171
+ gpu_memory_utilization:
172
+ The GPU memory utilization to use for vLLM. A larger value will result in
173
+ faster evaluation, but at the risk of running out of GPU memory. Only reduce
174
+ this if you are running out of GPU memory. Only relevant if the model is
175
+ generative.
171
176
  debug:
172
177
  Whether to run the benchmark in debug mode.
173
178
  run_with_cli:
@@ -196,6 +201,7 @@ class BenchmarkConfig:
196
201
  num_iterations: int
197
202
  api_base: str | None
198
203
  api_version: str | None
204
+ gpu_memory_utilization: float
199
205
  debug: bool
200
206
  run_with_cli: bool
201
207
  only_allow_safetensors: bool
@@ -227,6 +233,7 @@ class BenchmarkConfigParams(pydantic.BaseModel):
227
233
  num_iterations: int
228
234
  api_base: str | None
229
235
  api_version: str | None
236
+ gpu_memory_utilization: float
230
237
  debug: bool
231
238
  run_with_cli: bool
232
239
  only_allow_safetensors: bool
@@ -13,6 +13,7 @@ from .german import * # noqa: F403
13
13
  from .icelandic import * # noqa: F403
14
14
  from .italian import * # noqa: F403
15
15
  from .norwegian import * # noqa: F403
16
+ from .portuguese import * # noqa: F403
16
17
  from .spanish import * # noqa: F403
17
18
  from .swedish import * # noqa: F403
18
19
 
@@ -118,3 +118,13 @@ BELEBELE_DA_CONFIG = DatasetConfig(
118
118
  languages=[DA],
119
119
  unofficial=True,
120
120
  )
121
+
122
+ MULTI_WIKI_QA_DA_CONFIG = DatasetConfig(
123
+ name="multi-wiki-qa-da",
124
+ pretty_name="the truncated version of the Danish part of the reading "
125
+ "comprehension dataset MultiWikiQA",
126
+ huggingface_id="EuroEval/multi-wiki-qa-da-mini",
127
+ task=RC,
128
+ languages=[DA],
129
+ unofficial=True,
130
+ )
@@ -110,3 +110,13 @@ BELEBELE_NL_CONFIG = DatasetConfig(
110
110
  languages=[NL],
111
111
  unofficial=True,
112
112
  )
113
+
114
+ MULTI_WIKI_QA_NL_CONFIG = DatasetConfig(
115
+ name="multi-wiki-qa-nl",
116
+ pretty_name="the truncated version of the Dutch part of the reading "
117
+ "comprehension dataset MultiWikiQA",
118
+ huggingface_id="EuroEval/multi-wiki-qa-nl-mini",
119
+ task=RC,
120
+ languages=[NL],
121
+ unofficial=True,
122
+ )
@@ -95,3 +95,13 @@ MMLU_CONFIG = DatasetConfig(
95
95
  languages=[EN],
96
96
  unofficial=True,
97
97
  )
98
+
99
+ MULTI_WIKI_QA_EN_CONFIG = DatasetConfig(
100
+ name="multi-wiki-qa-en",
101
+ pretty_name="the truncated version of the English part of the reading "
102
+ "comprehension dataset MultiWikiQA",
103
+ huggingface_id="EuroEval/multi-wiki-qa-en-mini",
104
+ task=RC,
105
+ languages=[EN],
106
+ unofficial=True,
107
+ )
@@ -52,3 +52,13 @@ WIKIANN_FO_CONFIG = DatasetConfig(
52
52
  languages=[FO],
53
53
  unofficial=True,
54
54
  )
55
+
56
+ MULTI_WIKI_QA_FO_CONFIG = DatasetConfig(
57
+ name="multi-wiki-qa-fo",
58
+ pretty_name="the truncated version of the Faroese part of the reading "
59
+ "comprehension dataset MultiWikiQA",
60
+ huggingface_id="EuroEval/multi-wiki-qa-fo-mini",
61
+ task=RC,
62
+ languages=[FO],
63
+ unofficial=True,
64
+ )
@@ -68,3 +68,13 @@ BELEBELE_FI_CONFIG = DatasetConfig(
68
68
  languages=[FI],
69
69
  unofficial=True,
70
70
  )
71
+
72
+ MULTI_WIKI_QA_FI_CONFIG = DatasetConfig(
73
+ name="multi-wiki-qa-fi",
74
+ pretty_name="the truncated version of the Finnish part of the reading "
75
+ "comprehension dataset MultiWikiQA",
76
+ huggingface_id="EuroEval/multi-wiki-qa-fi-mini",
77
+ task=RC,
78
+ languages=[FI],
79
+ unofficial=True,
80
+ )
@@ -81,3 +81,13 @@ BELEBELE_FR_CONFIG = DatasetConfig(
81
81
  languages=[FR],
82
82
  unofficial=True,
83
83
  )
84
+
85
+ MULTI_WIKI_QA_FR_CONFIG = DatasetConfig(
86
+ name="multi-wiki-qa-fr",
87
+ pretty_name="the truncated version of the French part of the reading "
88
+ "comprehension dataset MultiWikiQA",
89
+ huggingface_id="EuroEval/multi-wiki-qa-fr-mini",
90
+ task=RC,
91
+ languages=[FR],
92
+ unofficial=True,
93
+ )
@@ -89,3 +89,13 @@ BELEBELE_DE_CONFIG = DatasetConfig(
89
89
  languages=[DE],
90
90
  unofficial=True,
91
91
  )
92
+
93
+ MULTI_WIKI_QA_DE_CONFIG = DatasetConfig(
94
+ name="multi-wiki-qa-de",
95
+ pretty_name="the truncated version of the German part of the reading "
96
+ "comprehension dataset MultiWikiQA",
97
+ huggingface_id="EuroEval/multi-wiki-qa-de-mini",
98
+ task=RC,
99
+ languages=[DE],
100
+ unofficial=True,
101
+ )
@@ -146,3 +146,13 @@ BELEBELE_IS_CONFIG = DatasetConfig(
146
146
  languages=[IS],
147
147
  unofficial=True,
148
148
  )
149
+
150
+ MULTI_WIKI_QA_IS_CONFIG = DatasetConfig(
151
+ name="multi-wiki-qa-is",
152
+ pretty_name="the truncated version of the Icelandic part of the reading "
153
+ "comprehension dataset MultiWikiQA",
154
+ huggingface_id="EuroEval/multi-wiki-qa-is-mini",
155
+ task=RC,
156
+ languages=[IS],
157
+ unofficial=True,
158
+ )
@@ -89,3 +89,13 @@ BELEBELE_IT_CONFIG = DatasetConfig(
89
89
  languages=[IT],
90
90
  unofficial=True,
91
91
  )
92
+
93
+ MULTI_WIKI_QA_IT_CONFIG = DatasetConfig(
94
+ name="multi-wiki-qa-it",
95
+ pretty_name="the truncated version of the Italian part of the reading "
96
+ "comprehension dataset MultiWikiQA",
97
+ huggingface_id="EuroEval/multi-wiki-qa-it-mini",
98
+ task=RC,
99
+ languages=[IT],
100
+ unofficial=True,
101
+ )
@@ -184,3 +184,23 @@ BELEBELE_NO_CONFIG = DatasetConfig(
184
184
  languages=[NB, NN, NO],
185
185
  unofficial=True,
186
186
  )
187
+
188
+ MULTI_WIKI_QA_NB_CONFIG = DatasetConfig(
189
+ name="multi-wiki-qa-nb",
190
+ pretty_name="the truncated version of the Norwegian Bokmål part of the reading "
191
+ "comprehension dataset MultiWikiQA",
192
+ huggingface_id="EuroEval/multi-wiki-qa-no-mini",
193
+ task=RC,
194
+ languages=[NB, NO],
195
+ unofficial=True,
196
+ )
197
+
198
+ MULTI_WIKI_QA_NN_CONFIG = DatasetConfig(
199
+ name="multi-wiki-qa-nn",
200
+ pretty_name="the truncated version of the Norwegian Nynorsk part of the reading "
201
+ "comprehension dataset MultiWikiQA",
202
+ huggingface_id="EuroEval/multi-wiki-qa-nn-mini",
203
+ task=RC,
204
+ languages=[NN],
205
+ unofficial=True,
206
+ )
@@ -0,0 +1,81 @@
1
+ """All Portuguese dataset configurations used in EuroEval."""
2
+
3
+ from ..data_models import DatasetConfig
4
+ from ..languages import PT
5
+ from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
+
7
+ ### Official datasets ###
8
+
9
+ SST2_PT_CONFIG = DatasetConfig(
10
+ name="sst2-pt",
11
+ pretty_name="the truncated version of the Portuguese sentiment classification "
12
+ "dataset SST2-pt, translated from the English SST2 dataset",
13
+ huggingface_id="EuroEval/sst2-pt-mini",
14
+ task=SENT,
15
+ languages=[PT],
16
+ _labels=["positive", "negative"],
17
+ )
18
+
19
+ SCALA_PT = DatasetConfig(
20
+ name="scala-pt",
21
+ pretty_name="the Portuguese part of the linguistic acceptability dataset ScaLA",
22
+ huggingface_id="EuroEval/scala-pt",
23
+ task=LA,
24
+ languages=[PT],
25
+ )
26
+
27
+ HAREM_CONFIG = DatasetConfig(
28
+ name="harem",
29
+ pretty_name="the Portuguese named entity recognition dataset HAREM",
30
+ huggingface_id="EuroEval/harem",
31
+ task=NER,
32
+ languages=[PT],
33
+ )
34
+
35
+ MULTI_WIKI_QA_PT_CONFIG = DatasetConfig(
36
+ name="multi-wiki-qa-pt",
37
+ pretty_name="the truncated version of the Portuguese part of the reading "
38
+ "comprehension dataset MultiWikiQA",
39
+ huggingface_id="EuroEval/multi-wiki-qa-pt-pt-mini",
40
+ task=RC,
41
+ languages=[PT],
42
+ )
43
+
44
+ PUBLICO_CONFIG = DatasetConfig(
45
+ name="publico",
46
+ pretty_name="the truncated version of the Portuguese summarisation dataset Público",
47
+ huggingface_id="EuroEval/publico-mini",
48
+ task=SUMM,
49
+ languages=[PT],
50
+ )
51
+
52
+ MMLU_PT_CONFIG = DatasetConfig(
53
+ name="mmlu-pt",
54
+ pretty_name="the truncated version of the Portuguese knowledge dataset MMLU-pt, "
55
+ "translated from the English MMLU dataset",
56
+ huggingface_id="EuroEval/mmlu-pt-mini",
57
+ task=KNOW,
58
+ languages=[PT],
59
+ )
60
+
61
+ GOLDENSWAG_PT_CONFIG = DatasetConfig(
62
+ name="goldenswag-pt",
63
+ pretty_name="the truncated version of the Portuguese common-sense reasoning "
64
+ "dataset GoldenSwag-pt, translated from the English GoldenSwag dataset",
65
+ huggingface_id="EuroEval/goldenswag-pt-mini",
66
+ task=COMMON_SENSE,
67
+ languages=[PT],
68
+ )
69
+
70
+
71
+ ### Unofficial datasets ###
72
+
73
+ BOOLQ_PT_CONFIG = DatasetConfig(
74
+ name="boolq-pt",
75
+ pretty_name="the Portuguese multiple choice reading comprehension dataset "
76
+ "BoolQ-pt, translated from the English BoolQ dataset",
77
+ huggingface_id="EuroEval/boolq-pt",
78
+ task=MCRC,
79
+ languages=[PT],
80
+ unofficial=True,
81
+ )
@@ -8,7 +8,8 @@ from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
8
8
 
9
9
  SENTIMENT_HEADLINES_CONFIG = DatasetConfig(
10
10
  name="sentiment-headlines-es",
11
- pretty_name="the truncated version of the Spanish sentiment headlines dataset",
11
+ pretty_name="the truncated version of the Spanish sentiment classification dataset "
12
+ "SentimentHeadlines",
12
13
  huggingface_id="EuroEval/sentiment-headlines-es",
13
14
  task=SENT,
14
15
  languages=[ES],
@@ -33,7 +34,7 @@ CONLL_ES_CONFIG = DatasetConfig(
33
34
 
34
35
  MLQA_ES_CONFIG = DatasetConfig(
35
36
  name="mlqa-es",
36
- pretty_name="the Spanish version of the MLQA reading comprehension dataset",
37
+ pretty_name="the Spanish version of the reading comprehension dataset MLQA",
37
38
  huggingface_id="EuroEval/mlqa-es",
38
39
  task=RC,
39
40
  languages=[ES],
@@ -70,7 +71,7 @@ HELLASWAG_ES_CONFIG = DatasetConfig(
70
71
 
71
72
  XQUAD_ES_CONFIG = DatasetConfig(
72
73
  name="xquad-es",
73
- pretty_name="the Spanish version of the XQuAD reading comprehension dataset",
74
+ pretty_name="the Spanish version of the reading comprehension dataset XQuAD",
74
75
  huggingface_id="EuroEval/xquad-es",
75
76
  task=RC,
76
77
  languages=[ES],
@@ -86,3 +87,13 @@ BELEBELE_ES_CONFIG = DatasetConfig(
86
87
  languages=[ES],
87
88
  unofficial=True,
88
89
  )
90
+
91
+ MULTI_WIKI_QA_ES_CONFIG = DatasetConfig(
92
+ name="multi-wiki-qa-es",
93
+ pretty_name="the truncated version of the Spanish part of the reading "
94
+ "comprehension dataset MultiWikiQA",
95
+ huggingface_id="EuroEval/multi-wiki-qa-es-mini",
96
+ task=RC,
97
+ languages=[ES],
98
+ unofficial=True,
99
+ )
@@ -98,3 +98,13 @@ BELEBELE_SV_CONFIG = DatasetConfig(
98
98
  languages=[SV],
99
99
  unofficial=True,
100
100
  )
101
+
102
+ MULTI_WIKI_QA_SV_CONFIG = DatasetConfig(
103
+ name="multi-wiki-qa-sv",
104
+ pretty_name="the truncated version of the Swedish part of the reading "
105
+ "comprehension dataset MultiWikiQA",
106
+ huggingface_id="EuroEval/multi-wiki-qa-sv-mini",
107
+ task=RC,
108
+ languages=[SV],
109
+ unofficial=True,
110
+ )
euroeval/generation.py CHANGED
@@ -200,17 +200,35 @@ def generate_single_iteration(
200
200
  all_preds.extend(extracted_labels)
201
201
 
202
202
  if "label" in non_cached_dataset.column_names:
203
+ non_cached_labels = non_cached_dataset["label"]
204
+ if not isinstance(non_cached_labels, list):
205
+ non_cached_labels = list(non_cached_labels)
206
+ cached_labels = cached_dataset["label"]
207
+ if not isinstance(cached_labels, list):
208
+ cached_labels = list(cached_labels)
203
209
  ground_truth = [
204
210
  label.lower() if isinstance(label, str) else label
205
- for label in non_cached_dataset["label"] + cached_dataset["label"]
211
+ for label in non_cached_labels + cached_labels
206
212
  ]
207
213
  elif "labels" in non_cached_dataset.column_names:
214
+ non_cached_labels = non_cached_dataset["labels"]
215
+ if not isinstance(non_cached_labels, list):
216
+ non_cached_labels = list(non_cached_labels)
217
+ cached_labels = cached_dataset["labels"]
218
+ if not isinstance(cached_labels, list):
219
+ cached_labels = list(cached_labels)
208
220
  ground_truth = [
209
221
  [label.lower() if isinstance(label, str) else label for label in label_list]
210
- for label_list in non_cached_dataset["labels"] + cached_dataset["labels"]
222
+ for label_list in non_cached_labels + cached_labels
211
223
  ]
212
224
  elif "target_text" in non_cached_dataset.column_names:
213
- ground_truth = non_cached_dataset["target_text"] + cached_dataset["target_text"]
225
+ non_cached_labels = non_cached_dataset["target_text"]
226
+ if not isinstance(non_cached_labels, list):
227
+ non_cached_labels = list(non_cached_labels)
228
+ cached_labels = cached_dataset["target_text"]
229
+ if not isinstance(cached_labels, list):
230
+ cached_labels = list(cached_labels)
231
+ ground_truth = non_cached_labels + cached_labels
214
232
  else:
215
233
  raise ValueError(
216
234
  "The dataset must have either a 'label', 'labels', or 'target_text' column"
@@ -306,7 +324,7 @@ def debug_log(
306
324
  ):
307
325
  logger.info(
308
326
  f"Input: '{input_text}'\n"
309
- f"Raw outout: '{raw_output}'\n"
327
+ f"Raw output: '{raw_output}'\n"
310
328
  f"Prediction: '{prediction}'\n"
311
329
  f"Label: '{label}'"
312
330
  )
@@ -323,7 +323,6 @@ def apply_prompt(
323
323
  tokenize=False,
324
324
  add_generation_prompt=True,
325
325
  chat_template=chat_template,
326
- enable_thinking=True,
327
326
  )
328
327
  for messages in messages_list
329
328
  ]
@@ -272,6 +272,7 @@ class HumanEvaluator:
272
272
  num_iterations=iteration + 1,
273
273
  api_base=None,
274
274
  api_version=None,
275
+ gpu_memory_utilization=0.9,
275
276
  debug=False,
276
277
  run_with_cli=True,
277
278
  only_allow_safetensors=False,
euroeval/languages.py CHANGED
@@ -36,7 +36,7 @@ NN = Language(
36
36
  )
37
37
  ES = Language(code="es", name="Spanish", _and_separator="y", _or_separator="o")
38
38
  SV = Language(code="sv", name="Swedish", _and_separator="och", _or_separator="eller")
39
-
39
+ PT = Language(code="pt", name="Portuguese", _and_separator="e", _or_separator="ou")
40
40
 
41
41
  AB = Language(code="ab", name="Abkhazian")
42
42
  AA = Language(code="aa", name="Afar")
@@ -152,7 +152,6 @@ PI = Language(code="pi", name="Pali")
152
152
  PS = Language(code="ps", name="Pashto")
153
153
  FA = Language(code="fa", name="Persian")
154
154
  PL = Language(code="pl", name="Polish")
155
- PT = Language(code="pt", name="Portuguese")
156
155
  PA = Language(code="pa", name="Punjabi")
157
156
  QU = Language(code="qu", name="Quechua")
158
157
  RO = Language(code="ro", name="Romanian")
@@ -1,7 +1,7 @@
1
1
  """Templates for the Linguistic Acceptability task."""
2
2
 
3
3
  from ..data_models import PromptConfig
4
- from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, SV
4
+ from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
5
5
 
6
6
  LA_TEMPLATES = {
7
7
  DA: PromptConfig(
@@ -36,6 +36,14 @@ LA_TEMPLATES = {
36
36
  default_instruction_prompt="Texto: {text}\n\nDetermina si el texto es "
37
37
  "gramaticalmente correcto o no. Responde con {labels_str}, y nada más.",
38
38
  ),
39
+ PT: PromptConfig(
40
+ default_prompt_label_mapping=dict(correct="sim", incorrect="não"),
41
+ default_prompt_prefix="Seguem-se abaixo textos e se são "
42
+ "gramaticalmente correctos",
43
+ default_prompt_template="Texto: {text}\nGramaticalmente correcto: {label}",
44
+ default_instruction_prompt="Texto: {text}\n\nDetermina se o texto é "
45
+ "gramaticalmente correcto ou não. Responde com {labels_str}, e nada mais.",
46
+ ),
39
47
  FI: PromptConfig(
40
48
  default_prompt_label_mapping=dict(correct="kyllä", incorrect="ei"),
41
49
  default_prompt_prefix="Seuraavat ovat lauseita ja ovatko ne "
@@ -1,7 +1,7 @@
1
1
  """Templates for all multiple choice tasks."""
2
2
 
3
3
  from ..data_models import PromptConfig
4
- from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, SV
4
+ from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, PT, SV
5
5
 
6
6
  # TODO: Missing Faroese
7
7
  MULTIPLE_CHOICE_TEMPLATES = {
@@ -36,6 +36,14 @@ MULTIPLE_CHOICE_TEMPLATES = {
36
36
  "usando solo {labels_str}, y nada más.",
37
37
  default_prompt_label_mapping="auto",
38
38
  ),
39
+ PT: PromptConfig(
40
+ default_prompt_prefix="As seguintes são perguntas de escolha múltipla "
41
+ "(com respostas).",
42
+ default_prompt_template="Pergunta: {text}\nResposta: {label}",
43
+ default_instruction_prompt="Pergunta: {text}\n\nResponde à pergunta "
44
+ "acima usando só {labels_str}, e nada mais.",
45
+ default_prompt_label_mapping="auto",
46
+ ),
39
47
  FI: PromptConfig(
40
48
  default_prompt_prefix="Seuraavat ovat monivalintakysymyksiä (vastauksineen).",
41
49
  default_prompt_template="Kysymys: {text}\nVastaus: {label}",
@@ -1,7 +1,7 @@
1
1
  """Templates for the Named Entity Recognition task."""
2
2
 
3
3
  from ..data_models import PromptConfig
4
- from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, SV
4
+ from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
5
5
 
6
6
  NER_TEMPLATES = {
7
7
  DA: PromptConfig(
@@ -80,6 +80,25 @@ NER_TEMPLATES = {
80
80
  "claves {labels_str}. Los valores deben ser listas de las "
81
81
  "entidades nombradas de ese tipo, exactamente como aparecen en la oración.",
82
82
  ),
83
+ PT: PromptConfig(
84
+ default_prompt_label_mapping={
85
+ "b-per": "pessoa",
86
+ "i-per": "pessoa",
87
+ "b-loc": "local",
88
+ "i-loc": "local",
89
+ "b-org": "organização",
90
+ "i-org": "organização",
91
+ "b-misc": "diverso",
92
+ "i-misc": "diverso",
93
+ },
94
+ default_prompt_prefix="Seguem-se frases e dicionários JSON com as entidades "
95
+ "mencionadas presentes na frase indicada.",
96
+ default_prompt_template="Frase: {text}\nEntidades mencionadas: {label}",
97
+ default_instruction_prompt="Frase: {text}\n\nIdentifica as entidades "
98
+ "mencionadas na frase. Deves devolver um dicionário JSON com as chaves "
99
+ "{labels_str}. Os valores devem ser listas contendo as entidades "
100
+ "mencionadas desse tipo, tal como ocorrem na frase.",
101
+ ),
83
102
  FI: PromptConfig(
84
103
  default_prompt_label_mapping={
85
104
  "b-per": "henkilö",
@@ -1,7 +1,7 @@
1
1
  """Templates for the Reading Comprehension task."""
2
2
 
3
3
  from ..data_models import PromptConfig
4
- from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, SV
4
+ from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
5
5
 
6
6
  RC_TEMPLATES = {
7
7
  DA: PromptConfig(
@@ -117,6 +117,15 @@ RC_TEMPLATES = {
117
117
  "teksten ovenfor med maks 3 ord.\n\nSpørsmål: {question}",
118
118
  default_prompt_label_mapping=dict(),
119
119
  ),
120
+ PT: PromptConfig(
121
+ default_prompt_prefix="Os textos que se seguem são acompanhados de perguntas "
122
+ "e respostas.",
123
+ default_prompt_template="Texto: {text}\nPergunta: {question}\nResposta com "
124
+ "um máximo de 3 palavras: {label}",
125
+ default_instruction_prompt="Texto: {text}\n\nResponde à seguinte pergunta "
126
+ "sobre o texto acima num máximo de 3 palavras.\n\nPergunta: {question}",
127
+ default_prompt_label_mapping=dict(),
128
+ ),
120
129
  SV: PromptConfig(
121
130
  default_prompt_prefix="Nedan följer texter med tillhörande frågor och svar.",
122
131
  default_prompt_template="Text: {text}\nFråga: {question}\nSvar på max 3 ord: "
@@ -1,7 +1,7 @@
1
1
  """Templates for the Sentiment Analysis task."""
2
2
 
3
3
  from ..data_models import PromptConfig
4
- from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, SV
4
+ from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
5
5
 
6
6
  SENT_TEMPLATES = {
7
7
  DA: PromptConfig(
@@ -44,6 +44,16 @@ SENT_TEMPLATES = {
44
44
  default_instruction_prompt="Documento: {text}\n\nClasifica el sentimiento del "
45
45
  "documento. Responde con {labels_str}, y nada más.",
46
46
  ),
47
+ PT: PromptConfig(
48
+ default_prompt_label_mapping=dict(
49
+ positive="positivo", neutral="neutro", negative="negativo"
50
+ ),
51
+ default_prompt_prefix="Abaixo encontras documentos e os seus "
52
+ "sentimentos correspondentes, que podem ser {labels_str}.",
53
+ default_prompt_template="Documento: {text}\nSentimento: {label}",
54
+ default_instruction_prompt="Documento: {text}\n\nClassifica o "
55
+ "sentimento do documento. Responde apenas com {labels_str}.",
56
+ ),
47
57
  FI: PromptConfig(
48
58
  default_prompt_label_mapping=dict(
49
59
  positive="positiivinen", neutral="neutrali", negative="negatiivinen"
@@ -1,7 +1,7 @@
1
1
  """Templates for the Summarization task."""
2
2
 
3
3
  from ..data_models import PromptConfig
4
- from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, SV
4
+ from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, PT, SV
5
5
 
6
6
  # TODO: Missing Faroese
7
7
  SUMM_TEMPLATES = {
@@ -36,6 +36,13 @@ SUMM_TEMPLATES = {
36
36
  "documento anterior.",
37
37
  default_prompt_label_mapping=dict(),
38
38
  ),
39
+ PT: PromptConfig(
40
+ default_prompt_prefix="Abaixo encontras documentos com resumos associados.",
41
+ default_prompt_template="Documento: {text}\nResumo: {target_text}",
42
+ default_instruction_prompt="Documento: {text}\n\nEscreve um resumo do "
43
+ "documento anterior.",
44
+ default_prompt_label_mapping=dict(),
45
+ ),
39
46
  FI: PromptConfig(
40
47
  default_prompt_prefix="Seuraavassa on artikkeleita ja niihin liittyviä "
41
48
  "tiivistelmiä.",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 15.11.0
3
+ Version: 15.13.0
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -29,7 +29,7 @@ License: MIT License
29
29
  SOFTWARE.
30
30
  License-File: LICENSE
31
31
  Requires-Python: <4.0,>=3.10
32
- Requires-Dist: accelerate>=0.34.2
32
+ Requires-Dist: accelerate>=1.9.0
33
33
  Requires-Dist: bert-score>=0.3.13
34
34
  Requires-Dist: click>=8.1.3
35
35
  Requires-Dist: datasets>=3.5.0
@@ -0,0 +1,63 @@
1
+ euroeval/__init__.py,sha256=fZyR9R3C3vwGJS3CrCJ6ySr_FDnMu_Aqnz0FdadWEEs,3399
2
+ euroeval/benchmark_config_factory.py,sha256=jKC8bEzJSGGCcG8aWsPxiyHX6fjOQYQWvkp1MIUuHYM,11564
3
+ euroeval/benchmarker.py,sha256=SDBzdCa4I8u1XDeN_1mKTFzfaaQbbY_oWcHt3niADxk,48497
4
+ euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
5
+ euroeval/cli.py,sha256=h81Lswm_q9htkYz-GQQQVIsdsUPnfe3LDH8AZdBcpKs,8602
6
+ euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
7
+ euroeval/data_loading.py,sha256=DP-cqwN_d0Y-KaN8P8c3fDr6PX80UYROHgRwX82ix4w,4156
8
+ euroeval/data_models.py,sha256=gPHyIoN2A5_O-cJgyb6jhn6enH8zsiIBI09W_wdHMQs,22031
9
+ euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
10
+ euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
+ euroeval/finetuning.py,sha256=BrPZ-6qFY8K-dwfaRwNetVYfYburoQwLQty6pn6iP_s,11340
12
+ euroeval/generation.py,sha256=1fqFEWwM2RzI3uPZem95VFWbN8EfrKZQTrHEP34ihHs,11622
13
+ euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
14
+ euroeval/human_evaluation.py,sha256=Jtz3K5Lqne48wPZWf4EAd3d-n_wX27nGJHigjhV1D7s,27537
15
+ euroeval/languages.py,sha256=cr_Z5jtaHb2XY0zeOhuk3ATHX74PODzt6gMPC2zMD7c,8594
16
+ euroeval/metrics.py,sha256=nxosyoRjlk7TcoAOkjU7zx2TB43b9tA8M1m4V1s5eKU,15516
17
+ euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
18
+ euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
19
+ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
20
+ euroeval/scores.py,sha256=TatSbjia7Zwj71gQFyV_gCHyppMbOgeaZgNCib8G86k,2849
21
+ euroeval/speed_benchmark.py,sha256=6bFGeMmtdl_6owkxNQ3ZKiyQQS58k0NApzlsbDgBW5s,4037
22
+ euroeval/tasks.py,sha256=btxf29M5rUP7JjBl6u9aQlHQAxrJNP4bRbdEQtDnmDA,3376
23
+ euroeval/tokenization_utils.py,sha256=LxgGs7juS5PuMYt5LL2X6eVXdtnpi-A2jFxqcWpF6NA,17931
24
+ euroeval/types.py,sha256=EIYMNOqqHqibnbNw-fvdst6HwTvq32gtxhr7jL7i-xM,2511
25
+ euroeval/utils.py,sha256=5R7y67xe0ODaje7k8nOu2AFS3Ph2gcsiWpIq5rjSSuA,11613
26
+ euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
27
+ euroeval/benchmark_modules/base.py,sha256=D1oKD16KBvxEoBUfqwvzvcDc1hx6letdD3v1PnBmF4A,10669
28
+ euroeval/benchmark_modules/fresh.py,sha256=sg_AXNPApFObCzCRWhCgKxfr-eqQsT6Ri0xx0_Yy5JM,10293
29
+ euroeval/benchmark_modules/hf.py,sha256=-W_bWEdm0zePkn4nDz4l0T4hhJJnlfwHrtIO3m5BrUs,44725
30
+ euroeval/benchmark_modules/litellm.py,sha256=_gKBbJsXzo_cHJVaeuQpHRBENEZUGS_vcC-uGIhhmHA,52111
31
+ euroeval/benchmark_modules/vllm.py,sha256=kq3PMUuRT0NOky6XSHl1JeHTDGehwcub0HcGC5S_Wv4,38834
32
+ euroeval/dataset_configs/__init__.py,sha256=EbjEyHwBtSztASl8_xblD8hessruDdV4Eg1vXrmGOuY,1935
33
+ euroeval/dataset_configs/danish.py,sha256=-y-n08hTApwTdSVdjRlZYa3gOX92cTGhg8xsuG-Lhww,3691
34
+ euroeval/dataset_configs/dutch.py,sha256=siyFeEKYx2gBpyqQPtOZ0cD8FTsIMUqzRX5xrQfrNXI,3480
35
+ euroeval/dataset_configs/english.py,sha256=uQAaGWpHk8xqFCeIhmmPXYTb1cZomeEdRaRe9qIZQrg,2858
36
+ euroeval/dataset_configs/faroese.py,sha256=gkgxQTWGFbfg9Eo1z-NSLROgKDcaij9tAN2mfgtrt0M,1647
37
+ euroeval/dataset_configs/finnish.py,sha256=OyveLgyii0hOlo6HZsqAq4rwDrj8tl2qstRfQKugURo,2342
38
+ euroeval/dataset_configs/french.py,sha256=DKKZEtohWkw_ouBaxWcPzp-K6NhQNtvCKxj8NLbIpUc,2678
39
+ euroeval/dataset_configs/german.py,sha256=3bfRgkqIGkAhcw4kwcJN9PKuJSmi1r6AFTJY-IWKgWM,2856
40
+ euroeval/dataset_configs/icelandic.py,sha256=g21IHjcwEZvf_yJ9PobeuBOqRiLOk0oCdEjY34g-UMk,4497
41
+ euroeval/dataset_configs/italian.py,sha256=rHLMkSXT0kFoQlkwHODxO50WBRIfGtkAnW_C-sfIu74,2957
42
+ euroeval/dataset_configs/norwegian.py,sha256=-WvQM44xCwjrqBzlAy4rjf6v87fGera2JmZV_069TeQ,6003
43
+ euroeval/dataset_configs/portuguese.py,sha256=3SqbwD0PNTILGALzh50pVoEwC-spRD75ZeE2NEj151E,2367
44
+ euroeval/dataset_configs/spanish.py,sha256=VKfBIpBRR38ckuULw7Ftmc-0smsm6GshUAik2-Y1Npw,2855
45
+ euroeval/dataset_configs/swedish.py,sha256=WpExi4TJqy_Ruwy4Kvde94jM605vT_88el_KKUzLV4E,3108
46
+ euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
47
+ euroeval/prompt_templates/linguistic_acceptability.py,sha256=ZN71BEt4HAhSYY-GWjh-S-iVvq5AODQJThkrjDhy4oM,7138
48
+ euroeval/prompt_templates/multiple_choice.py,sha256=F9ItGQtnaaez15A8MQ1UCpKRDsLM-AZyRdYetGAofa0,5494
49
+ euroeval/prompt_templates/named_entity_recognition.py,sha256=ga21s9T4_Hhbf88boWm7gnL7OgD7txuS_EeDgXaxEoE,13602
50
+ euroeval/prompt_templates/reading_comprehension.py,sha256=3Nch-9zHfUDIwy-k5mP-TRhHQRQ9nad8HdhpJ1S8nGc,7072
51
+ euroeval/prompt_templates/sentiment_classification.py,sha256=2Xsmj8lbaAXACHhwbbR4dWhoKyKB87TqpMO-ssQ-Djo,7649
52
+ euroeval/prompt_templates/summarization.py,sha256=I98LlUOBVa_xo02npq7BWKKZOXGqm-_15i64QzbEsb0,5334
53
+ euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
54
+ euroeval/task_group_utils/multiple_choice_classification.py,sha256=yfy8lczpZ_MY-Y4FQx3Et9vEUpuD3YMFjF3wQGCfMNw,6632
55
+ euroeval/task_group_utils/question_answering.py,sha256=agwtWOmctgat98yqgFiMSPY6zmoaPgYVyzMmOkNjr58,27284
56
+ euroeval/task_group_utils/sequence_classification.py,sha256=igmD24aMNN7QBJ8NDzgEnGwM-jq_zhC37QxazNm7GZ4,12711
57
+ euroeval/task_group_utils/text_to_text.py,sha256=xOpja-W4E-1peMjZX8G-3G5iRgmFHHygrQ5WN1hB3FI,4550
58
+ euroeval/task_group_utils/token_classification.py,sha256=wCy3aI-Sn9f-87tHzAnYDA6EbY3ah3xao1SnfnoRNz4,17490
59
+ euroeval-15.13.0.dist-info/METADATA,sha256=HnDtAE2-sYFmSl4yM9PQhgUrfklR_OB5C5aXPOgz5U8,13478
60
+ euroeval-15.13.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
61
+ euroeval-15.13.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
62
+ euroeval-15.13.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
63
+ euroeval-15.13.0.dist-info/RECORD,,
@@ -1,62 +0,0 @@
1
- euroeval/__init__.py,sha256=fZyR9R3C3vwGJS3CrCJ6ySr_FDnMu_Aqnz0FdadWEEs,3399
2
- euroeval/benchmark_config_factory.py,sha256=icTeT5C-bNCJmvSWFlxKdEpRboZN8OjwaHGu7JM-2xI,11158
3
- euroeval/benchmarker.py,sha256=RlD8z2TYT4dqKvFtfmbU2pS7ZZ8l_3ErYttIcSxjPMg,48040
4
- euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
5
- euroeval/cli.py,sha256=d8JztMi_RbpUlEBXidd6DQ-xeC-xhozf_qU6Vkzye20,8161
6
- euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
7
- euroeval/data_loading.py,sha256=DP-cqwN_d0Y-KaN8P8c3fDr6PX80UYROHgRwX82ix4w,4156
8
- euroeval/data_models.py,sha256=lrF8XAVVZFqof3O0Bq2nMSTuqhkDaoMixIoUMqgsAo8,21647
9
- euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
10
- euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
- euroeval/finetuning.py,sha256=BrPZ-6qFY8K-dwfaRwNetVYfYburoQwLQty6pn6iP_s,11340
12
- euroeval/generation.py,sha256=pXs2VwfLvUpwXRN8LcHvzE_HTXMkGSYc4wGv9vsz1BA,10758
13
- euroeval/generation_utils.py,sha256=8HOFE2xdnCPRMe3TiHh--n7Oy3rMV7MAnERpW9vplUA,13352
14
- euroeval/human_evaluation.py,sha256=9CMXrkzM7Q-vltFL1fD9hYwahQtWT12aHMU8PgGO5_c,27497
15
- euroeval/languages.py,sha256=LerXuRBAUYkQL6qSV-F82itAE4EgBGFBtzaGnJJZvOE,8555
16
- euroeval/metrics.py,sha256=nxosyoRjlk7TcoAOkjU7zx2TB43b9tA8M1m4V1s5eKU,15516
17
- euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
18
- euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
19
- euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
20
- euroeval/scores.py,sha256=TatSbjia7Zwj71gQFyV_gCHyppMbOgeaZgNCib8G86k,2849
21
- euroeval/speed_benchmark.py,sha256=6bFGeMmtdl_6owkxNQ3ZKiyQQS58k0NApzlsbDgBW5s,4037
22
- euroeval/tasks.py,sha256=btxf29M5rUP7JjBl6u9aQlHQAxrJNP4bRbdEQtDnmDA,3376
23
- euroeval/tokenization_utils.py,sha256=LxgGs7juS5PuMYt5LL2X6eVXdtnpi-A2jFxqcWpF6NA,17931
24
- euroeval/types.py,sha256=EIYMNOqqHqibnbNw-fvdst6HwTvq32gtxhr7jL7i-xM,2511
25
- euroeval/utils.py,sha256=5R7y67xe0ODaje7k8nOu2AFS3Ph2gcsiWpIq5rjSSuA,11613
26
- euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
27
- euroeval/benchmark_modules/base.py,sha256=D1oKD16KBvxEoBUfqwvzvcDc1hx6letdD3v1PnBmF4A,10669
28
- euroeval/benchmark_modules/fresh.py,sha256=sg_AXNPApFObCzCRWhCgKxfr-eqQsT6Ri0xx0_Yy5JM,10293
29
- euroeval/benchmark_modules/hf.py,sha256=-W_bWEdm0zePkn4nDz4l0T4hhJJnlfwHrtIO3m5BrUs,44725
30
- euroeval/benchmark_modules/litellm.py,sha256=_gKBbJsXzo_cHJVaeuQpHRBENEZUGS_vcC-uGIhhmHA,52111
31
- euroeval/benchmark_modules/vllm.py,sha256=LXWkCUaIpP3cboj1bAGM6N8pR02mX6-XZFJheZDbfAQ,38798
32
- euroeval/dataset_configs/__init__.py,sha256=kWKtlSAOY-olOQL3UtFqL6I3Tki3G3waMZSd2YChjCg,1895
33
- euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
34
- euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
35
- euroeval/dataset_configs/english.py,sha256=1q8XJqIVWBBNkldL7t-cVnU2O9EUb9_xoVRSN8arN90,2561
36
- euroeval/dataset_configs/faroese.py,sha256=QQgLe5gv0f3AtXe5rV65xZ98gFgyITQPDr3UwO4Bnv4,1350
37
- euroeval/dataset_configs/finnish.py,sha256=_8YWIlZNpO8Qi233bH7cKwm3tq3WETLfC_6mzg7LLog,2045
38
- euroeval/dataset_configs/french.py,sha256=ATsj8_9_GxFTQgmfrniPQFZ1R9hoQCI1_ieWTnscFHU,2382
39
- euroeval/dataset_configs/german.py,sha256=QO6PrBQY6kyZeQMU1vg6KrC_sKyj9U2ukS9nbKO19is,2560
40
- euroeval/dataset_configs/icelandic.py,sha256=mncl7X4yO9gBmYqXMBfm7FKU1jcKryerSgd0dqlIA_4,4198
41
- euroeval/dataset_configs/italian.py,sha256=KNjCvTzsEqH_EEk3At8slKqNwWWiIdbv_t5ke7n9nZI,2660
42
- euroeval/dataset_configs/norwegian.py,sha256=30YGdDPtDszG10BNDVHb-XXTGgGIIgDUNGoeM9q0K_E,5385
43
- euroeval/dataset_configs/spanish.py,sha256=NviL-FzJ5jq1bLTRvbtZBiGrAmZjxyijZNpKZFrnT-M,2527
44
- euroeval/dataset_configs/swedish.py,sha256=SOD2nKQTVwTpTvr362mDPHon42kr9vWs5C0mK02Fh-o,2811
45
- euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
46
- euroeval/prompt_templates/linguistic_acceptability.py,sha256=FAIJKS26EVRxlLHk1C3lN0GDtd5AM0MwvaMf-NNIxfU,6677
47
- euroeval/prompt_templates/multiple_choice.py,sha256=6iEqiPpT-3WJN_gsyhyapnwsrcsYGdVkSkzwn-VKKxw,5101
48
- euroeval/prompt_templates/named_entity_recognition.py,sha256=Xd6gBJD2e1l8-We2Ujor7crRUBcbgnNeeVknBIrTMJo,12737
49
- euroeval/prompt_templates/reading_comprehension.py,sha256=yLqryWQAW04GULz_EyNDLOS7ZrDUeasuLFt-dtqCnYk,6585
50
- euroeval/prompt_templates/sentiment_classification.py,sha256=LDOwjGQ2kqhwgNyphPywQeolwNB09o-xYWc9RUbzc84,7136
51
- euroeval/prompt_templates/summarization.py,sha256=mcWeKNhGWmp7IG_iY64T-VOSabQg5wKddjSbJNYFDp8,4984
52
- euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
53
- euroeval/task_group_utils/multiple_choice_classification.py,sha256=yfy8lczpZ_MY-Y4FQx3Et9vEUpuD3YMFjF3wQGCfMNw,6632
54
- euroeval/task_group_utils/question_answering.py,sha256=agwtWOmctgat98yqgFiMSPY6zmoaPgYVyzMmOkNjr58,27284
55
- euroeval/task_group_utils/sequence_classification.py,sha256=igmD24aMNN7QBJ8NDzgEnGwM-jq_zhC37QxazNm7GZ4,12711
56
- euroeval/task_group_utils/text_to_text.py,sha256=xOpja-W4E-1peMjZX8G-3G5iRgmFHHygrQ5WN1hB3FI,4550
57
- euroeval/task_group_utils/token_classification.py,sha256=wCy3aI-Sn9f-87tHzAnYDA6EbY3ah3xao1SnfnoRNz4,17490
58
- euroeval-15.11.0.dist-info/METADATA,sha256=NiRBsSAD6L_q4-y0AVkfoUoZA-9oD27uSK80cWpO_co,13479
59
- euroeval-15.11.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
60
- euroeval-15.11.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
61
- euroeval-15.11.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
62
- euroeval-15.11.0.dist-info/RECORD,,