EuroEval 15.9.1__py3-none-any.whl → 15.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

@@ -12,6 +12,8 @@ from transformers.tokenization_utils import PreTrainedTokenizer
12
12
  from transformers.tokenization_utils_base import BatchEncoding
13
13
  from transformers.trainer import Trainer
14
14
 
15
+ from ..exceptions import InvalidBenchmark
16
+
15
17
  if t.TYPE_CHECKING:
16
18
  from ..types import Labels, Predictions
17
19
 
@@ -19,7 +21,7 @@ logger = logging.getLogger("euroeval")
19
21
 
20
22
 
21
23
  class MultipleChoiceClassificationTrainer(Trainer):
22
- """Trainer subclass for question answering tasks."""
24
+ """Trainer subclass for multiple-choice classification tasks."""
23
25
 
24
26
  def evaluate( # type: ignore[override]
25
27
  self,
@@ -57,6 +59,8 @@ class MultipleChoiceClassificationTrainer(Trainer):
57
59
  )
58
60
 
59
61
  predictions = output.predictions
62
+ if isinstance(predictions, tuple):
63
+ predictions = predictions[0]
60
64
  assert isinstance(predictions, np.ndarray)
61
65
 
62
66
  metrics = output.metrics
@@ -150,6 +154,12 @@ def postprocess_predictions_and_labels(
150
154
  Returns:
151
155
  The postprocessed predictions and labels.
152
156
  """
157
+ if predictions.ndim != 2 or predictions.shape[1] != 2:
158
+ raise InvalidBenchmark(
159
+ "Predictions must be a 2D array with shape (num_examples, 2). Found "
160
+ f"shape {predictions.shape}."
161
+ )
162
+
153
163
  mapping = {0: "a", 1: "b", 2: "c", 3: "d", 4: "e"}
154
164
 
155
165
  all_predictions: list[str] = list()
@@ -8,11 +8,11 @@ from collections import defaultdict
8
8
  import evaluate
9
9
  import numpy as np
10
10
  from evaluate import EvaluationModule
11
- from transformers.tokenization_utils import PreTrainedTokenizer
12
11
  from transformers.tokenization_utils_base import PreTrainedTokenizerBase
13
12
  from transformers.trainer import Trainer
14
13
 
15
14
  from ..data_models import BenchmarkConfig, DatasetConfig, GenerativeModelOutput
15
+ from ..exceptions import InvalidBenchmark
16
16
  from ..tokenization_utils import get_special_token_metadata
17
17
  from ..utils import raise_if_model_output_contains_nan_values
18
18
 
@@ -20,6 +20,7 @@ if t.TYPE_CHECKING:
20
20
  import torch.nn as nn
21
21
  from datasets.arrow_dataset import Dataset
22
22
  from transformers.modeling_utils import PreTrainedModel
23
+ from transformers.tokenization_utils import PreTrainedTokenizer
23
24
  from transformers.tokenization_utils_base import BatchEncoding
24
25
  from transformers.trainer_callback import TrainerCallback
25
26
  from transformers.trainer_utils import EvalPrediction
@@ -43,6 +44,7 @@ class QuestionAnsweringTrainer(Trainer):
43
44
  compute_metrics: "c.Callable[[EvalPrediction], dict[str, float]]",
44
45
  callbacks: "list[TrainerCallback]",
45
46
  data_collator: "c.Callable",
47
+ **kwargs,
46
48
  ) -> None:
47
49
  """Initialise the trainer."""
48
50
  super().__init__(
@@ -54,6 +56,7 @@ class QuestionAnsweringTrainer(Trainer):
54
56
  compute_metrics=compute_metrics,
55
57
  callbacks=callbacks,
56
58
  data_collator=data_collator,
59
+ **kwargs,
57
60
  )
58
61
 
59
62
  # Get the CLS token id for the tokenizer
@@ -475,7 +478,7 @@ def prepare_test_examples(
475
478
 
476
479
 
477
480
  def postprocess_predictions_and_labels(
478
- predictions: tuple[np.ndarray, np.ndarray],
481
+ predictions: tuple[np.ndarray, ...],
479
482
  dataset: "Dataset",
480
483
  prepared_dataset: "Dataset",
481
484
  cls_token_index: int,
@@ -484,7 +487,7 @@ def postprocess_predictions_and_labels(
484
487
 
485
488
  Args:
486
489
  predictions:
487
- A pair of (start_logits, end_logits) predictions.
490
+ A tuple whose first two elements are (start_logits, end_logits).
488
491
  dataset:
489
492
  The dataset containing the examples.
490
493
  prepared_dataset:
@@ -495,7 +498,14 @@ def postprocess_predictions_and_labels(
495
498
  Returns:
496
499
  The postprocessed predictions and labels.
497
500
  """
498
- all_start_logits, all_end_logits = predictions
501
+ if len(predictions) < 2:
502
+ raise InvalidBenchmark(
503
+ "The predictions should be a tuple with the first two elements being "
504
+ "(start_logits, end_logits), but got {len(predictions)} elements instead: "
505
+ f"{predictions}."
506
+ )
507
+
508
+ all_start_logits, all_end_logits = predictions[:2]
499
509
 
500
510
  # Build a map from an example to its corresponding features, being the blocks of
501
511
  # text from the context that we're feeding into the model. An example can have
@@ -135,7 +135,7 @@ def extract_labels_from_generation(
135
135
  if first_label_token_mapping is False:
136
136
  raise InvalidBenchmark(
137
137
  "The model outputted logprobs, but the first label token mapping is "
138
- "not provided. This means that the model should not output logprobs."
138
+ "not provided, which is not supported."
139
139
  )
140
140
  labels = get_closest_logprobs_labels(
141
141
  generation_logprobs=model_output.scores,
@@ -8,7 +8,6 @@ import torch
8
8
 
9
9
  from .constants import TASK_GROUPS_USING_LOGPROBS
10
10
  from .enums import GenerativeType
11
- from .exceptions import InvalidModel
12
11
  from .utils import log_once
13
12
 
14
13
  if t.TYPE_CHECKING:
@@ -153,7 +152,9 @@ def should_prefix_space_be_added_to_labels(
153
152
  return add_prefix_space
154
153
 
155
154
 
156
- def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
155
+ def get_bos_token(
156
+ tokenizer: "PreTrainedTokenizer",
157
+ ) -> tuple[str, int] | tuple[None, None]:
157
158
  """Get the beginning-of-sequence token from a tokenizer.
158
159
 
159
160
  Args:
@@ -162,7 +163,7 @@ def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
162
163
 
163
164
  Returns:
164
165
  A pair (token, token_id) representing the beginning-of-sequence token and its
165
- token ID.
166
+ token ID, or (None, None) if no BOS token is found.
166
167
  """
167
168
  if isinstance(tokenizer.bos_token, str) and isinstance(tokenizer.bos_token_id, int):
168
169
  return tokenizer.bos_token, tokenizer.bos_token_id
@@ -176,15 +177,25 @@ def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
176
177
  bos_token_id = vocab[bos_token]
177
178
  break
178
179
  else:
179
- raise InvalidModel(
180
+ log_once(
180
181
  "The model does not have a beginning-of-sequence token. Please ensure that "
181
- "this has been set in the tokenizer's configuration."
182
+ "this has been set in the tokenizer's configuration. Using no BOS token."
183
+ " This may lead to unexpected behavior in the model.",
184
+ level=logging.INFO,
182
185
  )
186
+ return None, None
183
187
 
188
+ log_once(
189
+ f"Beginning-of-sequence token was not set, but detected it as {bos_token!r} "
190
+ f"with ID {bos_token_id}.",
191
+ level=logging.DEBUG,
192
+ )
184
193
  return bos_token, bos_token_id
185
194
 
186
195
 
187
- def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
196
+ def get_eos_token(
197
+ tokenizer: "PreTrainedTokenizer",
198
+ ) -> tuple[str, int] | tuple[None, None]:
188
199
  """Get the end-of-sequence token from a tokenizer.
189
200
 
190
201
  Args:
@@ -193,7 +204,7 @@ def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
193
204
 
194
205
  Returns:
195
206
  A pair (token, token_id) representing the end-of-sequence token and its token
196
- ID.
207
+ ID, or (None, None) if no EOS token is found.
197
208
  """
198
209
  if isinstance(tokenizer.eos_token, str) and isinstance(tokenizer.eos_token_id, int):
199
210
  return tokenizer.eos_token, tokenizer.eos_token_id
@@ -207,14 +218,105 @@ def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
207
218
  eos_token_id = vocab[eos_token]
208
219
  break
209
220
  else:
210
- raise InvalidModel(
221
+ log_once(
211
222
  "The model does not have an end-of-sequence token. Please ensure that this "
212
- "has been set in the tokenizer's configuration."
223
+ "has been set in the tokenizer's configuration. Using no EOS token. This "
224
+ "may lead to unexpected behavior in the model.",
225
+ level=logging.INFO,
213
226
  )
227
+ return None, None
214
228
 
229
+ log_once(
230
+ f"End-of-sequence token was not set, but detected it as {eos_token!r} with "
231
+ f"ID {eos_token_id}.",
232
+ level=logging.DEBUG,
233
+ )
215
234
  return eos_token, eos_token_id
216
235
 
217
236
 
237
+ def get_pad_token(
238
+ tokenizer: "PreTrainedTokenizer",
239
+ ) -> tuple[str, int] | tuple[None, None]:
240
+ """Get the padding token from a tokenizer.
241
+
242
+ Args:
243
+ tokenizer:
244
+ The tokenizer.
245
+
246
+ Returns:
247
+ A pair (token, token_id) representing the padding token and its token ID, or
248
+ (None, None) if no padding token is found.
249
+ """
250
+ # If the tokenizer already has a padding token, return it
251
+ if tokenizer.pad_token is not None and tokenizer.pad_token_id is not None:
252
+ assert isinstance(tokenizer.pad_token, str), (
253
+ "Expected tokenizer.pad_token to be a string, but got "
254
+ f"{type(tokenizer.pad_token)}."
255
+ )
256
+ assert isinstance(tokenizer.pad_token_id, int), (
257
+ "Expected tokenizer.pad_token_id to be an integer, but got "
258
+ f"{type(tokenizer.pad_token_id)}."
259
+ )
260
+ return (tokenizer.pad_token, tokenizer.pad_token_id)
261
+
262
+ # If the tokenizer has a BOS token, use it as the padding token
263
+ if tokenizer.bos_token is not None and tokenizer.bos_token_id is not None:
264
+ assert isinstance(tokenizer.bos_token, str), (
265
+ "Expected tokenizer.bos_token to be a string, but got "
266
+ f"{type(tokenizer.bos_token)}."
267
+ )
268
+ assert isinstance(tokenizer.bos_token_id, int), (
269
+ "Expected tokenizer.bos_token_id to be an integer, but got "
270
+ f"{type(tokenizer.bos_token_id)}."
271
+ )
272
+ pad_token = tokenizer.bos_token
273
+ pad_token_id = tokenizer.bos_token_id
274
+
275
+ # If the tokenizer has an EOS token, use it as the padding token
276
+ elif tokenizer.eos_token is not None and tokenizer.eos_token_id is not None:
277
+ assert isinstance(tokenizer.eos_token, str), (
278
+ "Expected tokenizer.eos_token to be a string, but got "
279
+ f"{type(tokenizer.eos_token)}."
280
+ )
281
+ assert isinstance(tokenizer.eos_token_id, int), (
282
+ "Expected tokenizer.eos_token_id to be an integer, but got "
283
+ f"{type(tokenizer.eos_token_id)}."
284
+ )
285
+ pad_token = tokenizer.eos_token
286
+ pad_token_id = tokenizer.eos_token_id
287
+
288
+ # Otherwise, try to find a candidate padding token in the vocabulary
289
+ else:
290
+ pad_token_candidates = [
291
+ "<pad>",
292
+ "[pad]",
293
+ "<|endoftext|>",
294
+ "<|end▁of▁sentence|>",
295
+ "<|im_end|>",
296
+ ]
297
+ pad_token_candidates.extend([c.upper() for c in pad_token_candidates])
298
+ for candidate in pad_token_candidates:
299
+ if candidate in tokenizer.get_vocab():
300
+ pad_token = candidate
301
+ pad_token_id = tokenizer.get_vocab()[candidate]
302
+ break
303
+ else:
304
+ log_once(
305
+ "Could not identify a padding token for the model. Please ensure that "
306
+ "this has been set in the tokenizer's configuration. Using no padding "
307
+ "token. This may lead to unexpected behavior in the model.",
308
+ level=logging.INFO,
309
+ )
310
+ return None, None
311
+
312
+ log_once(
313
+ f"Padding token was not set, but detected it as {pad_token!r} with ID "
314
+ f"{pad_token_id}.",
315
+ level=logging.DEBUG,
316
+ )
317
+ return pad_token, pad_token_id
318
+
319
+
218
320
  def get_end_of_chat_token_ids(tokenizer: "PreTrainedTokenizer") -> list[int] | None:
219
321
  """Get the end token ID for chat models.
220
322
 
@@ -291,14 +393,14 @@ def get_first_label_token_mapping(
291
393
  if tokenizer is None:
292
394
  if output_scores:
293
395
  log_once(
294
- f"The model {model_config.model_id!r} will output scores, since the "
295
- "dataset supports it and no tokenizer is available.",
396
+ f"We will use logprobs with the model {model_config.model_id!r} "
397
+ "since the dataset supports it and no tokenizer is available.",
296
398
  level=logging.DEBUG,
297
399
  )
298
400
  else:
299
401
  log_once(
300
- f"The model {model_config.model_id!r} will not output scores, since "
301
- "the dataset does not support it and no tokenizer is available.",
402
+ f"We will not use logprobs with the model {model_config.model_id!r} "
403
+ "since the dataset does not support it and no tokenizer is available.",
302
404
  level=logging.DEBUG,
303
405
  )
304
406
  return output_scores
@@ -359,7 +461,7 @@ def get_first_label_token_mapping(
359
461
  if not matching_tokens:
360
462
  log_once(
361
463
  f"No matching token found in token_list for label '{label}', so "
362
- "we will not output scores.",
464
+ "we will not use logprobs with the model.",
363
465
  level=logging.DEBUG,
364
466
  )
365
467
  return False
@@ -369,8 +471,8 @@ def get_first_label_token_mapping(
369
471
  # tokens are distinct
370
472
  if len(first_tokens) == len(set(first_tokens)):
371
473
  log_once(
372
- "The model will output scores, since the first tokens of the labels "
373
- "are distinct.",
474
+ "We will use logprobs with the model since the first tokens of the "
475
+ "labels are distinct.",
374
476
  level=logging.DEBUG,
375
477
  )
376
478
  return {
@@ -379,7 +481,7 @@ def get_first_label_token_mapping(
379
481
  }
380
482
  else:
381
483
  log_once(
382
- "The model will not output scores, since the first tokens of the "
484
+ "We will not use logprobs with the model since the first tokens of the "
383
485
  "labels are not distinct. The first tokens for the labels "
384
486
  f"{local_labels} are {first_tokens}"
385
487
  )
@@ -389,7 +491,8 @@ def get_first_label_token_mapping(
389
491
  # evaluation errors. This will force the label extraction to rely on word edit
390
492
  # distance instead of logprobs.
391
493
  log_once(
392
- "The model will not output scores, since the dataset does not have labels.",
494
+ "We will not use logprobs with the model, since the dataset does not have "
495
+ "labels.",
393
496
  level=logging.DEBUG,
394
497
  )
395
498
  return False
euroeval/utils.py CHANGED
@@ -121,6 +121,8 @@ def block_terminal_output() -> None:
121
121
  logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
122
122
  logging.getLogger("accelerate").setLevel(logging.CRITICAL)
123
123
  logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
124
+ logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
125
+ logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
124
126
  logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
125
127
 
126
128
  # This suppresses vLLM logging
@@ -352,19 +354,22 @@ def safe_run(coroutine: t.Coroutine[t.Any, t.Any, T]) -> T:
352
354
  asyncio.set_event_loop(None)
353
355
 
354
356
 
355
- async def catch_coroutine_exception(
356
- coroutine: t.Coroutine[t.Any, t.Any, T],
357
+ async def add_semaphore_and_catch_exception(
358
+ coroutine: t.Coroutine[t.Any, t.Any, T], semaphore: asyncio.Semaphore
357
359
  ) -> T | Exception:
358
- """Run a coroutine, catching any exceptions and returning them.
360
+ """Run a coroutine with a semaphore.
359
361
 
360
362
  Args:
361
363
  coroutine:
362
364
  The coroutine to run.
365
+ semaphore:
366
+ The semaphore to use.
363
367
 
364
368
  Returns:
365
- The result of the coroutine, or the exception if it was raised.
369
+ The result of the coroutine.
366
370
  """
367
- try:
368
- return await coroutine
369
- except Exception as exc:
370
- return exc
371
+ async with semaphore:
372
+ try:
373
+ return await coroutine
374
+ except Exception as exc:
375
+ return exc
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 15.9.1
3
+ Version: 15.10.0
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
7
7
  Author-email: Dan Saattrup Nielsen <dan.nielsen@alexandra.dk>
8
- Maintainer-email: Dan Saattrup Nielsen <dan.nielsen@alexandra.dk>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>
8
+ Maintainer-email: Dan Saattrup Nielsen <dan.nielsen@alexandra.dk>
9
9
  License: MIT License
10
10
 
11
11
  Copyright (c) 2022-2024 Dan Saattrup Nielsen
@@ -37,13 +37,12 @@ Requires-Dist: demjson3>=3.0.6
37
37
  Requires-Dist: evaluate>=0.4.1
38
38
  Requires-Dist: huggingface-hub>=0.30.1
39
39
  Requires-Dist: levenshtein>=0.24.0
40
- Requires-Dist: litellm>=1.63.0
40
+ Requires-Dist: litellm>=1.72.2
41
41
  Requires-Dist: more-itertools>=10.5.0
42
42
  Requires-Dist: numpy<2.0.0,>=1.23.0
43
- Requires-Dist: ollama>=0.4.7
43
+ Requires-Dist: ollama>=0.5.1
44
44
  Requires-Dist: pandas>=2.2.0
45
45
  Requires-Dist: peft>=0.15.0
46
- Requires-Dist: protobuf~=3.20.0
47
46
  Requires-Dist: pydantic>=2.6.0
48
47
  Requires-Dist: pyinfer>=0.0.3
49
48
  Requires-Dist: python-dotenv>=1.0.1
@@ -62,12 +61,12 @@ Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == '
62
61
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
63
62
  Requires-Dist: gradio>=4.26.0; extra == 'all'
64
63
  Requires-Dist: outlines>=0.1.11; extra == 'all'
65
- Requires-Dist: vllm>=0.9.0; (platform_system == 'Linux') and extra == 'all'
64
+ Requires-Dist: vllm>=0.9.1; (platform_system == 'Linux') and extra == 'all'
66
65
  Provides-Extra: generative
67
66
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
68
67
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
69
68
  Requires-Dist: outlines>=0.1.11; extra == 'generative'
70
- Requires-Dist: vllm>=0.9.0; (platform_system == 'Linux') and extra == 'generative'
69
+ Requires-Dist: vllm>=0.9.1; (platform_system == 'Linux') and extra == 'generative'
71
70
  Provides-Extra: human-evaluation
72
71
  Requires-Dist: gradio>=4.26.0; extra == 'human-evaluation'
73
72
  Provides-Extra: test
@@ -93,7 +92,7 @@ ______________________________________________________________________
93
92
  [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg)](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
94
93
 
95
94
 
96
- ## Maintainers
95
+ ## Maintainer
97
96
 
98
97
  - Dan Saattrup Nielsen ([@saattrupdan](https://github.com/saattrupdan),
99
98
  dan.nielsen@alexandra.dk)
@@ -3,12 +3,12 @@ euroeval/benchmark_config_factory.py,sha256=icTeT5C-bNCJmvSWFlxKdEpRboZN8OjwaHGu
3
3
  euroeval/benchmarker.py,sha256=wmgrYVS31PMhhrVienjaVHHyfnZAy51kUvC6OjooiOw,48047
4
4
  euroeval/callbacks.py,sha256=F1AJCLB8FJpxqYprwLi_PsH4Bc0x4lyR8UiTG-GlFLY,2452
5
5
  euroeval/cli.py,sha256=d8JztMi_RbpUlEBXidd6DQ-xeC-xhozf_qU6Vkzye20,8161
6
- euroeval/constants.py,sha256=p6kp_R6-Tq5LBvyXyT6Sa6N3SkjEElGS2LSZRBoQaYs,1985
7
- euroeval/data_loading.py,sha256=L_REtxefte5Ke4xE_Cz01zkfCyKlOYhSqT5ZXXulHPc,3992
6
+ euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
7
+ euroeval/data_loading.py,sha256=2rMLSy8pbntlwmImizMtkTiUzj93mcv5kzYjZELWWfU,4081
8
8
  euroeval/data_models.py,sha256=7nAGDpN58Y35Lt9JZE_y0y5iOYesw2htcwHc68MkBZU,22953
9
9
  euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
10
10
  euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
- euroeval/finetuning.py,sha256=uuaUxNQJb7TivPQuI1OYQ_MIKbD-6-7mpkobLKsDefQ,10667
11
+ euroeval/finetuning.py,sha256=cx5SVgEsveMDNfoMxwLfAFsjZeKmYyHftaOZWZ-L9hA,11285
12
12
  euroeval/generation.py,sha256=LSsskfLjIJ-c3gQxmr7eiAobPOm-5bU9vnR7uHQ7XmU,10745
13
13
  euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
14
14
  euroeval/human_evaluation.py,sha256=zqbbJkqm2Uymf-88PxM3R9vVRR8SZJlq3QrqWEoiVeE,27643
@@ -19,15 +19,15 @@ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,223
19
19
  euroeval/scores.py,sha256=TovjCZD8wmGrIjA4v5oAQp18P5KVcHvakkByDh0Hstk,3059
20
20
  euroeval/speed_benchmark.py,sha256=J7VKWMf7GU_l0lRR8f0QeUr_vAaBQqTbgQ_yToHhp_0,3980
21
21
  euroeval/tasks.py,sha256=87gbe__K5KNIb1aBSuwGnMPmZgamJFecNNYmNgMxaVo,7069
22
- euroeval/tokenization_utils.py,sha256=kghOIZMM3H0P9YDv0VBSNI7drzgJXlkRtMwt3Cgeev8,13907
22
+ euroeval/tokenization_utils.py,sha256=LxgGs7juS5PuMYt5LL2X6eVXdtnpi-A2jFxqcWpF6NA,17931
23
23
  euroeval/types.py,sha256=E0JhLfg-ek5pdFcYJbnGRUSodHxkuR3o8XGuIrBcuRM,2485
24
- euroeval/utils.py,sha256=e83OnWc0GJn0Tn_vP3tbqh1DAbLy2ky-LnIlTEOKzKU,11410
24
+ euroeval/utils.py,sha256=5R7y67xe0ODaje7k8nOu2AFS3Ph2gcsiWpIq5rjSSuA,11613
25
25
  euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
26
26
  euroeval/benchmark_modules/base.py,sha256=LcG46I2O5wcvu_3T_irBY6VkUhWVPKifBhcP-ln93TA,10798
27
27
  euroeval/benchmark_modules/fresh.py,sha256=_LWmpqiNGGTA-NoVC0v3-fS1sraDS9n-pgKUzz89jVk,9919
28
- euroeval/benchmark_modules/hf.py,sha256=CoiaNakjhg6gm_5IbUUeevXQZebg2VrRLuhzEi2Hhrk,44617
29
- euroeval/benchmark_modules/litellm.py,sha256=SxSr_0C6b_jVavR3y9QyhfkCOP5-va4zijGfghFTArY,48362
30
- euroeval/benchmark_modules/vllm.py,sha256=rz_Xau5TGiFeb2VkdVpW_fYOfRCCvYrH0q9BGzCwZlo,42156
28
+ euroeval/benchmark_modules/hf.py,sha256=Nbtn5eZ4axbmL09M8dGZCBr07pn9-btbqGgQ6q7KbHg,44620
29
+ euroeval/benchmark_modules/litellm.py,sha256=LS4mBXXG6h4uJwySPc6SI6f0y_HuiKE7IprprqWpoCI,50601
30
+ euroeval/benchmark_modules/vllm.py,sha256=sgeltOVfZA9bu0AmXV7PtZvuRst0I8s6VOIp0CI6DO8,38880
31
31
  euroeval/dataset_configs/__init__.py,sha256=kWKtlSAOY-olOQL3UtFqL6I3Tki3G3waMZSd2YChjCg,1895
32
32
  euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
33
33
  euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
@@ -49,13 +49,13 @@ euroeval/prompt_templates/reading_comprehension.py,sha256=yLqryWQAW04GULz_EyNDLO
49
49
  euroeval/prompt_templates/sentiment_classification.py,sha256=LDOwjGQ2kqhwgNyphPywQeolwNB09o-xYWc9RUbzc84,7136
50
50
  euroeval/prompt_templates/summarization.py,sha256=mcWeKNhGWmp7IG_iY64T-VOSabQg5wKddjSbJNYFDp8,4984
51
51
  euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
52
- euroeval/task_group_utils/multiple_choice_classification.py,sha256=nB78TzOgd0HBvTclmjOYJid9ZVAgu8IHZsqB_n1SAZU,6178
53
- euroeval/task_group_utils/question_answering.py,sha256=kZBABJ_WYNTH4Xgo2jIvfx7iYvfoGt0EUObSaXRCGmk,27700
54
- euroeval/task_group_utils/sequence_classification.py,sha256=Yqx0pUhuHYmSkv1ZUfOndSLTvpr0lWCk19oYITfSjV4,13555
52
+ euroeval/task_group_utils/multiple_choice_classification.py,sha256=LQ6zD1UGi-jGCKI2xUJiQdAXoqb5QMpIJu41B2U0HPw,6543
53
+ euroeval/task_group_utils/question_answering.py,sha256=D4oJL2vQEjHghyxiiiq_vj1IQC6eryqNoLXuTiQEPmw,28071
54
+ euroeval/task_group_utils/sequence_classification.py,sha256=zwRUgVHqLlREILwyg-yuDPkrIQOfqGVPsFBai-2D9a8,13525
55
55
  euroeval/task_group_utils/text_to_text.py,sha256=Nu1_qRPLbboCd9Q5rxqY4fQFJ_aGXu80aWQqoTG1cYc,5047
56
56
  euroeval/task_group_utils/token_classification.py,sha256=3idWB81Fcx9UhTuk-gxMfXENrCBmiWBDUWdULXoIhpw,17863
57
- euroeval-15.9.1.dist-info/METADATA,sha256=UkGmFcnarstFwD1J1eS6h3gbyxnucnaAVLnB5QhkdSo,13555
58
- euroeval-15.9.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
- euroeval-15.9.1.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
60
- euroeval-15.9.1.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
61
- euroeval-15.9.1.dist-info/RECORD,,
57
+ euroeval-15.10.0.dist-info/METADATA,sha256=WUXtSfS6qvrlA25lazql3DvyS5chyMnBPKyu-l65A_I,13472
58
+ euroeval-15.10.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
+ euroeval-15.10.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
60
+ euroeval-15.10.0.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
61
+ euroeval-15.10.0.dist-info/RECORD,,