EuroEval 16.3.0__py3-none-any.whl → 16.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (64) hide show
  1. euroeval/__init__.py +3 -2
  2. euroeval/benchmark_config_factory.py +0 -4
  3. euroeval/benchmark_modules/base.py +3 -16
  4. euroeval/benchmark_modules/fresh.py +2 -1
  5. euroeval/benchmark_modules/hf.py +99 -62
  6. euroeval/benchmark_modules/litellm.py +101 -41
  7. euroeval/benchmark_modules/vllm.py +91 -83
  8. euroeval/benchmarker.py +84 -78
  9. euroeval/caching_utils.py +79 -0
  10. euroeval/callbacks.py +5 -7
  11. euroeval/constants.py +6 -0
  12. euroeval/data_loading.py +14 -11
  13. euroeval/data_models.py +12 -4
  14. euroeval/dataset_configs/__init__.py +2 -0
  15. euroeval/dataset_configs/czech.py +79 -0
  16. euroeval/dataset_configs/danish.py +10 -11
  17. euroeval/dataset_configs/dutch.py +0 -1
  18. euroeval/dataset_configs/english.py +0 -1
  19. euroeval/dataset_configs/estonian.py +11 -1
  20. euroeval/dataset_configs/finnish.py +0 -1
  21. euroeval/dataset_configs/french.py +0 -1
  22. euroeval/dataset_configs/german.py +0 -1
  23. euroeval/dataset_configs/italian.py +0 -1
  24. euroeval/dataset_configs/latvian.py +0 -1
  25. euroeval/dataset_configs/lithuanian.py +9 -3
  26. euroeval/dataset_configs/norwegian.py +0 -1
  27. euroeval/dataset_configs/polish.py +0 -1
  28. euroeval/dataset_configs/portuguese.py +0 -1
  29. euroeval/dataset_configs/slovak.py +60 -0
  30. euroeval/dataset_configs/spanish.py +0 -1
  31. euroeval/dataset_configs/swedish.py +10 -12
  32. euroeval/finetuning.py +21 -15
  33. euroeval/generation.py +10 -10
  34. euroeval/generation_utils.py +2 -3
  35. euroeval/logging_utils.py +250 -0
  36. euroeval/metrics/base.py +0 -3
  37. euroeval/metrics/huggingface.py +9 -5
  38. euroeval/metrics/llm_as_a_judge.py +5 -3
  39. euroeval/metrics/pipeline.py +17 -9
  40. euroeval/metrics/speed.py +0 -3
  41. euroeval/model_cache.py +11 -14
  42. euroeval/model_config.py +4 -5
  43. euroeval/model_loading.py +3 -0
  44. euroeval/prompt_templates/linguistic_acceptability.py +21 -3
  45. euroeval/prompt_templates/multiple_choice.py +25 -1
  46. euroeval/prompt_templates/named_entity_recognition.py +51 -11
  47. euroeval/prompt_templates/reading_comprehension.py +31 -3
  48. euroeval/prompt_templates/sentiment_classification.py +23 -1
  49. euroeval/prompt_templates/summarization.py +26 -6
  50. euroeval/scores.py +7 -7
  51. euroeval/speed_benchmark.py +3 -5
  52. euroeval/task_group_utils/multiple_choice_classification.py +0 -3
  53. euroeval/task_group_utils/question_answering.py +0 -3
  54. euroeval/task_group_utils/sequence_classification.py +43 -31
  55. euroeval/task_group_utils/text_to_text.py +17 -8
  56. euroeval/task_group_utils/token_classification.py +10 -9
  57. euroeval/tokenisation_utils.py +14 -12
  58. euroeval/utils.py +29 -146
  59. {euroeval-16.3.0.dist-info → euroeval-16.4.0.dist-info}/METADATA +4 -4
  60. euroeval-16.4.0.dist-info/RECORD +75 -0
  61. euroeval-16.3.0.dist-info/RECORD +0 -71
  62. {euroeval-16.3.0.dist-info → euroeval-16.4.0.dist-info}/WHEEL +0 -0
  63. {euroeval-16.3.0.dist-info → euroeval-16.4.0.dist-info}/entry_points.txt +0 -0
  64. {euroeval-16.3.0.dist-info → euroeval-16.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -5,11 +5,11 @@ import re
5
5
  import typing as t
6
6
 
7
7
  import torch
8
- from transformers import MistralCommonTokenizer
8
+ from transformers.tokenization_mistral_common import MistralCommonTokenizer
9
9
 
10
10
  from .enums import GenerativeType
11
11
  from .exceptions import InvalidModel
12
- from .utils import log_once
12
+ from .logging_utils import log, log_once
13
13
 
14
14
  if t.TYPE_CHECKING:
15
15
  from transformers.tokenization_utils import PreTrainedTokenizer
@@ -18,9 +18,6 @@ if t.TYPE_CHECKING:
18
18
  from .data_models import DatasetConfig, ModelConfig
19
19
 
20
20
 
21
- logger = logging.getLogger("euroeval")
22
-
23
-
24
21
  def get_special_token_metadata(tokeniser: "PreTrainedTokenizerBase") -> dict:
25
22
  """Get the special token metadata for a tokeniser.
26
23
 
@@ -182,7 +179,7 @@ def get_bos_token(
182
179
  "The model does not have a beginning-of-sequence token. Please ensure that "
183
180
  "this has been set in the tokeniser's configuration. Using no BOS token."
184
181
  " This may lead to unexpected behavior in the model.",
185
- level=logging.INFO,
182
+ level=logging.WARNING,
186
183
  )
187
184
  return None, None
188
185
 
@@ -223,14 +220,14 @@ def get_eos_token(
223
220
  "The model does not have an end-of-sequence token. Please ensure that this "
224
221
  "has been set in the tokeniser's configuration. Using no EOS token. This "
225
222
  "may lead to unexpected behavior in the model.",
226
- level=logging.INFO,
223
+ level=logging.WARNING,
227
224
  )
228
225
  return None, None
229
226
 
230
227
  log_once(
231
228
  f"End-of-sequence token was not set, but detected it as {eos_token!r} with "
232
229
  f"ID {eos_token_id}.",
233
- level=logging.DEBUG,
230
+ level=logging.WARNING,
234
231
  )
235
232
  return eos_token, eos_token_id
236
233
 
@@ -306,7 +303,7 @@ def get_pad_token(
306
303
  "Could not identify a padding token for the model. Please ensure that "
307
304
  "this has been set in the tokeniser's configuration. Using no padding "
308
305
  "token. This may lead to unexpected behavior in the model.",
309
- level=logging.INFO,
306
+ level=logging.WARNING,
310
307
  )
311
308
  return None, None
312
309
 
@@ -358,12 +355,16 @@ def get_end_of_chat_token_ids(
358
355
  x_token_index = idx
359
356
  break
360
357
  else:
361
- logger.debug("Could not locate the end-of-chat token for the model.")
358
+ log(
359
+ "Could not locate the end-of-chat token for the model.", level=logging.DEBUG
360
+ )
362
361
  return None
363
362
 
364
363
  end_of_chat_tokens = token_ids[x_token_index + 1 :]
365
364
  if len(end_of_chat_tokens) == 0:
366
- logger.debug("Could not locate the end-of-chat token for the model.")
365
+ log(
366
+ "Could not locate the end-of-chat token for the model.", level=logging.DEBUG
367
+ )
367
368
  return None
368
369
 
369
370
  log_once(
@@ -506,7 +507,8 @@ def get_first_label_token_mapping(
506
507
  log_once(
507
508
  "We will not use logprobs with the model since the first tokens of the "
508
509
  "labels are not distinct. The first tokens for the labels "
509
- f"{local_labels} are {first_tokens}"
510
+ f"{local_labels} are {first_tokens}",
511
+ level=logging.DEBUG,
510
512
  )
511
513
  return False
512
514
 
euroeval/utils.py CHANGED
@@ -11,30 +11,23 @@ import re
11
11
  import socket
12
12
  import sys
13
13
  import typing as t
14
- import warnings
15
- from functools import cache
16
14
  from pathlib import Path
17
15
 
18
16
  import demjson3
19
17
  import huggingface_hub as hf_hub
20
- import litellm
21
18
  import numpy as np
22
19
  import torch
23
- from datasets.utils import disable_progress_bar
24
- from transformers import logging as tf_logging
25
20
 
21
+ from .caching_utils import cache_arguments
22
+ from .constants import T
26
23
  from .exceptions import InvalidBenchmark, InvalidModel, NaNValueInModelOutput
24
+ from .logging_utils import log, log_once
27
25
 
28
26
  if t.TYPE_CHECKING:
29
- from types import TracebackType
30
-
31
27
  from .data_models import ModelIdComponents
32
28
  from .types import Predictions
33
29
 
34
30
 
35
- logger = logging.getLogger("euroeval")
36
-
37
-
38
31
  def create_model_cache_dir(cache_dir: str, model_id: str) -> str:
39
32
  """Create cache directory for a model.
40
33
 
@@ -149,68 +142,6 @@ def enforce_reproducibility(seed: int = 4242) -> np.random.Generator:
149
142
  return rng
150
143
 
151
144
 
152
- def block_terminal_output() -> None:
153
- """Blocks libraries from writing output to the terminal.
154
-
155
- This filters warnings from some libraries, sets the logging level to ERROR for some
156
- libraries, disabled tokeniser progress bars when using Hugging Face tokenisers, and
157
- disables most of the logging from the `transformers` library.
158
- """
159
- if os.getenv("FULL_LOG") == "1":
160
- return
161
-
162
- # Ignore miscellaneous warnings
163
- warnings.filterwarnings("ignore", category=UserWarning)
164
- warnings.filterwarnings("ignore", category=FutureWarning)
165
- logging.getLogger("absl").setLevel(logging.CRITICAL)
166
-
167
- # Disable matplotlib logging
168
- logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
169
-
170
- # Disable PyTorch logging
171
- logging.getLogger("torch.utils.cpp_extension").setLevel(logging.CRITICAL)
172
- warnings.filterwarnings(action="ignore", module="torch*")
173
- os.environ["TORCH_LOGS"] = "-all"
174
-
175
- # Disable huggingface_hub logging
176
- logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
177
-
178
- # Disable LiteLLM logging
179
- logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
180
- logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
181
- logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
182
- logging.getLogger("openai").setLevel(logging.CRITICAL)
183
- logging.getLogger("httpx").setLevel(logging.CRITICAL)
184
- litellm.suppress_debug_info = True
185
-
186
- # Disable vLLM logging
187
- logging.getLogger("vllm").setLevel(logging.CRITICAL)
188
- logging.getLogger("vllm.engine.llm_engine").setLevel(logging.CRITICAL)
189
- logging.getLogger("vllm.transformers_utils.tokenizer").setLevel(logging.CRITICAL)
190
- logging.getLogger("vllm.core.scheduler").setLevel(logging.CRITICAL)
191
- logging.getLogger("vllm.model_executor.weight_utils").setLevel(logging.CRITICAL)
192
- logging.getLogger("vllm.platforms").setLevel(logging.CRITICAL)
193
- logging.getLogger("mistral_common.tokens.tokenizers.tekken").setLevel(
194
- logging.CRITICAL
195
- )
196
- os.environ["LOG_LEVEL"] = "CRITICAL"
197
- os.environ["VLLM_CONFIGURE_LOGGING"] = "0"
198
-
199
- # Disable datasets logging
200
- logging.getLogger("datasets").setLevel(logging.CRITICAL)
201
- logging.getLogger("filelock").setLevel(logging.CRITICAL)
202
- disable_progress_bar()
203
-
204
- # Disable evaluate logging
205
- warnings.filterwarnings("ignore", module="seqeval*")
206
-
207
- # Disable most of the `transformers` logging
208
- tf_logging._default_log_level = logging.CRITICAL
209
- tf_logging.set_verbosity(logging.CRITICAL)
210
- logging.getLogger("transformers.trainer").setLevel(logging.CRITICAL)
211
- logging.getLogger("accelerate").setLevel(logging.CRITICAL)
212
-
213
-
214
145
  def get_class_by_name(class_name: str | list[str], module_name: str) -> t.Type | None:
215
146
  """Get a class by its name.
216
147
 
@@ -240,9 +171,10 @@ def get_class_by_name(class_name: str | list[str], module_name: str) -> t.Type |
240
171
 
241
172
  if error_messages:
242
173
  errors = "\n- " + "\n- ".join(error_messages)
243
- logger.debug(
174
+ log(
244
175
  f"Could not find the class with the name(s) {', '.join(class_name)}. The "
245
- f"following error messages were raised: {errors}"
176
+ f"following error messages were raised: {errors}",
177
+ level=logging.DEBUG,
246
178
  )
247
179
 
248
180
  # If the class could not be found, return None
@@ -264,49 +196,27 @@ def get_min_cuda_compute_capability() -> float | None:
264
196
  return float(f"{major}.{minor}")
265
197
 
266
198
 
267
- @cache
199
+ @cache_arguments(disable_condition=lambda: hasattr(sys, "_called_from_test"))
268
200
  def internet_connection_available() -> bool:
269
201
  """Checks if internet connection is available by pinging google.com.
270
202
 
271
203
  Returns:
272
204
  Whether or not internet connection is available.
273
205
  """
206
+ internet_available: bool = False
207
+
274
208
  try:
275
209
  s = socket.create_connection(("1.1.1.1", 80))
276
210
  s.close()
277
- return True
278
-
279
- # We want to only catch exceptions related to socket connections, but as we cannot
280
- # import these here as they're developer dependencies, we check the exception name
281
- # instead. If the exception is not related to socket connections, we reraise it.
211
+ internet_available = True
212
+ except OSError:
213
+ pass
282
214
  except Exception as e:
283
215
  pytest_socket_errors = ["SocketConnectBlockedError", "SocketBlockedError"]
284
- if type(e).__name__ in pytest_socket_errors or isinstance(e, OSError):
285
- return False
286
- raise e
287
-
288
-
289
- class HiddenPrints:
290
- """Context manager which removes all terminal output."""
291
-
292
- def __enter__(self) -> None:
293
- """Enter the context manager."""
294
- self._original_stdout = sys.stdout
295
- self._original_stderr = sys.stderr
296
- sys.stdout = open(os.devnull, "w")
297
- sys.stderr = open(os.devnull, "w")
298
-
299
- def __exit__(
300
- self,
301
- exc_type: t.Type[BaseException],
302
- exc_val: BaseException,
303
- exc_tb: "TracebackType",
304
- ) -> None:
305
- """Exit the context manager."""
306
- sys.stdout.close()
307
- sys.stderr.close()
308
- sys.stdout = self._original_stdout
309
- sys.stderr = self._original_stderr
216
+ if type(e).__name__ not in pytest_socket_errors:
217
+ raise e
218
+
219
+ return internet_available
310
220
 
311
221
 
312
222
  def raise_if_model_output_contains_nan_values(model_output: "Predictions") -> None:
@@ -364,34 +274,6 @@ def unscramble(scrambled_text: str) -> str:
364
274
  return unscrambled
365
275
 
366
276
 
367
- @cache
368
- def log_once(message: str, level: int = logging.INFO) -> None:
369
- """Log a message once.
370
-
371
- This is ensured by caching the input/output pairs of this function, using the
372
- `functools.cache` decorator.
373
-
374
- Args:
375
- message:
376
- The message to log.
377
- level:
378
- The logging level. Defaults to logging.INFO.
379
- """
380
- match level:
381
- case logging.DEBUG:
382
- logger.debug(message)
383
- case logging.INFO:
384
- logger.info(message)
385
- case logging.WARNING:
386
- logger.warning(message)
387
- case logging.ERROR:
388
- logger.error(message)
389
- case logging.CRITICAL:
390
- logger.critical(message)
391
- case _:
392
- raise ValueError(f"Invalid logging level: {level}")
393
-
394
-
395
277
  def get_package_version(package_name: str) -> str | None:
396
278
  """Get the version of a package.
397
279
 
@@ -408,9 +290,6 @@ def get_package_version(package_name: str) -> str | None:
408
290
  return None
409
291
 
410
292
 
411
- T = t.TypeVar("T", bound=object)
412
-
413
-
414
293
  def safe_run(coroutine: t.Coroutine[t.Any, t.Any, T]) -> T:
415
294
  """Run a coroutine, ensuring that the event loop is always closed when we're done.
416
295
 
@@ -464,37 +343,41 @@ def extract_json_dict_from_string(s: str) -> dict | None:
464
343
  """
465
344
  json_regex = r"\{[^{}]*?\}"
466
345
  if (json_match := re.search(pattern=json_regex, string=s, flags=re.DOTALL)) is None:
467
- logger.debug(
346
+ log(
468
347
  "The model output does not contain any JSON dictionary, so cannot parse "
469
- f"it. Skipping. Here is the output: {s!r}"
348
+ f"it. Skipping. Here is the output: {s!r}",
349
+ level=logging.DEBUG,
470
350
  )
471
351
  return None
472
352
  json_string = json_match.group()
473
353
  try:
474
354
  json_output = demjson3.decode(txt=json_string)
475
355
  except demjson3.JSONDecodeError:
476
- logger.debug(
356
+ log(
477
357
  "The model output is not valid JSON, so cannot parse it. Skipping. "
478
- f"Here is the output: {json_string!r}"
358
+ f"Here is the output: {json_string!r}",
359
+ level=logging.DEBUG,
479
360
  )
480
361
  return None
481
362
  if not isinstance(json_output, dict):
482
- logger.debug(
363
+ log(
483
364
  "The model output is not a JSON dictionary, so cannot parse "
484
- f"it. Skipping. Here is the output: {json_string!r}"
365
+ f"it. Skipping. Here is the output: {json_string!r}",
366
+ level=logging.DEBUG,
485
367
  )
486
368
  return None
487
369
  elif not all(isinstance(key, str) for key in json_output.keys()):
488
- logger.debug(
370
+ log(
489
371
  "The model output is not a JSON dictionary with string keys, "
490
372
  "so cannot parse it. Skipping. Here is the output: "
491
- f"{json_string!r}"
373
+ f"{json_string!r}",
374
+ level=logging.DEBUG,
492
375
  )
493
376
  return None
494
377
  return json_output
495
378
 
496
379
 
497
- @cache
380
+ @cache_arguments()
498
381
  def get_hf_token(api_key: str | None) -> str | bool:
499
382
  """Get the Hugging Face token.
500
383
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 16.3.0
3
+ Version: 16.4.0
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -62,12 +62,12 @@ Provides-Extra: all
62
62
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
63
63
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
64
64
  Requires-Dist: timm>=1.0.19; extra == 'all'
65
- Requires-Dist: vllm[flashinfer]<0.11.0,>=0.10.1; (platform_system == 'Linux') and extra == 'all'
65
+ Requires-Dist: vllm[flashinfer]>=0.11.0; (platform_system == 'Linux') and extra == 'all'
66
66
  Provides-Extra: generative
67
67
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
68
68
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
69
69
  Requires-Dist: timm>=1.0.19; extra == 'generative'
70
- Requires-Dist: vllm[flashinfer]<0.11.0,>=0.10.1; (platform_system == 'Linux') and extra == 'generative'
70
+ Requires-Dist: vllm[flashinfer]>=0.11.0; (platform_system == 'Linux') and extra == 'generative'
71
71
  Description-Content-Type: text/markdown
72
72
 
73
73
  <!-- This disables the requirement that the first line is a top-level heading -->
@@ -92,7 +92,7 @@ ______________________________________________________________________
92
92
  [![Second paper](https://img.shields.io/badge/arXiv-2406.13469-b31b1b.svg)](https://arxiv.org/abs/2406.13469)
93
93
  [![License](https://img.shields.io/github/license/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/blob/main/LICENSE)
94
94
  [![LastCommit](https://img.shields.io/github/last-commit/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/commits/main)
95
- [![Code Coverage](https://img.shields.io/badge/Coverage-67%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
95
+ [![Code Coverage](https://img.shields.io/badge/Coverage-70%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
96
96
  [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg)](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
97
97
 
98
98
  ## Maintainer
@@ -0,0 +1,75 @@
1
+ euroeval/__init__.py,sha256=Ci1Sta9hl-v-ZPwJ1qqAVpzvj-vVgZZbQQuP5Qopc4o,3956
2
+ euroeval/benchmark_config_factory.py,sha256=x1HfK8kDVxN14PPHxonsDv0vhkdrexsMJfKaXhO9WQQ,8540
3
+ euroeval/benchmarker.py,sha256=M_2KV0f41RmCiRLcQLEIACt1TcL7QqvH48ds0ebJCG8,49705
4
+ euroeval/caching_utils.py,sha256=AkR0TLY9EHbqv3TrhtCmpEGsm0DWZSLEfR2fRHq1S3E,2587
5
+ euroeval/callbacks.py,sha256=l8f6Zr8EoHfVFsI1ZnMUK0Y8uZB00Nvaz_I6XDn6avE,2515
6
+ euroeval/cli.py,sha256=yb_Gw3TrouBiUmeQIZF0705zio8UPFACUDOzSB3CCfo,9316
7
+ euroeval/constants.py,sha256=XAdsdSE4bAOUeW2o5qmMlfqRmsXZUNIKlEZrbxBPdLk,2845
8
+ euroeval/data_loading.py,sha256=r2GtvH2fAPapE9Idyu8W27n3YXD2Bgw8Qt88vdDn0DQ,4751
9
+ euroeval/data_models.py,sha256=j3gdzLSxgr3FakBIOqvVGZ5K5cXb4RrCMOkJc8J8Zmc,28007
10
+ euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
11
+ euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
12
+ euroeval/finetuning.py,sha256=t3VqkuRVqRxcpHhSzU4nF4npvLDnjNzPJqGqG-L6ifk,11764
13
+ euroeval/generation.py,sha256=epv2QPHTxzoBmq5OFQtolvuvJ6ce4FkdD03NTYdKFZk,12579
14
+ euroeval/generation_utils.py,sha256=3mI-T9imk433VsvbwCy71Zzv2XOdm-l1SH-IiFfSd9M,18285
15
+ euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
16
+ euroeval/logging_utils.py,sha256=iq9K2-7URgeHOUJCYCC9k8vRAz4YmBf4f44GpCVEGcc,8802
17
+ euroeval/model_cache.py,sha256=S_8ZtLaliTiUEvQAVw_DJ1qk5PWUO5-eE04hGScCj_o,9246
18
+ euroeval/model_config.py,sha256=fxHfgpw-9vj3hwke28DguVGvG9TU06nkTXT0V6KAMpQ,2761
19
+ euroeval/model_loading.py,sha256=mVh05sPENBBOIUkd_rwXqbBd13YvF_tOVZ8XGtguNzw,2338
20
+ euroeval/scores.py,sha256=tlLfmI6Pgm1d_odubfyFcGLoB6Mxgfw3Yl7POzFv9l8,3235
21
+ euroeval/speed_benchmark.py,sha256=k9xEF7jPAMrEBcZdykilQ6eJMGhFW1eUGuhQco9470M,4034
22
+ euroeval/tasks.py,sha256=EzEWFDo_0ffabBFiRu-mw80jENUioE8D_VEn_Dsv-F8,4703
23
+ euroeval/tokenisation_utils.py,sha256=rytsJy4mNEqeSdGzXsvVU4OShveeHOOlbaQOJDsX4S0,21275
24
+ euroeval/types.py,sha256=_iVy-RwiCGu9TNX2sfyJTdCvXy1akNGTCywAo-YpBqU,2815
25
+ euroeval/utils.py,sha256=VJrbEFXr6ZCJIHiMT7M7Y84ZYl0LHe2uhIz4eePciAw,15235
26
+ euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
27
+ euroeval/benchmark_modules/base.py,sha256=PeOqhfrc9iqyRz1aDHFBiTpWcwU5zDXo5pB_CD8W4VI,11199
28
+ euroeval/benchmark_modules/fresh.py,sha256=h4TPJlJK6xxxyhAXURr0T9gk4Khm3WyujnKBDFc3sCE,10806
29
+ euroeval/benchmark_modules/hf.py,sha256=enj88OY2XELdNgLnqeRPXvX2ATgcm6fjQpSYpBhmgzI,46274
30
+ euroeval/benchmark_modules/litellm.py,sha256=VNFIOJU8TJNrifHtfBILh1MeS7tehqztxH5WoPLr5fc,66581
31
+ euroeval/benchmark_modules/vllm.py,sha256=dm19gYG-MR63V8YpZBM1iOQ1c7xbFRzo9NuDWHG3q-Y,46952
32
+ euroeval/dataset_configs/__init__.py,sha256=zvyH0onXIDtm8lHDVRSzk7J0_mJFU0H6WnLueaxM7WE,2157
33
+ euroeval/dataset_configs/czech.py,sha256=9IDYKg1aoikMXIqQo2tYTQHf2WmQEujkNTyF0a8c9c8,2134
34
+ euroeval/dataset_configs/danish.py,sha256=nkw1poFOJGpQJFB9HYC6bdlNzUR5pXxYacvZs4GrK4Y,5522
35
+ euroeval/dataset_configs/dutch.py,sha256=CDr0oQnmDxeNloZ6iTGYPcNqPM5R9N8Z4aTKEE0C2MU,5408
36
+ euroeval/dataset_configs/english.py,sha256=2sJQPM4rZSYlwE5v4AiDm95Boq-_53AzdOt5cL_drJs,4628
37
+ euroeval/dataset_configs/estonian.py,sha256=fC5TUGpd6u22DUxoETBLA7EThwqsPDU54gXTzWtFCHk,3369
38
+ euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
39
+ euroeval/dataset_configs/finnish.py,sha256=DwrhwluoV4rmW8m2E5gWTfvHZ1XKRQG_3KU7wSOqM40,4281
40
+ euroeval/dataset_configs/french.py,sha256=MIZUAn2rNwasb41DC92q6vMwRBem1Fw4D7Hj1cLFlfs,4611
41
+ euroeval/dataset_configs/german.py,sha256=D-Yuz0pGf8pOEfMmTibXfk0k0QGjA4nEyAmea4TnCh0,5021
42
+ euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
43
+ euroeval/dataset_configs/italian.py,sha256=GaN7u2NgAOrpe7n--CsmpQm_n-PCVsTN-wV78UKtQco,4895
44
+ euroeval/dataset_configs/latvian.py,sha256=8hb32_YD_nQHn4kRVfwiGRDoJHF8M00ZdcwuD5ozJwU,2647
45
+ euroeval/dataset_configs/lithuanian.py,sha256=Gv3ta3Gs7xknZ_h_dVWY7YN43UfQzLkJPnYnQcuBguU,1998
46
+ euroeval/dataset_configs/norwegian.py,sha256=VcNftTvOJMCQEJvDFe3iixKbr8cjE3C6oHG4Jp4HET4,7636
47
+ euroeval/dataset_configs/polish.py,sha256=wiorGf4Z06WLPYAa5blD8F2qDaEWUr4MgVShkkVfVo4,3563
48
+ euroeval/dataset_configs/portuguese.py,sha256=TsjJMGJc_wExE_9TMJiQuxhN9BylXcHTXRFaCmkE4Gg,3980
49
+ euroeval/dataset_configs/slovak.py,sha256=Dc9ai2VW-ckQk7trglL2w1Ki0NECsr1RMXQPYBAN6OU,1759
50
+ euroeval/dataset_configs/spanish.py,sha256=VQHQiRsTLlen1zBKgbmRiXSB--b89WofXgFxeIgMR1o,4793
51
+ euroeval/dataset_configs/swedish.py,sha256=pNd-O9cU-4_9gkQU-EFVzsjri6Jg-0taVkzQYdFT6Lw,5257
52
+ euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
53
+ euroeval/metrics/base.py,sha256=dUBby-ZzettMjdcjek6rw0JTZMuScX4cQ2Rd6untKHY,2525
54
+ euroeval/metrics/huggingface.py,sha256=eCbL-jIj5WYAVRSYdbBWDzDoakIPl6_rSvBqLZhXO-E,6736
55
+ euroeval/metrics/llm_as_a_judge.py,sha256=br-pIyzhgrfDXZb6K0GuSUAyczLnrc7pFugW1DYwK6w,9721
56
+ euroeval/metrics/pipeline.py,sha256=xGCA7N1F4cLKOIeXP9SGAZvrWToREwAVb_gR5iBMQIU,10825
57
+ euroeval/metrics/speed.py,sha256=G5hEQcrtqxF070ZZwLDh61iZnq2CSW2o6ZM7zR4lOTY,1298
58
+ euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
59
+ euroeval/prompt_templates/linguistic_acceptability.py,sha256=Q-GyoS_c_iM-wQ8aXTywRTdxl1kUF0WEzHWh40hsk3s,10098
60
+ euroeval/prompt_templates/multiple_choice.py,sha256=p6Dt3EMaASyqFHOjxdisFnh7OOVi-roCyKalSPwp5Dc,8220
61
+ euroeval/prompt_templates/named_entity_recognition.py,sha256=3yEr1GHk0UbubsTwDSK928QssgYO0mnMfOgVmlDT2HI,19066
62
+ euroeval/prompt_templates/reading_comprehension.py,sha256=0eYnJOfk8u9Zv_Xj6VtDLoQwvfe5_jjzAWGAksRMO6Y,10338
63
+ euroeval/prompt_templates/sentiment_classification.py,sha256=Xg90BzCHQEmgTImn9zqI9Z48nW1paGQ-4AWYCxoUJxk,11027
64
+ euroeval/prompt_templates/summarization.py,sha256=ypyJRX2R5CyCFjJnM3iE5J4OrvLZBwXr7WdBLv8qMKQ,7391
65
+ euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
66
+ euroeval/task_group_utils/multiple_choice_classification.py,sha256=tAFQOM_iZwyknbOcZfw6_71lUSbcB5OlY0gOkNfUBAY,7051
67
+ euroeval/task_group_utils/question_answering.py,sha256=vr5gjIJxqqvbj0DYNSEdN0Ek9fkZ5maCAt7FKqzo-Xs,27695
68
+ euroeval/task_group_utils/sequence_classification.py,sha256=_kYgAIF2LABZ-nate3O6s7vlfI2RGHVtpNPjaMIHLDk,16450
69
+ euroeval/task_group_utils/text_to_text.py,sha256=ibSOiP_wpEyGYQh7uEeTjOp-ojLJsEcJT1W7IWOBfk8,5381
70
+ euroeval/task_group_utils/token_classification.py,sha256=hFiO29eSX_KtqbjJM4jy37jmyhfhfnWj3WTpNvh_vQk,17208
71
+ euroeval-16.4.0.dist-info/METADATA,sha256=ot4RNMLDwwJR2UIk20k59E7MsBOXlIqJPYI9xc_XUP8,15365
72
+ euroeval-16.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
73
+ euroeval-16.4.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
74
+ euroeval-16.4.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
75
+ euroeval-16.4.0.dist-info/RECORD,,
@@ -1,71 +0,0 @@
1
- euroeval/__init__.py,sha256=QJo_xezfFnpKBB32nvA_juy29tAz1eVn---MQiexYjE,3901
2
- euroeval/benchmark_config_factory.py,sha256=eOQsd9F4cJy8I7a3_lIKDZ5b5ukipIUqk0GZ3pyytwQ,8596
3
- euroeval/benchmarker.py,sha256=Nt4k1DivG-YtsSiqEwqsHfBzEkauo1lrsG1RAS0ZWuw,48928
4
- euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
5
- euroeval/cli.py,sha256=yb_Gw3TrouBiUmeQIZF0705zio8UPFACUDOzSB3CCfo,9316
6
- euroeval/constants.py,sha256=e1LRJe6CspvbKlfo4-9ee1wGocNoh1c7GcyaXpiN1Jk,2744
7
- euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
8
- euroeval/data_models.py,sha256=X4zAdR1K2MPb4f4Vc7gPYfolzFxxsz5WplnsmsiMYY8,27766
9
- euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
10
- euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
- euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
12
- euroeval/generation.py,sha256=Va3EOmFzOMBNfI4fh3nW5qhhrM3CBT8_4MaLwVtsF_E,12528
13
- euroeval/generation_utils.py,sha256=d2_vylWXIeH4xIXgbsI5rN6dMt0zKp0zXExD6aOKWaA,18299
14
- euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
15
- euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
16
- euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
17
- euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
18
- euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
19
- euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
20
- euroeval/tasks.py,sha256=EzEWFDo_0ffabBFiRu-mw80jENUioE8D_VEn_Dsv-F8,4703
21
- euroeval/tokenisation_utils.py,sha256=7lQ83rP1Ws7HHg20bFbqD4GqtdbyBADwyxPBmFzAzVA,21158
22
- euroeval/types.py,sha256=_iVy-RwiCGu9TNX2sfyJTdCvXy1akNGTCywAo-YpBqU,2815
23
- euroeval/utils.py,sha256=qAh8TLrJPk10l9qKcvD1mq2gNOGRTLl88PvPNj5IuRU,19451
24
- euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
25
- euroeval/benchmark_modules/base.py,sha256=mHF8XS6GGUXV-sJtxmI5WJBWPLMHuh-4Z4OWjC25x9Y,11566
26
- euroeval/benchmark_modules/fresh.py,sha256=qqsaC6u06YeJIK-Z6w9gZefb5cg1nU7ZDrO76l2GZN0,10779
27
- euroeval/benchmark_modules/hf.py,sha256=Z-Z_AxJk2APFXcZdyZrnKQ4OE_uRH81Vsm9x-gfJ1-I,44926
28
- euroeval/benchmark_modules/litellm.py,sha256=2EUhzLcxocfFxjbgyyP5QQtLieoH-fWbLR6RRz64EN8,64176
29
- euroeval/benchmark_modules/vllm.py,sha256=eTwS1YDB0v0lOWvv6_UXPlqNjNaPQTKRY-g495Y6X9s,46432
30
- euroeval/dataset_configs/__init__.py,sha256=ylO6FwnzlWmCuifliE_b4Vs5GXapYeyvZ4j1XVFmdN8,2086
31
- euroeval/dataset_configs/danish.py,sha256=fAMWYQVrx3B11r5NZSL-LWSQTJvCDwSxImIkIrGdoAA,5552
32
- euroeval/dataset_configs/dutch.py,sha256=883caShKOOi5s1Ky0_EKFeq0y9wVuqN-GVqeOwbKFr0,5438
33
- euroeval/dataset_configs/english.py,sha256=rl6bBIluKXkxT8L4e071GQuPprMHTI955mgW46V3Cp0,4658
34
- euroeval/dataset_configs/estonian.py,sha256=tdnz0gmMR9yO5rm3SsIz-Wd0LmlCvi3UJ2M5r4VwkSE,3093
35
- euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
36
- euroeval/dataset_configs/finnish.py,sha256=pfO_flf6HHUbZZLae62cV30__uey_Oj37aiX0eBNWcQ,4311
37
- euroeval/dataset_configs/french.py,sha256=OdkCfWhtImgB3Ni6o0NRvCEvjeKAqausfJ2VO04CUwY,4641
38
- euroeval/dataset_configs/german.py,sha256=sav75C7f33OofQzliwvb3g7B7cw0MXm0G8wdlcmI7r8,5051
39
- euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
40
- euroeval/dataset_configs/italian.py,sha256=YucxgJtCG31sQplJ6hL64sF39ZSj926_a7McpCzKxh0,4925
41
- euroeval/dataset_configs/latvian.py,sha256=fB3tsqZoFldTnrlpeSu9iQQ907ptOVC8ZaielkgmVlM,2677
42
- euroeval/dataset_configs/lithuanian.py,sha256=QTahv862C5XzjLU8WHcExBGlkRFQnj9F4-I_5x1qJSk,1833
43
- euroeval/dataset_configs/norwegian.py,sha256=ipDIg2wXquZvIjlc4Bs-TbMJCKOoK6TL7lP9AzLOOj8,7666
44
- euroeval/dataset_configs/polish.py,sha256=5MTWLUmDG0qMgb1ATSdON2A_2ZFLlXUVjS0u64srfIg,3593
45
- euroeval/dataset_configs/portuguese.py,sha256=wanwK9LYdBND_JPh203L_YQraiLSd2kI8P0myy6U6Dk,4010
46
- euroeval/dataset_configs/spanish.py,sha256=xVWWHS84aOjDcutfAh7J2roHEb2KHZ084pYysH2BdSo,4823
47
- euroeval/dataset_configs/swedish.py,sha256=f_H7khH0IHcZXEQyYM8bpIvYnRsSj0EhVXh4RgpOCmw,5317
48
- euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
49
- euroeval/metrics/base.py,sha256=HST2XeZrUQZV_vTiieePiaznEov3CIGzuVNIITtLsQc,2596
50
- euroeval/metrics/huggingface.py,sha256=7_97xfdqsznoBOm3diVvZtJ6k9XUa8isiVVmOgia8kI,6522
51
- euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
52
- euroeval/metrics/pipeline.py,sha256=aLNf0vKTfov-HZbvyJj9_9Z1rR1BkVsWxAea8btCWg8,10513
53
- euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
54
- euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
55
- euroeval/prompt_templates/linguistic_acceptability.py,sha256=n-InOATuwdjlmDjiUdGIk9bQJMUgVFdp3u-iQ0K9WjY,9189
56
- euroeval/prompt_templates/multiple_choice.py,sha256=W0WZdAhbOV2jdHNhjfNNhgoPTbFKA2vhs72U0hP1rW0,7323
57
- euroeval/prompt_templates/named_entity_recognition.py,sha256=Kl7SB7vRJ-K9oXMZcJEffELaQlbwspNKUrQLDeNobcY,17301
58
- euroeval/prompt_templates/reading_comprehension.py,sha256=OtV8tu6wyf7rwW3krmyk8bzdNSRS5WkWFgxok4o67_o,9243
59
- euroeval/prompt_templates/sentiment_classification.py,sha256=tnalqea4TjG6z4xF7tDDKQm7rWrYGg6SIWTX3RDQQ20,10012
60
- euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13aIOgY0SjIzzsNo,6593
61
- euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
62
- euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
63
- euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
64
- euroeval/task_group_utils/sequence_classification.py,sha256=TAqZCoMQ9I-HFhMH35_J1mY2SQg95HUbXcgrBIyhgk0,16082
65
- euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
66
- euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
67
- euroeval-16.3.0.dist-info/METADATA,sha256=iSfb2jRJO7BfidNgy0jOKUXFh_WwBojxgisOBWQmYHg,15381
68
- euroeval-16.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
69
- euroeval-16.3.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
70
- euroeval-16.3.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
71
- euroeval-16.3.0.dist-info/RECORD,,