PyPI - EuroEval - Versions diffs - 16.2.2__py3-none-any.whl → 16.4.0__py3-none-any.whl - Mend

EuroEval 16.2.2py3-none-any.whl → 16.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of EuroEval might be problematic. Click here for more details.

Files changed (65) hide show

euroeval/__init__.py +7 -4
euroeval/benchmark_config_factory.py +0 -4
euroeval/benchmark_modules/base.py +3 -16
euroeval/benchmark_modules/fresh.py +5 -2
euroeval/benchmark_modules/hf.py +107 -66
euroeval/benchmark_modules/litellm.py +103 -55
euroeval/benchmark_modules/vllm.py +155 -82
euroeval/benchmarker.py +184 -129
euroeval/caching_utils.py +79 -0
euroeval/callbacks.py +5 -7
euroeval/cli.py +1 -1
euroeval/constants.py +9 -0
euroeval/data_loading.py +14 -11
euroeval/data_models.py +12 -4
euroeval/dataset_configs/__init__.py +3 -0
euroeval/dataset_configs/czech.py +79 -0
euroeval/dataset_configs/danish.py +10 -13
euroeval/dataset_configs/dutch.py +0 -3
euroeval/dataset_configs/english.py +0 -3
euroeval/dataset_configs/estonian.py +11 -1
euroeval/dataset_configs/finnish.py +0 -3
euroeval/dataset_configs/french.py +0 -3
euroeval/dataset_configs/german.py +0 -3
euroeval/dataset_configs/italian.py +0 -3
euroeval/dataset_configs/latvian.py +2 -4
euroeval/dataset_configs/lithuanian.py +68 -0
euroeval/dataset_configs/norwegian.py +0 -3
euroeval/dataset_configs/polish.py +0 -3
euroeval/dataset_configs/portuguese.py +0 -3
euroeval/dataset_configs/slovak.py +60 -0
euroeval/dataset_configs/spanish.py +0 -3
euroeval/dataset_configs/swedish.py +10 -15
euroeval/finetuning.py +21 -15
euroeval/generation.py +10 -10
euroeval/generation_utils.py +2 -3
euroeval/logging_utils.py +250 -0
euroeval/metrics/base.py +0 -3
euroeval/metrics/huggingface.py +10 -6
euroeval/metrics/llm_as_a_judge.py +5 -3
euroeval/metrics/pipeline.py +22 -9
euroeval/metrics/speed.py +0 -3
euroeval/model_cache.py +11 -14
euroeval/model_config.py +4 -5
euroeval/model_loading.py +3 -0
euroeval/prompt_templates/linguistic_acceptability.py +30 -3
euroeval/prompt_templates/multiple_choice.py +34 -1
euroeval/prompt_templates/named_entity_recognition.py +71 -11
euroeval/prompt_templates/reading_comprehension.py +41 -3
euroeval/prompt_templates/sentiment_classification.py +34 -1
euroeval/prompt_templates/summarization.py +26 -6
euroeval/scores.py +7 -7
euroeval/speed_benchmark.py +3 -5
euroeval/task_group_utils/multiple_choice_classification.py +0 -3
euroeval/task_group_utils/question_answering.py +0 -3
euroeval/task_group_utils/sequence_classification.py +43 -31
euroeval/task_group_utils/text_to_text.py +17 -8
euroeval/task_group_utils/token_classification.py +10 -9
euroeval/tokenisation_utils.py +22 -20
euroeval/utils.py +30 -147
{euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/METADATA +182 -61
euroeval-16.4.0.dist-info/RECORD +75 -0
euroeval-16.2.2.dist-info/RECORD +0 -70
{euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/WHEEL +0 -0
{euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/entry_points.txt +0 -0
{euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/licenses/LICENSE +0 -0

euroeval/utils.py CHANGED Viewed

@@ -11,30 +11,23 @@ import re
 import socket
 import sys
 import typing as t
-import warnings
-from functools import cache
 from pathlib import Path
 import demjson3
 import huggingface_hub as hf_hub
-import litellm
 import numpy as np
 import torch
-from datasets.utils import disable_progress_bar
-from transformers import logging as tf_logging
+from .caching_utils import cache_arguments
+from .constants import T
 from .exceptions import InvalidBenchmark, InvalidModel, NaNValueInModelOutput
+from .logging_utils import log, log_once
 if t.TYPE_CHECKING:
-    from types import TracebackType
     from .data_models import ModelIdComponents
     from .types import Predictions
-logger = logging.getLogger("euroeval")
 def create_model_cache_dir(cache_dir: str, model_id: str) -> str:
     """Create cache directory for a model.
@@ -149,68 +142,6 @@ def enforce_reproducibility(seed: int = 4242) -> np.random.Generator:
     return rng
-def block_terminal_output() -> None:
-    """Blocks libraries from writing output to the terminal.
-    This filters warnings from some libraries, sets the logging level to ERROR for some
-    libraries, disabled tokeniser progress bars when using Hugging Face tokenisers, and
-    disables most of the logging from the `transformers` library.
-    """
-    if os.getenv("FULL_LOG") == "1":
-        return
-    # Ignore miscellaneous warnings
-    warnings.filterwarnings("ignore", category=UserWarning)
-    warnings.filterwarnings("ignore", category=FutureWarning)
-    logging.getLogger("absl").setLevel(logging.CRITICAL)
-    # Disable matplotlib logging
-    logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
-    # Disable PyTorch logging
-    logging.getLogger("torch.utils.cpp_extension").setLevel(logging.CRITICAL)
-    warnings.filterwarnings(action="ignore", module="torch*")
-    os.environ["TORCH_LOGS"] = "-all"
-    # Disable huggingface_hub logging
-    logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
-    # Disable LiteLLM logging
-    logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
-    logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
-    logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
-    logging.getLogger("openai").setLevel(logging.CRITICAL)
-    logging.getLogger("httpx").setLevel(logging.CRITICAL)
-    litellm.suppress_debug_info = True
-    # Disable vLLM logging
-    logging.getLogger("vllm").setLevel(logging.CRITICAL)
-    logging.getLogger("vllm.engine.llm_engine").setLevel(logging.CRITICAL)
-    logging.getLogger("vllm.transformers_utils.tokenizer").setLevel(logging.CRITICAL)
-    logging.getLogger("vllm.core.scheduler").setLevel(logging.CRITICAL)
-    logging.getLogger("vllm.model_executor.weight_utils").setLevel(logging.CRITICAL)
-    logging.getLogger("vllm.platforms").setLevel(logging.CRITICAL)
-    logging.getLogger("mistral_common.tokens.tokenizers.tekken").setLevel(
-        logging.CRITICAL
-    )
-    os.environ["LOG_LEVEL"] = "CRITICAL"
-    os.environ["VLLM_CONFIGURE_LOGGING"] = "0"
-    # Disable datasets logging
-    logging.getLogger("datasets").setLevel(logging.CRITICAL)
-    logging.getLogger("filelock").setLevel(logging.CRITICAL)
-    disable_progress_bar()
-    # Disable evaluate logging
-    warnings.filterwarnings("ignore", module="seqeval*")
-    # Disable most of the `transformers` logging
-    tf_logging._default_log_level = logging.CRITICAL
-    tf_logging.set_verbosity(logging.CRITICAL)
-    logging.getLogger("transformers.trainer").setLevel(logging.CRITICAL)
-    logging.getLogger("accelerate").setLevel(logging.CRITICAL)
 def get_class_by_name(class_name: str | list[str], module_name: str) -> t.Type | None:
     """Get a class by its name.
@@ -240,9 +171,10 @@ def get_class_by_name(class_name: str | list[str], module_name: str) -> t.Type |
     if error_messages:
         errors = "\n- " + "\n- ".join(error_messages)
-        logger.debug(
+        log(
             f"Could not find the class with the name(s) {', '.join(class_name)}. The "
-            f"following error messages were raised: {errors}"
+            f"following error messages were raised: {errors}",
+            level=logging.DEBUG,
         )
     # If the class could not be found, return None
@@ -264,49 +196,27 @@ def get_min_cuda_compute_capability() -> float | None:
     return float(f"{major}.{minor}")
-@cache
+@cache_arguments(disable_condition=lambda: hasattr(sys, "_called_from_test"))
 def internet_connection_available() -> bool:
     """Checks if internet connection is available by pinging google.com.
     Returns:
         Whether or not internet connection is available.
     """
+    internet_available: bool = False
     try:
         s = socket.create_connection(("1.1.1.1", 80))
         s.close()
-        return True
-    # We want to only catch exceptions related to socket connections, but as we cannot
-    # import these here as they're developer dependencies, we check the exception name
-    # instead. If the exception is not related to socket connections, we reraise it.
+        internet_available = True
+    except OSError:
+        pass
     except Exception as e:
         pytest_socket_errors = ["SocketConnectBlockedError", "SocketBlockedError"]
-        if type(e).__name__ in pytest_socket_errors or isinstance(e, OSError):
-            return False
-        raise e
-class HiddenPrints:
-    """Context manager which removes all terminal output."""
-    def __enter__(self) -> None:
-        """Enter the context manager."""
-        self._original_stdout = sys.stdout
-        self._original_stderr = sys.stderr
-        sys.stdout = open(os.devnull, "w")
-        sys.stderr = open(os.devnull, "w")
-    def __exit__(
-        self,
-        exc_type: t.Type[BaseException],
-        exc_val: BaseException,
-        exc_tb: "TracebackType",
-    ) -> None:
-        """Exit the context manager."""
-        sys.stdout.close()
-        sys.stderr.close()
-        sys.stdout = self._original_stdout
-        sys.stderr = self._original_stderr
+        if type(e).__name__ not in pytest_socket_errors:
+            raise e
+    return internet_available
 def raise_if_model_output_contains_nan_values(model_output: "Predictions") -> None:
@@ -364,34 +274,6 @@ def unscramble(scrambled_text: str) -> str:
     return unscrambled
-@cache
-def log_once(message: str, level: int = logging.INFO) -> None:
-    """Log a message once.
-    This is ensured by caching the input/output pairs of this function, using the
-    `functools.cache` decorator.
-    Args:
-        message:
-            The message to log.
-        level:
-            The logging level. Defaults to logging.INFO.
-    """
-    match level:
-        case logging.DEBUG:
-            logger.debug(message)
-        case logging.INFO:
-            logger.info(message)
-        case logging.WARNING:
-            logger.warning(message)
-        case logging.ERROR:
-            logger.error(message)
-        case logging.CRITICAL:
-            logger.critical(message)
-        case _:
-            raise ValueError(f"Invalid logging level: {level}")
 def get_package_version(package_name: str) -> str | None:
     """Get the version of a package.
@@ -408,9 +290,6 @@ def get_package_version(package_name: str) -> str | None:
         return None
-T = t.TypeVar("T", bound=object)
 def safe_run(coroutine: t.Coroutine[t.Any, t.Any, T]) -> T:
     """Run a coroutine, ensuring that the event loop is always closed when we're done.
@@ -462,39 +341,43 @@ def extract_json_dict_from_string(s: str) -> dict | None:
     Returns:
         The extracted JSON dictionary, or None if no JSON dictionary could be found.
     """
-    json_regex = r"\{[^{}]+?\}"
+    json_regex = r"\{[^{}]*?\}"
     if (json_match := re.search(pattern=json_regex, string=s, flags=re.DOTALL)) is None:
-        logger.debug(
+        log(
             "The model output does not contain any JSON dictionary, so cannot parse "
-            f"it. Skipping. Here is the output: {s!r}"
+            f"it. Skipping. Here is the output: {s!r}",
+            level=logging.DEBUG,
         )
         return None
     json_string = json_match.group()
     try:
         json_output = demjson3.decode(txt=json_string)
     except demjson3.JSONDecodeError:
-        logger.debug(
+        log(
             "The model output is not valid JSON, so cannot parse it. Skipping. "
-            f"Here is the output: {json_string!r}"
+            f"Here is the output: {json_string!r}",
+            level=logging.DEBUG,
         )
         return None
     if not isinstance(json_output, dict):
-        logger.debug(
+        log(
             "The model output is not a JSON dictionary, so cannot parse "
-            f"it. Skipping. Here is the output: {json_string!r}"
+            f"it. Skipping. Here is the output: {json_string!r}",
+            level=logging.DEBUG,
         )
         return None
     elif not all(isinstance(key, str) for key in json_output.keys()):
-        logger.debug(
+        log(
             "The model output is not a JSON dictionary with string keys, "
             "so cannot parse it. Skipping. Here is the output: "
-            f"{json_string!r}"
+            f"{json_string!r}",
+            level=logging.DEBUG,
         )
         return None
     return json_output
-@cache
+@cache_arguments()
 def get_hf_token(api_key: str | None) -> str | bool:
     """Get the Hugging Face token.

{euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: EuroEval
-Version: 16.2.2
+Version: 16.4.0
 Summary: The robust European language model benchmark.
 Project-URL: Repository, https://github.com/EuroEval/EuroEval
 Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -62,21 +62,28 @@ Provides-Extra: all
 Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
 Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
 Requires-Dist: timm>=1.0.19; extra == 'all'
-Requires-Dist: vllm[flashinfer]>=0.10.1; (platform_system == 'Linux') and extra == 'all'
+Requires-Dist: vllm[flashinfer]>=0.11.0; (platform_system == 'Linux') and extra == 'all'
 Provides-Extra: generative
 Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
 Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
 Requires-Dist: timm>=1.0.19; extra == 'generative'
-Requires-Dist: vllm[flashinfer]>=0.10.1; (platform_system == 'Linux') and extra == 'generative'
+Requires-Dist: vllm[flashinfer]>=0.11.0; (platform_system == 'Linux') and extra == 'generative'
 Description-Content-Type: text/markdown
+<!-- This disables the requirement that the first line is a top-level heading -->
+<!-- markdownlint-configure-file { "MD041": false } -->
 <div align='center'>
-<img src="https://raw.githubusercontent.com/EuroEval/EuroEval/main/gfx/euroeval.png" height="500" width="372">
+<img
+    src="https://raw.githubusercontent.com/EuroEval/EuroEval/main/gfx/euroeval.png"
+    height="500"
+    width="372"
+>
 </div>
-### The robust European language model benchmark.
+### The robust European language model benchmark
-_(formerly known as ScandEval)_
+(formerly known as ScandEval)
 ______________________________________________________________________
 [![Documentation](https://img.shields.io/badge/docs-passing-green)](https://euroeval.com)
@@ -85,19 +92,19 @@ ______________________________________________________________________
 [![Second paper](https://img.shields.io/badge/arXiv-2406.13469-b31b1b.svg)](https://arxiv.org/abs/2406.13469)
 [![License](https://img.shields.io/github/license/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/blob/main/LICENSE)
 [![LastCommit](https://img.shields.io/github/last-commit/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/commits/main)
-[![Code Coverage](https://img.shields.io/badge/Coverage-67%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
+[![Code Coverage](https://img.shields.io/badge/Coverage-70%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
 [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg)](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
 ## Maintainer
-- Dan Saattrup Smart ([@saattrupdan](https://github.com/saattrupdan), dan.smart@alexandra.dk)
+- Dan Saattrup Smart ([@saattrupdan](https://github.com/saattrupdan), <dan.smart@alexandra.dk>)
 ## Installation
 To install the package simply write the following command in your favorite terminal:
-```
-$ pip install euroeval[all]
+```bash
+pip install euroeval[all]
 ```
 This will install the EuroEval package with all extras. You can also install the
@@ -105,51 +112,61 @@ minimal version by leaving out the `[all]`, in which case the package will let y
 when an evaluation requires a certain extra dependency, and how you install it.
 ## Quickstart
 ### Benchmarking from the Command Line
 The easiest way to benchmark pretrained models is via the command line interface. After
 having installed the package, you can benchmark your favorite model like so:
-```
-$ euroeval --model <model-id>
+```bash
+euroeval --model <model-id>
 ```
 Here `model` is the HuggingFace model ID, which can be found on the [HuggingFace
 Hub](https://huggingface.co/models). By default this will benchmark the model on all
 the tasks available. If you want to benchmark on a particular task, then use the
 `--task` argument:
-```
-$ euroeval --model <model-id> --task sentiment-classification
+```bash
+euroeval --model <model-id> --task sentiment-classification
 ```
 We can also narrow down which languages we would like to benchmark on. This can be done
 by setting the `--language` argument. Here we thus benchmark the model on the Danish
 sentiment classification task:
-```
-$ euroeval --model <model-id> --task sentiment-classification --language da
+```bash
+euroeval --model <model-id> --task sentiment-classification --language da
 ```
 Multiple models, datasets and/or languages can be specified by just attaching multiple
 arguments. Here is an example with two models:
-```
-$ euroeval --model <model-id1> --model <model-id2>
+```bash
+euroeval --model <model-id1> --model <model-id2>
 ```
 The specific model version/revision to use can also be added after the suffix '@':
-```
-$ euroeval --model <model-id>@<commit>
+```bash
+euroeval --model <model-id>@<commit>
 ```
 This can be a branch name, a tag name, or a commit id. It defaults to 'main' for latest.
 See all the arguments and options available for the `euroeval` command by typing
-```
-$ euroeval --help
+```bash
+euroeval --help
 ```
 ### Benchmarking from a Script
 In a script, the syntax is similar to the command line interface. You simply initialise
 an object of the `Benchmarker` class, and call this benchmark object with your favorite
 model:
-```
+```python
 >>> from euroeval import Benchmarker
 >>> benchmark = Benchmarker()
 >>> benchmark(model="<model-id>")
@@ -157,29 +174,34 @@ model:
 To benchmark on a specific task and/or language, you simply specify the `task` or
 `language` arguments, shown here with same example as above:
-```
+```python
 >>> benchmark(model="<model-id>", task="sentiment-classification", language="da")
 ```
 If you want to benchmark a subset of all the models on the Hugging Face Hub, you can
 simply leave out the `model` argument. In this example, we're benchmarking all Danish
 models on the Danish sentiment classification task:
-```
+```python
 >>> benchmark(task="sentiment-classification", language="da")
 ```
 ### Benchmarking in an Offline Environment
 If you need to benchmark in an offline environment, you need to download the models,
 datasets and metrics beforehand. This can be done by adding the `--download-only`
 argument, from the command line, or the `download_only` argument, if benchmarking from a
 script. For example to download the model you want and all of the Danish sentiment
 classification datasets:
-```
-$ euroeval --model <model-id> --task sentiment-classification --language da --download-only
+```bash
+euroeval --model <model-id> --task sentiment-classification --language da --download-only
 ```
 Or from a script:
-```
+```python
 >>> benchmark(
 ... model="<model-id>",
 ... task="sentiment-classification",
@@ -193,11 +215,13 @@ internet connection will be required during evaluation. If offline support is im
 to you, please consider [opening an issue](https://github.com/EuroEval/EuroEval/issues).
 ### Benchmarking from Docker
 A Dockerfile is provided in the repo, which can be downloaded and run, without needing
 to clone the repo and installing from source. This can be fetched programmatically by
 running the following:
-```
-$ wget https://raw.githubusercontent.com/EuroEval/EuroEval/main/Dockerfile.cuda
+```bash
+wget https://raw.githubusercontent.com/EuroEval/EuroEval/main/Dockerfile.cuda
 ```
 Next, to be able to build the Docker image, first ensure that the NVIDIA Container
@@ -208,56 +232,153 @@ and
 Ensure that the the CUDA version stated at the top of the Dockerfile matches the CUDA
 version installed (which you can check using `nvidia-smi`). After that, we build the
 image as follows:
-```
-$ docker build --pull -t euroeval -f Dockerfile.cuda .
+```bash
+docker build --pull -t euroeval -f Dockerfile.cuda .
 ```
 With the Docker image built, we can now evaluate any model as follows:
-```
-$ docker run -e args="<euroeval-arguments>" --gpus 1 --name euroeval --rm euroeval
+```bash
+docker run -e args="<euroeval-arguments>" --gpus 1 --name euroeval --rm euroeval
 ```
 Here `<euroeval-arguments>` consists of the arguments added to the `euroeval` CLI
 argument. This could for instance be `--model <model-id> --task
 sentiment-classification`.
 ### Reproducing the datasets
 All datasets used in this project are generated using the scripts located in the
 [src/scripts](src/scripts) folder. To reproduce a dataset, run the corresponding script
 with the following command
-```shell
-$ uv run src/scripts/<name-of-script>.py
+```bash
+uv run src/scripts/<name-of-script>.py
 ```
 Replace <name-of-script> with the specific script you wish to execute, e.g.,
-```shell
-$ uv run src/scripts/create_allocine.py
+```bash
+uv run src/scripts/create_allocine.py
 ```
 ## Contributors :pray:
 A huge thank you to all the contributors who have helped make this project a success!
-<a href="https://github.com/peter-sk"><img src="https://avatars.githubusercontent.com/u/6168908" width=50 alt="Contributor avatar for peter-sk"/></a>
-<a href="https://github.com/AJDERS"><img src="https://avatars.githubusercontent.com/u/38854604" width=50 alt="Contributor avatar for AJDERS"/></a>
-<a href="https://github.com/oliverkinch"><img src="https://avatars.githubusercontent.com/u/71556498" width=50 alt="Contributor avatar for oliverkinch"/></a>
-<a href="https://github.com/versae"><img src="https://avatars.githubusercontent.com/u/173537" width=50 alt="Contributor avatar for versae"/></a>
-<a href="https://github.com/KennethEnevoldsen"><img src="https://avatars.githubusercontent.com/u/23721977" width=50 alt="Contributor avatar for KennethEnevoldsen"/></a>
-<a href="https://github.com/viggo-gascou"><img src="https://avatars.githubusercontent.com/u/94069687" width=50 alt="Contributor avatar for viggo-gascou"/></a>
-<a href="https://github.com/mathiasesn"><img src="https://avatars.githubusercontent.com/u/27091759" width=50 alt="Contributor avatar for mathiasesn"/></a>
-<a href="https://github.com/Alkarex"><img src="https://avatars.githubusercontent.com/u/1008324" width=50 alt="Contributor avatar for Alkarex"/></a>
-<a href="https://github.com/marksverdhei"><img src="https://avatars.githubusercontent.com/u/46672778" width=50 alt="Contributor avatar for marksverdhei"/></a>
-<a href="https://github.com/Mikeriess"><img src="https://avatars.githubusercontent.com/u/19728563" width=50 alt="Contributor avatar for Mikeriess"/></a>
-<a href="https://github.com/ThomasKluiters"><img src="https://avatars.githubusercontent.com/u/8137941" width=50 alt="Contributor avatar for ThomasKluiters"/></a>
-<a href="https://github.com/BramVanroy"><img src="https://avatars.githubusercontent.com/u/2779410" width=50 alt="Contributor avatar for BramVanroy"/></a>
-<a href="https://github.com/peregilk"><img src="https://avatars.githubusercontent.com/u/9079808" width=50 alt="Contributor avatar for peregilk"/></a>
-<a href="https://github.com/Rijgersberg"><img src="https://avatars.githubusercontent.com/u/8604946" width=50 alt="Contributor avatar for Rijgersberg"/></a>
-<a href="https://github.com/duarteocarmo"><img src="https://avatars.githubusercontent.com/u/26342344" width=50 alt="Contributor avatar for duarteocarmo"/></a>
-<a href="https://github.com/slowwavesleep"><img src="https://avatars.githubusercontent.com/u/44175589" width=50 alt="Contributor avatar for slowwavesleep"/></a>
+<a href="https://github.com/peter-sk">
+    <img
+        src="https://avatars.githubusercontent.com/u/6168908"
+        width=50
+        alt="Contributor avatar for peter-sk"
+    />
+</a>
+<a href="https://github.com/AJDERS">
+    <img
+        src="https://avatars.githubusercontent.com/u/38854604"
+        width=50
+        alt="Contributor avatar for AJDERS"
+    />
+</a>
+<a href="https://github.com/oliverkinch">
+    <img
+        src="https://avatars.githubusercontent.com/u/71556498"
+        width=50
+        alt="Contributor avatar for oliverkinch"
+    />
+</a>
+<a href="https://github.com/versae">
+    <img
+        src="https://avatars.githubusercontent.com/u/173537"
+        width=50
+        alt="Contributor avatar for versae"
+    />
+</a>
+<a href="https://github.com/KennethEnevoldsen">
+    <img
+        src="https://avatars.githubusercontent.com/u/23721977"
+        width=50
+        alt="Contributor avatar for KennethEnevoldsen"
+    />
+</a>
+<a href="https://github.com/viggo-gascou">
+    <img
+        src="https://avatars.githubusercontent.com/u/94069687"
+        width=50
+        alt="Contributor avatar for viggo-gascou"
+    />
+</a>
+<a href="https://github.com/mathiasesn">
+    <img
+        src="https://avatars.githubusercontent.com/u/27091759"
+        width=50
+        alt="Contributor avatar for mathiasesn"
+    />
+</a>
+<a href="https://github.com/Alkarex">
+    <img
+        src="https://avatars.githubusercontent.com/u/1008324"
+        width=50
+        alt="Contributor avatar for Alkarex"
+    />
+</a>
+<a href="https://github.com/marksverdhei">
+    <img
+        src="https://avatars.githubusercontent.com/u/46672778"
+        width=50
+        alt="Contributor avatar for marksverdhei"
+    />
+</a>
+<a href="https://github.com/Mikeriess">
+    <img
+        src="https://avatars.githubusercontent.com/u/19728563"
+        width=50
+        alt="Contributor avatar for Mikeriess"
+    />
+</a>
+<a href="https://github.com/ThomasKluiters">
+    <img
+        src="https://avatars.githubusercontent.com/u/8137941"
+        width=50
+        alt="Contributor avatar for ThomasKluiters"
+    />
+</a>
+<a href="https://github.com/BramVanroy">
+    <img
+        src="https://avatars.githubusercontent.com/u/2779410"
+        width=50
+        alt="Contributor avatar for BramVanroy"
+    />
+</a>
+<a href="https://github.com/peregilk">
+    <img
+        src="https://avatars.githubusercontent.com/u/9079808"
+        width=50
+        alt="Contributor avatar for peregilk"
+    />
+</a>
+<a href="https://github.com/Rijgersberg">
+    <img
+        src="https://avatars.githubusercontent.com/u/8604946"
+        width=50
+        alt="Contributor avatar for Rijgersberg"
+    />
+</a>
+<a href="https://github.com/duarteocarmo">
+    <img
+        src="https://avatars.githubusercontent.com/u/26342344"
+        width=50
+        alt="Contributor avatar for duarteocarmo"
+    />
+</a>
+<a href="https://github.com/slowwavesleep">
+    <img
+        src="https://avatars.githubusercontent.com/u/44175589"
+        width=50
+        alt="Contributor avatar for slowwavesleep"
+    />
+</a>
 ### Contribute to EuroEval
@@ -269,8 +390,8 @@ contributing new datasets, your help makes this project better for everyone.
 - **Adding datasets**: If you're interested in adding a new dataset to EuroEval, we have
   a [dedicated guide](NEW_DATASET_GUIDE.md) with step-by-step instructions.
 ### Special Thanks
 - Thanks to [Google](https://google.com/) for sponsoring Gemini credits as part of their
   [Google Cloud for Researchers Program](https://cloud.google.com/edu/researchers).
 - Thanks [@Mikeriess](https://github.com/Mikeriess) for evaluating many of the larger
@@ -285,11 +406,11 @@ contributing new datasets, your help makes this project better for everyone.
 - Thanks to [CHC](https://chc.au.dk/) for sponsoring the OpenAI credits used to
   evaluate GPT-4-turbo in German.
 ## Citing EuroEval
 If you want to cite the framework then feel free to use this:
-```
+```bibtex
 @article{smart2024encoder,
   title={Encoder vs Decoder: Comparative Analysis of Encoder and Decoder Language Models on Multilingual NLU Tasks},
   author={Smart, Dan Saattrup and Enevoldsen, Kenneth and Schneider-Kamp, Peter},

EuroEval 16.2.2__py3-none-any.whl → 16.4.0__py3-none-any.whl

Potentially problematic release.

EuroEval 16.2.2py3-none-any.whl → 16.4.0py3-none-any.whl