PyPI - EuroEval - Versions diffs - 16.3.0__py3-none-any.whl → 16.4.0__py3-none-any.whl - Mend

EuroEval 16.3.0py3-none-any.whl → 16.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of EuroEval might be problematic. Click here for more details.

Files changed (64) hide show

euroeval/__init__.py +3 -2
euroeval/benchmark_config_factory.py +0 -4
euroeval/benchmark_modules/base.py +3 -16
euroeval/benchmark_modules/fresh.py +2 -1
euroeval/benchmark_modules/hf.py +99 -62
euroeval/benchmark_modules/litellm.py +101 -41
euroeval/benchmark_modules/vllm.py +91 -83
euroeval/benchmarker.py +84 -78
euroeval/caching_utils.py +79 -0
euroeval/callbacks.py +5 -7
euroeval/constants.py +6 -0
euroeval/data_loading.py +14 -11
euroeval/data_models.py +12 -4
euroeval/dataset_configs/__init__.py +2 -0
euroeval/dataset_configs/czech.py +79 -0
euroeval/dataset_configs/danish.py +10 -11
euroeval/dataset_configs/dutch.py +0 -1
euroeval/dataset_configs/english.py +0 -1
euroeval/dataset_configs/estonian.py +11 -1
euroeval/dataset_configs/finnish.py +0 -1
euroeval/dataset_configs/french.py +0 -1
euroeval/dataset_configs/german.py +0 -1
euroeval/dataset_configs/italian.py +0 -1
euroeval/dataset_configs/latvian.py +0 -1
euroeval/dataset_configs/lithuanian.py +9 -3
euroeval/dataset_configs/norwegian.py +0 -1
euroeval/dataset_configs/polish.py +0 -1
euroeval/dataset_configs/portuguese.py +0 -1
euroeval/dataset_configs/slovak.py +60 -0
euroeval/dataset_configs/spanish.py +0 -1
euroeval/dataset_configs/swedish.py +10 -12
euroeval/finetuning.py +21 -15
euroeval/generation.py +10 -10
euroeval/generation_utils.py +2 -3
euroeval/logging_utils.py +250 -0
euroeval/metrics/base.py +0 -3
euroeval/metrics/huggingface.py +9 -5
euroeval/metrics/llm_as_a_judge.py +5 -3
euroeval/metrics/pipeline.py +17 -9
euroeval/metrics/speed.py +0 -3
euroeval/model_cache.py +11 -14
euroeval/model_config.py +4 -5
euroeval/model_loading.py +3 -0
euroeval/prompt_templates/linguistic_acceptability.py +21 -3
euroeval/prompt_templates/multiple_choice.py +25 -1
euroeval/prompt_templates/named_entity_recognition.py +51 -11
euroeval/prompt_templates/reading_comprehension.py +31 -3
euroeval/prompt_templates/sentiment_classification.py +23 -1
euroeval/prompt_templates/summarization.py +26 -6
euroeval/scores.py +7 -7
euroeval/speed_benchmark.py +3 -5
euroeval/task_group_utils/multiple_choice_classification.py +0 -3
euroeval/task_group_utils/question_answering.py +0 -3
euroeval/task_group_utils/sequence_classification.py +43 -31
euroeval/task_group_utils/text_to_text.py +17 -8
euroeval/task_group_utils/token_classification.py +10 -9
euroeval/tokenisation_utils.py +14 -12
euroeval/utils.py +29 -146
{euroeval-16.3.0.dist-info → euroeval-16.4.0.dist-info}/METADATA +4 -4
euroeval-16.4.0.dist-info/RECORD +75 -0
euroeval-16.3.0.dist-info/RECORD +0 -71
{euroeval-16.3.0.dist-info → euroeval-16.4.0.dist-info}/WHEEL +0 -0
{euroeval-16.3.0.dist-info → euroeval-16.4.0.dist-info}/entry_points.txt +0 -0
{euroeval-16.3.0.dist-info → euroeval-16.4.0.dist-info}/licenses/LICENSE +0 -0

euroeval/tokenisation_utils.py CHANGED Viewed

@@ -5,11 +5,11 @@ import re
 import typing as t
 import torch
-from transformers import MistralCommonTokenizer
+from transformers.tokenization_mistral_common import MistralCommonTokenizer
 from .enums import GenerativeType
 from .exceptions import InvalidModel
-from .utils import log_once
+from .logging_utils import log, log_once
 if t.TYPE_CHECKING:
     from transformers.tokenization_utils import PreTrainedTokenizer
@@ -18,9 +18,6 @@ if t.TYPE_CHECKING:
     from .data_models import DatasetConfig, ModelConfig
-logger = logging.getLogger("euroeval")
 def get_special_token_metadata(tokeniser: "PreTrainedTokenizerBase") -> dict:
     """Get the special token metadata for a tokeniser.
@@ -182,7 +179,7 @@ def get_bos_token(
             "The model does not have a beginning-of-sequence token. Please ensure that "
             "this has been set in the tokeniser's configuration. Using no BOS token."
             " This may lead to unexpected behavior in the model.",
-            level=logging.INFO,
+            level=logging.WARNING,
         )
         return None, None
@@ -223,14 +220,14 @@ def get_eos_token(
             "The model does not have an end-of-sequence token. Please ensure that this "
             "has been set in the tokeniser's configuration. Using no EOS token. This "
             "may lead to unexpected behavior in the model.",
-            level=logging.INFO,
+            level=logging.WARNING,
         )
         return None, None
     log_once(
         f"End-of-sequence token was not set, but detected it as {eos_token!r} with "
         f"ID {eos_token_id}.",
-        level=logging.DEBUG,
+        level=logging.WARNING,
     )
     return eos_token, eos_token_id
@@ -306,7 +303,7 @@ def get_pad_token(
                 "Could not identify a padding token for the model. Please ensure that "
                 "this has been set in the tokeniser's configuration. Using no padding "
                 "token. This may lead to unexpected behavior in the model.",
-                level=logging.INFO,
+                level=logging.WARNING,
             )
             return None, None
@@ -358,12 +355,16 @@ def get_end_of_chat_token_ids(
             x_token_index = idx
             break
     else:
-        logger.debug("Could not locate the end-of-chat token for the model.")
+        log(
+            "Could not locate the end-of-chat token for the model.", level=logging.DEBUG
+        )
         return None
     end_of_chat_tokens = token_ids[x_token_index + 1 :]
     if len(end_of_chat_tokens) == 0:
-        logger.debug("Could not locate the end-of-chat token for the model.")
+        log(
+            "Could not locate the end-of-chat token for the model.", level=logging.DEBUG
+        )
         return None
     log_once(
@@ -506,7 +507,8 @@ def get_first_label_token_mapping(
             log_once(
                 "We will not use logprobs with the model since the first tokens of the "
                 "labels are not distinct. The first tokens for the labels "
-                f"{local_labels} are {first_tokens}"
+                f"{local_labels} are {first_tokens}",
+                level=logging.DEBUG,
             )
         return False

euroeval/utils.py CHANGED Viewed

@@ -11,30 +11,23 @@ import re
 import socket
 import sys
 import typing as t
-import warnings
-from functools import cache
 from pathlib import Path
 import demjson3
 import huggingface_hub as hf_hub
-import litellm
 import numpy as np
 import torch
-from datasets.utils import disable_progress_bar
-from transformers import logging as tf_logging
+from .caching_utils import cache_arguments
+from .constants import T
 from .exceptions import InvalidBenchmark, InvalidModel, NaNValueInModelOutput
+from .logging_utils import log, log_once
 if t.TYPE_CHECKING:
-    from types import TracebackType
     from .data_models import ModelIdComponents
     from .types import Predictions
-logger = logging.getLogger("euroeval")
 def create_model_cache_dir(cache_dir: str, model_id: str) -> str:
     """Create cache directory for a model.
@@ -149,68 +142,6 @@ def enforce_reproducibility(seed: int = 4242) -> np.random.Generator:
     return rng
-def block_terminal_output() -> None:
-    """Blocks libraries from writing output to the terminal.
-    This filters warnings from some libraries, sets the logging level to ERROR for some
-    libraries, disabled tokeniser progress bars when using Hugging Face tokenisers, and
-    disables most of the logging from the `transformers` library.
-    """
-    if os.getenv("FULL_LOG") == "1":
-        return
-    # Ignore miscellaneous warnings
-    warnings.filterwarnings("ignore", category=UserWarning)
-    warnings.filterwarnings("ignore", category=FutureWarning)
-    logging.getLogger("absl").setLevel(logging.CRITICAL)
-    # Disable matplotlib logging
-    logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
-    # Disable PyTorch logging
-    logging.getLogger("torch.utils.cpp_extension").setLevel(logging.CRITICAL)
-    warnings.filterwarnings(action="ignore", module="torch*")
-    os.environ["TORCH_LOGS"] = "-all"
-    # Disable huggingface_hub logging
-    logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
-    # Disable LiteLLM logging
-    logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
-    logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
-    logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
-    logging.getLogger("openai").setLevel(logging.CRITICAL)
-    logging.getLogger("httpx").setLevel(logging.CRITICAL)
-    litellm.suppress_debug_info = True
-    # Disable vLLM logging
-    logging.getLogger("vllm").setLevel(logging.CRITICAL)
-    logging.getLogger("vllm.engine.llm_engine").setLevel(logging.CRITICAL)
-    logging.getLogger("vllm.transformers_utils.tokenizer").setLevel(logging.CRITICAL)
-    logging.getLogger("vllm.core.scheduler").setLevel(logging.CRITICAL)
-    logging.getLogger("vllm.model_executor.weight_utils").setLevel(logging.CRITICAL)
-    logging.getLogger("vllm.platforms").setLevel(logging.CRITICAL)
-    logging.getLogger("mistral_common.tokens.tokenizers.tekken").setLevel(
-        logging.CRITICAL
-    )
-    os.environ["LOG_LEVEL"] = "CRITICAL"
-    os.environ["VLLM_CONFIGURE_LOGGING"] = "0"
-    # Disable datasets logging
-    logging.getLogger("datasets").setLevel(logging.CRITICAL)
-    logging.getLogger("filelock").setLevel(logging.CRITICAL)
-    disable_progress_bar()
-    # Disable evaluate logging
-    warnings.filterwarnings("ignore", module="seqeval*")
-    # Disable most of the `transformers` logging
-    tf_logging._default_log_level = logging.CRITICAL
-    tf_logging.set_verbosity(logging.CRITICAL)
-    logging.getLogger("transformers.trainer").setLevel(logging.CRITICAL)
-    logging.getLogger("accelerate").setLevel(logging.CRITICAL)
 def get_class_by_name(class_name: str | list[str], module_name: str) -> t.Type | None:
     """Get a class by its name.
@@ -240,9 +171,10 @@ def get_class_by_name(class_name: str | list[str], module_name: str) -> t.Type |
     if error_messages:
         errors = "\n- " + "\n- ".join(error_messages)
-        logger.debug(
+        log(
             f"Could not find the class with the name(s) {', '.join(class_name)}. The "
-            f"following error messages were raised: {errors}"
+            f"following error messages were raised: {errors}",
+            level=logging.DEBUG,
         )
     # If the class could not be found, return None
@@ -264,49 +196,27 @@ def get_min_cuda_compute_capability() -> float | None:
     return float(f"{major}.{minor}")
-@cache
+@cache_arguments(disable_condition=lambda: hasattr(sys, "_called_from_test"))
 def internet_connection_available() -> bool:
     """Checks if internet connection is available by pinging google.com.
     Returns:
         Whether or not internet connection is available.
     """
+    internet_available: bool = False
     try:
         s = socket.create_connection(("1.1.1.1", 80))
         s.close()
-        return True
-    # We want to only catch exceptions related to socket connections, but as we cannot
-    # import these here as they're developer dependencies, we check the exception name
-    # instead. If the exception is not related to socket connections, we reraise it.
+        internet_available = True
+    except OSError:
+        pass
     except Exception as e:
         pytest_socket_errors = ["SocketConnectBlockedError", "SocketBlockedError"]
-        if type(e).__name__ in pytest_socket_errors or isinstance(e, OSError):
-            return False
-        raise e
-class HiddenPrints:
-    """Context manager which removes all terminal output."""
-    def __enter__(self) -> None:
-        """Enter the context manager."""
-        self._original_stdout = sys.stdout
-        self._original_stderr = sys.stderr
-        sys.stdout = open(os.devnull, "w")
-        sys.stderr = open(os.devnull, "w")
-    def __exit__(
-        self,
-        exc_type: t.Type[BaseException],
-        exc_val: BaseException,
-        exc_tb: "TracebackType",
-    ) -> None:
-        """Exit the context manager."""
-        sys.stdout.close()
-        sys.stderr.close()
-        sys.stdout = self._original_stdout
-        sys.stderr = self._original_stderr
+        if type(e).__name__ not in pytest_socket_errors:
+            raise e
+    return internet_available
 def raise_if_model_output_contains_nan_values(model_output: "Predictions") -> None:
@@ -364,34 +274,6 @@ def unscramble(scrambled_text: str) -> str:
     return unscrambled
-@cache
-def log_once(message: str, level: int = logging.INFO) -> None:
-    """Log a message once.
-    This is ensured by caching the input/output pairs of this function, using the
-    `functools.cache` decorator.
-    Args:
-        message:
-            The message to log.
-        level:
-            The logging level. Defaults to logging.INFO.
-    """
-    match level:
-        case logging.DEBUG:
-            logger.debug(message)
-        case logging.INFO:
-            logger.info(message)
-        case logging.WARNING:
-            logger.warning(message)
-        case logging.ERROR:
-            logger.error(message)
-        case logging.CRITICAL:
-            logger.critical(message)
-        case _:
-            raise ValueError(f"Invalid logging level: {level}")
 def get_package_version(package_name: str) -> str | None:
     """Get the version of a package.
@@ -408,9 +290,6 @@ def get_package_version(package_name: str) -> str | None:
         return None
-T = t.TypeVar("T", bound=object)
 def safe_run(coroutine: t.Coroutine[t.Any, t.Any, T]) -> T:
     """Run a coroutine, ensuring that the event loop is always closed when we're done.
@@ -464,37 +343,41 @@ def extract_json_dict_from_string(s: str) -> dict | None:
     """
     json_regex = r"\{[^{}]*?\}"
     if (json_match := re.search(pattern=json_regex, string=s, flags=re.DOTALL)) is None:
-        logger.debug(
+        log(
             "The model output does not contain any JSON dictionary, so cannot parse "
-            f"it. Skipping. Here is the output: {s!r}"
+            f"it. Skipping. Here is the output: {s!r}",
+            level=logging.DEBUG,
         )
         return None
     json_string = json_match.group()
     try:
         json_output = demjson3.decode(txt=json_string)
     except demjson3.JSONDecodeError:
-        logger.debug(
+        log(
             "The model output is not valid JSON, so cannot parse it. Skipping. "
-            f"Here is the output: {json_string!r}"
+            f"Here is the output: {json_string!r}",
+            level=logging.DEBUG,
         )
         return None
     if not isinstance(json_output, dict):
-        logger.debug(
+        log(
             "The model output is not a JSON dictionary, so cannot parse "
-            f"it. Skipping. Here is the output: {json_string!r}"
+            f"it. Skipping. Here is the output: {json_string!r}",
+            level=logging.DEBUG,
         )
         return None
     elif not all(isinstance(key, str) for key in json_output.keys()):
-        logger.debug(
+        log(
             "The model output is not a JSON dictionary with string keys, "
             "so cannot parse it. Skipping. Here is the output: "
-            f"{json_string!r}"
+            f"{json_string!r}",
+            level=logging.DEBUG,
         )
         return None
     return json_output
-@cache
+@cache_arguments()
 def get_hf_token(api_key: str | None) -> str | bool:
     """Get the Hugging Face token.

{euroeval-16.3.0.dist-info → euroeval-16.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: EuroEval
-Version: 16.3.0
+Version: 16.4.0
 Summary: The robust European language model benchmark.
 Project-URL: Repository, https://github.com/EuroEval/EuroEval
 Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -62,12 +62,12 @@ Provides-Extra: all
 Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
 Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
 Requires-Dist: timm>=1.0.19; extra == 'all'
-Requires-Dist: vllm[flashinfer]<0.11.0,>=0.10.1; (platform_system == 'Linux') and extra == 'all'
+Requires-Dist: vllm[flashinfer]>=0.11.0; (platform_system == 'Linux') and extra == 'all'
 Provides-Extra: generative
 Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
 Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
 Requires-Dist: timm>=1.0.19; extra == 'generative'
-Requires-Dist: vllm[flashinfer]<0.11.0,>=0.10.1; (platform_system == 'Linux') and extra == 'generative'
+Requires-Dist: vllm[flashinfer]>=0.11.0; (platform_system == 'Linux') and extra == 'generative'
 Description-Content-Type: text/markdown
 <!-- This disables the requirement that the first line is a top-level heading -->
@@ -92,7 +92,7 @@ ______________________________________________________________________
 [![Second paper](https://img.shields.io/badge/arXiv-2406.13469-b31b1b.svg)](https://arxiv.org/abs/2406.13469)
 [![License](https://img.shields.io/github/license/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/blob/main/LICENSE)
 [![LastCommit](https://img.shields.io/github/last-commit/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/commits/main)
-[![Code Coverage](https://img.shields.io/badge/Coverage-67%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
+[![Code Coverage](https://img.shields.io/badge/Coverage-70%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
 [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg)](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
 ## Maintainer

euroeval-16.4.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,75 @@
+euroeval/__init__.py,sha256=Ci1Sta9hl-v-ZPwJ1qqAVpzvj-vVgZZbQQuP5Qopc4o,3956
+euroeval/benchmark_config_factory.py,sha256=x1HfK8kDVxN14PPHxonsDv0vhkdrexsMJfKaXhO9WQQ,8540
+euroeval/benchmarker.py,sha256=M_2KV0f41RmCiRLcQLEIACt1TcL7QqvH48ds0ebJCG8,49705
+euroeval/caching_utils.py,sha256=AkR0TLY9EHbqv3TrhtCmpEGsm0DWZSLEfR2fRHq1S3E,2587
+euroeval/callbacks.py,sha256=l8f6Zr8EoHfVFsI1ZnMUK0Y8uZB00Nvaz_I6XDn6avE,2515
+euroeval/cli.py,sha256=yb_Gw3TrouBiUmeQIZF0705zio8UPFACUDOzSB3CCfo,9316
+euroeval/constants.py,sha256=XAdsdSE4bAOUeW2o5qmMlfqRmsXZUNIKlEZrbxBPdLk,2845
+euroeval/data_loading.py,sha256=r2GtvH2fAPapE9Idyu8W27n3YXD2Bgw8Qt88vdDn0DQ,4751
+euroeval/data_models.py,sha256=j3gdzLSxgr3FakBIOqvVGZ5K5cXb4RrCMOkJc8J8Zmc,28007
+euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
+euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
+euroeval/finetuning.py,sha256=t3VqkuRVqRxcpHhSzU4nF4npvLDnjNzPJqGqG-L6ifk,11764
+euroeval/generation.py,sha256=epv2QPHTxzoBmq5OFQtolvuvJ6ce4FkdD03NTYdKFZk,12579
+euroeval/generation_utils.py,sha256=3mI-T9imk433VsvbwCy71Zzv2XOdm-l1SH-IiFfSd9M,18285
+euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
+euroeval/logging_utils.py,sha256=iq9K2-7URgeHOUJCYCC9k8vRAz4YmBf4f44GpCVEGcc,8802
+euroeval/model_cache.py,sha256=S_8ZtLaliTiUEvQAVw_DJ1qk5PWUO5-eE04hGScCj_o,9246
+euroeval/model_config.py,sha256=fxHfgpw-9vj3hwke28DguVGvG9TU06nkTXT0V6KAMpQ,2761
+euroeval/model_loading.py,sha256=mVh05sPENBBOIUkd_rwXqbBd13YvF_tOVZ8XGtguNzw,2338
+euroeval/scores.py,sha256=tlLfmI6Pgm1d_odubfyFcGLoB6Mxgfw3Yl7POzFv9l8,3235
+euroeval/speed_benchmark.py,sha256=k9xEF7jPAMrEBcZdykilQ6eJMGhFW1eUGuhQco9470M,4034
+euroeval/tasks.py,sha256=EzEWFDo_0ffabBFiRu-mw80jENUioE8D_VEn_Dsv-F8,4703
+euroeval/tokenisation_utils.py,sha256=rytsJy4mNEqeSdGzXsvVU4OShveeHOOlbaQOJDsX4S0,21275
+euroeval/types.py,sha256=_iVy-RwiCGu9TNX2sfyJTdCvXy1akNGTCywAo-YpBqU,2815
+euroeval/utils.py,sha256=VJrbEFXr6ZCJIHiMT7M7Y84ZYl0LHe2uhIz4eePciAw,15235
+euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
+euroeval/benchmark_modules/base.py,sha256=PeOqhfrc9iqyRz1aDHFBiTpWcwU5zDXo5pB_CD8W4VI,11199
+euroeval/benchmark_modules/fresh.py,sha256=h4TPJlJK6xxxyhAXURr0T9gk4Khm3WyujnKBDFc3sCE,10806
+euroeval/benchmark_modules/hf.py,sha256=enj88OY2XELdNgLnqeRPXvX2ATgcm6fjQpSYpBhmgzI,46274
+euroeval/benchmark_modules/litellm.py,sha256=VNFIOJU8TJNrifHtfBILh1MeS7tehqztxH5WoPLr5fc,66581
+euroeval/benchmark_modules/vllm.py,sha256=dm19gYG-MR63V8YpZBM1iOQ1c7xbFRzo9NuDWHG3q-Y,46952
+euroeval/dataset_configs/__init__.py,sha256=zvyH0onXIDtm8lHDVRSzk7J0_mJFU0H6WnLueaxM7WE,2157
+euroeval/dataset_configs/czech.py,sha256=9IDYKg1aoikMXIqQo2tYTQHf2WmQEujkNTyF0a8c9c8,2134
+euroeval/dataset_configs/danish.py,sha256=nkw1poFOJGpQJFB9HYC6bdlNzUR5pXxYacvZs4GrK4Y,5522
+euroeval/dataset_configs/dutch.py,sha256=CDr0oQnmDxeNloZ6iTGYPcNqPM5R9N8Z4aTKEE0C2MU,5408
+euroeval/dataset_configs/english.py,sha256=2sJQPM4rZSYlwE5v4AiDm95Boq-_53AzdOt5cL_drJs,4628
+euroeval/dataset_configs/estonian.py,sha256=fC5TUGpd6u22DUxoETBLA7EThwqsPDU54gXTzWtFCHk,3369
+euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
+euroeval/dataset_configs/finnish.py,sha256=DwrhwluoV4rmW8m2E5gWTfvHZ1XKRQG_3KU7wSOqM40,4281
+euroeval/dataset_configs/french.py,sha256=MIZUAn2rNwasb41DC92q6vMwRBem1Fw4D7Hj1cLFlfs,4611
+euroeval/dataset_configs/german.py,sha256=D-Yuz0pGf8pOEfMmTibXfk0k0QGjA4nEyAmea4TnCh0,5021
+euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
+euroeval/dataset_configs/italian.py,sha256=GaN7u2NgAOrpe7n--CsmpQm_n-PCVsTN-wV78UKtQco,4895
+euroeval/dataset_configs/latvian.py,sha256=8hb32_YD_nQHn4kRVfwiGRDoJHF8M00ZdcwuD5ozJwU,2647
+euroeval/dataset_configs/lithuanian.py,sha256=Gv3ta3Gs7xknZ_h_dVWY7YN43UfQzLkJPnYnQcuBguU,1998
+euroeval/dataset_configs/norwegian.py,sha256=VcNftTvOJMCQEJvDFe3iixKbr8cjE3C6oHG4Jp4HET4,7636
+euroeval/dataset_configs/polish.py,sha256=wiorGf4Z06WLPYAa5blD8F2qDaEWUr4MgVShkkVfVo4,3563
+euroeval/dataset_configs/portuguese.py,sha256=TsjJMGJc_wExE_9TMJiQuxhN9BylXcHTXRFaCmkE4Gg,3980
+euroeval/dataset_configs/slovak.py,sha256=Dc9ai2VW-ckQk7trglL2w1Ki0NECsr1RMXQPYBAN6OU,1759
+euroeval/dataset_configs/spanish.py,sha256=VQHQiRsTLlen1zBKgbmRiXSB--b89WofXgFxeIgMR1o,4793
+euroeval/dataset_configs/swedish.py,sha256=pNd-O9cU-4_9gkQU-EFVzsjri6Jg-0taVkzQYdFT6Lw,5257
+euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
+euroeval/metrics/base.py,sha256=dUBby-ZzettMjdcjek6rw0JTZMuScX4cQ2Rd6untKHY,2525
+euroeval/metrics/huggingface.py,sha256=eCbL-jIj5WYAVRSYdbBWDzDoakIPl6_rSvBqLZhXO-E,6736
+euroeval/metrics/llm_as_a_judge.py,sha256=br-pIyzhgrfDXZb6K0GuSUAyczLnrc7pFugW1DYwK6w,9721
+euroeval/metrics/pipeline.py,sha256=xGCA7N1F4cLKOIeXP9SGAZvrWToREwAVb_gR5iBMQIU,10825
+euroeval/metrics/speed.py,sha256=G5hEQcrtqxF070ZZwLDh61iZnq2CSW2o6ZM7zR4lOTY,1298
+euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
+euroeval/prompt_templates/linguistic_acceptability.py,sha256=Q-GyoS_c_iM-wQ8aXTywRTdxl1kUF0WEzHWh40hsk3s,10098
+euroeval/prompt_templates/multiple_choice.py,sha256=p6Dt3EMaASyqFHOjxdisFnh7OOVi-roCyKalSPwp5Dc,8220
+euroeval/prompt_templates/named_entity_recognition.py,sha256=3yEr1GHk0UbubsTwDSK928QssgYO0mnMfOgVmlDT2HI,19066
+euroeval/prompt_templates/reading_comprehension.py,sha256=0eYnJOfk8u9Zv_Xj6VtDLoQwvfe5_jjzAWGAksRMO6Y,10338
+euroeval/prompt_templates/sentiment_classification.py,sha256=Xg90BzCHQEmgTImn9zqI9Z48nW1paGQ-4AWYCxoUJxk,11027
+euroeval/prompt_templates/summarization.py,sha256=ypyJRX2R5CyCFjJnM3iE5J4OrvLZBwXr7WdBLv8qMKQ,7391
+euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
+euroeval/task_group_utils/multiple_choice_classification.py,sha256=tAFQOM_iZwyknbOcZfw6_71lUSbcB5OlY0gOkNfUBAY,7051
+euroeval/task_group_utils/question_answering.py,sha256=vr5gjIJxqqvbj0DYNSEdN0Ek9fkZ5maCAt7FKqzo-Xs,27695
+euroeval/task_group_utils/sequence_classification.py,sha256=_kYgAIF2LABZ-nate3O6s7vlfI2RGHVtpNPjaMIHLDk,16450
+euroeval/task_group_utils/text_to_text.py,sha256=ibSOiP_wpEyGYQh7uEeTjOp-ojLJsEcJT1W7IWOBfk8,5381
+euroeval/task_group_utils/token_classification.py,sha256=hFiO29eSX_KtqbjJM4jy37jmyhfhfnWj3WTpNvh_vQk,17208
+euroeval-16.4.0.dist-info/METADATA,sha256=ot4RNMLDwwJR2UIk20k59E7MsBOXlIqJPYI9xc_XUP8,15365
+euroeval-16.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+euroeval-16.4.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
+euroeval-16.4.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
+euroeval-16.4.0.dist-info/RECORD,,

euroeval-16.3.0.dist-info/RECORD DELETED Viewed

@@ -1,71 +0,0 @@
-euroeval/__init__.py,sha256=QJo_xezfFnpKBB32nvA_juy29tAz1eVn---MQiexYjE,3901
-euroeval/benchmark_config_factory.py,sha256=eOQsd9F4cJy8I7a3_lIKDZ5b5ukipIUqk0GZ3pyytwQ,8596
-euroeval/benchmarker.py,sha256=Nt4k1DivG-YtsSiqEwqsHfBzEkauo1lrsG1RAS0ZWuw,48928
-euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
-euroeval/cli.py,sha256=yb_Gw3TrouBiUmeQIZF0705zio8UPFACUDOzSB3CCfo,9316
-euroeval/constants.py,sha256=e1LRJe6CspvbKlfo4-9ee1wGocNoh1c7GcyaXpiN1Jk,2744
-euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
-euroeval/data_models.py,sha256=X4zAdR1K2MPb4f4Vc7gPYfolzFxxsz5WplnsmsiMYY8,27766
-euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
-euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
-euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
-euroeval/generation.py,sha256=Va3EOmFzOMBNfI4fh3nW5qhhrM3CBT8_4MaLwVtsF_E,12528
-euroeval/generation_utils.py,sha256=d2_vylWXIeH4xIXgbsI5rN6dMt0zKp0zXExD6aOKWaA,18299
-euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
-euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
-euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
-euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
-euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
-euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
-euroeval/tasks.py,sha256=EzEWFDo_0ffabBFiRu-mw80jENUioE8D_VEn_Dsv-F8,4703
-euroeval/tokenisation_utils.py,sha256=7lQ83rP1Ws7HHg20bFbqD4GqtdbyBADwyxPBmFzAzVA,21158
-euroeval/types.py,sha256=_iVy-RwiCGu9TNX2sfyJTdCvXy1akNGTCywAo-YpBqU,2815
-euroeval/utils.py,sha256=qAh8TLrJPk10l9qKcvD1mq2gNOGRTLl88PvPNj5IuRU,19451
-euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
-euroeval/benchmark_modules/base.py,sha256=mHF8XS6GGUXV-sJtxmI5WJBWPLMHuh-4Z4OWjC25x9Y,11566
-euroeval/benchmark_modules/fresh.py,sha256=qqsaC6u06YeJIK-Z6w9gZefb5cg1nU7ZDrO76l2GZN0,10779
-euroeval/benchmark_modules/hf.py,sha256=Z-Z_AxJk2APFXcZdyZrnKQ4OE_uRH81Vsm9x-gfJ1-I,44926
-euroeval/benchmark_modules/litellm.py,sha256=2EUhzLcxocfFxjbgyyP5QQtLieoH-fWbLR6RRz64EN8,64176
-euroeval/benchmark_modules/vllm.py,sha256=eTwS1YDB0v0lOWvv6_UXPlqNjNaPQTKRY-g495Y6X9s,46432
-euroeval/dataset_configs/__init__.py,sha256=ylO6FwnzlWmCuifliE_b4Vs5GXapYeyvZ4j1XVFmdN8,2086
-euroeval/dataset_configs/danish.py,sha256=fAMWYQVrx3B11r5NZSL-LWSQTJvCDwSxImIkIrGdoAA,5552
-euroeval/dataset_configs/dutch.py,sha256=883caShKOOi5s1Ky0_EKFeq0y9wVuqN-GVqeOwbKFr0,5438
-euroeval/dataset_configs/english.py,sha256=rl6bBIluKXkxT8L4e071GQuPprMHTI955mgW46V3Cp0,4658
-euroeval/dataset_configs/estonian.py,sha256=tdnz0gmMR9yO5rm3SsIz-Wd0LmlCvi3UJ2M5r4VwkSE,3093
-euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
-euroeval/dataset_configs/finnish.py,sha256=pfO_flf6HHUbZZLae62cV30__uey_Oj37aiX0eBNWcQ,4311
-euroeval/dataset_configs/french.py,sha256=OdkCfWhtImgB3Ni6o0NRvCEvjeKAqausfJ2VO04CUwY,4641
-euroeval/dataset_configs/german.py,sha256=sav75C7f33OofQzliwvb3g7B7cw0MXm0G8wdlcmI7r8,5051
-euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
-euroeval/dataset_configs/italian.py,sha256=YucxgJtCG31sQplJ6hL64sF39ZSj926_a7McpCzKxh0,4925
-euroeval/dataset_configs/latvian.py,sha256=fB3tsqZoFldTnrlpeSu9iQQ907ptOVC8ZaielkgmVlM,2677
-euroeval/dataset_configs/lithuanian.py,sha256=QTahv862C5XzjLU8WHcExBGlkRFQnj9F4-I_5x1qJSk,1833
-euroeval/dataset_configs/norwegian.py,sha256=ipDIg2wXquZvIjlc4Bs-TbMJCKOoK6TL7lP9AzLOOj8,7666
-euroeval/dataset_configs/polish.py,sha256=5MTWLUmDG0qMgb1ATSdON2A_2ZFLlXUVjS0u64srfIg,3593
-euroeval/dataset_configs/portuguese.py,sha256=wanwK9LYdBND_JPh203L_YQraiLSd2kI8P0myy6U6Dk,4010
-euroeval/dataset_configs/spanish.py,sha256=xVWWHS84aOjDcutfAh7J2roHEb2KHZ084pYysH2BdSo,4823
-euroeval/dataset_configs/swedish.py,sha256=f_H7khH0IHcZXEQyYM8bpIvYnRsSj0EhVXh4RgpOCmw,5317
-euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
-euroeval/metrics/base.py,sha256=HST2XeZrUQZV_vTiieePiaznEov3CIGzuVNIITtLsQc,2596
-euroeval/metrics/huggingface.py,sha256=7_97xfdqsznoBOm3diVvZtJ6k9XUa8isiVVmOgia8kI,6522
-euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
-euroeval/metrics/pipeline.py,sha256=aLNf0vKTfov-HZbvyJj9_9Z1rR1BkVsWxAea8btCWg8,10513
-euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
-euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
-euroeval/prompt_templates/linguistic_acceptability.py,sha256=n-InOATuwdjlmDjiUdGIk9bQJMUgVFdp3u-iQ0K9WjY,9189
-euroeval/prompt_templates/multiple_choice.py,sha256=W0WZdAhbOV2jdHNhjfNNhgoPTbFKA2vhs72U0hP1rW0,7323
-euroeval/prompt_templates/named_entity_recognition.py,sha256=Kl7SB7vRJ-K9oXMZcJEffELaQlbwspNKUrQLDeNobcY,17301
-euroeval/prompt_templates/reading_comprehension.py,sha256=OtV8tu6wyf7rwW3krmyk8bzdNSRS5WkWFgxok4o67_o,9243
-euroeval/prompt_templates/sentiment_classification.py,sha256=tnalqea4TjG6z4xF7tDDKQm7rWrYGg6SIWTX3RDQQ20,10012
-euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13aIOgY0SjIzzsNo,6593
-euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
-euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
-euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
-euroeval/task_group_utils/sequence_classification.py,sha256=TAqZCoMQ9I-HFhMH35_J1mY2SQg95HUbXcgrBIyhgk0,16082
-euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
-euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
-euroeval-16.3.0.dist-info/METADATA,sha256=iSfb2jRJO7BfidNgy0jOKUXFh_WwBojxgisOBWQmYHg,15381
-euroeval-16.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-euroeval-16.3.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
-euroeval-16.3.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
-euroeval-16.3.0.dist-info/RECORD,,

{euroeval-16.3.0.dist-info → euroeval-16.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{euroeval-16.3.0.dist-info → euroeval-16.4.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{euroeval-16.3.0.dist-info → euroeval-16.4.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

EuroEval 16.3.0__py3-none-any.whl → 16.4.0__py3-none-any.whl

Potentially problematic release.

EuroEval 16.3.0py3-none-any.whl → 16.4.0py3-none-any.whl