PyPI - EuroEval - Versions diffs - 15.9.1__py3-none-any.whl → 15.10.0__py3-none-any.whl - Mend

EuroEval 15.9.1py3-none-any.whl → 15.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of EuroEval might be problematic. Click here for more details.

Files changed (16) hide show

euroeval/benchmark_modules/hf.py +3 -3
euroeval/benchmark_modules/litellm.py +158 -122
euroeval/benchmark_modules/vllm.py +188 -235
euroeval/constants.py +13 -0
euroeval/data_loading.py +8 -2
euroeval/finetuning.py +22 -0
euroeval/task_group_utils/multiple_choice_classification.py +11 -1
euroeval/task_group_utils/question_answering.py +14 -4
euroeval/task_group_utils/sequence_classification.py +1 -1
euroeval/tokenization_utils.py +121 -18
euroeval/utils.py +13 -8
{euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/METADATA +7 -8
{euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/RECORD +16 -16
{euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/WHEEL +0 -0
{euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/entry_points.txt +0 -0
{euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/licenses/LICENSE +0 -0

euroeval/task_group_utils/multiple_choice_classification.py CHANGED Viewed

@@ -12,6 +12,8 @@ from transformers.tokenization_utils import PreTrainedTokenizer
 from transformers.tokenization_utils_base import BatchEncoding
 from transformers.trainer import Trainer
+from ..exceptions import InvalidBenchmark
 if t.TYPE_CHECKING:
     from ..types import Labels, Predictions
@@ -19,7 +21,7 @@ logger = logging.getLogger("euroeval")
 class MultipleChoiceClassificationTrainer(Trainer):
-    """Trainer subclass for question answering tasks."""
+    """Trainer subclass for multiple-choice classification tasks."""
     def evaluate(  # type: ignore[override]
         self,
@@ -57,6 +59,8 @@ class MultipleChoiceClassificationTrainer(Trainer):
         )
         predictions = output.predictions
+        if isinstance(predictions, tuple):
+            predictions = predictions[0]
         assert isinstance(predictions, np.ndarray)
         metrics = output.metrics
@@ -150,6 +154,12 @@ def postprocess_predictions_and_labels(
     Returns:
         The postprocessed predictions and labels.
     """
+    if predictions.ndim != 2 or predictions.shape[1] != 2:
+        raise InvalidBenchmark(
+            "Predictions must be a 2D array with shape (num_examples, 2). Found "
+            f"shape {predictions.shape}."
+        )
     mapping = {0: "a", 1: "b", 2: "c", 3: "d", 4: "e"}
     all_predictions: list[str] = list()

euroeval/task_group_utils/question_answering.py CHANGED Viewed

@@ -8,11 +8,11 @@ from collections import defaultdict
 import evaluate
 import numpy as np
 from evaluate import EvaluationModule
-from transformers.tokenization_utils import PreTrainedTokenizer
 from transformers.tokenization_utils_base import PreTrainedTokenizerBase
 from transformers.trainer import Trainer
 from ..data_models import BenchmarkConfig, DatasetConfig, GenerativeModelOutput
+from ..exceptions import InvalidBenchmark
 from ..tokenization_utils import get_special_token_metadata
 from ..utils import raise_if_model_output_contains_nan_values
@@ -20,6 +20,7 @@ if t.TYPE_CHECKING:
     import torch.nn as nn
     from datasets.arrow_dataset import Dataset
     from transformers.modeling_utils import PreTrainedModel
+    from transformers.tokenization_utils import PreTrainedTokenizer
     from transformers.tokenization_utils_base import BatchEncoding
     from transformers.trainer_callback import TrainerCallback
     from transformers.trainer_utils import EvalPrediction
@@ -43,6 +44,7 @@ class QuestionAnsweringTrainer(Trainer):
         compute_metrics: "c.Callable[[EvalPrediction], dict[str, float]]",
         callbacks: "list[TrainerCallback]",
         data_collator: "c.Callable",
+        **kwargs,
     ) -> None:
         """Initialise the trainer."""
         super().__init__(
@@ -54,6 +56,7 @@ class QuestionAnsweringTrainer(Trainer):
             compute_metrics=compute_metrics,
             callbacks=callbacks,
             data_collator=data_collator,
+            **kwargs,
         )
         # Get the CLS token id for the tokenizer
@@ -475,7 +478,7 @@ def prepare_test_examples(
 def postprocess_predictions_and_labels(
-    predictions: tuple[np.ndarray, np.ndarray],
+    predictions: tuple[np.ndarray, ...],
     dataset: "Dataset",
     prepared_dataset: "Dataset",
     cls_token_index: int,
@@ -484,7 +487,7 @@ def postprocess_predictions_and_labels(
     Args:
         predictions:
-            A pair of (start_logits, end_logits) predictions.
+            A tuple whose first two elements are (start_logits, end_logits).
         dataset:
             The dataset containing the examples.
         prepared_dataset:
@@ -495,7 +498,14 @@ def postprocess_predictions_and_labels(
     Returns:
         The postprocessed predictions and labels.
     """
-    all_start_logits, all_end_logits = predictions
+    if len(predictions) < 2:
+        raise InvalidBenchmark(
+            "The predictions should be a tuple with the first two elements being "
+            "(start_logits, end_logits), but got {len(predictions)} elements instead: "
+            f"{predictions}."
+        )
+    all_start_logits, all_end_logits = predictions[:2]
     # Build a map from an example to its corresponding features, being the blocks of
     # text from the context that we're feeding into the model. An example can have

euroeval/task_group_utils/sequence_classification.py CHANGED Viewed

@@ -135,7 +135,7 @@ def extract_labels_from_generation(
         if first_label_token_mapping is False:
             raise InvalidBenchmark(
                 "The model outputted logprobs, but the first label token mapping is "
-                "not provided. This means that the model should not output logprobs."
+                "not provided, which is not supported."
             )
         labels = get_closest_logprobs_labels(
             generation_logprobs=model_output.scores,

euroeval/tokenization_utils.py CHANGED Viewed

@@ -8,7 +8,6 @@ import torch
 from .constants import TASK_GROUPS_USING_LOGPROBS
 from .enums import GenerativeType
-from .exceptions import InvalidModel
 from .utils import log_once
 if t.TYPE_CHECKING:
@@ -153,7 +152,9 @@ def should_prefix_space_be_added_to_labels(
     return add_prefix_space
-def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
+def get_bos_token(
+    tokenizer: "PreTrainedTokenizer",
+) -> tuple[str, int] | tuple[None, None]:
     """Get the beginning-of-sequence token from a tokenizer.
     Args:
@@ -162,7 +163,7 @@ def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
     Returns:
         A pair (token, token_id) representing the beginning-of-sequence token and its
-        token ID.
+        token ID, or (None, None) if no BOS token is found.
     """
     if isinstance(tokenizer.bos_token, str) and isinstance(tokenizer.bos_token_id, int):
         return tokenizer.bos_token, tokenizer.bos_token_id
@@ -176,15 +177,25 @@ def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
             bos_token_id = vocab[bos_token]
             break
     else:
-        raise InvalidModel(
+        log_once(
             "The model does not have a beginning-of-sequence token. Please ensure that "
-            "this has been set in the tokenizer's configuration."
+            "this has been set in the tokenizer's configuration. Using no BOS token."
+            " This may lead to unexpected behavior in the model.",
+            level=logging.INFO,
         )
+        return None, None
+    log_once(
+        f"Beginning-of-sequence token was not set, but detected it as {bos_token!r} "
+        f"with ID {bos_token_id}.",
+        level=logging.DEBUG,
+    )
     return bos_token, bos_token_id
-def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
+def get_eos_token(
+    tokenizer: "PreTrainedTokenizer",
+) -> tuple[str, int] | tuple[None, None]:
     """Get the end-of-sequence token from a tokenizer.
     Args:
@@ -193,7 +204,7 @@ def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
     Returns:
         A pair (token, token_id) representing the end-of-sequence token and its token
-        ID.
+        ID, or (None, None) if no EOS token is found.
     """
     if isinstance(tokenizer.eos_token, str) and isinstance(tokenizer.eos_token_id, int):
         return tokenizer.eos_token, tokenizer.eos_token_id
@@ -207,14 +218,105 @@ def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
             eos_token_id = vocab[eos_token]
             break
     else:
-        raise InvalidModel(
+        log_once(
             "The model does not have an end-of-sequence token. Please ensure that this "
-            "has been set in the tokenizer's configuration."
+            "has been set in the tokenizer's configuration. Using no EOS token. This "
+            "may lead to unexpected behavior in the model.",
+            level=logging.INFO,
         )
+        return None, None
+    log_once(
+        f"End-of-sequence token was not set, but detected it as {eos_token!r} with "
+        f"ID {eos_token_id}.",
+        level=logging.DEBUG,
+    )
     return eos_token, eos_token_id
+def get_pad_token(
+    tokenizer: "PreTrainedTokenizer",
+) -> tuple[str, int] | tuple[None, None]:
+    """Get the padding token from a tokenizer.
+    Args:
+        tokenizer:
+            The tokenizer.
+    Returns:
+        A pair (token, token_id) representing the padding token and its token ID, or
+        (None, None) if no padding token is found.
+    """
+    # If the tokenizer already has a padding token, return it
+    if tokenizer.pad_token is not None and tokenizer.pad_token_id is not None:
+        assert isinstance(tokenizer.pad_token, str), (
+            "Expected tokenizer.pad_token to be a string, but got "
+            f"{type(tokenizer.pad_token)}."
+        )
+        assert isinstance(tokenizer.pad_token_id, int), (
+            "Expected tokenizer.pad_token_id to be an integer, but got "
+            f"{type(tokenizer.pad_token_id)}."
+        )
+        return (tokenizer.pad_token, tokenizer.pad_token_id)
+    # If the tokenizer has a BOS token, use it as the padding token
+    if tokenizer.bos_token is not None and tokenizer.bos_token_id is not None:
+        assert isinstance(tokenizer.bos_token, str), (
+            "Expected tokenizer.bos_token to be a string, but got "
+            f"{type(tokenizer.bos_token)}."
+        )
+        assert isinstance(tokenizer.bos_token_id, int), (
+            "Expected tokenizer.bos_token_id to be an integer, but got "
+            f"{type(tokenizer.bos_token_id)}."
+        )
+        pad_token = tokenizer.bos_token
+        pad_token_id = tokenizer.bos_token_id
+    # If the tokenizer has an EOS token, use it as the padding token
+    elif tokenizer.eos_token is not None and tokenizer.eos_token_id is not None:
+        assert isinstance(tokenizer.eos_token, str), (
+            "Expected tokenizer.eos_token to be a string, but got "
+            f"{type(tokenizer.eos_token)}."
+        )
+        assert isinstance(tokenizer.eos_token_id, int), (
+            "Expected tokenizer.eos_token_id to be an integer, but got "
+            f"{type(tokenizer.eos_token_id)}."
+        )
+        pad_token = tokenizer.eos_token
+        pad_token_id = tokenizer.eos_token_id
+    # Otherwise, try to find a candidate padding token in the vocabulary
+    else:
+        pad_token_candidates = [
+            "<pad>",
+            "[pad]",
+            "<|endoftext|>",
+            "<｜end▁of▁sentence｜>",
+            "<|im_end|>",
+        ]
+        pad_token_candidates.extend([c.upper() for c in pad_token_candidates])
+        for candidate in pad_token_candidates:
+            if candidate in tokenizer.get_vocab():
+                pad_token = candidate
+                pad_token_id = tokenizer.get_vocab()[candidate]
+                break
+        else:
+            log_once(
+                "Could not identify a padding token for the model. Please ensure that "
+                "this has been set in the tokenizer's configuration. Using no padding "
+                "token. This may lead to unexpected behavior in the model.",
+                level=logging.INFO,
+            )
+            return None, None
+    log_once(
+        f"Padding token was not set, but detected it as {pad_token!r} with ID "
+        f"{pad_token_id}.",
+        level=logging.DEBUG,
+    )
+    return pad_token, pad_token_id
 def get_end_of_chat_token_ids(tokenizer: "PreTrainedTokenizer") -> list[int] | None:
     """Get the end token ID for chat models.
@@ -291,14 +393,14 @@ def get_first_label_token_mapping(
     if tokenizer is None:
         if output_scores:
             log_once(
-                f"The model {model_config.model_id!r} will output scores, since the "
-                "dataset supports it and no tokenizer is available.",
+                f"We will use logprobs with the model {model_config.model_id!r} "
+                "since the dataset supports it and no tokenizer is available.",
                 level=logging.DEBUG,
             )
         else:
             log_once(
-                f"The model {model_config.model_id!r} will not output scores, since "
-                "the dataset does not support it and no tokenizer is available.",
+                f"We will not use logprobs with the model {model_config.model_id!r} "
+                "since the dataset does not support it and no tokenizer is available.",
                 level=logging.DEBUG,
             )
         return output_scores
@@ -359,7 +461,7 @@ def get_first_label_token_mapping(
             if not matching_tokens:
                 log_once(
                     f"No matching token found in token_list for label '{label}', so "
-                    "we will not output scores.",
+                    "we will not use logprobs with the model.",
                     level=logging.DEBUG,
                 )
                 return False
@@ -369,8 +471,8 @@ def get_first_label_token_mapping(
         # tokens are distinct
         if len(first_tokens) == len(set(first_tokens)):
             log_once(
-                "The model will output scores, since the first tokens of the labels "
-                "are distinct.",
+                "We will use logprobs with the model since the first tokens of the "
+                "labels are distinct.",
                 level=logging.DEBUG,
             )
             return {
@@ -379,7 +481,7 @@ def get_first_label_token_mapping(
             }
         else:
             log_once(
-                "The model will not output scores, since the first tokens of the "
+                "We will not use logprobs with the model since the first tokens of the "
                 "labels are not distinct. The first tokens for the labels "
                 f"{local_labels} are {first_tokens}"
             )
@@ -389,7 +491,8 @@ def get_first_label_token_mapping(
     # evaluation errors. This will force the label extraction to rely on word edit
     # distance instead of logprobs.
     log_once(
-        "The model will not output scores, since the dataset does not have labels.",
+        "We will not use logprobs with the model, since the dataset does not have "
+        "labels.",
         level=logging.DEBUG,
     )
     return False

euroeval/utils.py CHANGED Viewed

@@ -121,6 +121,8 @@ def block_terminal_output() -> None:
     logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
     logging.getLogger("accelerate").setLevel(logging.CRITICAL)
     logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
+    logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
+    logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
     logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
     # This suppresses vLLM logging
@@ -352,19 +354,22 @@ def safe_run(coroutine: t.Coroutine[t.Any, t.Any, T]) -> T:
         asyncio.set_event_loop(None)
-async def catch_coroutine_exception(
-    coroutine: t.Coroutine[t.Any, t.Any, T],
+async def add_semaphore_and_catch_exception(
+    coroutine: t.Coroutine[t.Any, t.Any, T], semaphore: asyncio.Semaphore
 ) -> T | Exception:
-    """Run a coroutine, catching any exceptions and returning them.
+    """Run a coroutine with a semaphore.
     Args:
         coroutine:
             The coroutine to run.
+        semaphore:
+            The semaphore to use.
     Returns:
-        The result of the coroutine, or the exception if it was raised.
+        The result of the coroutine.
     """
-    try:
-        return await coroutine
-    except Exception as exc:
-        return exc
+    async with semaphore:
+        try:
+            return await coroutine
+        except Exception as exc:
+            return exc

{euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/METADATA RENAMED Viewed

@@ -1,11 +1,11 @@
 Metadata-Version: 2.4
 Name: EuroEval
-Version: 15.9.1
+Version: 15.10.0
 Summary: The robust European language model benchmark.
 Project-URL: Repository, https://github.com/EuroEval/EuroEval
 Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
 Author-email: Dan Saattrup Nielsen <dan.nielsen@alexandra.dk>
-Maintainer-email: Dan Saattrup Nielsen <dan.nielsen@alexandra.dk>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>
+Maintainer-email: Dan Saattrup Nielsen <dan.nielsen@alexandra.dk>
 License: MIT License
         Copyright (c) 2022-2024 Dan Saattrup Nielsen
@@ -37,13 +37,12 @@ Requires-Dist: demjson3>=3.0.6
 Requires-Dist: evaluate>=0.4.1
 Requires-Dist: huggingface-hub>=0.30.1
 Requires-Dist: levenshtein>=0.24.0
-Requires-Dist: litellm>=1.63.0
+Requires-Dist: litellm>=1.72.2
 Requires-Dist: more-itertools>=10.5.0
 Requires-Dist: numpy<2.0.0,>=1.23.0
-Requires-Dist: ollama>=0.4.7
+Requires-Dist: ollama>=0.5.1
 Requires-Dist: pandas>=2.2.0
 Requires-Dist: peft>=0.15.0
-Requires-Dist: protobuf~=3.20.0
 Requires-Dist: pydantic>=2.6.0
 Requires-Dist: pyinfer>=0.0.3
 Requires-Dist: python-dotenv>=1.0.1
@@ -62,12 +61,12 @@ Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == '
 Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
 Requires-Dist: gradio>=4.26.0; extra == 'all'
 Requires-Dist: outlines>=0.1.11; extra == 'all'
-Requires-Dist: vllm>=0.9.0; (platform_system == 'Linux') and extra == 'all'
+Requires-Dist: vllm>=0.9.1; (platform_system == 'Linux') and extra == 'all'
 Provides-Extra: generative
 Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
 Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
 Requires-Dist: outlines>=0.1.11; extra == 'generative'
-Requires-Dist: vllm>=0.9.0; (platform_system == 'Linux') and extra == 'generative'
+Requires-Dist: vllm>=0.9.1; (platform_system == 'Linux') and extra == 'generative'
 Provides-Extra: human-evaluation
 Requires-Dist: gradio>=4.26.0; extra == 'human-evaluation'
 Provides-Extra: test
@@ -93,7 +92,7 @@ ______________________________________________________________________
 [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg)](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
-## Maintainers
+## Maintainer
 - Dan Saattrup Nielsen ([@saattrupdan](https://github.com/saattrupdan),
   dan.nielsen@alexandra.dk)

{euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/RECORD RENAMED Viewed

@@ -3,12 +3,12 @@ euroeval/benchmark_config_factory.py,sha256=icTeT5C-bNCJmvSWFlxKdEpRboZN8OjwaHGu
 euroeval/benchmarker.py,sha256=wmgrYVS31PMhhrVienjaVHHyfnZAy51kUvC6OjooiOw,48047
 euroeval/callbacks.py,sha256=F1AJCLB8FJpxqYprwLi_PsH4Bc0x4lyR8UiTG-GlFLY,2452
 euroeval/cli.py,sha256=d8JztMi_RbpUlEBXidd6DQ-xeC-xhozf_qU6Vkzye20,8161
-euroeval/constants.py,sha256=p6kp_R6-Tq5LBvyXyT6Sa6N3SkjEElGS2LSZRBoQaYs,1985
-euroeval/data_loading.py,sha256=L_REtxefte5Ke4xE_Cz01zkfCyKlOYhSqT5ZXXulHPc,3992
+euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
+euroeval/data_loading.py,sha256=2rMLSy8pbntlwmImizMtkTiUzj93mcv5kzYjZELWWfU,4081
 euroeval/data_models.py,sha256=7nAGDpN58Y35Lt9JZE_y0y5iOYesw2htcwHc68MkBZU,22953
 euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
 euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
-euroeval/finetuning.py,sha256=uuaUxNQJb7TivPQuI1OYQ_MIKbD-6-7mpkobLKsDefQ,10667
+euroeval/finetuning.py,sha256=cx5SVgEsveMDNfoMxwLfAFsjZeKmYyHftaOZWZ-L9hA,11285
 euroeval/generation.py,sha256=LSsskfLjIJ-c3gQxmr7eiAobPOm-5bU9vnR7uHQ7XmU,10745
 euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
 euroeval/human_evaluation.py,sha256=zqbbJkqm2Uymf-88PxM3R9vVRR8SZJlq3QrqWEoiVeE,27643
@@ -19,15 +19,15 @@ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,223
 euroeval/scores.py,sha256=TovjCZD8wmGrIjA4v5oAQp18P5KVcHvakkByDh0Hstk,3059
 euroeval/speed_benchmark.py,sha256=J7VKWMf7GU_l0lRR8f0QeUr_vAaBQqTbgQ_yToHhp_0,3980
 euroeval/tasks.py,sha256=87gbe__K5KNIb1aBSuwGnMPmZgamJFecNNYmNgMxaVo,7069
-euroeval/tokenization_utils.py,sha256=kghOIZMM3H0P9YDv0VBSNI7drzgJXlkRtMwt3Cgeev8,13907
+euroeval/tokenization_utils.py,sha256=LxgGs7juS5PuMYt5LL2X6eVXdtnpi-A2jFxqcWpF6NA,17931
 euroeval/types.py,sha256=E0JhLfg-ek5pdFcYJbnGRUSodHxkuR3o8XGuIrBcuRM,2485
-euroeval/utils.py,sha256=e83OnWc0GJn0Tn_vP3tbqh1DAbLy2ky-LnIlTEOKzKU,11410
+euroeval/utils.py,sha256=5R7y67xe0ODaje7k8nOu2AFS3Ph2gcsiWpIq5rjSSuA,11613
 euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
 euroeval/benchmark_modules/base.py,sha256=LcG46I2O5wcvu_3T_irBY6VkUhWVPKifBhcP-ln93TA,10798
 euroeval/benchmark_modules/fresh.py,sha256=_LWmpqiNGGTA-NoVC0v3-fS1sraDS9n-pgKUzz89jVk,9919
-euroeval/benchmark_modules/hf.py,sha256=CoiaNakjhg6gm_5IbUUeevXQZebg2VrRLuhzEi2Hhrk,44617
-euroeval/benchmark_modules/litellm.py,sha256=SxSr_0C6b_jVavR3y9QyhfkCOP5-va4zijGfghFTArY,48362
-euroeval/benchmark_modules/vllm.py,sha256=rz_Xau5TGiFeb2VkdVpW_fYOfRCCvYrH0q9BGzCwZlo,42156
+euroeval/benchmark_modules/hf.py,sha256=Nbtn5eZ4axbmL09M8dGZCBr07pn9-btbqGgQ6q7KbHg,44620
+euroeval/benchmark_modules/litellm.py,sha256=LS4mBXXG6h4uJwySPc6SI6f0y_HuiKE7IprprqWpoCI,50601
+euroeval/benchmark_modules/vllm.py,sha256=sgeltOVfZA9bu0AmXV7PtZvuRst0I8s6VOIp0CI6DO8,38880
 euroeval/dataset_configs/__init__.py,sha256=kWKtlSAOY-olOQL3UtFqL6I3Tki3G3waMZSd2YChjCg,1895
 euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
 euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
@@ -49,13 +49,13 @@ euroeval/prompt_templates/reading_comprehension.py,sha256=yLqryWQAW04GULz_EyNDLO
 euroeval/prompt_templates/sentiment_classification.py,sha256=LDOwjGQ2kqhwgNyphPywQeolwNB09o-xYWc9RUbzc84,7136
 euroeval/prompt_templates/summarization.py,sha256=mcWeKNhGWmp7IG_iY64T-VOSabQg5wKddjSbJNYFDp8,4984
 euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
-euroeval/task_group_utils/multiple_choice_classification.py,sha256=nB78TzOgd0HBvTclmjOYJid9ZVAgu8IHZsqB_n1SAZU,6178
-euroeval/task_group_utils/question_answering.py,sha256=kZBABJ_WYNTH4Xgo2jIvfx7iYvfoGt0EUObSaXRCGmk,27700
-euroeval/task_group_utils/sequence_classification.py,sha256=Yqx0pUhuHYmSkv1ZUfOndSLTvpr0lWCk19oYITfSjV4,13555
+euroeval/task_group_utils/multiple_choice_classification.py,sha256=LQ6zD1UGi-jGCKI2xUJiQdAXoqb5QMpIJu41B2U0HPw,6543
+euroeval/task_group_utils/question_answering.py,sha256=D4oJL2vQEjHghyxiiiq_vj1IQC6eryqNoLXuTiQEPmw,28071
+euroeval/task_group_utils/sequence_classification.py,sha256=zwRUgVHqLlREILwyg-yuDPkrIQOfqGVPsFBai-2D9a8,13525
 euroeval/task_group_utils/text_to_text.py,sha256=Nu1_qRPLbboCd9Q5rxqY4fQFJ_aGXu80aWQqoTG1cYc,5047
 euroeval/task_group_utils/token_classification.py,sha256=3idWB81Fcx9UhTuk-gxMfXENrCBmiWBDUWdULXoIhpw,17863
-euroeval-15.9.1.dist-info/METADATA,sha256=UkGmFcnarstFwD1J1eS6h3gbyxnucnaAVLnB5QhkdSo,13555
-euroeval-15.9.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-euroeval-15.9.1.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
-euroeval-15.9.1.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
-euroeval-15.9.1.dist-info/RECORD,,
+euroeval-15.10.0.dist-info/METADATA,sha256=WUXtSfS6qvrlA25lazql3DvyS5chyMnBPKyu-l65A_I,13472
+euroeval-15.10.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+euroeval-15.10.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
+euroeval-15.10.0.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
+euroeval-15.10.0.dist-info/RECORD,,

{euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

EuroEval 15.9.1__py3-none-any.whl → 15.10.0__py3-none-any.whl

Potentially problematic release.

EuroEval 15.9.1py3-none-any.whl → 15.10.0py3-none-any.whl