PyPI - EuroEval - Versions diffs - 16.0.1__py3-none-any.whl → 16.1.0__py3-none-any.whl - Mend

EuroEval 16.0.1py3-none-any.whl → 16.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of EuroEval might be problematic. Click here for more details.

Files changed (48) hide show

euroeval/benchmark_config_factory.py +6 -1
euroeval/benchmark_modules/base.py +2 -0
euroeval/benchmark_modules/fresh.py +7 -1
euroeval/benchmark_modules/hf.py +26 -21
euroeval/benchmark_modules/litellm.py +258 -131
euroeval/benchmark_modules/vllm.py +79 -40
euroeval/benchmarker.py +11 -2
euroeval/cli.py +14 -1
euroeval/constants.py +1 -1
euroeval/data_models.py +77 -6
euroeval/dataset_configs/__init__.py +1 -0
euroeval/dataset_configs/danish.py +14 -0
euroeval/dataset_configs/dutch.py +14 -0
euroeval/dataset_configs/english.py +22 -0
euroeval/dataset_configs/estonian.py +15 -7
euroeval/dataset_configs/finnish.py +14 -0
euroeval/dataset_configs/french.py +14 -0
euroeval/dataset_configs/german.py +23 -0
euroeval/dataset_configs/italian.py +14 -0
euroeval/dataset_configs/latvian.py +14 -0
euroeval/dataset_configs/norwegian.py +14 -0
euroeval/dataset_configs/polish.py +126 -0
euroeval/dataset_configs/portuguese.py +14 -0
euroeval/dataset_configs/spanish.py +14 -0
euroeval/dataset_configs/swedish.py +25 -0
euroeval/enums.py +12 -0
euroeval/generation.py +17 -8
euroeval/generation_utils.py +58 -10
euroeval/metrics/pipeline.py +1 -1
euroeval/prompt_templates/linguistic_acceptability.py +9 -0
euroeval/prompt_templates/multiple_choice.py +27 -1
euroeval/prompt_templates/named_entity_recognition.py +20 -0
euroeval/prompt_templates/reading_comprehension.py +11 -0
euroeval/prompt_templates/sentiment_classification.py +15 -0
euroeval/prompt_templates/summarization.py +27 -1
euroeval/scores.py +5 -0
euroeval/task_group_utils/question_answering.py +29 -29
euroeval/task_group_utils/sequence_classification.py +10 -33
euroeval/task_group_utils/token_classification.py +3 -3
euroeval/tasks.py +4 -4
euroeval/{tokenization_utils.py → tokenisation_utils.py} +40 -23
euroeval/utils.py +36 -3
{euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/METADATA +1 -1
euroeval-16.1.0.dist-info/RECORD +70 -0
euroeval-16.0.1.dist-info/RECORD +0 -69
{euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/WHEEL +0 -0
{euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/entry_points.txt +0 -0
{euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/licenses/LICENSE +0 -0

euroeval/{tokenization_utils.py → tokenisation_utils.py} RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Utility functions related to tokenization."""
+"""Utility functions related to tokenisation."""
 import logging
 import re
@@ -79,8 +79,8 @@ def should_prompts_be_stripped(
     """Determine if we should strip the prompts for few-shot evaluation.
     This is the case if the tokeniser needs to include the space as part of the label
-    token. The strategy is thus to tokenize a label with a preceeding colon (as in the
-    prompts), i.e., ": positive", and check if the tokenization starts with the tokens
+    token. The strategy is thus to tokenise a label with a preceeding colon (as in the
+    prompts), i.e., ": positive", and check if the tokenisation starts with the tokens
     of ": ". If this is the case, then we should not strip the prompts, since the
     tokeniser produces the whitespace token separately.
@@ -88,7 +88,7 @@ def should_prompts_be_stripped(
         labels_to_be_generated:
             The labels that are to be generated.
         tokeniser:
-            The tokeniser used to tokenize the labels.
+            The tokeniser used to tokenise the labels.
     Returns:
         Whether we should strip the prompts.
@@ -124,7 +124,7 @@ def should_prefix_space_be_added_to_labels(
         labels_to_be_generated:
             The labels that are to be generated.
         tokeniser:
-            The tokeniser used to tokenize the labels.
+            The tokeniser used to tokenise the labels.
     Returns:
         Whether we should add a prefix space to the labels.
@@ -318,7 +318,9 @@ def get_pad_token(
     return pad_token, pad_token_id
-def get_end_of_chat_token_ids(tokeniser: "PreTrainedTokenizer") -> list[int] | None:
+def get_end_of_chat_token_ids(
+    tokeniser: "PreTrainedTokenizer", generative_type: GenerativeType | None
+) -> list[int] | None:
     """Get the end token ID for chat models.
     This is only relevant for tokenisers with a chat template.
@@ -326,20 +328,23 @@ def get_end_of_chat_token_ids(tokeniser: "PreTrainedTokenizer") -> list[int] | N
     Args:
         tokeniser:
             The tokeniser.
+        generative_type:
+            The generative type, or None if not available.
     Returns:
         The token IDs used to end chats, or None if the tokeniser does not have a chat
         template or if no end-of-chat token could be found.
     """
-    if not has_chat_template(tokeniser=tokeniser):
+    if generative_type == GenerativeType.BASE:
         return None
     user_message: dict[str, str] = dict(role="user", content="X")
     token_ids = apply_chat_template(
         conversation=[user_message],
         tokeniser=tokeniser,
-        tokenize=True,
+        tokenise=True,
         add_generation_prompt=False,
+        enable_thinking=generative_type == GenerativeType.REASONING,
     )
     assert isinstance(token_ids, list)
@@ -420,7 +425,7 @@ def get_first_label_token_mapping(
         for label in dataset_config.labels
     ]
-    # Tokenize some text containing each label, which we will use to extract the
+    # Tokenise some text containing each label, which we will use to extract the
     # first token of each label
     all_tokens: list[list[str]]
     if not has_chat_template(tokeniser=tokeniser):
@@ -439,11 +444,13 @@ def get_first_label_token_mapping(
                         dict(role="user", content=""),
                         dict(role="assistant", content=label),
                         # Adding extra user message as Mistral tokenisers require
-                        # conversamtions to end with a user message
+                        # conversations to end with a user message
                         dict(role="user", content=""),
                     ],
                     tokeniser=tokeniser,
-                    tokenize=True,
+                    tokenise=True,
+                    add_generation_prompt=True,
+                    enable_thinking=generative_type == GenerativeType.REASONING,
                 )
             )
             for label in local_labels
@@ -537,9 +544,10 @@ def has_chat_template(tokeniser: "PreTrainedTokenizer") -> bool:
 def apply_chat_template(
     conversation: list[dict[str, str]],
     tokeniser: "PreTrainedTokenizer",
-    tokenize: bool = False,
-    add_generation_prompt: bool = True,
-    **transformers_tokeniser_kwargs,
+    tokenise: bool,
+    add_generation_prompt: bool,
+    enable_thinking: bool,
+    **extra_kwargs,
 ) -> str | list[int]:
     """Apply the chat template to a prompt.
@@ -548,38 +556,47 @@ def apply_chat_template(
             The conversation to apply the chat template to.
         tokeniser:
             The tokeniser.
-        tokenize:
-            Whether to tokenize the resulting prompt, returning a list of token IDs
+        tokenise:
+            Whether to tokenise the resulting prompt, returning a list of token IDs
             instead of a string.
         add_generation_prompt:
             Whether to add a generation prompt at the end of the conversation. This is
             only relevant for regular Hugging Face tokenisers, as Mistral tokenisers
             always add a generation prompt.
-        **transformers_tokeniser_kwargs:
-            Additional keyword arguments to pass to the tokeniser, in case the tokeniser
-            is a regular Hugging Face tokeniser.
+        enable_thinking:
+            Whether to enable special handling for reasoning models, such as adding
+            special tokens for thinking. This is only relevant for regular Hugging
+            Face tokenisers, as Mistral tokenisers always handle reasoning models.
+        **extra_kwargs:
+            Extra keyword arguments to pass to the tokeniser's `apply_chat_template`
+            method. Only relevant for regular Hugging Face tokenisers.
     Returns:
         The prompt with the chat template applied, either as a string or a list of
-        token IDs, depending on the value of `tokenize`.
+        token IDs, depending on the value of `tokenise`.
     Raises:
         InvalidModel:
             If the tokeniser does not have a chat template.
     """
+    # Ensure that the first user message is not empty, as this can cause issues with
+    # Jinja2
+    conversation[0]["content"] = conversation[0]["content"] or " "
     if not has_chat_template(tokeniser=tokeniser):
         raise InvalidModel(
             "The tokeniser does not have a chat template, so cannot apply it."
         )
     elif isinstance(tokeniser, MistralCommonTokenizer):
         templated_prompt = tokeniser.apply_chat_template(
-            conversation=conversation, tokenize=tokenize
+            conversation=conversation, tokenize=tokenise
         )
     else:
         templated_prompt = tokeniser.apply_chat_template(
             conversation=conversation,
             add_generation_prompt=add_generation_prompt,
-            tokenize=tokenize,
-            **transformers_tokeniser_kwargs,
+            tokenize=tokenise,
+            enable_thinking=enable_thinking,
+            **extra_kwargs,
         )
     return templated_prompt

euroeval/utils.py CHANGED Viewed

@@ -4,7 +4,6 @@ import asyncio
 import gc
 import importlib
 import importlib.metadata
-import importlib.util
 import logging
 import os
 import random
@@ -25,11 +24,12 @@ from datasets.utils import disable_progress_bar
 from requests.exceptions import RequestException
 from transformers import logging as tf_logging
-from .exceptions import InvalidBenchmark, NaNValueInModelOutput
+from .exceptions import InvalidBenchmark, InvalidModel, NaNValueInModelOutput
 if t.TYPE_CHECKING:
     from types import TracebackType
+    from .data_models import ModelIdComponents
     from .types import Predictions
@@ -347,7 +347,8 @@ def safe_run(coroutine: t.Coroutine[t.Any, t.Any, T]) -> T:
     loop = asyncio.new_event_loop()
     try:
         asyncio.set_event_loop(loop)
-        return loop.run_until_complete(coroutine)
+        response = loop.run_until_complete(coroutine)
+        return response
     finally:
         loop.close()
         asyncio.set_event_loop(None)
@@ -488,3 +489,35 @@ def extract_multiple_choice_labels(
             f"{', '.join(candidate_labels)}. Here is the prompt: {prompt!r}"
         )
     return sample_candidate_labels
+def split_model_id(model_id: str) -> "ModelIdComponents":
+    """Split a model ID into its components.
+    Args:
+        model_id:
+            The model ID to split.
+    Returns:
+        The split model ID.
+    Raises:
+        If the model ID is not valid.
+    """
+    # Importing here to avoid circular imports
+    from .data_models import ModelIdComponents
+    # Attempt to extract the model ID, revision, and param using regex
+    model_id_match = re.match(pattern=r"^[^@#]+", string=model_id)
+    revision_match = re.search(pattern=r"@([^@#]+)", string=model_id)
+    param_match = re.search(pattern=r"#([^@#]+)", string=model_id)
+    # If we cannot extract the model ID, raise an error
+    if model_id_match is None:
+        raise InvalidModel(f"The model ID {model_id!r} is not valid.")
+    model_id = model_id_match.group()
+    # Extract the revision and param and return the result
+    revision = revision_match.group(1) if revision_match is not None else "main"
+    param = param_match.group(1) if param_match is not None else None
+    return ModelIdComponents(model_id=model_id, revision=revision, param=param)

{euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: EuroEval
-Version: 16.0.1
+Version: 16.1.0
 Summary: The robust European language model benchmark.
 Project-URL: Repository, https://github.com/EuroEval/EuroEval
 Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues

euroeval-16.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,70 @@
+euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
+euroeval/benchmark_config_factory.py,sha256=NzNSiqix4hlVXk3xnyzdg2WDxomkectf97UWdVS3POo,11667
+euroeval/benchmarker.py,sha256=JkhvYxhVpQPcWmDLzwnB8Yy6tTqj3yfDWTefklbI7RM,50355
+euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
+euroeval/cli.py,sha256=wUGetj9Ld4wkS872ZOfYqHIJMh58o8L2MDi78wU5nxI,9099
+euroeval/constants.py,sha256=NN7kcwQdlDyyGFSrLjsL_qKVRyoRqZ9sKO5SjlgtRwA,2741
+euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
+euroeval/data_models.py,sha256=S-PATp4F1wBwvra6wtjlJFXxZbZB_vEpJHXcdTTKA70,27593
+euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
+euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
+euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
+euroeval/generation.py,sha256=MSrd0oIkoqwKsCOaIkY2CFF_urXLOfNR1OO5nMvcCpY,12476
+euroeval/generation_utils.py,sha256=OtEXLhI6L1vlbC768dH3xzj0qkokz43m0vswGKrRmBA,18061
+euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
+euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
+euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
+euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
+euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
+euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
+euroeval/tasks.py,sha256=3qEOBAMmfeqgXqlGkCKzQ-s0Yw-0-jPRgFZ97EZCFng,4535
+euroeval/tokenisation_utils.py,sha256=jRIi9m8XmGh3LeZna47AWmJI9U9m4ojXQynQTe7kzWc,21344
+euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
+euroeval/utils.py,sha256=c0tFw1IXZIqgLU4EfY_k28iJ1ZlCZ_oFoKZH2sGCKYg,16499
+euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
+euroeval/benchmark_modules/base.py,sha256=mHF8XS6GGUXV-sJtxmI5WJBWPLMHuh-4Z4OWjC25x9Y,11566
+euroeval/benchmark_modules/fresh.py,sha256=TveSQiFBi3xXgCEQBdHwkUQ685PDkKW0y3G5Yt5rkeM,10655
+euroeval/benchmark_modules/hf.py,sha256=oBjVumnSM9PW7ZocQwCGLKpbeGFWLN_71DBotxZo1aY,44038
+euroeval/benchmark_modules/litellm.py,sha256=6EKjHnUoPCpuupISZHXqZsXLG8tyiA1-G12a5C6L8MM,64629
+euroeval/benchmark_modules/vllm.py,sha256=sYFdVzB9CZX6_sGI4xghDyXoVn6I95_nbeFUWeSMXcc,43132
+euroeval/dataset_configs/__init__.py,sha256=uuIZmElpJV8iupo5oDj3TeQhBDRANdWpLKYFASLirHA,2046
+euroeval/dataset_configs/danish.py,sha256=QABfgI7m-0-5AimDXegp5ssDSLcM2VrAI_RWsinSZP4,5631
+euroeval/dataset_configs/dutch.py,sha256=63Ro2yFym5MuIDXf5953vUYenw9B0kZSCmZbXjdy4Rs,5517
+euroeval/dataset_configs/english.py,sha256=7lS12Tj7FnMGkS4xj7UoZyymNX6PGXTVl5muPswIgAE,4737
+euroeval/dataset_configs/estonian.py,sha256=tdnz0gmMR9yO5rm3SsIz-Wd0LmlCvi3UJ2M5r4VwkSE,3093
+euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
+euroeval/dataset_configs/finnish.py,sha256=esb5nu4HAEdqiP7F9klmME-tkjme01Qd89TOxTB1S20,4390
+euroeval/dataset_configs/french.py,sha256=lZKhJcTpaG8n3y8u5KY61UfU9YzEHF9tIPKm8UakoBs,4720
+euroeval/dataset_configs/german.py,sha256=gF0idcfDt5Iy89ozwgEXEYR_ukyYurdQSS1KITPz5aM,5130
+euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
+euroeval/dataset_configs/italian.py,sha256=tJ_-OYRJ8wJX7ZCwdE4KJIScn1ijYigAXK3lDTZTA3E,5004
+euroeval/dataset_configs/latvian.py,sha256=-zVftcd7Zl6MbrqL-zqBSixsIiPsbt5ZAqldE2wFOEI,2713
+euroeval/dataset_configs/norwegian.py,sha256=ccLM2Zkf5eaFH1K1KyzqoMwkVNcXgjMQTxIhPf4tl_E,7745
+euroeval/dataset_configs/polish.py,sha256=Z-9PT9KaopQUmBgFk5F85ve3pjQwTJqouG8IFgg5iqw,3672
+euroeval/dataset_configs/portuguese.py,sha256=gQ054SdLQ5fkm4IAP6Mdh5RcPDJPDITcuyaLKZit_9o,4089
+euroeval/dataset_configs/spanish.py,sha256=DvJlMK6OQg4qmxKzQA2IficlBMB7BafvxqIVuTKiZyw,4902
+euroeval/dataset_configs/swedish.py,sha256=YWHp7hbJ25o36csSg9uXaQCEJK1BPb7u2RQZiCe0lNs,5445
+euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
+euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
+euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
+euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
+euroeval/metrics/pipeline.py,sha256=Wcan3eDWV7t4WRXMPWCCe_JsA-fZnIfZU2ESinbbL2I,10284
+euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
+euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
+euroeval/prompt_templates/linguistic_acceptability.py,sha256=pRR1QBnYt5DnfxQp6dw1OYFZfIct-1R9pfdgPGpjoco,8667
+euroeval/prompt_templates/multiple_choice.py,sha256=Q-8-ETqG-RZeLzR8v8WUBIN7djiNSfNpmYnZRUWcd84,6905
+euroeval/prompt_templates/named_entity_recognition.py,sha256=LT7J6Y9rUCJFimpnwujBZq_V5buSmXHJteIXbTOoaCE,16442
+euroeval/prompt_templates/reading_comprehension.py,sha256=ogzmhiSZO6egrdxxQiWz6a0XMdC0vws-lg5yRKQoYV0,8730
+euroeval/prompt_templates/sentiment_classification.py,sha256=BwnTpSdsAN_rL693ImgtKIRc5T_2G6ptWW0jCdC02NQ,9454
+euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13aIOgY0SjIzzsNo,6593
+euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
+euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
+euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
+euroeval/task_group_utils/sequence_classification.py,sha256=qWUUrh4X4jK2XfUzP4aoPDoJhVJifrnDEaaw_F48hig,16080
+euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
+euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
+euroeval-16.1.0.dist-info/METADATA,sha256=pYdW0IZwY8vatTA55EERxBK1kMaQuGhqzNys5xiSqsM,13729
+euroeval-16.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+euroeval-16.1.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
+euroeval-16.1.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
+euroeval-16.1.0.dist-info/RECORD,,

euroeval-16.0.1.dist-info/RECORD DELETED Viewed

@@ -1,69 +0,0 @@
-euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
-euroeval/benchmark_config_factory.py,sha256=ZKzGkWr-Mr4wEMYNXUHsYkd2R-dxnNyETZJJ-Fq-my0,11386
-euroeval/benchmarker.py,sha256=YNqhl2QchqzbGMGu8QoJAG_mnYbcJ46ksfaS0x78fiw,49847
-euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
-euroeval/cli.py,sha256=RR45NiHMI9hphqBJ7Xopde-C18Be9JgJxgg6eYPFVMM,8594
-euroeval/constants.py,sha256=imy-YwofbAwTbjk_vgynYf3zaK5kKV349oXZl99DVyM,2742
-euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
-euroeval/data_models.py,sha256=UGyqPAYFImrR1gi4ctQdCVb0rjVkEmyf4Lc1a7_6t6E,24663
-euroeval/enums.py,sha256=V73E8FTL1aRz74OKcxokTYLnO7Q8HGs2QI0JPZI4qQo,3032
-euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
-euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
-euroeval/generation.py,sha256=wm2u8fDGDgtWxCReG3N6v4_lLvo0OHTpR88ThGSRH7A,12139
-euroeval/generation_utils.py,sha256=w3hfiJfUPDjf2xSKdDrhlpfuxZlztF0_0h2sFPB2hT0,16212
-euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
-euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
-euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
-euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
-euroeval/scores.py,sha256=gJ7DSQVyE2_8qZxJPuUJcFk7Byj2D7nevE23kd4XMbA,3004
-euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
-euroeval/tasks.py,sha256=fwmDKnIexmWbm8HueLUilYzqdNRfo0rFxX-tjZ53Nbg,4503
-euroeval/tokenization_utils.py,sha256=66nip9llPw3XBEzGY0TE1DrejLV2WvdSA1p1euXC6Bg,20556
-euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
-euroeval/utils.py,sha256=ITvT-JxXosrDuElNV7cbASfxzDWSBz9mJWAZHiTOiZY,15304
-euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
-euroeval/benchmark_modules/base.py,sha256=vYW97bnlzqxxcIq6lY-zd0o6zxyDRMhT85jOhdKnoYE,11482
-euroeval/benchmark_modules/fresh.py,sha256=_iRTHt9qUkq7jPOlgwx7IwZG48dK4mjMrh7KiEHeUjE,10462
-euroeval/benchmark_modules/hf.py,sha256=HDXuVwt0kZUyL9x3aG5pEjSdGCRfzegqT0xKZYprjU0,43843
-euroeval/benchmark_modules/litellm.py,sha256=M6ct5ppcYfO-Il5VMRm3PuyAeQ-rtS22UKyRStLnqfM,59210
-euroeval/benchmark_modules/vllm.py,sha256=ckWLA9maDP5TLAfLhEXzkOYJBngb5BQR7X7RLKPl64A,41824
-euroeval/dataset_configs/__init__.py,sha256=lEOr4kJzgtUymeNBVhd-VwdUK0YTUZ3GjUMlLz5fGWk,2010
-euroeval/dataset_configs/danish.py,sha256=Pb43E-xfgQk9uaxq8ooznvf8okdX8KAYFEPHt1CG_TQ,5192
-euroeval/dataset_configs/dutch.py,sha256=tY7FDw7BmhXxNfI1hqfasxQXP0QbYTqknokTZ7gqdRY,5079
-euroeval/dataset_configs/english.py,sha256=Y4yc3AQu8WojqENj0sy4-rIlx1LhPnsCQ0DeonqDsVs,4128
-euroeval/dataset_configs/estonian.py,sha256=o13P_XkrdhLFCz9l8LJy-TSY3JIN7XmByxesEDiagnc,2879
-euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
-euroeval/dataset_configs/finnish.py,sha256=7iXjjpJ23tupvtXwJF3TH1Tzwhxw0RFaoBv38HclsJc,3950
-euroeval/dataset_configs/french.py,sha256=9ofGQpnjw0j_lPB0SuWMvbuWVZXfOvROMqZ03d-EAHs,4281
-euroeval/dataset_configs/german.py,sha256=qsJO2YCND8Kuc_atSWXjkoD2itUQNbUsExiGk7P0OnE,4459
-euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
-euroeval/dataset_configs/italian.py,sha256=xoS_oIFXnTraiV9PX2dBsE1GyodlAbma5dEB7yM_Q8A,4564
-euroeval/dataset_configs/latvian.py,sha256=tibwTbe-atsRZEBbegJ6nbr1Oh4RthUYhZoHPVVawq0,2273
-euroeval/dataset_configs/norwegian.py,sha256=eTX0KpjH60FyLGrUTfspvNvYaL-Ytfw3DTFftlriVM0,7295
-euroeval/dataset_configs/portuguese.py,sha256=x-Idrdo_EtmB_xoabwKivKG091DvFEQEbO6MTcjZVqs,3646
-euroeval/dataset_configs/spanish.py,sha256=5m3Qh328YPhbN8jFPIy9Sa7ZWob02ToCWzlDoT8IsSw,4462
-euroeval/dataset_configs/swedish.py,sha256=j_I7ba9a0nXzEPvpnPTuNFEkS51pnUPrnRwcqGh7tu0,4715
-euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
-euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
-euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
-euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
-euroeval/metrics/pipeline.py,sha256=a09Um3tnNdyQhzyDa9k-seYQXriYiJRQ5vyHK2lrKcg,10276
-euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
-euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
-euroeval/prompt_templates/linguistic_acceptability.py,sha256=9ZIyv_hfI2Aj20Uy9SY1izq5OBRV844PXPiZCNCOoEY,8207
-euroeval/prompt_templates/multiple_choice.py,sha256=TCMKB0xS5IEa8f4YEUjsoifcUpaIv4yOL4FisVvPwok,6423
-euroeval/prompt_templates/named_entity_recognition.py,sha256=_ZRVDcnbXvTs_C2NXy78oMbCLFDtW9SuxmvSVg51Umo,15554
-euroeval/prompt_templates/reading_comprehension.py,sha256=eRMN-kCT3wuImbuFXzZYfo5WiVhCFWJkCYwRUDtpeWo,8208
-euroeval/prompt_templates/sentiment_classification.py,sha256=eIXn-aAY7LKeXqxzMKoqdVbihA2f1RaNQk7DhceuQdQ,8887
-euroeval/prompt_templates/summarization.py,sha256=GvnKuYJKbJ_2QkdtSWp_h4RhfOXdq-7_yYeClJSPaTY,6137
-euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
-euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
-euroeval/task_group_utils/question_answering.py,sha256=vdEbcZy7BE6ICA7kWkPYmPW4eVuIiZ_4uJRLUexDhwY,27750
-euroeval/task_group_utils/sequence_classification.py,sha256=ZIXcYo6ins9VUv8TT4aupWrfUQoWGBlgU8a1hYATOYM,17249
-euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
-euroeval/task_group_utils/token_classification.py,sha256=sNl0rhkXI9g5zKsJujrWX-9jWbYYK2iaKA1AcUg0xW4,17118
-euroeval-16.0.1.dist-info/METADATA,sha256=toyIiyjwyl4Oty2YsD-P6r95hN0Si3BkBNBMOfmiwBA,13729
-euroeval-16.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-euroeval-16.0.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
-euroeval-16.0.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
-euroeval-16.0.1.dist-info/RECORD,,

{euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

EuroEval 16.0.1__py3-none-any.whl → 16.1.0__py3-none-any.whl

Potentially problematic release.

EuroEval 16.0.1py3-none-any.whl → 16.1.0py3-none-any.whl