PyPI - EuroEval - Versions diffs - 15.11.0__py3-none-any.whl → 15.12.0__py3-none-any.whl - Mend

EuroEval 15.11.0py3-none-any.whl → 15.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

euroeval/benchmark_config_factory.py +7 -0
euroeval/benchmark_modules/vllm.py +1 -1
euroeval/benchmarker.py +7 -0
euroeval/cli.py +10 -0
euroeval/data_models.py +7 -0
euroeval/dataset_configs/__init__.py +1 -0
euroeval/dataset_configs/portuguese.py +74 -0
euroeval/dataset_configs/spanish.py +4 -3
euroeval/generation.py +22 -4
euroeval/generation_utils.py +0 -1
euroeval/human_evaluation.py +1 -0
euroeval/languages.py +1 -2
euroeval/prompt_templates/linguistic_acceptability.py +9 -1
euroeval/prompt_templates/multiple_choice.py +9 -1
euroeval/prompt_templates/named_entity_recognition.py +20 -1
euroeval/prompt_templates/sentiment_classification.py +11 -1
euroeval/prompt_templates/summarization.py +8 -1
{euroeval-15.11.0.dist-info → euroeval-15.12.0.dist-info}/METADATA +1 -1
{euroeval-15.11.0.dist-info → euroeval-15.12.0.dist-info}/RECORD +22 -21
{euroeval-15.11.0.dist-info → euroeval-15.12.0.dist-info}/WHEEL +0 -0
{euroeval-15.11.0.dist-info → euroeval-15.12.0.dist-info}/entry_points.txt +0 -0
{euroeval-15.11.0.dist-info → euroeval-15.12.0.dist-info}/licenses/LICENSE +0 -0

euroeval/benchmark_config_factory.py CHANGED Viewed

@@ -42,6 +42,7 @@ def build_benchmark_config(
     num_iterations: int,
     api_base: str | None,
     api_version: str | None,
+    gpu_memory_utilization: float,
     debug: bool,
     run_with_cli: bool,
     only_allow_safetensors: bool,
@@ -102,6 +103,11 @@ def build_benchmark_config(
             model on an inference API.
         api_version:
             The version of the API to use for a given inference API.
+        gpu_memory_utilization:
+            The GPU memory utilization to use for vLLM. A larger value will result in
+            faster evaluation, but at the risk of running out of GPU memory. Only reduce
+            this if you are running out of GPU memory. Only relevant if the model is
+            generative.
         debug:
             Whether to run the benchmark in debug mode.
         run_with_cli:
@@ -154,6 +160,7 @@ def build_benchmark_config(
         num_iterations=num_iterations,
         api_base=api_base,
         api_version=api_version,
+        gpu_memory_utilization=gpu_memory_utilization,
         debug=debug,
         run_with_cli=run_with_cli,
         only_allow_safetensors=only_allow_safetensors,

euroeval/benchmark_modules/vllm.py CHANGED Viewed

@@ -757,7 +757,7 @@ def load_model_and_tokenizer(
         model = LLM(
             model=model_id,
             tokenizer=model_id,
-            gpu_memory_utilization=0.9,
+            gpu_memory_utilization=benchmark_config.gpu_memory_utilization,
             max_model_len=min(true_max_model_len, MAX_CONTEXT_LENGTH),
             download_dir=download_dir,
             trust_remote_code=benchmark_config.trust_remote_code,

euroeval/benchmarker.py CHANGED Viewed

@@ -78,6 +78,7 @@ class Benchmarker:
         num_iterations: int = 10,
         api_base: str | None = None,
         api_version: str | None = None,
+        gpu_memory_utilization: float = 0.9,
         debug: bool = False,
         run_with_cli: bool = False,
         only_allow_safetensors: bool = False,
@@ -145,6 +146,11 @@ class Benchmarker:
                 to a model on an inference API. Defaults to None.
             api_version:
                 The version of the API to use. Defaults to None.
+            gpu_memory_utilization:
+                The GPU memory utilization to use for vLLM. Only relevant if the model
+                is generative. A larger value will result in faster evaluation, but at
+                the risk of running out of GPU memory. Only reduce this if you are
+                running out of GPU memory. Defaults to 0.9.
             debug:
                 Whether to output debug information. Defaults to False.
             run_with_cli:
@@ -192,6 +198,7 @@ class Benchmarker:
             num_iterations=num_iterations,
             api_base=api_base,
             api_version=api_version,
+            gpu_memory_utilization=gpu_memory_utilization,
             debug=debug,
             run_with_cli=run_with_cli,
             only_allow_safetensors=only_allow_safetensors,

euroeval/cli.py CHANGED Viewed

@@ -186,6 +186,14 @@ from .tasks import get_all_tasks
     help="The version of the API to use. Only relevant if `model` refers to a model on "
     "an inference API.",
 )
+@click.option(
+    "--gpu-memory-utilization",
+    default=0.9,
+    show_default=True,
+    help="The GPU memory utilization to use for vLLM. A larger value will result in "
+    "faster evaluation, but at the risk of running out of GPU memory. Only reduce this "
+    "if you are running out of GPU memory. Only relevant if the model is generative.",
+)
 @click.option(
     "--debug/--no-debug",
     default=False,
@@ -223,6 +231,7 @@ def benchmark(
     num_iterations: int,
     api_base: str | None,
     api_version: str | None,
+    gpu_memory_utilization: float,
     debug: bool,
     only_allow_safetensors: bool,
 ) -> None:
@@ -258,6 +267,7 @@ def benchmark(
         num_iterations=num_iterations,
         api_base=api_base,
         api_version=api_version,
+        gpu_memory_utilization=gpu_memory_utilization,
         debug=debug,
         run_with_cli=True,
         only_allow_safetensors=only_allow_safetensors,

euroeval/data_models.py CHANGED Viewed

@@ -168,6 +168,11 @@ class BenchmarkConfig:
         api_version:
             The version of the API to use. Only relevant if `model` refers to a model on
             an inference API.
+        gpu_memory_utilization:
+            The GPU memory utilization to use for vLLM. A larger value will result in
+            faster evaluation, but at the risk of running out of GPU memory. Only reduce
+            this if you are running out of GPU memory. Only relevant if the model is
+            generative.
         debug:
             Whether to run the benchmark in debug mode.
         run_with_cli:
@@ -196,6 +201,7 @@ class BenchmarkConfig:
     num_iterations: int
     api_base: str | None
     api_version: str | None
+    gpu_memory_utilization: float
     debug: bool
     run_with_cli: bool
     only_allow_safetensors: bool
@@ -227,6 +233,7 @@ class BenchmarkConfigParams(pydantic.BaseModel):
     num_iterations: int
     api_base: str | None
     api_version: str | None
+    gpu_memory_utilization: float
     debug: bool
     run_with_cli: bool
     only_allow_safetensors: bool

euroeval/dataset_configs/__init__.py CHANGED Viewed

@@ -13,6 +13,7 @@ from .german import *  # noqa: F403
 from .icelandic import *  # noqa: F403
 from .italian import *  # noqa: F403
 from .norwegian import *  # noqa: F403
+from .portuguese import *  # noqa: F403
 from .spanish import *  # noqa: F403
 from .swedish import *  # noqa: F403

euroeval/dataset_configs/portuguese.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""All Portuguese dataset configurations used in EuroEval."""
+from ..data_models import DatasetConfig
+from ..languages import PT
+from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, SENT, SUMM
+### Official datasets ###
+SST2_PT_CONFIG = DatasetConfig(
+    name="sst2-pt",
+    pretty_name="the truncated version of the Portuguese sentiment classification "
+    "dataset SST2-pt, translated from the English SST2 dataset",
+    huggingface_id="EuroEval/sst2-pt-mini",
+    task=SENT,
+    languages=[PT],
+    _labels=["positive", "negative"],
+)
+MMLU_PT_CONFIG = DatasetConfig(
+    name="mmlu-pt",
+    pretty_name="the truncated version of the Portuguese knowledge dataset MMLU-pt, "
+    "translated from the English MMLU dataset",
+    huggingface_id="EuroEval/mmlu-pt-mini",
+    task=KNOW,
+    languages=[PT],
+)
+GOLDENSWAG_PT_CONFIG = DatasetConfig(
+    name="goldenswag-pt",
+    pretty_name="the truncated version of the Portuguese common-sense reasoning "
+    "dataset GoldenSwag-pt, translated from the English GoldenSwag dataset",
+    huggingface_id="EuroEval/goldenswag-pt-mini",
+    task=COMMON_SENSE,
+    languages=[PT],
+)
+SCALA_PT = DatasetConfig(
+    name="scala-pt",
+    pretty_name="the Portuguese part of the linguistic acceptability dataset ScaLA",
+    huggingface_id="EuroEval/scala-pt",
+    task=LA,
+    languages=[PT],
+)
+HAREM_CONFIG = DatasetConfig(
+    name="harem",
+    pretty_name="the Portuguese named entity recognition dataset HAREM",
+    huggingface_id="EuroEval/harem",
+    task=NER,
+    languages=[PT],
+)
+PUBLICO_CONFIG = DatasetConfig(
+    name="publico",
+    pretty_name="the truncated version of the Portuguese summarisation dataset Público",
+    huggingface_id="EuroEval/publico-mini",
+    task=SUMM,
+    languages=[PT],
+)
+### Unofficial datasets ###
+BOOLQ_PT_CONFIG = DatasetConfig(
+    name="boolq-pt",
+    pretty_name="the Portuguese multiple choice reading comprehension dataset "
+    "BoolQ-pt, translated from the English BoolQ dataset",
+    huggingface_id="EuroEval/boolq-pt",
+    task=MCRC,
+    languages=[PT],
+)

euroeval/dataset_configs/spanish.py CHANGED Viewed

@@ -8,7 +8,8 @@ from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
 SENTIMENT_HEADLINES_CONFIG = DatasetConfig(
     name="sentiment-headlines-es",
-    pretty_name="the truncated version of the Spanish sentiment headlines dataset",
+    pretty_name="the truncated version of the Spanish sentiment classification dataset "
+    "SentimentHeadlines",
     huggingface_id="EuroEval/sentiment-headlines-es",
     task=SENT,
     languages=[ES],
@@ -33,7 +34,7 @@ CONLL_ES_CONFIG = DatasetConfig(
 MLQA_ES_CONFIG = DatasetConfig(
     name="mlqa-es",
-    pretty_name="the Spanish version of the MLQA reading comprehension dataset",
+    pretty_name="the Spanish version of the reading comprehension dataset MLQA",
     huggingface_id="EuroEval/mlqa-es",
     task=RC,
     languages=[ES],
@@ -70,7 +71,7 @@ HELLASWAG_ES_CONFIG = DatasetConfig(
 XQUAD_ES_CONFIG = DatasetConfig(
     name="xquad-es",
-    pretty_name="the Spanish version of the XQuAD reading comprehension dataset",
+    pretty_name="the Spanish version of the reading comprehension dataset XQuAD",
     huggingface_id="EuroEval/xquad-es",
     task=RC,
     languages=[ES],

euroeval/generation.py CHANGED Viewed

@@ -200,17 +200,35 @@ def generate_single_iteration(
         all_preds.extend(extracted_labels)
     if "label" in non_cached_dataset.column_names:
+        non_cached_labels = non_cached_dataset["label"]
+        if not isinstance(non_cached_labels, list):
+            non_cached_labels = list(non_cached_labels)
+        cached_labels = cached_dataset["label"]
+        if not isinstance(cached_labels, list):
+            cached_labels = list(cached_labels)
         ground_truth = [
             label.lower() if isinstance(label, str) else label
-            for label in non_cached_dataset["label"] + cached_dataset["label"]
+            for label in non_cached_labels + cached_labels
         ]
     elif "labels" in non_cached_dataset.column_names:
+        non_cached_labels = non_cached_dataset["labels"]
+        if not isinstance(non_cached_labels, list):
+            non_cached_labels = list(non_cached_labels)
+        cached_labels = cached_dataset["labels"]
+        if not isinstance(cached_labels, list):
+            cached_labels = list(cached_labels)
         ground_truth = [
             [label.lower() if isinstance(label, str) else label for label in label_list]
-            for label_list in non_cached_dataset["labels"] + cached_dataset["labels"]
+            for label_list in non_cached_labels + cached_labels
         ]
     elif "target_text" in non_cached_dataset.column_names:
-        ground_truth = non_cached_dataset["target_text"] + cached_dataset["target_text"]
+        non_cached_labels = non_cached_dataset["target_text"]
+        if not isinstance(non_cached_labels, list):
+            non_cached_labels = list(non_cached_labels)
+        cached_labels = cached_dataset["target_text"]
+        if not isinstance(cached_labels, list):
+            cached_labels = list(cached_labels)
+        ground_truth = non_cached_labels + cached_labels
     else:
         raise ValueError(
             "The dataset must have either a 'label', 'labels', or 'target_text' column"
@@ -306,7 +324,7 @@ def debug_log(
     ):
         logger.info(
             f"Input: '{input_text}'\n"
-            f"Raw outout: '{raw_output}'\n"
+            f"Raw output: '{raw_output}'\n"
             f"Prediction: '{prediction}'\n"
             f"Label: '{label}'"
         )

euroeval/generation_utils.py CHANGED Viewed

@@ -323,7 +323,6 @@ def apply_prompt(
                     tokenize=False,
                     add_generation_prompt=True,
                     chat_template=chat_template,
-                    enable_thinking=True,
                 )
                 for messages in messages_list
             ]

euroeval/human_evaluation.py CHANGED Viewed

@@ -272,6 +272,7 @@ class HumanEvaluator:
             num_iterations=iteration + 1,
             api_base=None,
             api_version=None,
+            gpu_memory_utilization=0.9,
             debug=False,
             run_with_cli=True,
             only_allow_safetensors=False,

euroeval/languages.py CHANGED Viewed

@@ -36,7 +36,7 @@ NN = Language(
 )
 ES = Language(code="es", name="Spanish", _and_separator="y", _or_separator="o")
 SV = Language(code="sv", name="Swedish", _and_separator="och", _or_separator="eller")
+PT = Language(code="pt", name="Portuguese", _and_separator="e", _or_separator="ou")
 AB = Language(code="ab", name="Abkhazian")
 AA = Language(code="aa", name="Afar")
@@ -152,7 +152,6 @@ PI = Language(code="pi", name="Pali")
 PS = Language(code="ps", name="Pashto")
 FA = Language(code="fa", name="Persian")
 PL = Language(code="pl", name="Polish")
-PT = Language(code="pt", name="Portuguese")
 PA = Language(code="pa", name="Punjabi")
 QU = Language(code="qu", name="Quechua")
 RO = Language(code="ro", name="Romanian")

euroeval/prompt_templates/linguistic_acceptability.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Templates for the Linguistic Acceptability task."""
 from ..data_models import PromptConfig
-from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, SV
+from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
 LA_TEMPLATES = {
     DA: PromptConfig(
@@ -36,6 +36,14 @@ LA_TEMPLATES = {
         default_instruction_prompt="Texto: {text}\n\nDetermina si el texto es "
         "gramaticalmente correcto o no. Responde con {labels_str}, y nada más.",
     ),
+    PT: PromptConfig(
+        default_prompt_label_mapping=dict(correct="sim", incorrect="não"),
+        default_prompt_prefix="Seguem-se abaixo textos e se são "
+        "gramaticalmente correctos",
+        default_prompt_template="Texto: {text}\nGramaticalmente correcto: {label}",
+        default_instruction_prompt="Texto: {text}\n\nDetermina se o texto é "
+        "gramaticalmente correcto ou não. Responde com {labels_str}, e nada mais.",
+    ),
     FI: PromptConfig(
         default_prompt_label_mapping=dict(correct="kyllä", incorrect="ei"),
         default_prompt_prefix="Seuraavat ovat lauseita ja ovatko ne "

euroeval/prompt_templates/multiple_choice.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Templates for all multiple choice tasks."""
 from ..data_models import PromptConfig
-from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, SV
+from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, PT, SV
 # TODO: Missing Faroese
 MULTIPLE_CHOICE_TEMPLATES = {
@@ -36,6 +36,14 @@ MULTIPLE_CHOICE_TEMPLATES = {
         "usando solo {labels_str}, y nada más.",
         default_prompt_label_mapping="auto",
     ),
+    PT: PromptConfig(
+        default_prompt_prefix="As seguintes são perguntas de escolha múltipla "
+        "(com respostas).",
+        default_prompt_template="Pergunta: {text}\nResposta: {label}",
+        default_instruction_prompt="Pergunta: {text}\n\nResponde à pergunta "
+        "acima usando só {labels_str}, e nada mais.",
+        default_prompt_label_mapping="auto",
+    ),
     FI: PromptConfig(
         default_prompt_prefix="Seuraavat ovat monivalintakysymyksiä (vastauksineen).",
         default_prompt_template="Kysymys: {text}\nVastaus: {label}",

euroeval/prompt_templates/named_entity_recognition.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Templates for the Named Entity Recognition task."""
 from ..data_models import PromptConfig
-from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, SV
+from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
 NER_TEMPLATES = {
     DA: PromptConfig(
@@ -80,6 +80,25 @@ NER_TEMPLATES = {
         "claves {labels_str}. Los valores deben ser listas de las "
         "entidades nombradas de ese tipo, exactamente como aparecen en la oración.",
     ),
+    PT: PromptConfig(
+        default_prompt_label_mapping={
+            "b-per": "pessoa",
+            "i-per": "pessoa",
+            "b-loc": "local",
+            "i-loc": "local",
+            "b-org": "organização",
+            "i-org": "organização",
+            "b-misc": "diverso",
+            "i-misc": "diverso",
+        },
+        default_prompt_prefix="Seguem-se frases e dicionários JSON com as entidades "
+        "mencionadas presentes na frase indicada.",
+        default_prompt_template="Frase: {text}\nEntidades mencionadas: {label}",
+        default_instruction_prompt="Frase: {text}\n\nIdentifica as entidades "
+        "mencionadas na frase. Deves devolver um dicionário JSON com as chaves "
+        "{labels_str}. Os valores devem ser listas contendo as entidades "
+        "mencionadas desse tipo, tal como ocorrem na frase.",
+    ),
     FI: PromptConfig(
         default_prompt_label_mapping={
             "b-per": "henkilö",

euroeval/prompt_templates/sentiment_classification.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Templates for the Sentiment Analysis task."""
 from ..data_models import PromptConfig
-from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, SV
+from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
 SENT_TEMPLATES = {
     DA: PromptConfig(
@@ -44,6 +44,16 @@ SENT_TEMPLATES = {
         default_instruction_prompt="Documento: {text}\n\nClasifica el sentimiento del "
         "documento. Responde con {labels_str}, y nada más.",
     ),
+    PT: PromptConfig(
+        default_prompt_label_mapping=dict(
+            positive="positivo", neutral="neutro", negative="negativo"
+        ),
+        default_prompt_prefix="Abaixo encontras documentos e os seus "
+        "sentimentos correspondentes, que podem ser {labels_str}.",
+        default_prompt_template="Documento: {text}\nSentimento: {label}",
+        default_instruction_prompt="Documento: {text}\n\nClassifica o "
+        "sentimento do documento. Responde apenas com {labels_str}.",
+    ),
     FI: PromptConfig(
         default_prompt_label_mapping=dict(
             positive="positiivinen", neutral="neutrali", negative="negatiivinen"

euroeval/prompt_templates/summarization.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Templates for the Summarization task."""
 from ..data_models import PromptConfig
-from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, SV
+from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, PT, SV
 # TODO: Missing Faroese
 SUMM_TEMPLATES = {
@@ -36,6 +36,13 @@ SUMM_TEMPLATES = {
         "documento anterior.",
         default_prompt_label_mapping=dict(),
     ),
+    PT: PromptConfig(
+        default_prompt_prefix="Abaixo encontras documentos com resumos associados.",
+        default_prompt_template="Documento: {text}\nResumo: {target_text}",
+        default_instruction_prompt="Documento: {text}\n\nEscreve um resumo do "
+        "documento anterior.",
+        default_prompt_label_mapping=dict(),
+    ),
     FI: PromptConfig(
         default_prompt_prefix="Seuraavassa on artikkeleita ja niihin liittyviä "
         "tiivistelmiä.",

{euroeval-15.11.0.dist-info → euroeval-15.12.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: EuroEval
-Version: 15.11.0
+Version: 15.12.0
 Summary: The robust European language model benchmark.
 Project-URL: Repository, https://github.com/EuroEval/EuroEval
 Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues

{euroeval-15.11.0.dist-info → euroeval-15.12.0.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 euroeval/__init__.py,sha256=fZyR9R3C3vwGJS3CrCJ6ySr_FDnMu_Aqnz0FdadWEEs,3399
-euroeval/benchmark_config_factory.py,sha256=icTeT5C-bNCJmvSWFlxKdEpRboZN8OjwaHGu7JM-2xI,11158
-euroeval/benchmarker.py,sha256=RlD8z2TYT4dqKvFtfmbU2pS7ZZ8l_3ErYttIcSxjPMg,48040
+euroeval/benchmark_config_factory.py,sha256=jKC8bEzJSGGCcG8aWsPxiyHX6fjOQYQWvkp1MIUuHYM,11564
+euroeval/benchmarker.py,sha256=SDBzdCa4I8u1XDeN_1mKTFzfaaQbbY_oWcHt3niADxk,48497
 euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
-euroeval/cli.py,sha256=d8JztMi_RbpUlEBXidd6DQ-xeC-xhozf_qU6Vkzye20,8161
+euroeval/cli.py,sha256=h81Lswm_q9htkYz-GQQQVIsdsUPnfe3LDH8AZdBcpKs,8602
 euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
 euroeval/data_loading.py,sha256=DP-cqwN_d0Y-KaN8P8c3fDr6PX80UYROHgRwX82ix4w,4156
-euroeval/data_models.py,sha256=lrF8XAVVZFqof3O0Bq2nMSTuqhkDaoMixIoUMqgsAo8,21647
+euroeval/data_models.py,sha256=gPHyIoN2A5_O-cJgyb6jhn6enH8zsiIBI09W_wdHMQs,22031
 euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
 euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
 euroeval/finetuning.py,sha256=BrPZ-6qFY8K-dwfaRwNetVYfYburoQwLQty6pn6iP_s,11340
-euroeval/generation.py,sha256=pXs2VwfLvUpwXRN8LcHvzE_HTXMkGSYc4wGv9vsz1BA,10758
-euroeval/generation_utils.py,sha256=8HOFE2xdnCPRMe3TiHh--n7Oy3rMV7MAnERpW9vplUA,13352
-euroeval/human_evaluation.py,sha256=9CMXrkzM7Q-vltFL1fD9hYwahQtWT12aHMU8PgGO5_c,27497
-euroeval/languages.py,sha256=LerXuRBAUYkQL6qSV-F82itAE4EgBGFBtzaGnJJZvOE,8555
+euroeval/generation.py,sha256=1fqFEWwM2RzI3uPZem95VFWbN8EfrKZQTrHEP34ihHs,11622
+euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
+euroeval/human_evaluation.py,sha256=Jtz3K5Lqne48wPZWf4EAd3d-n_wX27nGJHigjhV1D7s,27537
+euroeval/languages.py,sha256=cr_Z5jtaHb2XY0zeOhuk3ATHX74PODzt6gMPC2zMD7c,8594
 euroeval/metrics.py,sha256=nxosyoRjlk7TcoAOkjU7zx2TB43b9tA8M1m4V1s5eKU,15516
 euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
 euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
@@ -28,8 +28,8 @@ euroeval/benchmark_modules/base.py,sha256=D1oKD16KBvxEoBUfqwvzvcDc1hx6letdD3v1Pn
 euroeval/benchmark_modules/fresh.py,sha256=sg_AXNPApFObCzCRWhCgKxfr-eqQsT6Ri0xx0_Yy5JM,10293
 euroeval/benchmark_modules/hf.py,sha256=-W_bWEdm0zePkn4nDz4l0T4hhJJnlfwHrtIO3m5BrUs,44725
 euroeval/benchmark_modules/litellm.py,sha256=_gKBbJsXzo_cHJVaeuQpHRBENEZUGS_vcC-uGIhhmHA,52111
-euroeval/benchmark_modules/vllm.py,sha256=LXWkCUaIpP3cboj1bAGM6N8pR02mX6-XZFJheZDbfAQ,38798
-euroeval/dataset_configs/__init__.py,sha256=kWKtlSAOY-olOQL3UtFqL6I3Tki3G3waMZSd2YChjCg,1895
+euroeval/benchmark_modules/vllm.py,sha256=kq3PMUuRT0NOky6XSHl1JeHTDGehwcub0HcGC5S_Wv4,38834
+euroeval/dataset_configs/__init__.py,sha256=EbjEyHwBtSztASl8_xblD8hessruDdV4Eg1vXrmGOuY,1935
 euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
 euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
 euroeval/dataset_configs/english.py,sha256=1q8XJqIVWBBNkldL7t-cVnU2O9EUb9_xoVRSN8arN90,2561
@@ -40,23 +40,24 @@ euroeval/dataset_configs/german.py,sha256=QO6PrBQY6kyZeQMU1vg6KrC_sKyj9U2ukS9nbK
 euroeval/dataset_configs/icelandic.py,sha256=mncl7X4yO9gBmYqXMBfm7FKU1jcKryerSgd0dqlIA_4,4198
 euroeval/dataset_configs/italian.py,sha256=KNjCvTzsEqH_EEk3At8slKqNwWWiIdbv_t5ke7n9nZI,2660
 euroeval/dataset_configs/norwegian.py,sha256=30YGdDPtDszG10BNDVHb-XXTGgGIIgDUNGoeM9q0K_E,5385
-euroeval/dataset_configs/spanish.py,sha256=NviL-FzJ5jq1bLTRvbtZBiGrAmZjxyijZNpKZFrnT-M,2527
+euroeval/dataset_configs/portuguese.py,sha256=-HSDsujWfK__nV2SCu-z0ne0AXLDszOT05oYphQUDTw,2063
+euroeval/dataset_configs/spanish.py,sha256=Yzm1kiilEKoHyd3xD2wrw596Ac9UcaWhlE93GlOFjlc,2558
 euroeval/dataset_configs/swedish.py,sha256=SOD2nKQTVwTpTvr362mDPHon42kr9vWs5C0mK02Fh-o,2811
 euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
-euroeval/prompt_templates/linguistic_acceptability.py,sha256=FAIJKS26EVRxlLHk1C3lN0GDtd5AM0MwvaMf-NNIxfU,6677
-euroeval/prompt_templates/multiple_choice.py,sha256=6iEqiPpT-3WJN_gsyhyapnwsrcsYGdVkSkzwn-VKKxw,5101
-euroeval/prompt_templates/named_entity_recognition.py,sha256=Xd6gBJD2e1l8-We2Ujor7crRUBcbgnNeeVknBIrTMJo,12737
+euroeval/prompt_templates/linguistic_acceptability.py,sha256=ZN71BEt4HAhSYY-GWjh-S-iVvq5AODQJThkrjDhy4oM,7138
+euroeval/prompt_templates/multiple_choice.py,sha256=F9ItGQtnaaez15A8MQ1UCpKRDsLM-AZyRdYetGAofa0,5494
+euroeval/prompt_templates/named_entity_recognition.py,sha256=ga21s9T4_Hhbf88boWm7gnL7OgD7txuS_EeDgXaxEoE,13602
 euroeval/prompt_templates/reading_comprehension.py,sha256=yLqryWQAW04GULz_EyNDLOS7ZrDUeasuLFt-dtqCnYk,6585
-euroeval/prompt_templates/sentiment_classification.py,sha256=LDOwjGQ2kqhwgNyphPywQeolwNB09o-xYWc9RUbzc84,7136
-euroeval/prompt_templates/summarization.py,sha256=mcWeKNhGWmp7IG_iY64T-VOSabQg5wKddjSbJNYFDp8,4984
+euroeval/prompt_templates/sentiment_classification.py,sha256=2Xsmj8lbaAXACHhwbbR4dWhoKyKB87TqpMO-ssQ-Djo,7649
+euroeval/prompt_templates/summarization.py,sha256=I98LlUOBVa_xo02npq7BWKKZOXGqm-_15i64QzbEsb0,5334
 euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
 euroeval/task_group_utils/multiple_choice_classification.py,sha256=yfy8lczpZ_MY-Y4FQx3Et9vEUpuD3YMFjF3wQGCfMNw,6632
 euroeval/task_group_utils/question_answering.py,sha256=agwtWOmctgat98yqgFiMSPY6zmoaPgYVyzMmOkNjr58,27284
 euroeval/task_group_utils/sequence_classification.py,sha256=igmD24aMNN7QBJ8NDzgEnGwM-jq_zhC37QxazNm7GZ4,12711
 euroeval/task_group_utils/text_to_text.py,sha256=xOpja-W4E-1peMjZX8G-3G5iRgmFHHygrQ5WN1hB3FI,4550
 euroeval/task_group_utils/token_classification.py,sha256=wCy3aI-Sn9f-87tHzAnYDA6EbY3ah3xao1SnfnoRNz4,17490
-euroeval-15.11.0.dist-info/METADATA,sha256=NiRBsSAD6L_q4-y0AVkfoUoZA-9oD27uSK80cWpO_co,13479
-euroeval-15.11.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-euroeval-15.11.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
-euroeval-15.11.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
-euroeval-15.11.0.dist-info/RECORD,,
+euroeval-15.12.0.dist-info/METADATA,sha256=8cY6HWgAZgrCkIA20lVKuf42y-e7U1MZQZSTdF3e7ig,13479
+euroeval-15.12.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+euroeval-15.12.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
+euroeval-15.12.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
+euroeval-15.12.0.dist-info/RECORD,,

{euroeval-15.11.0.dist-info → euroeval-15.12.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{euroeval-15.11.0.dist-info → euroeval-15.12.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{euroeval-15.11.0.dist-info → euroeval-15.12.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

EuroEval 15.11.0__py3-none-any.whl → 15.12.0__py3-none-any.whl

EuroEval 15.11.0py3-none-any.whl → 15.12.0py3-none-any.whl