PyPI - EuroEval - Versions diffs - 15.11.0__py3-none-any.whl → 15.13.0__py3-none-any.whl - Mend

EuroEval 15.11.0py3-none-any.whl → 15.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of EuroEval might be problematic. Click here for more details.

Files changed (35) hide show

euroeval/benchmark_config_factory.py +7 -0
euroeval/benchmark_modules/vllm.py +1 -1
euroeval/benchmarker.py +7 -0
euroeval/cli.py +10 -0
euroeval/data_models.py +7 -0
euroeval/dataset_configs/__init__.py +1 -0
euroeval/dataset_configs/danish.py +10 -0
euroeval/dataset_configs/dutch.py +10 -0
euroeval/dataset_configs/english.py +10 -0
euroeval/dataset_configs/faroese.py +10 -0
euroeval/dataset_configs/finnish.py +10 -0
euroeval/dataset_configs/french.py +10 -0
euroeval/dataset_configs/german.py +10 -0
euroeval/dataset_configs/icelandic.py +10 -0
euroeval/dataset_configs/italian.py +10 -0
euroeval/dataset_configs/norwegian.py +20 -0
euroeval/dataset_configs/portuguese.py +81 -0
euroeval/dataset_configs/spanish.py +14 -3
euroeval/dataset_configs/swedish.py +10 -0
euroeval/generation.py +22 -4
euroeval/generation_utils.py +0 -1
euroeval/human_evaluation.py +1 -0
euroeval/languages.py +1 -2
euroeval/prompt_templates/linguistic_acceptability.py +9 -1
euroeval/prompt_templates/multiple_choice.py +9 -1
euroeval/prompt_templates/named_entity_recognition.py +20 -1
euroeval/prompt_templates/reading_comprehension.py +10 -1
euroeval/prompt_templates/sentiment_classification.py +11 -1
euroeval/prompt_templates/summarization.py +8 -1
{euroeval-15.11.0.dist-info → euroeval-15.13.0.dist-info}/METADATA +2 -2
euroeval-15.13.0.dist-info/RECORD +63 -0
euroeval-15.11.0.dist-info/RECORD +0 -62
{euroeval-15.11.0.dist-info → euroeval-15.13.0.dist-info}/WHEEL +0 -0
{euroeval-15.11.0.dist-info → euroeval-15.13.0.dist-info}/entry_points.txt +0 -0
{euroeval-15.11.0.dist-info → euroeval-15.13.0.dist-info}/licenses/LICENSE +0 -0

euroeval/benchmark_config_factory.py CHANGED Viewed

@@ -42,6 +42,7 @@ def build_benchmark_config(
     num_iterations: int,
     api_base: str | None,
     api_version: str | None,
+    gpu_memory_utilization: float,
     debug: bool,
     run_with_cli: bool,
     only_allow_safetensors: bool,
@@ -102,6 +103,11 @@ def build_benchmark_config(
             model on an inference API.
         api_version:
             The version of the API to use for a given inference API.
+        gpu_memory_utilization:
+            The GPU memory utilization to use for vLLM. A larger value will result in
+            faster evaluation, but at the risk of running out of GPU memory. Only reduce
+            this if you are running out of GPU memory. Only relevant if the model is
+            generative.
         debug:
             Whether to run the benchmark in debug mode.
         run_with_cli:
@@ -154,6 +160,7 @@ def build_benchmark_config(
         num_iterations=num_iterations,
         api_base=api_base,
         api_version=api_version,
+        gpu_memory_utilization=gpu_memory_utilization,
         debug=debug,
         run_with_cli=run_with_cli,
         only_allow_safetensors=only_allow_safetensors,

euroeval/benchmark_modules/vllm.py CHANGED Viewed

@@ -757,7 +757,7 @@ def load_model_and_tokenizer(
         model = LLM(
             model=model_id,
             tokenizer=model_id,
-            gpu_memory_utilization=0.9,
+            gpu_memory_utilization=benchmark_config.gpu_memory_utilization,
             max_model_len=min(true_max_model_len, MAX_CONTEXT_LENGTH),
             download_dir=download_dir,
             trust_remote_code=benchmark_config.trust_remote_code,

euroeval/benchmarker.py CHANGED Viewed

@@ -78,6 +78,7 @@ class Benchmarker:
         num_iterations: int = 10,
         api_base: str | None = None,
         api_version: str | None = None,
+        gpu_memory_utilization: float = 0.9,
         debug: bool = False,
         run_with_cli: bool = False,
         only_allow_safetensors: bool = False,
@@ -145,6 +146,11 @@ class Benchmarker:
                 to a model on an inference API. Defaults to None.
             api_version:
                 The version of the API to use. Defaults to None.
+            gpu_memory_utilization:
+                The GPU memory utilization to use for vLLM. Only relevant if the model
+                is generative. A larger value will result in faster evaluation, but at
+                the risk of running out of GPU memory. Only reduce this if you are
+                running out of GPU memory. Defaults to 0.9.
             debug:
                 Whether to output debug information. Defaults to False.
             run_with_cli:
@@ -192,6 +198,7 @@ class Benchmarker:
             num_iterations=num_iterations,
             api_base=api_base,
             api_version=api_version,
+            gpu_memory_utilization=gpu_memory_utilization,
             debug=debug,
             run_with_cli=run_with_cli,
             only_allow_safetensors=only_allow_safetensors,

euroeval/cli.py CHANGED Viewed

@@ -186,6 +186,14 @@ from .tasks import get_all_tasks
     help="The version of the API to use. Only relevant if `model` refers to a model on "
     "an inference API.",
 )
+@click.option(
+    "--gpu-memory-utilization",
+    default=0.9,
+    show_default=True,
+    help="The GPU memory utilization to use for vLLM. A larger value will result in "
+    "faster evaluation, but at the risk of running out of GPU memory. Only reduce this "
+    "if you are running out of GPU memory. Only relevant if the model is generative.",
+)
 @click.option(
     "--debug/--no-debug",
     default=False,
@@ -223,6 +231,7 @@ def benchmark(
     num_iterations: int,
     api_base: str | None,
     api_version: str | None,
+    gpu_memory_utilization: float,
     debug: bool,
     only_allow_safetensors: bool,
 ) -> None:
@@ -258,6 +267,7 @@ def benchmark(
         num_iterations=num_iterations,
         api_base=api_base,
         api_version=api_version,
+        gpu_memory_utilization=gpu_memory_utilization,
         debug=debug,
         run_with_cli=True,
         only_allow_safetensors=only_allow_safetensors,

euroeval/data_models.py CHANGED Viewed

@@ -168,6 +168,11 @@ class BenchmarkConfig:
         api_version:
             The version of the API to use. Only relevant if `model` refers to a model on
             an inference API.
+        gpu_memory_utilization:
+            The GPU memory utilization to use for vLLM. A larger value will result in
+            faster evaluation, but at the risk of running out of GPU memory. Only reduce
+            this if you are running out of GPU memory. Only relevant if the model is
+            generative.
         debug:
             Whether to run the benchmark in debug mode.
         run_with_cli:
@@ -196,6 +201,7 @@ class BenchmarkConfig:
     num_iterations: int
     api_base: str | None
     api_version: str | None
+    gpu_memory_utilization: float
     debug: bool
     run_with_cli: bool
     only_allow_safetensors: bool
@@ -227,6 +233,7 @@ class BenchmarkConfigParams(pydantic.BaseModel):
     num_iterations: int
     api_base: str | None
     api_version: str | None
+    gpu_memory_utilization: float
     debug: bool
     run_with_cli: bool
     only_allow_safetensors: bool

euroeval/dataset_configs/__init__.py CHANGED Viewed

@@ -13,6 +13,7 @@ from .german import *  # noqa: F403
 from .icelandic import *  # noqa: F403
 from .italian import *  # noqa: F403
 from .norwegian import *  # noqa: F403
+from .portuguese import *  # noqa: F403
 from .spanish import *  # noqa: F403
 from .swedish import *  # noqa: F403

euroeval/dataset_configs/danish.py CHANGED Viewed

@@ -118,3 +118,13 @@ BELEBELE_DA_CONFIG = DatasetConfig(
     languages=[DA],
     unofficial=True,
 )
+MULTI_WIKI_QA_DA_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-da",
+    pretty_name="the truncated version of the Danish part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-da-mini",
+    task=RC,
+    languages=[DA],
+    unofficial=True,
+)

euroeval/dataset_configs/dutch.py CHANGED Viewed

@@ -110,3 +110,13 @@ BELEBELE_NL_CONFIG = DatasetConfig(
     languages=[NL],
     unofficial=True,
 )
+MULTI_WIKI_QA_NL_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-nl",
+    pretty_name="the truncated version of the Dutch part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-nl-mini",
+    task=RC,
+    languages=[NL],
+    unofficial=True,
+)

euroeval/dataset_configs/english.py CHANGED Viewed

@@ -95,3 +95,13 @@ MMLU_CONFIG = DatasetConfig(
     languages=[EN],
     unofficial=True,
 )
+MULTI_WIKI_QA_EN_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-en",
+    pretty_name="the truncated version of the English part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-en-mini",
+    task=RC,
+    languages=[EN],
+    unofficial=True,
+)

euroeval/dataset_configs/faroese.py CHANGED Viewed

@@ -52,3 +52,13 @@ WIKIANN_FO_CONFIG = DatasetConfig(
     languages=[FO],
     unofficial=True,
 )
+MULTI_WIKI_QA_FO_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-fo",
+    pretty_name="the truncated version of the Faroese part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-fo-mini",
+    task=RC,
+    languages=[FO],
+    unofficial=True,
+)

euroeval/dataset_configs/finnish.py CHANGED Viewed

@@ -68,3 +68,13 @@ BELEBELE_FI_CONFIG = DatasetConfig(
     languages=[FI],
     unofficial=True,
 )
+MULTI_WIKI_QA_FI_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-fi",
+    pretty_name="the truncated version of the Finnish part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-fi-mini",
+    task=RC,
+    languages=[FI],
+    unofficial=True,
+)

euroeval/dataset_configs/french.py CHANGED Viewed

@@ -81,3 +81,13 @@ BELEBELE_FR_CONFIG = DatasetConfig(
     languages=[FR],
     unofficial=True,
 )
+MULTI_WIKI_QA_FR_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-fr",
+    pretty_name="the truncated version of the French part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-fr-mini",
+    task=RC,
+    languages=[FR],
+    unofficial=True,
+)

euroeval/dataset_configs/german.py CHANGED Viewed

@@ -89,3 +89,13 @@ BELEBELE_DE_CONFIG = DatasetConfig(
     languages=[DE],
     unofficial=True,
 )
+MULTI_WIKI_QA_DE_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-de",
+    pretty_name="the truncated version of the German part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-de-mini",
+    task=RC,
+    languages=[DE],
+    unofficial=True,
+)

euroeval/dataset_configs/icelandic.py CHANGED Viewed

@@ -146,3 +146,13 @@ BELEBELE_IS_CONFIG = DatasetConfig(
     languages=[IS],
     unofficial=True,
 )
+MULTI_WIKI_QA_IS_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-is",
+    pretty_name="the truncated version of the Icelandic part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-is-mini",
+    task=RC,
+    languages=[IS],
+    unofficial=True,
+)

euroeval/dataset_configs/italian.py CHANGED Viewed

@@ -89,3 +89,13 @@ BELEBELE_IT_CONFIG = DatasetConfig(
     languages=[IT],
     unofficial=True,
 )
+MULTI_WIKI_QA_IT_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-it",
+    pretty_name="the truncated version of the Italian part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-it-mini",
+    task=RC,
+    languages=[IT],
+    unofficial=True,
+)

euroeval/dataset_configs/norwegian.py CHANGED Viewed

@@ -184,3 +184,23 @@ BELEBELE_NO_CONFIG = DatasetConfig(
     languages=[NB, NN, NO],
     unofficial=True,
 )
+MULTI_WIKI_QA_NB_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-nb",
+    pretty_name="the truncated version of the Norwegian Bokmål part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-no-mini",
+    task=RC,
+    languages=[NB, NO],
+    unofficial=True,
+)
+MULTI_WIKI_QA_NN_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-nn",
+    pretty_name="the truncated version of the Norwegian Nynorsk part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-nn-mini",
+    task=RC,
+    languages=[NN],
+    unofficial=True,
+)

euroeval/dataset_configs/portuguese.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""All Portuguese dataset configurations used in EuroEval."""
+from ..data_models import DatasetConfig
+from ..languages import PT
+from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
+### Official datasets ###
+SST2_PT_CONFIG = DatasetConfig(
+    name="sst2-pt",
+    pretty_name="the truncated version of the Portuguese sentiment classification "
+    "dataset SST2-pt, translated from the English SST2 dataset",
+    huggingface_id="EuroEval/sst2-pt-mini",
+    task=SENT,
+    languages=[PT],
+    _labels=["positive", "negative"],
+)
+SCALA_PT = DatasetConfig(
+    name="scala-pt",
+    pretty_name="the Portuguese part of the linguistic acceptability dataset ScaLA",
+    huggingface_id="EuroEval/scala-pt",
+    task=LA,
+    languages=[PT],
+)
+HAREM_CONFIG = DatasetConfig(
+    name="harem",
+    pretty_name="the Portuguese named entity recognition dataset HAREM",
+    huggingface_id="EuroEval/harem",
+    task=NER,
+    languages=[PT],
+)
+MULTI_WIKI_QA_PT_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-pt",
+    pretty_name="the truncated version of the Portuguese part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-pt-pt-mini",
+    task=RC,
+    languages=[PT],
+)
+PUBLICO_CONFIG = DatasetConfig(
+    name="publico",
+    pretty_name="the truncated version of the Portuguese summarisation dataset Público",
+    huggingface_id="EuroEval/publico-mini",
+    task=SUMM,
+    languages=[PT],
+)
+MMLU_PT_CONFIG = DatasetConfig(
+    name="mmlu-pt",
+    pretty_name="the truncated version of the Portuguese knowledge dataset MMLU-pt, "
+    "translated from the English MMLU dataset",
+    huggingface_id="EuroEval/mmlu-pt-mini",
+    task=KNOW,
+    languages=[PT],
+)
+GOLDENSWAG_PT_CONFIG = DatasetConfig(
+    name="goldenswag-pt",
+    pretty_name="the truncated version of the Portuguese common-sense reasoning "
+    "dataset GoldenSwag-pt, translated from the English GoldenSwag dataset",
+    huggingface_id="EuroEval/goldenswag-pt-mini",
+    task=COMMON_SENSE,
+    languages=[PT],
+)
+### Unofficial datasets ###
+BOOLQ_PT_CONFIG = DatasetConfig(
+    name="boolq-pt",
+    pretty_name="the Portuguese multiple choice reading comprehension dataset "
+    "BoolQ-pt, translated from the English BoolQ dataset",
+    huggingface_id="EuroEval/boolq-pt",
+    task=MCRC,
+    languages=[PT],
+    unofficial=True,
+)

euroeval/dataset_configs/spanish.py CHANGED Viewed

@@ -8,7 +8,8 @@ from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
 SENTIMENT_HEADLINES_CONFIG = DatasetConfig(
     name="sentiment-headlines-es",
-    pretty_name="the truncated version of the Spanish sentiment headlines dataset",
+    pretty_name="the truncated version of the Spanish sentiment classification dataset "
+    "SentimentHeadlines",
     huggingface_id="EuroEval/sentiment-headlines-es",
     task=SENT,
     languages=[ES],
@@ -33,7 +34,7 @@ CONLL_ES_CONFIG = DatasetConfig(
 MLQA_ES_CONFIG = DatasetConfig(
     name="mlqa-es",
-    pretty_name="the Spanish version of the MLQA reading comprehension dataset",
+    pretty_name="the Spanish version of the reading comprehension dataset MLQA",
     huggingface_id="EuroEval/mlqa-es",
     task=RC,
     languages=[ES],
@@ -70,7 +71,7 @@ HELLASWAG_ES_CONFIG = DatasetConfig(
 XQUAD_ES_CONFIG = DatasetConfig(
     name="xquad-es",
-    pretty_name="the Spanish version of the XQuAD reading comprehension dataset",
+    pretty_name="the Spanish version of the reading comprehension dataset XQuAD",
     huggingface_id="EuroEval/xquad-es",
     task=RC,
     languages=[ES],
@@ -86,3 +87,13 @@ BELEBELE_ES_CONFIG = DatasetConfig(
     languages=[ES],
     unofficial=True,
 )
+MULTI_WIKI_QA_ES_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-es",
+    pretty_name="the truncated version of the Spanish part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-es-mini",
+    task=RC,
+    languages=[ES],
+    unofficial=True,
+)

euroeval/dataset_configs/swedish.py CHANGED Viewed

@@ -98,3 +98,13 @@ BELEBELE_SV_CONFIG = DatasetConfig(
     languages=[SV],
     unofficial=True,
 )
+MULTI_WIKI_QA_SV_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-sv",
+    pretty_name="the truncated version of the Swedish part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-sv-mini",
+    task=RC,
+    languages=[SV],
+    unofficial=True,
+)

euroeval/generation.py CHANGED Viewed

@@ -200,17 +200,35 @@ def generate_single_iteration(
         all_preds.extend(extracted_labels)
     if "label" in non_cached_dataset.column_names:
+        non_cached_labels = non_cached_dataset["label"]
+        if not isinstance(non_cached_labels, list):
+            non_cached_labels = list(non_cached_labels)
+        cached_labels = cached_dataset["label"]
+        if not isinstance(cached_labels, list):
+            cached_labels = list(cached_labels)
         ground_truth = [
             label.lower() if isinstance(label, str) else label
-            for label in non_cached_dataset["label"] + cached_dataset["label"]
+            for label in non_cached_labels + cached_labels
         ]
     elif "labels" in non_cached_dataset.column_names:
+        non_cached_labels = non_cached_dataset["labels"]
+        if not isinstance(non_cached_labels, list):
+            non_cached_labels = list(non_cached_labels)
+        cached_labels = cached_dataset["labels"]
+        if not isinstance(cached_labels, list):
+            cached_labels = list(cached_labels)
         ground_truth = [
             [label.lower() if isinstance(label, str) else label for label in label_list]
-            for label_list in non_cached_dataset["labels"] + cached_dataset["labels"]
+            for label_list in non_cached_labels + cached_labels
         ]
     elif "target_text" in non_cached_dataset.column_names:
-        ground_truth = non_cached_dataset["target_text"] + cached_dataset["target_text"]
+        non_cached_labels = non_cached_dataset["target_text"]
+        if not isinstance(non_cached_labels, list):
+            non_cached_labels = list(non_cached_labels)
+        cached_labels = cached_dataset["target_text"]
+        if not isinstance(cached_labels, list):
+            cached_labels = list(cached_labels)
+        ground_truth = non_cached_labels + cached_labels
     else:
         raise ValueError(
             "The dataset must have either a 'label', 'labels', or 'target_text' column"
@@ -306,7 +324,7 @@ def debug_log(
     ):
         logger.info(
             f"Input: '{input_text}'\n"
-            f"Raw outout: '{raw_output}'\n"
+            f"Raw output: '{raw_output}'\n"
             f"Prediction: '{prediction}'\n"
             f"Label: '{label}'"
         )

euroeval/generation_utils.py CHANGED Viewed

@@ -323,7 +323,6 @@ def apply_prompt(
                     tokenize=False,
                     add_generation_prompt=True,
                     chat_template=chat_template,
-                    enable_thinking=True,
                 )
                 for messages in messages_list
             ]

euroeval/human_evaluation.py CHANGED Viewed

@@ -272,6 +272,7 @@ class HumanEvaluator:
             num_iterations=iteration + 1,
             api_base=None,
             api_version=None,
+            gpu_memory_utilization=0.9,
             debug=False,
             run_with_cli=True,
             only_allow_safetensors=False,

euroeval/languages.py CHANGED Viewed

@@ -36,7 +36,7 @@ NN = Language(
 )
 ES = Language(code="es", name="Spanish", _and_separator="y", _or_separator="o")
 SV = Language(code="sv", name="Swedish", _and_separator="och", _or_separator="eller")
+PT = Language(code="pt", name="Portuguese", _and_separator="e", _or_separator="ou")
 AB = Language(code="ab", name="Abkhazian")
 AA = Language(code="aa", name="Afar")
@@ -152,7 +152,6 @@ PI = Language(code="pi", name="Pali")
 PS = Language(code="ps", name="Pashto")
 FA = Language(code="fa", name="Persian")
 PL = Language(code="pl", name="Polish")
-PT = Language(code="pt", name="Portuguese")
 PA = Language(code="pa", name="Punjabi")
 QU = Language(code="qu", name="Quechua")
 RO = Language(code="ro", name="Romanian")

euroeval/prompt_templates/linguistic_acceptability.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Templates for the Linguistic Acceptability task."""
 from ..data_models import PromptConfig
-from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, SV
+from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
 LA_TEMPLATES = {
     DA: PromptConfig(
@@ -36,6 +36,14 @@ LA_TEMPLATES = {
         default_instruction_prompt="Texto: {text}\n\nDetermina si el texto es "
         "gramaticalmente correcto o no. Responde con {labels_str}, y nada más.",
     ),
+    PT: PromptConfig(
+        default_prompt_label_mapping=dict(correct="sim", incorrect="não"),
+        default_prompt_prefix="Seguem-se abaixo textos e se são "
+        "gramaticalmente correctos",
+        default_prompt_template="Texto: {text}\nGramaticalmente correcto: {label}",
+        default_instruction_prompt="Texto: {text}\n\nDetermina se o texto é "
+        "gramaticalmente correcto ou não. Responde com {labels_str}, e nada mais.",
+    ),
     FI: PromptConfig(
         default_prompt_label_mapping=dict(correct="kyllä", incorrect="ei"),
         default_prompt_prefix="Seuraavat ovat lauseita ja ovatko ne "

euroeval/prompt_templates/multiple_choice.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Templates for all multiple choice tasks."""
 from ..data_models import PromptConfig
-from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, SV
+from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, PT, SV
 # TODO: Missing Faroese
 MULTIPLE_CHOICE_TEMPLATES = {
@@ -36,6 +36,14 @@ MULTIPLE_CHOICE_TEMPLATES = {
         "usando solo {labels_str}, y nada más.",
         default_prompt_label_mapping="auto",
     ),
+    PT: PromptConfig(
+        default_prompt_prefix="As seguintes são perguntas de escolha múltipla "
+        "(com respostas).",
+        default_prompt_template="Pergunta: {text}\nResposta: {label}",
+        default_instruction_prompt="Pergunta: {text}\n\nResponde à pergunta "
+        "acima usando só {labels_str}, e nada mais.",
+        default_prompt_label_mapping="auto",
+    ),
     FI: PromptConfig(
         default_prompt_prefix="Seuraavat ovat monivalintakysymyksiä (vastauksineen).",
         default_prompt_template="Kysymys: {text}\nVastaus: {label}",

euroeval/prompt_templates/named_entity_recognition.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Templates for the Named Entity Recognition task."""
 from ..data_models import PromptConfig
-from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, SV
+from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
 NER_TEMPLATES = {
     DA: PromptConfig(
@@ -80,6 +80,25 @@ NER_TEMPLATES = {
         "claves {labels_str}. Los valores deben ser listas de las "
         "entidades nombradas de ese tipo, exactamente como aparecen en la oración.",
     ),
+    PT: PromptConfig(
+        default_prompt_label_mapping={
+            "b-per": "pessoa",
+            "i-per": "pessoa",
+            "b-loc": "local",
+            "i-loc": "local",
+            "b-org": "organização",
+            "i-org": "organização",
+            "b-misc": "diverso",
+            "i-misc": "diverso",
+        },
+        default_prompt_prefix="Seguem-se frases e dicionários JSON com as entidades "
+        "mencionadas presentes na frase indicada.",
+        default_prompt_template="Frase: {text}\nEntidades mencionadas: {label}",
+        default_instruction_prompt="Frase: {text}\n\nIdentifica as entidades "
+        "mencionadas na frase. Deves devolver um dicionário JSON com as chaves "
+        "{labels_str}. Os valores devem ser listas contendo as entidades "
+        "mencionadas desse tipo, tal como ocorrem na frase.",
+    ),
     FI: PromptConfig(
         default_prompt_label_mapping={
             "b-per": "henkilö",

euroeval/prompt_templates/reading_comprehension.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Templates for the Reading Comprehension task."""
 from ..data_models import PromptConfig
-from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, SV
+from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
 RC_TEMPLATES = {
     DA: PromptConfig(
@@ -117,6 +117,15 @@ RC_TEMPLATES = {
         "teksten ovenfor med maks 3 ord.\n\nSpørsmål: {question}",
         default_prompt_label_mapping=dict(),
     ),
+    PT: PromptConfig(
+        default_prompt_prefix="Os textos que se seguem são acompanhados de perguntas "
+        "e respostas.",
+        default_prompt_template="Texto: {text}\nPergunta: {question}\nResposta com "
+        "um máximo de 3 palavras: {label}",
+        default_instruction_prompt="Texto: {text}\n\nResponde à seguinte pergunta "
+        "sobre o texto acima num máximo de 3 palavras.\n\nPergunta: {question}",
+        default_prompt_label_mapping=dict(),
+    ),
     SV: PromptConfig(
         default_prompt_prefix="Nedan följer texter med tillhörande frågor och svar.",
         default_prompt_template="Text: {text}\nFråga: {question}\nSvar på max 3 ord: "

euroeval/prompt_templates/sentiment_classification.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Templates for the Sentiment Analysis task."""
 from ..data_models import PromptConfig
-from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, SV
+from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
 SENT_TEMPLATES = {
     DA: PromptConfig(
@@ -44,6 +44,16 @@ SENT_TEMPLATES = {
         default_instruction_prompt="Documento: {text}\n\nClasifica el sentimiento del "
         "documento. Responde con {labels_str}, y nada más.",
     ),
+    PT: PromptConfig(
+        default_prompt_label_mapping=dict(
+            positive="positivo", neutral="neutro", negative="negativo"
+        ),
+        default_prompt_prefix="Abaixo encontras documentos e os seus "
+        "sentimentos correspondentes, que podem ser {labels_str}.",
+        default_prompt_template="Documento: {text}\nSentimento: {label}",
+        default_instruction_prompt="Documento: {text}\n\nClassifica o "
+        "sentimento do documento. Responde apenas com {labels_str}.",
+    ),
     FI: PromptConfig(
         default_prompt_label_mapping=dict(
             positive="positiivinen", neutral="neutrali", negative="negatiivinen"

euroeval/prompt_templates/summarization.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Templates for the Summarization task."""
 from ..data_models import PromptConfig
-from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, SV
+from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, PT, SV
 # TODO: Missing Faroese
 SUMM_TEMPLATES = {
@@ -36,6 +36,13 @@ SUMM_TEMPLATES = {
         "documento anterior.",
         default_prompt_label_mapping=dict(),
     ),
+    PT: PromptConfig(
+        default_prompt_prefix="Abaixo encontras documentos com resumos associados.",
+        default_prompt_template="Documento: {text}\nResumo: {target_text}",
+        default_instruction_prompt="Documento: {text}\n\nEscreve um resumo do "
+        "documento anterior.",
+        default_prompt_label_mapping=dict(),
+    ),
     FI: PromptConfig(
         default_prompt_prefix="Seuraavassa on artikkeleita ja niihin liittyviä "
         "tiivistelmiä.",

{euroeval-15.11.0.dist-info → euroeval-15.13.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: EuroEval
-Version: 15.11.0
+Version: 15.13.0
 Summary: The robust European language model benchmark.
 Project-URL: Repository, https://github.com/EuroEval/EuroEval
 Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -29,7 +29,7 @@ License: MIT License
         SOFTWARE.
 License-File: LICENSE
 Requires-Python: <4.0,>=3.10
-Requires-Dist: accelerate>=0.34.2
+Requires-Dist: accelerate>=1.9.0
 Requires-Dist: bert-score>=0.3.13
 Requires-Dist: click>=8.1.3
 Requires-Dist: datasets>=3.5.0

euroeval-15.13.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,63 @@
+euroeval/__init__.py,sha256=fZyR9R3C3vwGJS3CrCJ6ySr_FDnMu_Aqnz0FdadWEEs,3399
+euroeval/benchmark_config_factory.py,sha256=jKC8bEzJSGGCcG8aWsPxiyHX6fjOQYQWvkp1MIUuHYM,11564
+euroeval/benchmarker.py,sha256=SDBzdCa4I8u1XDeN_1mKTFzfaaQbbY_oWcHt3niADxk,48497
+euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
+euroeval/cli.py,sha256=h81Lswm_q9htkYz-GQQQVIsdsUPnfe3LDH8AZdBcpKs,8602
+euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
+euroeval/data_loading.py,sha256=DP-cqwN_d0Y-KaN8P8c3fDr6PX80UYROHgRwX82ix4w,4156
+euroeval/data_models.py,sha256=gPHyIoN2A5_O-cJgyb6jhn6enH8zsiIBI09W_wdHMQs,22031
+euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
+euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
+euroeval/finetuning.py,sha256=BrPZ-6qFY8K-dwfaRwNetVYfYburoQwLQty6pn6iP_s,11340
+euroeval/generation.py,sha256=1fqFEWwM2RzI3uPZem95VFWbN8EfrKZQTrHEP34ihHs,11622
+euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
+euroeval/human_evaluation.py,sha256=Jtz3K5Lqne48wPZWf4EAd3d-n_wX27nGJHigjhV1D7s,27537
+euroeval/languages.py,sha256=cr_Z5jtaHb2XY0zeOhuk3ATHX74PODzt6gMPC2zMD7c,8594
+euroeval/metrics.py,sha256=nxosyoRjlk7TcoAOkjU7zx2TB43b9tA8M1m4V1s5eKU,15516
+euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
+euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
+euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
+euroeval/scores.py,sha256=TatSbjia7Zwj71gQFyV_gCHyppMbOgeaZgNCib8G86k,2849
+euroeval/speed_benchmark.py,sha256=6bFGeMmtdl_6owkxNQ3ZKiyQQS58k0NApzlsbDgBW5s,4037
+euroeval/tasks.py,sha256=btxf29M5rUP7JjBl6u9aQlHQAxrJNP4bRbdEQtDnmDA,3376
+euroeval/tokenization_utils.py,sha256=LxgGs7juS5PuMYt5LL2X6eVXdtnpi-A2jFxqcWpF6NA,17931
+euroeval/types.py,sha256=EIYMNOqqHqibnbNw-fvdst6HwTvq32gtxhr7jL7i-xM,2511
+euroeval/utils.py,sha256=5R7y67xe0ODaje7k8nOu2AFS3Ph2gcsiWpIq5rjSSuA,11613
+euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
+euroeval/benchmark_modules/base.py,sha256=D1oKD16KBvxEoBUfqwvzvcDc1hx6letdD3v1PnBmF4A,10669
+euroeval/benchmark_modules/fresh.py,sha256=sg_AXNPApFObCzCRWhCgKxfr-eqQsT6Ri0xx0_Yy5JM,10293
+euroeval/benchmark_modules/hf.py,sha256=-W_bWEdm0zePkn4nDz4l0T4hhJJnlfwHrtIO3m5BrUs,44725
+euroeval/benchmark_modules/litellm.py,sha256=_gKBbJsXzo_cHJVaeuQpHRBENEZUGS_vcC-uGIhhmHA,52111
+euroeval/benchmark_modules/vllm.py,sha256=kq3PMUuRT0NOky6XSHl1JeHTDGehwcub0HcGC5S_Wv4,38834
+euroeval/dataset_configs/__init__.py,sha256=EbjEyHwBtSztASl8_xblD8hessruDdV4Eg1vXrmGOuY,1935
+euroeval/dataset_configs/danish.py,sha256=-y-n08hTApwTdSVdjRlZYa3gOX92cTGhg8xsuG-Lhww,3691
+euroeval/dataset_configs/dutch.py,sha256=siyFeEKYx2gBpyqQPtOZ0cD8FTsIMUqzRX5xrQfrNXI,3480
+euroeval/dataset_configs/english.py,sha256=uQAaGWpHk8xqFCeIhmmPXYTb1cZomeEdRaRe9qIZQrg,2858
+euroeval/dataset_configs/faroese.py,sha256=gkgxQTWGFbfg9Eo1z-NSLROgKDcaij9tAN2mfgtrt0M,1647
+euroeval/dataset_configs/finnish.py,sha256=OyveLgyii0hOlo6HZsqAq4rwDrj8tl2qstRfQKugURo,2342
+euroeval/dataset_configs/french.py,sha256=DKKZEtohWkw_ouBaxWcPzp-K6NhQNtvCKxj8NLbIpUc,2678
+euroeval/dataset_configs/german.py,sha256=3bfRgkqIGkAhcw4kwcJN9PKuJSmi1r6AFTJY-IWKgWM,2856
+euroeval/dataset_configs/icelandic.py,sha256=g21IHjcwEZvf_yJ9PobeuBOqRiLOk0oCdEjY34g-UMk,4497
+euroeval/dataset_configs/italian.py,sha256=rHLMkSXT0kFoQlkwHODxO50WBRIfGtkAnW_C-sfIu74,2957
+euroeval/dataset_configs/norwegian.py,sha256=-WvQM44xCwjrqBzlAy4rjf6v87fGera2JmZV_069TeQ,6003
+euroeval/dataset_configs/portuguese.py,sha256=3SqbwD0PNTILGALzh50pVoEwC-spRD75ZeE2NEj151E,2367
+euroeval/dataset_configs/spanish.py,sha256=VKfBIpBRR38ckuULw7Ftmc-0smsm6GshUAik2-Y1Npw,2855
+euroeval/dataset_configs/swedish.py,sha256=WpExi4TJqy_Ruwy4Kvde94jM605vT_88el_KKUzLV4E,3108
+euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
+euroeval/prompt_templates/linguistic_acceptability.py,sha256=ZN71BEt4HAhSYY-GWjh-S-iVvq5AODQJThkrjDhy4oM,7138
+euroeval/prompt_templates/multiple_choice.py,sha256=F9ItGQtnaaez15A8MQ1UCpKRDsLM-AZyRdYetGAofa0,5494
+euroeval/prompt_templates/named_entity_recognition.py,sha256=ga21s9T4_Hhbf88boWm7gnL7OgD7txuS_EeDgXaxEoE,13602
+euroeval/prompt_templates/reading_comprehension.py,sha256=3Nch-9zHfUDIwy-k5mP-TRhHQRQ9nad8HdhpJ1S8nGc,7072
+euroeval/prompt_templates/sentiment_classification.py,sha256=2Xsmj8lbaAXACHhwbbR4dWhoKyKB87TqpMO-ssQ-Djo,7649
+euroeval/prompt_templates/summarization.py,sha256=I98LlUOBVa_xo02npq7BWKKZOXGqm-_15i64QzbEsb0,5334
+euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
+euroeval/task_group_utils/multiple_choice_classification.py,sha256=yfy8lczpZ_MY-Y4FQx3Et9vEUpuD3YMFjF3wQGCfMNw,6632
+euroeval/task_group_utils/question_answering.py,sha256=agwtWOmctgat98yqgFiMSPY6zmoaPgYVyzMmOkNjr58,27284
+euroeval/task_group_utils/sequence_classification.py,sha256=igmD24aMNN7QBJ8NDzgEnGwM-jq_zhC37QxazNm7GZ4,12711
+euroeval/task_group_utils/text_to_text.py,sha256=xOpja-W4E-1peMjZX8G-3G5iRgmFHHygrQ5WN1hB3FI,4550
+euroeval/task_group_utils/token_classification.py,sha256=wCy3aI-Sn9f-87tHzAnYDA6EbY3ah3xao1SnfnoRNz4,17490
+euroeval-15.13.0.dist-info/METADATA,sha256=HnDtAE2-sYFmSl4yM9PQhgUrfklR_OB5C5aXPOgz5U8,13478
+euroeval-15.13.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+euroeval-15.13.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
+euroeval-15.13.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
+euroeval-15.13.0.dist-info/RECORD,,

euroeval-15.11.0.dist-info/RECORD DELETED Viewed

@@ -1,62 +0,0 @@
-euroeval/__init__.py,sha256=fZyR9R3C3vwGJS3CrCJ6ySr_FDnMu_Aqnz0FdadWEEs,3399
-euroeval/benchmark_config_factory.py,sha256=icTeT5C-bNCJmvSWFlxKdEpRboZN8OjwaHGu7JM-2xI,11158
-euroeval/benchmarker.py,sha256=RlD8z2TYT4dqKvFtfmbU2pS7ZZ8l_3ErYttIcSxjPMg,48040
-euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
-euroeval/cli.py,sha256=d8JztMi_RbpUlEBXidd6DQ-xeC-xhozf_qU6Vkzye20,8161
-euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
-euroeval/data_loading.py,sha256=DP-cqwN_d0Y-KaN8P8c3fDr6PX80UYROHgRwX82ix4w,4156
-euroeval/data_models.py,sha256=lrF8XAVVZFqof3O0Bq2nMSTuqhkDaoMixIoUMqgsAo8,21647
-euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
-euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
-euroeval/finetuning.py,sha256=BrPZ-6qFY8K-dwfaRwNetVYfYburoQwLQty6pn6iP_s,11340
-euroeval/generation.py,sha256=pXs2VwfLvUpwXRN8LcHvzE_HTXMkGSYc4wGv9vsz1BA,10758
-euroeval/generation_utils.py,sha256=8HOFE2xdnCPRMe3TiHh--n7Oy3rMV7MAnERpW9vplUA,13352
-euroeval/human_evaluation.py,sha256=9CMXrkzM7Q-vltFL1fD9hYwahQtWT12aHMU8PgGO5_c,27497
-euroeval/languages.py,sha256=LerXuRBAUYkQL6qSV-F82itAE4EgBGFBtzaGnJJZvOE,8555
-euroeval/metrics.py,sha256=nxosyoRjlk7TcoAOkjU7zx2TB43b9tA8M1m4V1s5eKU,15516
-euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
-euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
-euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
-euroeval/scores.py,sha256=TatSbjia7Zwj71gQFyV_gCHyppMbOgeaZgNCib8G86k,2849
-euroeval/speed_benchmark.py,sha256=6bFGeMmtdl_6owkxNQ3ZKiyQQS58k0NApzlsbDgBW5s,4037
-euroeval/tasks.py,sha256=btxf29M5rUP7JjBl6u9aQlHQAxrJNP4bRbdEQtDnmDA,3376
-euroeval/tokenization_utils.py,sha256=LxgGs7juS5PuMYt5LL2X6eVXdtnpi-A2jFxqcWpF6NA,17931
-euroeval/types.py,sha256=EIYMNOqqHqibnbNw-fvdst6HwTvq32gtxhr7jL7i-xM,2511
-euroeval/utils.py,sha256=5R7y67xe0ODaje7k8nOu2AFS3Ph2gcsiWpIq5rjSSuA,11613
-euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
-euroeval/benchmark_modules/base.py,sha256=D1oKD16KBvxEoBUfqwvzvcDc1hx6letdD3v1PnBmF4A,10669
-euroeval/benchmark_modules/fresh.py,sha256=sg_AXNPApFObCzCRWhCgKxfr-eqQsT6Ri0xx0_Yy5JM,10293
-euroeval/benchmark_modules/hf.py,sha256=-W_bWEdm0zePkn4nDz4l0T4hhJJnlfwHrtIO3m5BrUs,44725
-euroeval/benchmark_modules/litellm.py,sha256=_gKBbJsXzo_cHJVaeuQpHRBENEZUGS_vcC-uGIhhmHA,52111
-euroeval/benchmark_modules/vllm.py,sha256=LXWkCUaIpP3cboj1bAGM6N8pR02mX6-XZFJheZDbfAQ,38798
-euroeval/dataset_configs/__init__.py,sha256=kWKtlSAOY-olOQL3UtFqL6I3Tki3G3waMZSd2YChjCg,1895
-euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
-euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
-euroeval/dataset_configs/english.py,sha256=1q8XJqIVWBBNkldL7t-cVnU2O9EUb9_xoVRSN8arN90,2561
-euroeval/dataset_configs/faroese.py,sha256=QQgLe5gv0f3AtXe5rV65xZ98gFgyITQPDr3UwO4Bnv4,1350
-euroeval/dataset_configs/finnish.py,sha256=_8YWIlZNpO8Qi233bH7cKwm3tq3WETLfC_6mzg7LLog,2045
-euroeval/dataset_configs/french.py,sha256=ATsj8_9_GxFTQgmfrniPQFZ1R9hoQCI1_ieWTnscFHU,2382
-euroeval/dataset_configs/german.py,sha256=QO6PrBQY6kyZeQMU1vg6KrC_sKyj9U2ukS9nbKO19is,2560
-euroeval/dataset_configs/icelandic.py,sha256=mncl7X4yO9gBmYqXMBfm7FKU1jcKryerSgd0dqlIA_4,4198
-euroeval/dataset_configs/italian.py,sha256=KNjCvTzsEqH_EEk3At8slKqNwWWiIdbv_t5ke7n9nZI,2660
-euroeval/dataset_configs/norwegian.py,sha256=30YGdDPtDszG10BNDVHb-XXTGgGIIgDUNGoeM9q0K_E,5385
-euroeval/dataset_configs/spanish.py,sha256=NviL-FzJ5jq1bLTRvbtZBiGrAmZjxyijZNpKZFrnT-M,2527
-euroeval/dataset_configs/swedish.py,sha256=SOD2nKQTVwTpTvr362mDPHon42kr9vWs5C0mK02Fh-o,2811
-euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
-euroeval/prompt_templates/linguistic_acceptability.py,sha256=FAIJKS26EVRxlLHk1C3lN0GDtd5AM0MwvaMf-NNIxfU,6677
-euroeval/prompt_templates/multiple_choice.py,sha256=6iEqiPpT-3WJN_gsyhyapnwsrcsYGdVkSkzwn-VKKxw,5101
-euroeval/prompt_templates/named_entity_recognition.py,sha256=Xd6gBJD2e1l8-We2Ujor7crRUBcbgnNeeVknBIrTMJo,12737
-euroeval/prompt_templates/reading_comprehension.py,sha256=yLqryWQAW04GULz_EyNDLOS7ZrDUeasuLFt-dtqCnYk,6585
-euroeval/prompt_templates/sentiment_classification.py,sha256=LDOwjGQ2kqhwgNyphPywQeolwNB09o-xYWc9RUbzc84,7136
-euroeval/prompt_templates/summarization.py,sha256=mcWeKNhGWmp7IG_iY64T-VOSabQg5wKddjSbJNYFDp8,4984
-euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
-euroeval/task_group_utils/multiple_choice_classification.py,sha256=yfy8lczpZ_MY-Y4FQx3Et9vEUpuD3YMFjF3wQGCfMNw,6632
-euroeval/task_group_utils/question_answering.py,sha256=agwtWOmctgat98yqgFiMSPY6zmoaPgYVyzMmOkNjr58,27284
-euroeval/task_group_utils/sequence_classification.py,sha256=igmD24aMNN7QBJ8NDzgEnGwM-jq_zhC37QxazNm7GZ4,12711
-euroeval/task_group_utils/text_to_text.py,sha256=xOpja-W4E-1peMjZX8G-3G5iRgmFHHygrQ5WN1hB3FI,4550
-euroeval/task_group_utils/token_classification.py,sha256=wCy3aI-Sn9f-87tHzAnYDA6EbY3ah3xao1SnfnoRNz4,17490
-euroeval-15.11.0.dist-info/METADATA,sha256=NiRBsSAD6L_q4-y0AVkfoUoZA-9oD27uSK80cWpO_co,13479
-euroeval-15.11.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-euroeval-15.11.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
-euroeval-15.11.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
-euroeval-15.11.0.dist-info/RECORD,,

{euroeval-15.11.0.dist-info → euroeval-15.13.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{euroeval-15.11.0.dist-info → euroeval-15.13.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{euroeval-15.11.0.dist-info → euroeval-15.13.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

EuroEval 15.11.0__py3-none-any.whl → 15.13.0__py3-none-any.whl

Potentially problematic release.

EuroEval 15.11.0py3-none-any.whl → 15.13.0py3-none-any.whl