PyPI - EuroEval - Versions diffs - 15.16.0__py3-none-any.whl → 16.0.0__py3-none-any.whl - Mend

EuroEval 15.16.0py3-none-any.whl → 16.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of EuroEval might be problematic. Click here for more details.

Files changed (63) hide show

euroeval/__init__.py +3 -7
euroeval/benchmark_config_factory.py +3 -7
euroeval/benchmark_modules/base.py +35 -19
euroeval/benchmark_modules/fresh.py +24 -19
euroeval/benchmark_modules/hf.py +136 -154
euroeval/benchmark_modules/litellm.py +190 -110
euroeval/benchmark_modules/vllm.py +161 -114
euroeval/benchmarker.py +49 -22
euroeval/cli.py +3 -3
euroeval/constants.py +13 -15
euroeval/data_loading.py +33 -28
euroeval/data_models.py +53 -7
euroeval/dataset_configs/__init__.py +2 -0
euroeval/dataset_configs/danish.py +38 -1
euroeval/dataset_configs/dutch.py +38 -1
euroeval/dataset_configs/english.py +38 -1
euroeval/dataset_configs/estonian.py +95 -0
euroeval/dataset_configs/faroese.py +38 -0
euroeval/dataset_configs/finnish.py +39 -1
euroeval/dataset_configs/french.py +38 -1
euroeval/dataset_configs/german.py +38 -1
euroeval/dataset_configs/icelandic.py +39 -1
euroeval/dataset_configs/italian.py +38 -1
euroeval/dataset_configs/latvian.py +81 -0
euroeval/dataset_configs/norwegian.py +38 -1
euroeval/dataset_configs/portuguese.py +38 -1
euroeval/dataset_configs/spanish.py +38 -1
euroeval/dataset_configs/swedish.py +38 -1
euroeval/enums.py +0 -6
euroeval/finetuning.py +6 -6
euroeval/generation.py +25 -14
euroeval/generation_utils.py +46 -14
euroeval/languages.py +947 -187
euroeval/metrics/__init__.py +6 -0
euroeval/metrics/base.py +76 -0
euroeval/metrics/huggingface.py +192 -0
euroeval/metrics/llm_as_a_judge.py +257 -0
euroeval/metrics/pipeline.py +234 -0
euroeval/metrics/speed.py +51 -0
euroeval/prompt_templates/linguistic_acceptability.py +40 -2
euroeval/prompt_templates/multiple_choice.py +23 -2
euroeval/prompt_templates/named_entity_recognition.py +65 -2
euroeval/prompt_templates/reading_comprehension.py +42 -2
euroeval/prompt_templates/sentiment_classification.py +46 -2
euroeval/prompt_templates/summarization.py +24 -4
euroeval/scores.py +7 -2
euroeval/speed_benchmark.py +6 -6
euroeval/task_group_utils/multiple_choice_classification.py +17 -6
euroeval/task_group_utils/question_answering.py +35 -28
euroeval/task_group_utils/sequence_classification.py +96 -23
euroeval/task_group_utils/text_to_text.py +7 -3
euroeval/task_group_utils/token_classification.py +47 -75
euroeval/tasks.py +31 -6
euroeval/tokenization_utils.py +295 -207
euroeval/utils.py +118 -34
{euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/METADATA +11 -14
euroeval-16.0.0.dist-info/RECORD +69 -0
{euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/entry_points.txt +0 -1
euroeval/human_evaluation.py +0 -738
euroeval/metrics.py +0 -470
euroeval-15.16.0.dist-info/RECORD +0 -63
{euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/WHEEL +0 -0
{euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/licenses/LICENSE +0 -0

euroeval/dataset_configs/estonian.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""All Estonian dataset configurations used in EuroEval."""
+from ..data_models import DatasetConfig
+from ..languages import ET
+from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, NER, RC, SENT, SUMM
+### Official datasets ###
+ESTONIAN_VALENCE_CONFIG = DatasetConfig(
+    name="estonian-valence",
+    pretty_name="the Estonian sentiment classification dataset Estonian Valence",
+    huggingface_id="EuroEval/estonian-valence",
+    task=SENT,
+    languages=[ET],
+)
+GRAMMAR_ET_CONFIG = DatasetConfig(
+    name="grammar-et",
+    pretty_name="the Estonian linguistic acceptability dataset Grammar-et",
+    huggingface_id="EuroEval/grammar-et",
+    task=LA,
+    languages=[ET],
+)
+ESTNER_CONFIG = DatasetConfig(
+    name="estner",
+    pretty_name="the Estonian named entity recognition dataset EstNER",
+    huggingface_id="EuroEval/estner-mini",
+    task=NER,
+    languages=[ET],
+)
+MULTI_WIKI_QA_ET_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-et",
+    pretty_name="the truncated version of the Estonian part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-et-mini",
+    task=RC,
+    languages=[ET],
+)
+ERR_NEWS_CONFIG = DatasetConfig(
+    name="err-news",
+    pretty_name="the Estonian summarisation dataset ErrNews",
+    huggingface_id="EuroEval/err-news-mini",
+    task=SUMM,
+    languages=[ET],
+)
+EXAM_ET_CONFIG = DatasetConfig(
+    name="exam-et",
+    pretty_name="the Estonian knowledge assessment dataset Exam-et",
+    huggingface_id="EuroEval/exam-et",
+    task=KNOW,
+    languages=[ET],
+    _labels=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"],
+)
+WINOGRANDE_ET_CONFIG = DatasetConfig(
+    name="winogrande-et",
+    pretty_name="the Estonian common-sense reasoning dataset Winogrande-et",
+    huggingface_id="EuroEval/winogrande-et",
+    task=COMMON_SENSE,
+    languages=[ET],
+    _prompt_prefix="Sulle esitatakse lüngaga (_) tekstülesanded, "
+    "igal ülesandel on kaks vastusevarianti (a ja b).",
+    _prompt_template="Tekstülesanne: {text}\nVastus: {label}",
+    _instruction_prompt="Tekstülesanne: {text}\n\n"
+    "Sinu ülesanne on valida lünka sobiv vastusevariant. "
+    "Vasta ainult {labels_str}. Muud vastused ei ole lubatud.",
+    _labels=["a", "b"],
+)
+EUROPEAN_VALUES_ET_CONFIG = DatasetConfig(
+    name="european-values-et",
+    pretty_name="the Estonian version of the European values evaluation dataset",
+    huggingface_id="EuroEval/european-values-et",
+    task=EUROPEAN_VALUES,
+    languages=[ET],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+)
+### Unofficial datasets ###
+SCALA_ET_CONFIG = DatasetConfig(
+    name="scala-et",
+    pretty_name="the Estonian part of the linguistic acceptability dataset ScaLA",
+    huggingface_id="EuroEval/scala-et",
+    task=LA,
+    languages=[ET],
+    unofficial=True,
+)

euroeval/dataset_configs/faroese.py CHANGED Viewed

@@ -40,6 +40,44 @@ FOQA_CONFIG = DatasetConfig(
     languages=[FO],
 )
+# TODO: No Faroese version of the European values dataset exists yet
+# EUROPEAN_VALUES_FO_CONFIG = DatasetConfig(
+#     name="european-values-fo",
+#     pretty_name="the Faroese version of the European values evaluation dataset",
+#     huggingface_id="EuroEval/european-values-fo",
+#     task=EUROPEAN_VALUES,
+#     languages=[FO],
+#     splits=["test"],
+#     bootstrap_samples=False,
+#     _instruction_prompt="{text}",
+# )
+#
+# EUROPEAN_VALUES_SITUATIONAL_FO_CONFIG = DatasetConfig(
+#     name="european-values-situational-fo",
+#     pretty_name="the Faroese version of the European values evaluation dataset, "
+#     "where the questions are phrased in a situational way",
+#     huggingface_id="EuroEval/european-values-situational-fo",
+#     task=EUROPEAN_VALUES,
+#     languages=[FO],
+#     splits=["test"],
+#     bootstrap_samples=False,
+#     _instruction_prompt="{text}",
+#     unofficial=True,
+# )
+#
+# EUROPEAN_VALUES_COMPLETIONS_FO_CONFIG = DatasetConfig(
+#     name="european-values-completions-fo",
+#     pretty_name="the Faroese version of the European values evaluation dataset, "
+#     "where the questions are phrased as sentence completions",
+#     huggingface_id="EuroEval/european-values-completions-fo",
+#     task=EUROPEAN_VALUES,
+#     languages=[FO],
+#     splits=["test"],
+#     bootstrap_samples=False,
+#     _instruction_prompt="{text}",
+#     unofficial=True,
+# )
 ### Unofficial datasets ###

euroeval/dataset_configs/finnish.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from ..data_models import DatasetConfig
 from ..languages import FI
-from ..tasks import COMMON_SENSE, LA, MCRC, NER, RC, SENT, SUMM
+from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, LA, MCRC, NER, RC, SENT, SUMM
 ### Official datasets ###
@@ -57,6 +57,18 @@ SCALA_FI_CONFIG = DatasetConfig(
     languages=[FI],
 )
+EUROPEAN_VALUES_FI_CONFIG = DatasetConfig(
+    name="european-values-fi",
+    pretty_name="the Finnish version of the European values evaluation dataset",
+    huggingface_id="EuroEval/european-values-fi",
+    task=EUROPEAN_VALUES,
+    languages=[FI],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+)
 ### Unofficial datasets ###
 BELEBELE_FI_CONFIG = DatasetConfig(
@@ -88,3 +100,29 @@ GOLDENSWAG_FI_CONFIG = DatasetConfig(
     languages=[FI],
     unofficial=True,
 )
+EUROPEAN_VALUES_SITUATIONAL_FI_CONFIG = DatasetConfig(
+    name="european-values-situational-fi",
+    pretty_name="the Finnish version of the European values evaluation dataset, where "
+    "the questions are phrased in a situational way",
+    huggingface_id="EuroEval/european-values-situational-fi",
+    task=EUROPEAN_VALUES,
+    languages=[FI],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)
+EUROPEAN_VALUES_COMPLETIONS_FI_CONFIG = DatasetConfig(
+    name="european-values-completions-fi",
+    pretty_name="the Finnish version of the European values evaluation dataset, where "
+    "the questions are phrased as sentence completions",
+    huggingface_id="EuroEval/european-values-completions-fi",
+    task=EUROPEAN_VALUES,
+    languages=[FI],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)

euroeval/dataset_configs/french.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from ..data_models import DatasetConfig
 from ..languages import FR
-from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
+from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
 ### Official datasets ###
@@ -69,6 +69,17 @@ HELLASWAG_FR_CONFIG = DatasetConfig(
     languages=[FR],
 )
+EUROPEAN_VALUES_FR_CONFIG = DatasetConfig(
+    name="european-values-fr",
+    pretty_name="the French version of the European values evaluation dataset",
+    huggingface_id="EuroEval/european-values-fr",
+    task=EUROPEAN_VALUES,
+    languages=[FR],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+)
 ### Unofficial datasets ###
@@ -101,3 +112,29 @@ GOLDENSWAG_FR_CONFIG = DatasetConfig(
     languages=[FR],
     unofficial=True,
 )
+EUROPEAN_VALUES_SITUATIONAL_FR_CONFIG = DatasetConfig(
+    name="european-values-situational-fr",
+    pretty_name="the French version of the European values evaluation dataset, where "
+    "the questions are phrased in a situational way",
+    huggingface_id="EuroEval/european-values-situational-fr",
+    task=EUROPEAN_VALUES,
+    languages=[FR],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)
+EUROPEAN_VALUES_COMPLETIONS_FR_CONFIG = DatasetConfig(
+    name="european-values-completions-fr",
+    pretty_name="the French version of the European values evaluation dataset, where "
+    "the questions are phrased as sentence completions",
+    huggingface_id="EuroEval/european-values-completions-fr",
+    task=EUROPEAN_VALUES,
+    languages=[FR],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)

euroeval/dataset_configs/german.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from ..data_models import DatasetConfig
 from ..languages import DE
-from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
+from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
 ### Official datasets ###
@@ -67,6 +67,17 @@ HELLASWAG_DE_CONFIG = DatasetConfig(
     languages=[DE],
 )
+EUROPEAN_VALUES_DE_CONFIG = DatasetConfig(
+    name="european-values-de",
+    pretty_name="the German version of the European values evaluation dataset",
+    huggingface_id="EuroEval/european-values-de",
+    task=EUROPEAN_VALUES,
+    languages=[DE],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+)
 ### Unofficial datasets ###
@@ -109,3 +120,29 @@ GOLDENSWAG_DE_CONFIG = DatasetConfig(
     languages=[DE],
     unofficial=True,
 )
+EUROPEAN_VALUES_SITUATIONAL_DE_CONFIG = DatasetConfig(
+    name="european-values-situational-de",
+    pretty_name="the German version of the European values evaluation dataset, where "
+    "the questions are phrased in a situational way",
+    huggingface_id="EuroEval/european-values-situational-de",
+    task=EUROPEAN_VALUES,
+    languages=[DE],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)
+EUROPEAN_VALUES_COMPLETIONS_DE_CONFIG = DatasetConfig(
+    name="european-values-completions-de",
+    pretty_name="the German version of the European values evaluation dataset, where "
+    "the questions are phrased as sentence completions",
+    huggingface_id="EuroEval/european-values-completions-de",
+    task=EUROPEAN_VALUES,
+    languages=[DE],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)

euroeval/dataset_configs/icelandic.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from ..data_models import DatasetConfig
 from ..languages import IS
-from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
+from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
 ### Official datasets ###
@@ -66,6 +66,18 @@ WINOGRANDE_IS_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-is",
     task=COMMON_SENSE,
     languages=[IS],
+    _labels=["a", "b"],
+)
+EUROPEAN_VALUES_IS_CONFIG = DatasetConfig(
+    name="european-values-is",
+    pretty_name="the Icelandic version of the European values evaluation dataset",
+    huggingface_id="EuroEval/european-values-is",
+    task=EUROPEAN_VALUES,
+    languages=[IS],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
 )
@@ -156,3 +168,29 @@ MULTI_WIKI_QA_IS_CONFIG = DatasetConfig(
     languages=[IS],
     unofficial=True,
 )
+EUROPEAN_VALUES_SITUATIONAL_IS_CONFIG = DatasetConfig(
+    name="european-values-situational-is",
+    pretty_name="the Icelandic version of the European values evaluation dataset, "
+    "where the questions are phrased in a situational way",
+    huggingface_id="EuroEval/european-values-situational-is",
+    task=EUROPEAN_VALUES,
+    languages=[IS],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)
+EUROPEAN_VALUES_COMPLETIONS_IS_CONFIG = DatasetConfig(
+    name="european-values-completions-is",
+    pretty_name="the Icelandic version of the European values evaluation dataset, "
+    "where the questions are phrased as sentence completions",
+    huggingface_id="EuroEval/european-values-completions-is",
+    task=EUROPEAN_VALUES,
+    languages=[IS],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)

euroeval/dataset_configs/italian.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from ..data_models import DatasetConfig
 from ..languages import IT
-from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
+from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
 ### Official datasets ###
@@ -67,6 +67,17 @@ HELLASWAG_IT_CONFIG = DatasetConfig(
     languages=[IT],
 )
+EUROPEAN_VALUES_IT_CONFIG = DatasetConfig(
+    name="european-values-it",
+    pretty_name="the Italian version of the European values evaluation dataset",
+    huggingface_id="EuroEval/european-values-it",
+    task=EUROPEAN_VALUES,
+    languages=[IT],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+)
 ### Unofficial datasets ###
@@ -109,3 +120,29 @@ GOLDENSWAG_IT_CONFIG = DatasetConfig(
     languages=[IT],
     unofficial=True,
 )
+EUROPEAN_VALUES_SITUATIONAL_IT_CONFIG = DatasetConfig(
+    name="european-values-situational-it",
+    pretty_name="the Italian version of the European values evaluation dataset, "
+    "where the questions are phrased in a situational way",
+    huggingface_id="EuroEval/european-values-situational-it",
+    task=EUROPEAN_VALUES,
+    languages=[IT],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)
+EUROPEAN_VALUES_COMPLETIONS_IT_CONFIG = DatasetConfig(
+    name="european-values-completions-it",
+    pretty_name="the Italian version of the European values evaluation dataset, where "
+    "the questions are phrased as sentence completions",
+    huggingface_id="EuroEval/european-values-completions-it",
+    task=EUROPEAN_VALUES,
+    languages=[IT],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)

euroeval/dataset_configs/latvian.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""All Latvian dataset configurations used in EuroEval."""
+from ..data_models import DatasetConfig
+from ..languages import LV
+from ..tasks import COMMON_SENSE, KNOW, LA, NER, RC, SENT, SUMM
+### Official datasets ###
+LATVIAN_TWITTER_SENTIMENT_CONFIG = DatasetConfig(
+    name="latvian-twitter-sentiment",
+    pretty_name="the truncated version of the Latvian sentiment classification dataset",
+    huggingface_id="EuroEval/latvian-twitter-sentiment-mini",
+    task=SENT,
+    languages=[LV],
+)
+SCALA_LV_CONFIG = DatasetConfig(
+    name="scala-lv",
+    pretty_name="the Latvian part of the linguistic acceptability dataset ScaLA",
+    huggingface_id="EuroEval/scala-lv",
+    task=LA,
+    languages=[LV],
+)
+FULLSTACK_NER_LV_CONFIG = DatasetConfig(
+    name="fullstack-ner-lv",
+    pretty_name="the truncated version of the FullStack NER dataset",
+    huggingface_id="EuroEval/fullstack-ner-lv-mini",
+    task=NER,
+    languages=[LV],
+)
+MULTI_WIKI_QA_LV_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-lv",
+    pretty_name="the truncated version of the Latvian part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-lv-mini",
+    task=RC,
+    languages=[LV],
+)
+LSM_CONFIG = DatasetConfig(
+    name="lsm",
+    pretty_name="the truncated version of the Latvian summarisation dataset LSM",
+    huggingface_id="EuroEval/lsm-mini",
+    task=SUMM,
+    languages=[LV],
+)
+MMLU_LV_CONFIG = DatasetConfig(
+    name="mmlu-lv",
+    pretty_name="the truncated version of the Latvian knowledge dataset MMLU-lv, "
+    "translated from the English MMLU dataset",
+    huggingface_id="EuroEval/mmlu-lv-mini",
+    task=KNOW,
+    languages=[LV],
+)
+COPA_LV_CONFIG = DatasetConfig(
+    name="copa-lv",
+    pretty_name="the Latvian common-sense reasoning dataset COPA-lv, translated from "
+    "the English COPA dataset",
+    huggingface_id="EuroEval/copa-lv",
+    task=COMMON_SENSE,
+    languages=[LV],
+    _labels=["a", "b"],
+)
+### Unofficial datasets ###
+WIKIANN_LV_CONFIG = DatasetConfig(
+    name="wikiann-lv",
+    pretty_name="the truncated version of the Latvian part of the named entity "
+    "recognition dataset WikiANN",
+    huggingface_id="EuroEval/wikiann-lv-mini",
+    task=NER,
+    languages=[LV],
+    unofficial=True,
+)

euroeval/dataset_configs/norwegian.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from ..data_models import DatasetConfig
 from ..languages import NB, NN, NO
-from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
+from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
 ### Official datasets ###
@@ -94,6 +94,17 @@ NOR_COMMON_SENSE_QA_CONFIG = DatasetConfig(
     _labels=["a", "b", "c", "d", "e"],
 )
+EUROPEAN_VALUES_NO_CONFIG = DatasetConfig(
+    name="european-values-no",
+    pretty_name="the Norwegian version of the European values evaluation dataset",
+    huggingface_id="EuroEval/european-values-no",
+    task=EUROPEAN_VALUES,
+    languages=[NB, NN, NO],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+)
 ### Unofficial datasets ###
@@ -204,3 +215,29 @@ MULTI_WIKI_QA_NN_CONFIG = DatasetConfig(
     languages=[NN],
     unofficial=True,
 )
+EUROPEAN_VALUES_SITUATIONAL_NO_CONFIG = DatasetConfig(
+    name="european-values-situational-no",
+    pretty_name="the Norwegian version of the European values evaluation dataset, "
+    "where the questions are phrased in a situational way",
+    huggingface_id="EuroEval/european-values-situational-no",
+    task=EUROPEAN_VALUES,
+    languages=[NB, NN, NO],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)
+EUROPEAN_VALUES_COMPLETIONS_NO_CONFIG = DatasetConfig(
+    name="european-values-completions-no",
+    pretty_name="the Norwegian version of the European values evaluation dataset, "
+    "where the questions are phrased as sentence completions",
+    huggingface_id="EuroEval/european-values-completions-no",
+    task=EUROPEAN_VALUES,
+    languages=[NO],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)

euroeval/dataset_configs/portuguese.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from ..data_models import DatasetConfig
 from ..languages import PT
-from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
+from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
 ### Official datasets ###
@@ -67,6 +67,17 @@ GOLDENSWAG_PT_CONFIG = DatasetConfig(
     languages=[PT],
 )
+EUROPEAN_VALUES_PT_CONFIG = DatasetConfig(
+    name="european-values-pt",
+    pretty_name="the Portuguese version of the European values evaluation dataset",
+    huggingface_id="EuroEval/european-values-pt",
+    task=EUROPEAN_VALUES,
+    languages=[PT],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+)
 ### Unofficial datasets ###
@@ -79,3 +90,29 @@ BOOLQ_PT_CONFIG = DatasetConfig(
     languages=[PT],
     unofficial=True,
 )
+EUROPEAN_VALUES_SITUATIONAL_PT_CONFIG = DatasetConfig(
+    name="european-values-situational-pt",
+    pretty_name="the Portuguese version of the European values evaluation dataset, "
+    "where the questions are phrased in a situational way",
+    huggingface_id="EuroEval/european-values-situational-pt",
+    task=EUROPEAN_VALUES,
+    languages=[PT],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)
+EUROPEAN_VALUES_COMPLETIONS_PT_CONFIG = DatasetConfig(
+    name="european-values-completions-pt",
+    pretty_name="the Portuguese version of the European values evaluation dataset, "
+    "where the questions are phrased as sentence completions",
+    huggingface_id="EuroEval/european-values-completions-pt",
+    task=EUROPEAN_VALUES,
+    languages=[PT],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)

euroeval/dataset_configs/spanish.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from ..data_models import DatasetConfig
 from ..languages import ES
-from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
+from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
 ### Official datasets ###
@@ -66,6 +66,17 @@ HELLASWAG_ES_CONFIG = DatasetConfig(
     languages=[ES],
 )
+EUROPEAN_VALUES_ES_CONFIG = DatasetConfig(
+    name="european-values-es",
+    pretty_name="the Spanish version of the European values evaluation dataset",
+    huggingface_id="EuroEval/european-values-es",
+    task=EUROPEAN_VALUES,
+    languages=[ES],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+)
 ### Unofficial datasets ###
@@ -107,3 +118,29 @@ GOLDENSWAG_ES_CONFIG = DatasetConfig(
     languages=[ES],
     unofficial=True,
 )
+EUROPEAN_VALUES_SITUATIONAL_ES_CONFIG = DatasetConfig(
+    name="european-values-situational-es",
+    pretty_name="the Spanish version of the European values evaluation dataset, where "
+    "the questions are phrased in a situational way",
+    huggingface_id="EuroEval/european-values-situational-es",
+    task=EUROPEAN_VALUES,
+    languages=[ES],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)
+EUROPEAN_VALUES_COMPLETIONS_ES_CONFIG = DatasetConfig(
+    name="european-values-completions-es",
+    pretty_name="the Spanish version of the European values evaluation dataset, where "
+    "the questions are phrased as sentence completions",
+    huggingface_id="EuroEval/european-values-completions-es",
+    task=EUROPEAN_VALUES,
+    languages=[ES],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)

EuroEval 15.16.0__py3-none-any.whl → 16.0.0__py3-none-any.whl

Potentially problematic release.

EuroEval 15.16.0py3-none-any.whl → 16.0.0py3-none-any.whl