PyPI - EuroEval - Versions diffs - 16.2.2__py3-none-any.whl → 16.4.0__py3-none-any.whl - Mend

EuroEval 16.2.2py3-none-any.whl → 16.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of EuroEval might be problematic. Click here for more details.

Files changed (65) hide show

euroeval/__init__.py +7 -4
euroeval/benchmark_config_factory.py +0 -4
euroeval/benchmark_modules/base.py +3 -16
euroeval/benchmark_modules/fresh.py +5 -2
euroeval/benchmark_modules/hf.py +107 -66
euroeval/benchmark_modules/litellm.py +103 -55
euroeval/benchmark_modules/vllm.py +155 -82
euroeval/benchmarker.py +184 -129
euroeval/caching_utils.py +79 -0
euroeval/callbacks.py +5 -7
euroeval/cli.py +1 -1
euroeval/constants.py +9 -0
euroeval/data_loading.py +14 -11
euroeval/data_models.py +12 -4
euroeval/dataset_configs/__init__.py +3 -0
euroeval/dataset_configs/czech.py +79 -0
euroeval/dataset_configs/danish.py +10 -13
euroeval/dataset_configs/dutch.py +0 -3
euroeval/dataset_configs/english.py +0 -3
euroeval/dataset_configs/estonian.py +11 -1
euroeval/dataset_configs/finnish.py +0 -3
euroeval/dataset_configs/french.py +0 -3
euroeval/dataset_configs/german.py +0 -3
euroeval/dataset_configs/italian.py +0 -3
euroeval/dataset_configs/latvian.py +2 -4
euroeval/dataset_configs/lithuanian.py +68 -0
euroeval/dataset_configs/norwegian.py +0 -3
euroeval/dataset_configs/polish.py +0 -3
euroeval/dataset_configs/portuguese.py +0 -3
euroeval/dataset_configs/slovak.py +60 -0
euroeval/dataset_configs/spanish.py +0 -3
euroeval/dataset_configs/swedish.py +10 -15
euroeval/finetuning.py +21 -15
euroeval/generation.py +10 -10
euroeval/generation_utils.py +2 -3
euroeval/logging_utils.py +250 -0
euroeval/metrics/base.py +0 -3
euroeval/metrics/huggingface.py +10 -6
euroeval/metrics/llm_as_a_judge.py +5 -3
euroeval/metrics/pipeline.py +22 -9
euroeval/metrics/speed.py +0 -3
euroeval/model_cache.py +11 -14
euroeval/model_config.py +4 -5
euroeval/model_loading.py +3 -0
euroeval/prompt_templates/linguistic_acceptability.py +30 -3
euroeval/prompt_templates/multiple_choice.py +34 -1
euroeval/prompt_templates/named_entity_recognition.py +71 -11
euroeval/prompt_templates/reading_comprehension.py +41 -3
euroeval/prompt_templates/sentiment_classification.py +34 -1
euroeval/prompt_templates/summarization.py +26 -6
euroeval/scores.py +7 -7
euroeval/speed_benchmark.py +3 -5
euroeval/task_group_utils/multiple_choice_classification.py +0 -3
euroeval/task_group_utils/question_answering.py +0 -3
euroeval/task_group_utils/sequence_classification.py +43 -31
euroeval/task_group_utils/text_to_text.py +17 -8
euroeval/task_group_utils/token_classification.py +10 -9
euroeval/tokenisation_utils.py +22 -20
euroeval/utils.py +30 -147
{euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/METADATA +182 -61
euroeval-16.4.0.dist-info/RECORD +75 -0
euroeval-16.2.2.dist-info/RECORD +0 -70
{euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/WHEEL +0 -0
{euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/entry_points.txt +0 -0
{euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/licenses/LICENSE +0 -0

euroeval/constants.py CHANGED Viewed

@@ -1,7 +1,13 @@
 """Constants used throughout the project."""
+from typing import TypeVar
 from .enums import TaskGroup
+# Type variable used for generic typing
+T = TypeVar("T", bound=object)
 # This is used as input to generative models; it cannot be a special token
 DUMMY_FILL_VALUE = 100
@@ -50,9 +56,11 @@ METRIC_ATTRIBUTES_TAKING_UP_MEMORY = ["cached_bertscorer"]
 # Hugging Face Hub tags used to classify models as merge models
 MERGE_TAGS = ["merge", "mergekit"]
 # The minimum required CUDA compute capability for using bfloat16 in vLLM
 VLLM_BF16_MIN_CUDA_COMPUTE_CAPABILITY = 8.0
 # Used to detect whether a model is a reasoning model
 REASONING_TOKENS = [
     ("<think>", "</think>"),
@@ -60,6 +68,7 @@ REASONING_TOKENS = [
     ("<reasoning>", "</reasoning>"),
 ]
 # These tokens are sometimes used by models to indicate the end of a generated
 # response, but they do not use them as a proper EOS token, so we have to deal with them
 # manually. We only use them as stop tokens if they actually appear in the model's

euroeval/data_loading.py CHANGED Viewed

@@ -12,6 +12,7 @@ from huggingface_hub.errors import HfHubHTTPError
 from numpy.random import Generator
 from .exceptions import HuggingFaceHubDown, InvalidBenchmark
+from .logging_utils import log, no_terminal_output
 from .tasks import EUROPEAN_VALUES
 from .utils import unscramble
@@ -20,8 +21,6 @@ if t.TYPE_CHECKING:
     from .data_models import BenchmarkConfig, DatasetConfig
-logger = logging.getLogger("euroeval")
 def load_data(
     rng: Generator, dataset_config: "DatasetConfig", benchmark_config: "BenchmarkConfig"
@@ -106,11 +105,12 @@ def load_raw_data(dataset_config: "DatasetConfig", cache_dir: str) -> "DatasetDi
     num_attempts = 5
     for _ in range(num_attempts):
         try:
-            dataset = load_dataset(
-                path=dataset_config.huggingface_id,
-                cache_dir=cache_dir,
-                token=unscramble("HjccJFhIozVymqXDVqTUTXKvYhZMTbfIjMxG_"),
-            )
+            with no_terminal_output():
+                dataset = load_dataset(
+                    path=dataset_config.huggingface_id,
+                    cache_dir=cache_dir,
+                    token=unscramble("XbjeOLhwebEaSaDUMqqaPaPIhgOcyOfDpGnX_"),
+                )
             break
         except (
             FileNotFoundError,
@@ -118,9 +118,11 @@ def load_raw_data(dataset_config: "DatasetConfig", cache_dir: str) -> "DatasetDi
             DatasetsError,
             requests.ConnectionError,
             requests.ReadTimeout,
-        ):
-            logger.debug(
-                f"Failed to load dataset {dataset_config.huggingface_id!r}. Retrying..."
+        ) as e:
+            log(
+                f"Failed to load dataset {dataset_config.huggingface_id!r}, due to "
+                f"the following error: {e}. Retrying...",
+                level=logging.DEBUG,
             )
             time.sleep(1)
             continue
@@ -129,7 +131,8 @@ def load_raw_data(dataset_config: "DatasetConfig", cache_dir: str) -> "DatasetDi
     else:
         raise InvalidBenchmark(
             f"Failed to load dataset {dataset_config.huggingface_id!r} after "
-            f"{num_attempts} attempts."
+            f"{num_attempts} attempts. Run with verbose mode to see the individual "
+            "errors."
         )
     assert isinstance(dataset, DatasetDict)  # type: ignore[used-before-def]
     missing_keys = [key for key in dataset_config.splits if key not in dataset]

euroeval/data_models.py CHANGED Viewed

@@ -558,14 +558,14 @@ class DatasetConfig:
         )
     @property
-    def id2label(self) -> dict[int, str]:
+    def id2label(self) -> "HashableDict":
         """The mapping from ID to label."""
-        return {idx: label for idx, label in enumerate(self.labels)}
+        return HashableDict({idx: label for idx, label in enumerate(self.labels)})
     @property
-    def label2id(self) -> dict[str, int]:
+    def label2id(self) -> "HashableDict":
         """The mapping from label to ID."""
-        return {label: i for i, label in enumerate(self.labels)}
+        return HashableDict({label: i for i, label in enumerate(self.labels)})
     @property
     def num_labels(self) -> int:
@@ -783,3 +783,11 @@ class ModelIdComponents:
     model_id: str
     revision: str
     param: str | None
+class HashableDict(dict):
+    """A hashable dictionary."""
+    def __hash__(self) -> int:  # type: ignore[override]
+        """Return the hash of the dictionary."""
+        return hash(frozenset(self.items()))

euroeval/dataset_configs/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from ..data_models import DatasetConfig
 from ..languages import get_all_languages
 from ..tasks import SPEED
+from .czech import *  # noqa: F403
 from .danish import *  # noqa: F403
 from .dutch import *  # noqa: F403
 from .english import *  # noqa: F403
@@ -14,9 +15,11 @@ from .german import *  # noqa: F403
 from .icelandic import *  # noqa: F403
 from .italian import *  # noqa: F403
 from .latvian import *  # noqa: F403
+from .lithuanian import *  # noqa: F403
 from .norwegian import *  # noqa: F403
 from .polish import *  # noqa: F403
 from .portuguese import *  # noqa: F403
+from .slovak import *  # noqa: F403
 from .spanish import *  # noqa: F403
 from .swedish import *  # noqa: F403

euroeval/dataset_configs/czech.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""All Czech dataset configurations used in EuroEval."""
+from ..data_models import DatasetConfig
+from ..languages import CS
+from ..tasks import COMMON_SENSE, KNOW, LA, NER, RC, SENT, SUMM
+### Official datasets ###
+CSFD_SENTIMENT_CONFIG = DatasetConfig(
+    name="csfd-sentiment",
+    pretty_name="the truncated version of the Czech sentiment classification dataset "
+    "CSFD Sentiment",
+    huggingface_id="EuroEval/csfd-sentiment-mini",
+    task=SENT,
+    languages=[CS],
+)
+CS_GEC_CONFIG = DatasetConfig(
+    name="cs-gec",
+    pretty_name="the truncated version of the Czech linguistic acceptability dataset "
+    "CS-GEC",
+    huggingface_id="EuroEval/cs-gec-mini",
+    task=LA,
+    languages=[CS],
+)
+PONER_CONFIG = DatasetConfig(
+    name="poner",
+    pretty_name="the truncated version of the Czech named entity recognition dataset "
+    "PONER",
+    huggingface_id="EuroEval/poner-mini",
+    task=NER,
+    languages=[CS],
+)
+SQAD_CONFIG = DatasetConfig(
+    name="sqad",
+    pretty_name="the truncated version of the Czech reading comprehension dataset SQAD",
+    huggingface_id="EuroEval/sqad-mini",
+    task=RC,
+    languages=[CS],
+)
+CZECH_NEWS_CONFIG = DatasetConfig(
+    name="czech-news",
+    pretty_name="the truncated version of the Czech summarisation dataset",
+    huggingface_id="EuroEval/czech-news-mini",
+    task=SUMM,
+    languages=[CS],
+)
+UMIMETO_QA_CONFIG = DatasetConfig(
+    name="umimeto-qa",
+    pretty_name="the Czech knowledge dataset UmimetoQA",
+    huggingface_id="EuroEval/umimeto-qa",
+    task=KNOW,
+    languages=[CS],
+)
+HELLASWAG_CS_CONFIG = DatasetConfig(
+    name="hellaswag-cs",
+    pretty_name="the truncated version of the Czech common-sense reasoning dataset "
+    "HellaSwag-cs, translated from the English HellaSwag dataset",
+    huggingface_id="EuroEval/hellaswag-cs-mini",
+    task=COMMON_SENSE,
+    languages=[CS],
+)
+### Unofficial datasets ###
+SCALA_CS_CONFIG = DatasetConfig(
+    name="scala-cs",
+    pretty_name="the Czech part of the linguistic acceptability dataset ScaLA",
+    huggingface_id="EuroEval/scala-cs",
+    task=LA,
+    languages=[CS],
+    unofficial=True,
+)

euroeval/dataset_configs/danish.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """All Danish dataset configurations used in EuroEval."""
 from ..data_models import DatasetConfig
-from ..enums import ModelType
 from ..languages import DA
 from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
@@ -33,11 +32,11 @@ DANSK_CONFIG = DatasetConfig(
     languages=[DA],
 )
-SCANDIQA_DA_CONFIG = DatasetConfig(
-    name="scandiqa-da",
-    pretty_name="the Danish part of the truncated version of the question answering "
-    "dataset ScandiQA",
-    huggingface_id="EuroEval/scandiqa-da-mini",
+MULTI_WIKI_QA_DA_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-da",
+    pretty_name="the truncated version of the Danish part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-da-mini",
     task=RC,
     languages=[DA],
 )
@@ -130,11 +129,11 @@ BELEBELE_DA_CONFIG = DatasetConfig(
     unofficial=True,
 )
-MULTI_WIKI_QA_DA_CONFIG = DatasetConfig(
-    name="multi-wiki-qa-da",
-    pretty_name="the truncated version of the Danish part of the reading "
-    "comprehension dataset MultiWikiQA",
-    huggingface_id="EuroEval/multi-wiki-qa-da-mini",
+SCANDIQA_DA_CONFIG = DatasetConfig(
+    name="scandiqa-da",
+    pretty_name="the Danish part of the truncated version of the question answering "
+    "dataset ScandiQA",
+    huggingface_id="EuroEval/scandiqa-da-mini",
     task=RC,
     languages=[DA],
     unofficial=True,
@@ -157,9 +156,7 @@ WINOGRANDE_DA_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-da",
     task=COMMON_SENSE,
     languages=[DA],
-    splits=["train", "test"],
     _labels=["a", "b"],
-    _allowed_model_types=[ModelType.GENERATIVE],
     unofficial=True,
 )

euroeval/dataset_configs/dutch.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """All Dutch dataset configurations used in EuroEval."""
 from ..data_models import DatasetConfig
-from ..enums import ModelType
 from ..languages import NL
 from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
@@ -150,9 +149,7 @@ WINOGRANDE_NL_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-nl",
     task=COMMON_SENSE,
     languages=[NL],
-    splits=["train", "test"],
     _labels=["a", "b"],
-    _allowed_model_types=[ModelType.GENERATIVE],
     unofficial=True,
 )

euroeval/dataset_configs/english.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """All English dataset configurations used in EuroEval."""
 from ..data_models import DatasetConfig
-from ..enums import ModelType
 from ..languages import EN
 from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
@@ -133,9 +132,7 @@ WINOGRANDE_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-en",
     task=COMMON_SENSE,
     languages=[EN],
-    splits=["train", "test"],
     _labels=["a", "b"],
-    _allowed_model_types=[ModelType.GENERATIVE],
     unofficial=True,
 )

euroeval/dataset_configs/estonian.py CHANGED Viewed

@@ -94,10 +94,20 @@ SCALA_ET_CONFIG = DatasetConfig(
 EXAM_ET_CONFIG = DatasetConfig(
     name="exam-et",
-    pretty_name="the Estonian knowledge assessment dataset Exam-et",
+    pretty_name="the Estonian knowledge dataset Exam-et",
     huggingface_id="EuroEval/exam-et",
     task=KNOW,
     languages=[ET],
     _labels=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"],
     unofficial=True,
 )
+MMLU_ET_CONFIG = DatasetConfig(
+    name="mmlu-et",
+    pretty_name="the truncated version of the Estonian knowledge dataset MMLU-et, "
+    "translated from the English MMLU dataset",
+    huggingface_id="EuroEval/mmlu-et-mini",
+    task=KNOW,
+    languages=[ET],
+    unofficial=True,
+)

euroeval/dataset_configs/finnish.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """All Finnish dataset configurations used in EuroEval."""
 from ..data_models import DatasetConfig
-from ..enums import ModelType
 from ..languages import FI
 from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, LA, MCRC, NER, RC, SENT, SUMM
@@ -109,9 +108,7 @@ WINOGRANDE_FI_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-fi",
     task=COMMON_SENSE,
     languages=[FI],
-    splits=["train", "test"],
     _labels=["a", "b"],
-    _allowed_model_types=[ModelType.GENERATIVE],
     unofficial=True,
 )

euroeval/dataset_configs/french.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """All French dataset configurations used in EuroEval."""
 from ..data_models import DatasetConfig
-from ..enums import ModelType
 from ..languages import FR
 from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
@@ -121,9 +120,7 @@ WINOGRANDE_FR_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-fr",
     task=COMMON_SENSE,
     languages=[FR],
-    splits=["train", "test"],
     _labels=["a", "b"],
-    _allowed_model_types=[ModelType.GENERATIVE],
     unofficial=True,
 )

euroeval/dataset_configs/german.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """All German dataset configurations used in EuroEval."""
 from ..data_models import DatasetConfig
-from ..enums import ModelType
 from ..languages import DE
 from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
@@ -138,9 +137,7 @@ WINOGRANDE_DE_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-de",
     task=COMMON_SENSE,
     languages=[DE],
-    splits=["train", "test"],
     _labels=["a", "b"],
-    _allowed_model_types=[ModelType.GENERATIVE],
     unofficial=True,
 )

euroeval/dataset_configs/italian.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """All Italian dataset configurations used in EuroEval."""
 from ..data_models import DatasetConfig
-from ..enums import ModelType
 from ..languages import IT
 from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
@@ -129,9 +128,7 @@ WINOGRANDE_IT_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-it",
     task=COMMON_SENSE,
     languages=[IT],
-    splits=["train", "test"],
     _labels=["a", "b"],
-    _allowed_model_types=[ModelType.GENERATIVE],
     unofficial=True,
 )

euroeval/dataset_configs/latvian.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """All Latvian dataset configurations used in EuroEval."""
 from ..data_models import DatasetConfig
-from ..enums import ModelType
 from ..languages import LV
 from ..tasks import COMMON_SENSE, KNOW, LA, NER, RC, SENT, SUMM
@@ -25,7 +24,8 @@ SCALA_LV_CONFIG = DatasetConfig(
 FULLSTACK_NER_LV_CONFIG = DatasetConfig(
     name="fullstack-ner-lv",
-    pretty_name="the truncated version of the FullStack NER dataset",
+    pretty_name="the truncated version of the Latvian named entity recognition "
+    "dataset FullStack-NER-lv",
     huggingface_id="EuroEval/fullstack-ner-lv-mini",
     task=NER,
     languages=[LV],
@@ -88,8 +88,6 @@ WINOGRANDE_LV_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-lv",
     task=COMMON_SENSE,
     languages=[LV],
-    splits=["train", "test"],
     _labels=["a", "b"],
-    _allowed_model_types=[ModelType.GENERATIVE],
     unofficial=True,
 )

euroeval/dataset_configs/lithuanian.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""All Lithuanian dataset configurations used in EuroEval."""
+from ..data_models import DatasetConfig
+from ..languages import LT
+from ..tasks import COMMON_SENSE, KNOW, LA, NER, RC, SENT, SUMM
+### Official datasets ###
+LITHUANIAN_EMOTIONS_CONFIG = DatasetConfig(
+    name="lithuanian-emotions",
+    pretty_name="the truncated version of the Lithuanian sentiment "
+    "classification dataset Lithuanian Emotions",
+    huggingface_id="EuroEval/lithuanian-emotions-mini",
+    task=SENT,
+    languages=[LT],
+)
+SCALA_LT_CONFIG = DatasetConfig(
+    name="scala-lt",
+    pretty_name="the Lithuanian part of the linguistic acceptability dataset ScaLA",
+    huggingface_id="EuroEval/scala-lt",
+    task=LA,
+    languages=[LT],
+)
+WIKIANN_LT_CONFIG = DatasetConfig(
+    name="wikiann-lt",
+    pretty_name="the truncated version of the Lithuanian part of the named entity "
+    "recognition dataset WikiANN",
+    huggingface_id="EuroEval/wikiann-lt-mini",
+    task=NER,
+    languages=[LT],
+)
+MULTI_WIKI_QA_LT_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-lt",
+    pretty_name="the truncated version of the Lithuanian part of the reading "
+    "comprehension dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-lt-mini",
+    task=RC,
+    languages=[LT],
+)
+LRYTAS_CONFIG = DatasetConfig(
+    name="lrytas",
+    pretty_name="the truncated version of the Lithuanian summarisation dataset Lrytas",
+    huggingface_id="EuroEval/lrytas-mini",
+    task=SUMM,
+    languages=[LT],
+)
+LT_HISTORY_CONFIG = DatasetConfig(
+    name="lt-history",
+    pretty_name="the Lithuanian knowledge dataset LT-History",
+    huggingface_id="EuroEval/lt-history",
+    task=KNOW,
+    languages=[LT],
+)
+WINOGRANDE_LT_CONFIG = DatasetConfig(
+    name="winogrande-lt",
+    pretty_name="the Lithuanian common-sense reasoning dataset Winogrande-lt, "
+    "translated from the English Winogrande dataset",
+    huggingface_id="EuroEval/winogrande-lt",
+    task=COMMON_SENSE,
+    languages=[LT],
+    _labels=["a", "b"],
+)

euroeval/dataset_configs/norwegian.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """All Norwegian dataset configurations used in EuroEval."""
 from ..data_models import DatasetConfig
-from ..enums import ModelType
 from ..languages import NB, NN, NO
 from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
@@ -224,9 +223,7 @@ WINOGRANDE_NO_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-no",
     task=COMMON_SENSE,
     languages=[NB, NN, NO],
-    splits=["train", "test"],
     _labels=["a", "b"],
-    _allowed_model_types=[ModelType.GENERATIVE],
     unofficial=True,
 )

euroeval/dataset_configs/polish.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """All Polish dataset configurations used in EuroEval."""
 from ..data_models import DatasetConfig
-from ..enums import ModelType
 from ..languages import PL
 from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, NER, RC, SENT, SUMM
@@ -62,9 +61,7 @@ WINOGRANDE_PL_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-pl",
     task=COMMON_SENSE,
     languages=[PL],
-    splits=["train", "test"],
     _labels=["a", "b"],
-    _allowed_model_types=[ModelType.GENERATIVE],
 )
 EUROPEAN_VALUES_PL_CONFIG = DatasetConfig(

euroeval/dataset_configs/portuguese.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """All Portuguese dataset configurations used in EuroEval."""
 from ..data_models import DatasetConfig
-from ..enums import ModelType
 from ..languages import PT
 from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
@@ -99,9 +98,7 @@ WINOGRANDE_PT_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-pt",
     task=COMMON_SENSE,
     languages=[PT],
-    splits=["train", "test"],
     _labels=["a", "b"],
-    _allowed_model_types=[ModelType.GENERATIVE],
     unofficial=True,
 )

euroeval/dataset_configs/slovak.py ADDED Viewed

@@ -0,0 +1,60 @@
+"""All Slovak dataset configurations used in EuroEval."""
+from ..data_models import DatasetConfig
+from ..languages import SK
+from ..tasks import COMMON_SENSE, KNOW, LA, NER, RC, SENT
+### Official datasets ###
+CSFD_SENTIMENT_SK_CONFIG = DatasetConfig(
+    name="csfd-sentiment-sk",
+    pretty_name="the truncated version of the Slovak sentiment classification dataset "
+    "CSFD-sentiment-sk",
+    huggingface_id="EuroEval/csfd-sentiment-sk-mini",
+    task=SENT,
+    languages=[SK],
+)
+SCALA_SK_CONFIG = DatasetConfig(
+    name="scala-sk",
+    pretty_name="the Slovak part of the linguistic acceptability dataset ScaLA",
+    huggingface_id="EuroEval/scala-sk",
+    task=LA,
+    languages=[SK],
+)
+UNER_SK_CONFIG = DatasetConfig(
+    name="uner-sk",
+    pretty_name="the truncated version of the Slovak named entity recognition dataset "
+    "UNER-sk",
+    huggingface_id="EuroEval/uner-sk-mini",
+    task=NER,
+    languages=[SK],
+)
+MULTI_WIKI_QA_SK_CONFIG = DatasetConfig(
+    name="multi-wiki-qa-sk",
+    pretty_name="the truncated version of the Slovak part of the reading comprehension "
+    "dataset MultiWikiQA",
+    huggingface_id="EuroEval/multi-wiki-qa-sk-mini",
+    task=RC,
+    languages=[SK],
+)
+MMLU_SK_CONFIG = DatasetConfig(
+    name="mmlu-sk",
+    pretty_name="the truncated version of the Slovak knowledge dataset MMLU-sk, "
+    "translated from the English MMLU dataset",
+    huggingface_id="EuroEval/mmlu-sk-mini",
+    task=KNOW,
+    languages=[SK],
+)
+WINOGRANDE_SK_CONFIG = DatasetConfig(
+    name="winogrande-sk",
+    pretty_name="the Slovak common-sense reasoning dataset Winogrande-sk, translated "
+    "from the English Winogrande dataset",
+    huggingface_id="EuroEval/winogrande-sk",
+    task=COMMON_SENSE,
+    languages=[SK],
+)

euroeval/dataset_configs/spanish.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """All Spanish dataset configurations used in EuroEval."""
 from ..data_models import DatasetConfig
-from ..enums import ModelType
 from ..languages import ES
 from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
@@ -127,9 +126,7 @@ WINOGRANDE_ES_CONFIG = DatasetConfig(
     huggingface_id="EuroEval/winogrande-es",
     task=COMMON_SENSE,
     languages=[ES],
-    splits=["train", "test"],
     _labels=["a", "b"],
-    _allowed_model_types=[ModelType.GENERATIVE],
     unofficial=True,
 )

EuroEval 16.2.2__py3-none-any.whl → 16.4.0__py3-none-any.whl

Potentially problematic release.

EuroEval 16.2.2py3-none-any.whl → 16.4.0py3-none-any.whl