PyPI - EuroEval - Versions diffs - 15.16.0__py3-none-any.whl → 16.0.1__py3-none-any.whl - Mend

EuroEval 15.16.0py3-none-any.whl → 16.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of EuroEval might be problematic. Click here for more details.

Files changed (64) hide show

euroeval/__init__.py +8 -7
euroeval/benchmark_config_factory.py +3 -7
euroeval/benchmark_modules/base.py +35 -19
euroeval/benchmark_modules/fresh.py +24 -19
euroeval/benchmark_modules/hf.py +136 -154
euroeval/benchmark_modules/litellm.py +190 -110
euroeval/benchmark_modules/vllm.py +199 -139
euroeval/benchmarker.py +49 -22
euroeval/cli.py +3 -3
euroeval/constants.py +19 -15
euroeval/data_loading.py +33 -28
euroeval/data_models.py +73 -23
euroeval/dataset_configs/__init__.py +2 -0
euroeval/dataset_configs/danish.py +35 -1
euroeval/dataset_configs/dutch.py +38 -1
euroeval/dataset_configs/english.py +38 -1
euroeval/dataset_configs/estonian.py +95 -0
euroeval/dataset_configs/faroese.py +38 -0
euroeval/dataset_configs/finnish.py +39 -1
euroeval/dataset_configs/french.py +38 -1
euroeval/dataset_configs/german.py +38 -1
euroeval/dataset_configs/icelandic.py +39 -1
euroeval/dataset_configs/italian.py +38 -1
euroeval/dataset_configs/latvian.py +81 -0
euroeval/dataset_configs/norwegian.py +38 -1
euroeval/dataset_configs/portuguese.py +38 -1
euroeval/dataset_configs/spanish.py +38 -1
euroeval/dataset_configs/swedish.py +38 -1
euroeval/enums.py +0 -6
euroeval/finetuning.py +6 -6
euroeval/generation.py +25 -14
euroeval/generation_utils.py +90 -20
euroeval/languages.py +947 -187
euroeval/metrics/__init__.py +6 -0
euroeval/metrics/base.py +76 -0
euroeval/metrics/huggingface.py +192 -0
euroeval/metrics/llm_as_a_judge.py +257 -0
euroeval/metrics/pipeline.py +276 -0
euroeval/metrics/speed.py +51 -0
euroeval/model_cache.py +13 -1
euroeval/prompt_templates/linguistic_acceptability.py +40 -2
euroeval/prompt_templates/multiple_choice.py +23 -2
euroeval/prompt_templates/named_entity_recognition.py +65 -2
euroeval/prompt_templates/reading_comprehension.py +42 -2
euroeval/prompt_templates/sentiment_classification.py +46 -2
euroeval/prompt_templates/summarization.py +24 -4
euroeval/scores.py +7 -2
euroeval/speed_benchmark.py +6 -6
euroeval/task_group_utils/multiple_choice_classification.py +19 -8
euroeval/task_group_utils/question_answering.py +35 -28
euroeval/task_group_utils/sequence_classification.py +128 -42
euroeval/task_group_utils/text_to_text.py +7 -3
euroeval/task_group_utils/token_classification.py +59 -73
euroeval/tasks.py +33 -6
euroeval/tokenization_utils.py +294 -207
euroeval/utils.py +150 -35
{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/METADATA +13 -14
euroeval-16.0.1.dist-info/RECORD +69 -0
{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/entry_points.txt +0 -1
euroeval/human_evaluation.py +0 -738
euroeval/metrics.py +0 -470
euroeval-15.16.0.dist-info/RECORD +0 -63
{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/WHEEL +0 -0
{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/licenses/LICENSE +0 -0

euroeval/benchmark_modules/hf.py CHANGED Viewed

@@ -2,7 +2,6 @@
 import collections.abc as c
 import logging
-import os
 import typing as t
 from functools import cached_property, partial
 from json import JSONDecodeError
@@ -68,6 +67,7 @@ from ..utils import (
     block_terminal_output,
     create_model_cache_dir,
     get_class_by_name,
+    get_hf_token,
     internet_connection_available,
     log_once,
 )
@@ -96,6 +96,7 @@ class HuggingFaceEncoderModel(BenchmarkModule):
         model_config: "ModelConfig",
         dataset_config: "DatasetConfig",
         benchmark_config: "BenchmarkConfig",
+        log_metadata: bool = True,
     ) -> None:
         """Initialise the model.
@@ -106,18 +107,20 @@ class HuggingFaceEncoderModel(BenchmarkModule):
                 The dataset configuration.
             benchmark_config:
                 The benchmark configuration.
+            log_metadata:
+                Whether to log the model metadata.
         """
-        model, tokenizer = load_model_and_tokenizer(
+        model, tokeniser = load_model_and_tokeniser(
             model_config=model_config,
             dataset_config=dataset_config,
             benchmark_config=benchmark_config,
         )
         self._model: "PreTrainedModel" = model
-        self._tokenizer: "PreTrainedTokenizer" = tokenizer
+        self._tokeniser: "PreTrainedTokenizer" = tokeniser
-        self._model, self._tokenizer = align_model_and_tokenizer(
+        self._model, self._tokeniser = align_model_and_tokeniser(
             model=self._model,
-            tokenizer=self._tokenizer,
+            tokeniser=self._tokeniser,
             model_max_length=self.model_max_length,
             raise_errors=benchmark_config.raise_errors,
         )
@@ -126,6 +129,7 @@ class HuggingFaceEncoderModel(BenchmarkModule):
             model_config=model_config,
             dataset_config=dataset_config,
             benchmark_config=benchmark_config,
+            log_metadata=log_metadata,
         )
     @cached_property
@@ -135,9 +139,7 @@ class HuggingFaceEncoderModel(BenchmarkModule):
         Returns:
             The number of parameters in the model.
         """
-        token = (
-            self.benchmark_config.api_key or os.getenv("HUGGINGFACE_API_KEY") or True
-        )
+        token = get_hf_token(api_key=self.benchmark_config.api_key)
         hf_api = HfApi(token=token)
         try:
             repo_info = hf_api.model_info(
@@ -191,10 +193,10 @@ class HuggingFaceEncoderModel(BenchmarkModule):
         ):
             vocab_size = self._model.config.vocab_size
         elif (
-            hasattr(self._tokenizer, "vocab_size")
-            and self._tokenizer.vocab_size is not None
+            hasattr(self._tokeniser, "vocab_size")
+            and self._tokeniser.vocab_size is not None
         ):
-            vocab_size = self._tokenizer.vocab_size
+            vocab_size = self._tokeniser.vocab_size
         else:
             vocab_size = -1
         return vocab_size
@@ -208,18 +210,18 @@ class HuggingFaceEncoderModel(BenchmarkModule):
         """
         all_max_lengths: list[int] = list()
-        # Add the registered max length of the tokenizer
+        # Add the registered max length of the tokeniser
         if hasattr(
-            self._tokenizer, "model_max_length"
-        ) and self._tokenizer.model_max_length < int(1e30):
-            all_max_lengths.append(self._tokenizer.model_max_length)
+            self._tokeniser, "model_max_length"
+        ) and self._tokeniser.model_max_length < int(1e30):
+            all_max_lengths.append(self._tokeniser.model_max_length)
         # Add the max length derived from the model's input sizes
-        if hasattr(self._tokenizer, "max_model_input_sizes"):
+        if hasattr(self._tokeniser, "max_model_input_sizes"):
             all_max_lengths.extend(
                 [
                     size
-                    for size in self._tokenizer.max_model_input_sizes.values()
+                    for size in self._tokeniser.max_model_input_sizes.values()
                     if size is not None
                 ]
             )
@@ -275,10 +277,10 @@ class HuggingFaceEncoderModel(BenchmarkModule):
                 | TaskGroup.QUESTION_ANSWERING
                 | TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION
             ):
-                return DataCollatorWithPadding(self._tokenizer, padding="longest")
+                return DataCollatorWithPadding(self._tokeniser, padding="longest")
             case TaskGroup.TOKEN_CLASSIFICATION:
                 return DataCollatorForTokenClassification(
-                    tokenizer=self._tokenizer, label_pad_token_id=-100
+                    tokenizer=self._tokeniser, label_pad_token_id=-100
                 )
             case _:
                 raise NotImplementedError(
@@ -357,16 +359,16 @@ class HuggingFaceEncoderModel(BenchmarkModule):
                         self._model.config.label2id[lbl.lower()]
                         for lbl in examples["label"]
                     ]
-                except KeyError:
+                except KeyError as e:
                     raise InvalidBenchmark(
                         f"One of the labels in the dataset, "
                         f"{examples['label'].lower()}, does not occur in the "
                         f"label2id dictionary {self._model.config.label2id}."
-                    )
+                    ) from e
             return examples
         def tokenise(examples: dict) -> "BatchEncoding":
-            return self._tokenizer(text=examples["text"], truncation=True, padding=True)
+            return self._tokeniser(text=examples["text"], truncation=True, padding=True)
         match task.task_group:
             case TaskGroup.SEQUENCE_CLASSIFICATION:
@@ -376,39 +378,20 @@ class HuggingFaceEncoderModel(BenchmarkModule):
             case TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION:
                 dataset = DatasetDict(
-                    train=dataset["train"].map(
-                        partial(
-                            multiple_choice_classification.prepare_examples,
-                            tokenizer=self._tokenizer,
-                        ),
-                        batched=True,
-                        batch_size=10,
-                        remove_columns=dataset["train"].column_names,
-                        load_from_cache_file=False,
-                        keep_in_memory=True,
-                    ),
-                    val=dataset["val"].map(
-                        partial(
-                            multiple_choice_classification.prepare_examples,
-                            tokenizer=self._tokenizer,
-                        ),
-                        batched=True,
-                        batch_size=10,
-                        remove_columns=dataset["val"].column_names,
-                        load_from_cache_file=False,
-                        keep_in_memory=True,
-                    ),
-                    test=dataset["test"].map(
-                        partial(
-                            multiple_choice_classification.prepare_examples,
-                            tokenizer=self._tokenizer,
-                        ),
-                        batched=True,
-                        batch_size=10,
-                        remove_columns=dataset["test"].column_names,
-                        load_from_cache_file=False,
-                        keep_in_memory=True,
-                    ),
+                    {
+                        split_name: split.map(
+                            partial(
+                                multiple_choice_classification.prepare_examples,
+                                tokeniser=self._tokeniser,
+                            ),
+                            batched=True,
+                            batch_size=10,
+                            remove_columns=split.column_names,
+                            load_from_cache_file=False,
+                            keep_in_memory=True,
+                        )
+                        for split_name, split in dataset.items()
+                    }
                 )
             case TaskGroup.TEXT_TO_TEXT:
@@ -423,7 +406,7 @@ class HuggingFaceEncoderModel(BenchmarkModule):
                 dataset = dataset.map(
                     partial(
                         token_classification.tokenize_and_align_labels,
-                        tokenizer=self._tokenizer,
+                        tokeniser=self._tokeniser,
                         label2id=self._model.config.label2id,
                     ),
                     batched=True,
@@ -432,43 +415,44 @@ class HuggingFaceEncoderModel(BenchmarkModule):
                 )
             case TaskGroup.QUESTION_ANSWERING:
-                dataset = DatasetDict(
-                    dict(
-                        train=dataset["train"].map(
-                            partial(
-                                question_answering.prepare_train_examples,
-                                tokenizer=self._tokenizer,
-                            ),
-                            batched=True,
-                            batch_size=10,
-                            remove_columns=dataset["test"].column_names,
-                            load_from_cache_file=False,
-                            keep_in_memory=True,
+                data_dict = dict()
+                if "train" in dataset:
+                    data_dict["train"] = dataset["train"].map(
+                        partial(
+                            question_answering.prepare_train_examples,
+                            tokeniser=self._tokeniser,
                         ),
-                        val=dataset["val"].map(
-                            partial(
-                                question_answering.prepare_train_examples,
-                                tokenizer=self._tokenizer,
-                            ),
-                            batched=True,
-                            batch_size=10,
-                            remove_columns=dataset["test"].column_names,
-                            load_from_cache_file=False,
-                            keep_in_memory=True,
+                        batched=True,
+                        batch_size=10,
+                        remove_columns=dataset["test"].column_names,
+                        load_from_cache_file=False,
+                        keep_in_memory=True,
+                    )
+                if "val" in dataset:
+                    data_dict["val"] = dataset["val"].map(
+                        partial(
+                            question_answering.prepare_train_examples,
+                            tokeniser=self._tokeniser,
                         ),
-                        test=dataset["test"].map(
-                            partial(
-                                question_answering.prepare_test_examples,
-                                tokenizer=self._tokenizer,
-                            ),
-                            batched=True,
-                            batch_size=10,
-                            remove_columns=dataset["test"].column_names,
-                            load_from_cache_file=False,
-                            keep_in_memory=True,
+                        batched=True,
+                        batch_size=10,
+                        remove_columns=dataset["test"].column_names,
+                        load_from_cache_file=False,
+                        keep_in_memory=True,
+                    )
+                if "test" in dataset:
+                    data_dict["test"] = dataset["test"].map(
+                        partial(
+                            question_answering.prepare_test_examples,
+                            tokeniser=self._tokeniser,
                         ),
+                        batched=True,
+                        batch_size=10,
+                        remove_columns=dataset["test"].column_names,
+                        load_from_cache_file=False,
+                        keep_in_memory=True,
                     )
-                )
+                dataset = DatasetDict(data_dict)
                 # The Trainer hides the columns that are not used by the model (here
                 # `id` and `offset_mapping` which we will need for our post-processing),
@@ -559,12 +543,12 @@ class HuggingFaceEncoderModel(BenchmarkModule):
         return model_config
-def load_model_and_tokenizer(
+def load_model_and_tokeniser(
     model_config: "ModelConfig",
     dataset_config: "DatasetConfig",
     benchmark_config: "BenchmarkConfig",
 ) -> tuple["PreTrainedModel", "PreTrainedTokenizer"]:
-    """Load the model and tokenizer.
+    """Load the model and tokeniser.
     Args:
         model_config:
@@ -575,7 +559,7 @@ def load_model_and_tokenizer(
             The benchmark configuration
     Returns:
-        The loaded model and tokenizer.
+        The loaded model and tokeniser.
     """
     config: "PretrainedConfig"
     block_terminal_output()
@@ -607,12 +591,12 @@ def load_model_and_tokenizer(
         config=config,
         ignore_mismatched_sizes=ignore_mismatched_sizes,
         revision=model_config.revision,
-        token=benchmark_config.api_key or os.getenv("HUGGINGFACE_API_KEY") or True,
+        token=get_hf_token(api_key=benchmark_config.api_key),
         cache_dir=model_config.model_cache_dir,
         trust_remote_code=benchmark_config.trust_remote_code,
-        torch_dtype=get_torch_dtype(
+        dtype=get_dtype(
             device=benchmark_config.device,
-            torch_dtype_is_set=config.to_dict().get("torch_dtype") is not None,
+            dtype_is_set=config.to_dict().get("dtype") is not None,
             bf16_available=(
                 torch.cuda.is_available() and torch.cuda.is_bf16_supported()
             ),
@@ -658,11 +642,13 @@ def load_model_and_tokenizer(
                 model_kwargs["ignore_mismatched_sizes"] = True
                 continue
             else:
-                raise InvalidModel(str(e))
-        except (TimeoutError, RequestError):
+                raise InvalidModel(str(e)) from e
+        except (TimeoutError, RequestError) as e:
             attempts_left -= 1
             if attempts_left == 0:
-                raise InvalidModel("The model could not be loaded after 5 attempts.")
+                raise InvalidModel(
+                    "The model could not be loaded after 5 attempts."
+                ) from e
             logger.info(f"Couldn't load the model {model_id!r}. Retrying.")
             sleep(5)
             continue
@@ -670,16 +656,16 @@ def load_model_and_tokenizer(
             if "checkpoint seems to be incorrect" in str(e):
                 raise InvalidModel(
                     f"The model {model_id!r} has an incorrect checkpoint."
-                )
+                ) from e
             if "trust_remote_code" in str(e):
                 raise InvalidModel(
                     f"Loading the model {model_id!r} needs to trust remote code. "
                     "If you trust the suppliers of this model, then you can enable "
                     "this by setting the `--trust-remote-code` flag."
-                )
+                ) from e
             raise InvalidModel(
                 f"The model {model_id!r} could not be loaded. The error was {e!r}."
-            )
+            ) from e
     if isinstance(model_or_tuple, tuple):
         model = model_or_tuple[0]
@@ -697,13 +683,13 @@ def load_model_and_tokenizer(
     ):
         model = setup_model_for_question_answering(model=model)
-    tokenizer = load_tokenizer(
+    tokeniser = load_tokeniser(
         model=model,
         model_id=model_id,
         trust_remote_code=benchmark_config.trust_remote_code,
     )
-    return model, tokenizer
+    return model, tokeniser
 def get_model_repo_info(
@@ -722,7 +708,7 @@ def get_model_repo_info(
     Returns:
         The information about the model, or None if the model could not be found.
     """
-    token = benchmark_config.api_key or os.getenv("HUGGINGFACE_API_KEY") or True
+    token = get_hf_token(api_key=benchmark_config.api_key)
     hf_api = HfApi(token=token)
     model_id, revision = model_id.split("@") if "@" in model_id else (model_id, "main")
@@ -800,12 +786,7 @@ def get_model_repo_info(
             level=logging.DEBUG,
         )
         if base_model_id is not None:
-            base_model_info = hf_api.model_info(
-                repo_id=base_model_id,
-                token=benchmark_config.api_key
-                or os.getenv("HUGGINGFACE_API_KEY")
-                or True,
-            )
+            base_model_info = hf_api.model_info(repo_id=base_model_id, token=token)
             tags += base_model_info.tags or list()
             tags = list(set(tags))
@@ -839,7 +820,7 @@ def get_model_repo_info(
         else:
             pipeline_tag = "fill-mask"
-    if benchmark_config.only_allow_safetensors:
+    if benchmark_config.requires_safetensors:
         repo_files = hf_api.list_repo_files(repo_id=model_id, revision=revision)
         has_safetensors = any(f.endswith(".safetensors") for f in repo_files)
         if not has_safetensors:
@@ -848,7 +829,7 @@ def get_model_repo_info(
                 msg += "Skipping since the `--only-allow-safetensors` flag is set."
             else:
                 msg += (
-                    "Skipping since the `only_allow_safetensors` argument is set "
+                    "Skipping since the `requires_safetensors` argument is set "
                     "to `True`."
                 )
             logger.warning(msg)
@@ -869,7 +850,7 @@ def get_model_repo_info(
                     msg += " Skipping since the `--only-allow-safetensors` flag is set."
                 else:
                     msg += (
-                        " Skipping since the `only_allow_safetensors` argument is set "
+                        " Skipping since the `requires_safetensors` argument is set "
                         "to `True`."
                     )
                 logging.warning(msg)
@@ -880,10 +861,10 @@ def get_model_repo_info(
     )
-def load_tokenizer(
+def load_tokeniser(
     model: "PreTrainedModel | None", model_id: str, trust_remote_code: bool
 ) -> "PreTrainedTokenizer":
-    """Load the tokenizer.
+    """Load the tokeniser.
     Args:
         model:
@@ -895,7 +876,7 @@ def load_tokenizer(
             Whether to trust remote code.
     Returns:
-        The loaded tokenizer.
+        The loaded tokeniser.
     """
     loading_kwargs: dict[str, bool | str] = dict(
         use_fast=True,
@@ -918,45 +899,46 @@ def load_tokenizer(
     num_retries = 5
     for _ in range(num_retries):
         try:
-            tokenizer = AutoTokenizer.from_pretrained(model_id, **loading_kwargs)
+            tokeniser = AutoTokenizer.from_pretrained(model_id, **loading_kwargs)
             break
-        except (JSONDecodeError, OSError, TypeError):
-            raise InvalidModel(f"Could not load tokenizer for model {model_id!r}.")
+        except (JSONDecodeError, OSError, TypeError) as e:
+            raise InvalidModel(
+                f"Could not load tokeniser for model {model_id!r}."
+            ) from e
         except (TimeoutError, RequestError):
-            logger.info(f"Couldn't load tokenizer for {model_id!r}. Retrying.")
+            logger.info(f"Couldn't load tokeniser for {model_id!r}. Retrying.")
             sleep(5)
             continue
     else:
         raise InvalidModel(
-            f"Could not load tokenizer for model {model_id!r} after {num_retries} "
+            f"Could not load tokeniser for model {model_id!r} after {num_retries} "
             "attempts."
         )
     # Ensure that BOS, EOS and PAD tokens are set
-    tokenizer.bos_token, tokenizer.bos_token_id = get_bos_token(tokenizer=tokenizer)
-    tokenizer.eos_token, tokenizer.eos_token_id = get_eos_token(tokenizer=tokenizer)
+    tokeniser.bos_token, tokeniser.bos_token_id = get_bos_token(tokeniser=tokeniser)
+    tokeniser.eos_token, tokeniser.eos_token_id = get_eos_token(tokeniser=tokeniser)
-    return tokenizer
+    return tokeniser
-def get_torch_dtype(
-    device: torch.device, torch_dtype_is_set: bool, bf16_available: bool
+def get_dtype(
+    device: torch.device, dtype_is_set: bool, bf16_available: bool
 ) -> str | torch.dtype:
     """Get the torch dtype, used for loading the model.
     Args:
         device:
             The device to use.
-        torch_dtype_is_set:
-            Whether the torch data type is set in the model configuration.
+            Whether the data type is set in the model configuration.
         bf16_available:
             Whether bfloat16 is available.
     Returns:
-        The torch dtype.
+        The dtype.
     """
     using_cuda = device == torch.device("cuda")
-    if using_cuda and torch_dtype_is_set:
+    if using_cuda and dtype_is_set:
         return "auto"
     elif using_cuda and bf16_available:
         return torch.bfloat16
@@ -1009,7 +991,7 @@ def load_hf_model_config(
                 id2label=id2label,
                 label2id=label2id,
                 revision=revision,
-                token=api_key or os.getenv("HUGGINGFACE_API_KEY") or True,
+                token=get_hf_token(api_key=api_key),
                 trust_remote_code=trust_remote_code,
                 cache_dir=model_cache_dir,
             )
@@ -1024,7 +1006,7 @@ def load_hf_model_config(
             raise InvalidModel(
                 f"The model config for the model {model_id!r} could not be "
                 f"loaded, as the key {key!r} was not found in the config."
-            )
+            ) from e
         except (OSError, GatedRepoError) as e:
             # TEMP: When the model is gated then we cannot set cache dir, for some
             # reason (since transformers v4.38.2, still a problem in v4.48.0). This
@@ -1035,7 +1017,7 @@ def load_hf_model_config(
             raise InvalidModel(
                 f"Couldn't load model config for {model_id!r}. The error was "
                 f"{e!r}. Skipping"
-            )
+            ) from e
         except (TimeoutError, RequestError):
             logger.info(f"Couldn't load model config for {model_id!r}. Retrying.")
             sleep(5)
@@ -1045,17 +1027,17 @@ def load_hf_model_config(
                 raise InvalidModel(
                     f"The model {model_id!r} is awaiting a review from the repository "
                     "authors. Please try again later."
-                )
+                ) from e
             if "trust_remote_code" in str(e):
                 raise NeedsAdditionalArgument(
                     cli_argument="--trust-remote-code",
                     script_argument="trust_remote_code=True",
                     run_with_cli=run_with_cli,
-                )
+                ) from e
             raise InvalidModel(
                 f"The config for the model {model_id!r} could not be loaded. The "
                 f"error was {e!r}."
-            )
+            ) from e
 def setup_model_for_question_answering(model: "PreTrainedModel") -> "PreTrainedModel":
@@ -1140,33 +1122,33 @@ def get_children_of_module(
         return submodules
-def align_model_and_tokenizer(
+def align_model_and_tokeniser(
     model: "PreTrainedModel",
-    tokenizer: "PreTrainedTokenizer",
+    tokeniser: "PreTrainedTokenizer",
     model_max_length: int,
     raise_errors: bool = False,
 ) -> tuple["PreTrainedModel", "PreTrainedTokenizer"]:
-    """Aligns the model and the tokenizer.
+    """Aligns the model and the tokeniser.
     Args:
         model:
             The model to fix.
-        tokenizer:
-            The tokenizer to fix.
+        tokeniser:
+            The tokeniser to fix.
         model_max_length:
             The maximum length of the model.
         raise_errors:
             Whether to raise errors instead of trying to fix them silently.
     Returns:
-        The fixed model and tokenizer.
+        The fixed model and tokeniser.
     """
     model_max_length = min(model_max_length, MAX_CONTEXT_LENGTH)
     if model_max_length > 0:
-        tokenizer.model_max_length = model_max_length
+        tokeniser.model_max_length = model_max_length
     else:
-        tokenizer.model_max_length = 512
+        tokeniser.model_max_length = 512
     # Move the model to the CPU, since otherwise we can't catch the IndexErrors when
     # finding the maximum sequence length of the model
@@ -1175,9 +1157,9 @@ def align_model_and_tokenizer(
     # Manually check that this model max length is valid for the model, and adjust
     # otherwise
-    initial_max_length = tokenizer.model_max_length
+    initial_max_length = tokeniser.model_max_length
     for max_length in range(initial_max_length, 0, -1):
-        tokenizer.model_max_length = max_length
+        tokeniser.model_max_length = max_length
         dummy_inputs = torch.full(
             size=(1, max_length),
             fill_value=DUMMY_FILL_VALUE,
@@ -1204,24 +1186,24 @@ def align_model_and_tokenizer(
     # Move the model back to the original device
     model.to(model_device)  # type: ignore[arg-type]
-    # If there is a mismatch between the vocab size according to the tokenizer and
+    # If there is a mismatch between the vocab size according to the tokeniser and
     # the vocab size according to the model, we raise an error
     if hasattr(model.config, "vocab_size"):
-        if model.config.vocab_size < len(tokenizer):
+        if model.config.vocab_size < len(tokeniser):
             if raise_errors:
                 raise InvalidModel(
-                    "The vocab size of the tokenizer is larger than the vocab size of "
+                    "The vocab size of the tokeniser is larger than the vocab size of "
                     "the model. As the --raise-errors option was specified, the "
                     "embeddings of the model will not be automatically adjusted."
                 )
             if hasattr(model, "resize_token_embeddings"):
-                model.resize_token_embeddings(new_num_tokens=tokenizer.vocab_size + 1)
+                model.resize_token_embeddings(new_num_tokens=tokeniser.vocab_size + 1)
-    if tokenizer.bos_token is None and tokenizer.eos_token is not None:
-        tokenizer.bos_token = tokenizer.eos_token
-        tokenizer.bos_token_id = tokenizer.eos_token_id
+    if tokeniser.bos_token is None and tokeniser.eos_token is not None:
+        tokeniser.bos_token = tokeniser.eos_token
+        tokeniser.bos_token_id = tokeniser.eos_token_id
-    return model, tokenizer
+    return model, tokeniser
 def task_group_to_class_name(task_group: TaskGroup) -> str:

EuroEval 15.16.0__py3-none-any.whl → 16.0.1__py3-none-any.whl

Potentially problematic release.

EuroEval 15.16.0py3-none-any.whl → 16.0.1py3-none-any.whl