EuroEval 15.15.0__py3-none-any.whl → 16.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (63) hide show
  1. euroeval/__init__.py +3 -7
  2. euroeval/benchmark_config_factory.py +3 -7
  3. euroeval/benchmark_modules/base.py +35 -19
  4. euroeval/benchmark_modules/fresh.py +24 -19
  5. euroeval/benchmark_modules/hf.py +136 -154
  6. euroeval/benchmark_modules/litellm.py +323 -193
  7. euroeval/benchmark_modules/vllm.py +166 -112
  8. euroeval/benchmarker.py +59 -33
  9. euroeval/cli.py +3 -3
  10. euroeval/constants.py +13 -15
  11. euroeval/data_loading.py +33 -28
  12. euroeval/data_models.py +53 -7
  13. euroeval/dataset_configs/__init__.py +2 -0
  14. euroeval/dataset_configs/danish.py +38 -1
  15. euroeval/dataset_configs/dutch.py +38 -1
  16. euroeval/dataset_configs/english.py +38 -1
  17. euroeval/dataset_configs/estonian.py +95 -0
  18. euroeval/dataset_configs/faroese.py +38 -0
  19. euroeval/dataset_configs/finnish.py +39 -1
  20. euroeval/dataset_configs/french.py +38 -1
  21. euroeval/dataset_configs/german.py +38 -1
  22. euroeval/dataset_configs/icelandic.py +39 -1
  23. euroeval/dataset_configs/italian.py +38 -1
  24. euroeval/dataset_configs/latvian.py +81 -0
  25. euroeval/dataset_configs/norwegian.py +38 -1
  26. euroeval/dataset_configs/portuguese.py +38 -1
  27. euroeval/dataset_configs/spanish.py +38 -1
  28. euroeval/dataset_configs/swedish.py +38 -1
  29. euroeval/enums.py +0 -6
  30. euroeval/finetuning.py +8 -7
  31. euroeval/generation.py +25 -14
  32. euroeval/generation_utils.py +46 -14
  33. euroeval/languages.py +947 -187
  34. euroeval/metrics/__init__.py +6 -0
  35. euroeval/metrics/base.py +76 -0
  36. euroeval/metrics/huggingface.py +192 -0
  37. euroeval/metrics/llm_as_a_judge.py +257 -0
  38. euroeval/metrics/pipeline.py +234 -0
  39. euroeval/metrics/speed.py +51 -0
  40. euroeval/prompt_templates/linguistic_acceptability.py +40 -2
  41. euroeval/prompt_templates/multiple_choice.py +23 -2
  42. euroeval/prompt_templates/named_entity_recognition.py +65 -2
  43. euroeval/prompt_templates/reading_comprehension.py +42 -2
  44. euroeval/prompt_templates/sentiment_classification.py +46 -2
  45. euroeval/prompt_templates/summarization.py +24 -4
  46. euroeval/scores.py +7 -2
  47. euroeval/speed_benchmark.py +6 -6
  48. euroeval/task_group_utils/multiple_choice_classification.py +17 -6
  49. euroeval/task_group_utils/question_answering.py +35 -28
  50. euroeval/task_group_utils/sequence_classification.py +96 -23
  51. euroeval/task_group_utils/text_to_text.py +7 -3
  52. euroeval/task_group_utils/token_classification.py +47 -75
  53. euroeval/tasks.py +31 -6
  54. euroeval/tokenization_utils.py +295 -207
  55. euroeval/utils.py +118 -34
  56. {euroeval-15.15.0.dist-info → euroeval-16.0.0.dist-info}/METADATA +12 -14
  57. euroeval-16.0.0.dist-info/RECORD +69 -0
  58. {euroeval-15.15.0.dist-info → euroeval-16.0.0.dist-info}/entry_points.txt +0 -1
  59. euroeval/human_evaluation.py +0 -738
  60. euroeval/metrics.py +0 -468
  61. euroeval-15.15.0.dist-info/RECORD +0 -63
  62. {euroeval-15.15.0.dist-info → euroeval-16.0.0.dist-info}/WHEEL +0 -0
  63. {euroeval-15.15.0.dist-info → euroeval-16.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -2,7 +2,6 @@
2
2
 
3
3
  import collections.abc as c
4
4
  import logging
5
- import os
6
5
  import typing as t
7
6
  from functools import cached_property, partial
8
7
  from json import JSONDecodeError
@@ -68,6 +67,7 @@ from ..utils import (
68
67
  block_terminal_output,
69
68
  create_model_cache_dir,
70
69
  get_class_by_name,
70
+ get_hf_token,
71
71
  internet_connection_available,
72
72
  log_once,
73
73
  )
@@ -96,6 +96,7 @@ class HuggingFaceEncoderModel(BenchmarkModule):
96
96
  model_config: "ModelConfig",
97
97
  dataset_config: "DatasetConfig",
98
98
  benchmark_config: "BenchmarkConfig",
99
+ log_metadata: bool = True,
99
100
  ) -> None:
100
101
  """Initialise the model.
101
102
 
@@ -106,18 +107,20 @@ class HuggingFaceEncoderModel(BenchmarkModule):
106
107
  The dataset configuration.
107
108
  benchmark_config:
108
109
  The benchmark configuration.
110
+ log_metadata:
111
+ Whether to log the model metadata.
109
112
  """
110
- model, tokenizer = load_model_and_tokenizer(
113
+ model, tokeniser = load_model_and_tokeniser(
111
114
  model_config=model_config,
112
115
  dataset_config=dataset_config,
113
116
  benchmark_config=benchmark_config,
114
117
  )
115
118
  self._model: "PreTrainedModel" = model
116
- self._tokenizer: "PreTrainedTokenizer" = tokenizer
119
+ self._tokeniser: "PreTrainedTokenizer" = tokeniser
117
120
 
118
- self._model, self._tokenizer = align_model_and_tokenizer(
121
+ self._model, self._tokeniser = align_model_and_tokeniser(
119
122
  model=self._model,
120
- tokenizer=self._tokenizer,
123
+ tokeniser=self._tokeniser,
121
124
  model_max_length=self.model_max_length,
122
125
  raise_errors=benchmark_config.raise_errors,
123
126
  )
@@ -126,6 +129,7 @@ class HuggingFaceEncoderModel(BenchmarkModule):
126
129
  model_config=model_config,
127
130
  dataset_config=dataset_config,
128
131
  benchmark_config=benchmark_config,
132
+ log_metadata=log_metadata,
129
133
  )
130
134
 
131
135
  @cached_property
@@ -135,9 +139,7 @@ class HuggingFaceEncoderModel(BenchmarkModule):
135
139
  Returns:
136
140
  The number of parameters in the model.
137
141
  """
138
- token = (
139
- self.benchmark_config.api_key or os.getenv("HUGGINGFACE_API_KEY") or True
140
- )
142
+ token = get_hf_token(api_key=self.benchmark_config.api_key)
141
143
  hf_api = HfApi(token=token)
142
144
  try:
143
145
  repo_info = hf_api.model_info(
@@ -191,10 +193,10 @@ class HuggingFaceEncoderModel(BenchmarkModule):
191
193
  ):
192
194
  vocab_size = self._model.config.vocab_size
193
195
  elif (
194
- hasattr(self._tokenizer, "vocab_size")
195
- and self._tokenizer.vocab_size is not None
196
+ hasattr(self._tokeniser, "vocab_size")
197
+ and self._tokeniser.vocab_size is not None
196
198
  ):
197
- vocab_size = self._tokenizer.vocab_size
199
+ vocab_size = self._tokeniser.vocab_size
198
200
  else:
199
201
  vocab_size = -1
200
202
  return vocab_size
@@ -208,18 +210,18 @@ class HuggingFaceEncoderModel(BenchmarkModule):
208
210
  """
209
211
  all_max_lengths: list[int] = list()
210
212
 
211
- # Add the registered max length of the tokenizer
213
+ # Add the registered max length of the tokeniser
212
214
  if hasattr(
213
- self._tokenizer, "model_max_length"
214
- ) and self._tokenizer.model_max_length < int(1e30):
215
- all_max_lengths.append(self._tokenizer.model_max_length)
215
+ self._tokeniser, "model_max_length"
216
+ ) and self._tokeniser.model_max_length < int(1e30):
217
+ all_max_lengths.append(self._tokeniser.model_max_length)
216
218
 
217
219
  # Add the max length derived from the model's input sizes
218
- if hasattr(self._tokenizer, "max_model_input_sizes"):
220
+ if hasattr(self._tokeniser, "max_model_input_sizes"):
219
221
  all_max_lengths.extend(
220
222
  [
221
223
  size
222
- for size in self._tokenizer.max_model_input_sizes.values()
224
+ for size in self._tokeniser.max_model_input_sizes.values()
223
225
  if size is not None
224
226
  ]
225
227
  )
@@ -275,10 +277,10 @@ class HuggingFaceEncoderModel(BenchmarkModule):
275
277
  | TaskGroup.QUESTION_ANSWERING
276
278
  | TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION
277
279
  ):
278
- return DataCollatorWithPadding(self._tokenizer, padding="longest")
280
+ return DataCollatorWithPadding(self._tokeniser, padding="longest")
279
281
  case TaskGroup.TOKEN_CLASSIFICATION:
280
282
  return DataCollatorForTokenClassification(
281
- tokenizer=self._tokenizer, label_pad_token_id=-100
283
+ tokenizer=self._tokeniser, label_pad_token_id=-100
282
284
  )
283
285
  case _:
284
286
  raise NotImplementedError(
@@ -357,16 +359,16 @@ class HuggingFaceEncoderModel(BenchmarkModule):
357
359
  self._model.config.label2id[lbl.lower()]
358
360
  for lbl in examples["label"]
359
361
  ]
360
- except KeyError:
362
+ except KeyError as e:
361
363
  raise InvalidBenchmark(
362
364
  f"One of the labels in the dataset, "
363
365
  f"{examples['label'].lower()}, does not occur in the "
364
366
  f"label2id dictionary {self._model.config.label2id}."
365
- )
367
+ ) from e
366
368
  return examples
367
369
 
368
370
  def tokenise(examples: dict) -> "BatchEncoding":
369
- return self._tokenizer(text=examples["text"], truncation=True, padding=True)
371
+ return self._tokeniser(text=examples["text"], truncation=True, padding=True)
370
372
 
371
373
  match task.task_group:
372
374
  case TaskGroup.SEQUENCE_CLASSIFICATION:
@@ -376,39 +378,20 @@ class HuggingFaceEncoderModel(BenchmarkModule):
376
378
 
377
379
  case TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION:
378
380
  dataset = DatasetDict(
379
- train=dataset["train"].map(
380
- partial(
381
- multiple_choice_classification.prepare_examples,
382
- tokenizer=self._tokenizer,
383
- ),
384
- batched=True,
385
- batch_size=10,
386
- remove_columns=dataset["train"].column_names,
387
- load_from_cache_file=False,
388
- keep_in_memory=True,
389
- ),
390
- val=dataset["val"].map(
391
- partial(
392
- multiple_choice_classification.prepare_examples,
393
- tokenizer=self._tokenizer,
394
- ),
395
- batched=True,
396
- batch_size=10,
397
- remove_columns=dataset["val"].column_names,
398
- load_from_cache_file=False,
399
- keep_in_memory=True,
400
- ),
401
- test=dataset["test"].map(
402
- partial(
403
- multiple_choice_classification.prepare_examples,
404
- tokenizer=self._tokenizer,
405
- ),
406
- batched=True,
407
- batch_size=10,
408
- remove_columns=dataset["test"].column_names,
409
- load_from_cache_file=False,
410
- keep_in_memory=True,
411
- ),
381
+ {
382
+ split_name: split.map(
383
+ partial(
384
+ multiple_choice_classification.prepare_examples,
385
+ tokeniser=self._tokeniser,
386
+ ),
387
+ batched=True,
388
+ batch_size=10,
389
+ remove_columns=split.column_names,
390
+ load_from_cache_file=False,
391
+ keep_in_memory=True,
392
+ )
393
+ for split_name, split in dataset.items()
394
+ }
412
395
  )
413
396
 
414
397
  case TaskGroup.TEXT_TO_TEXT:
@@ -423,7 +406,7 @@ class HuggingFaceEncoderModel(BenchmarkModule):
423
406
  dataset = dataset.map(
424
407
  partial(
425
408
  token_classification.tokenize_and_align_labels,
426
- tokenizer=self._tokenizer,
409
+ tokeniser=self._tokeniser,
427
410
  label2id=self._model.config.label2id,
428
411
  ),
429
412
  batched=True,
@@ -432,43 +415,44 @@ class HuggingFaceEncoderModel(BenchmarkModule):
432
415
  )
433
416
 
434
417
  case TaskGroup.QUESTION_ANSWERING:
435
- dataset = DatasetDict(
436
- dict(
437
- train=dataset["train"].map(
438
- partial(
439
- question_answering.prepare_train_examples,
440
- tokenizer=self._tokenizer,
441
- ),
442
- batched=True,
443
- batch_size=10,
444
- remove_columns=dataset["test"].column_names,
445
- load_from_cache_file=False,
446
- keep_in_memory=True,
418
+ data_dict = dict()
419
+ if "train" in dataset:
420
+ data_dict["train"] = dataset["train"].map(
421
+ partial(
422
+ question_answering.prepare_train_examples,
423
+ tokeniser=self._tokeniser,
447
424
  ),
448
- val=dataset["val"].map(
449
- partial(
450
- question_answering.prepare_train_examples,
451
- tokenizer=self._tokenizer,
452
- ),
453
- batched=True,
454
- batch_size=10,
455
- remove_columns=dataset["test"].column_names,
456
- load_from_cache_file=False,
457
- keep_in_memory=True,
425
+ batched=True,
426
+ batch_size=10,
427
+ remove_columns=dataset["test"].column_names,
428
+ load_from_cache_file=False,
429
+ keep_in_memory=True,
430
+ )
431
+ if "val" in dataset:
432
+ data_dict["val"] = dataset["val"].map(
433
+ partial(
434
+ question_answering.prepare_train_examples,
435
+ tokeniser=self._tokeniser,
458
436
  ),
459
- test=dataset["test"].map(
460
- partial(
461
- question_answering.prepare_test_examples,
462
- tokenizer=self._tokenizer,
463
- ),
464
- batched=True,
465
- batch_size=10,
466
- remove_columns=dataset["test"].column_names,
467
- load_from_cache_file=False,
468
- keep_in_memory=True,
437
+ batched=True,
438
+ batch_size=10,
439
+ remove_columns=dataset["test"].column_names,
440
+ load_from_cache_file=False,
441
+ keep_in_memory=True,
442
+ )
443
+ if "test" in dataset:
444
+ data_dict["test"] = dataset["test"].map(
445
+ partial(
446
+ question_answering.prepare_test_examples,
447
+ tokeniser=self._tokeniser,
469
448
  ),
449
+ batched=True,
450
+ batch_size=10,
451
+ remove_columns=dataset["test"].column_names,
452
+ load_from_cache_file=False,
453
+ keep_in_memory=True,
470
454
  )
471
- )
455
+ dataset = DatasetDict(data_dict)
472
456
 
473
457
  # The Trainer hides the columns that are not used by the model (here
474
458
  # `id` and `offset_mapping` which we will need for our post-processing),
@@ -559,12 +543,12 @@ class HuggingFaceEncoderModel(BenchmarkModule):
559
543
  return model_config
560
544
 
561
545
 
562
- def load_model_and_tokenizer(
546
+ def load_model_and_tokeniser(
563
547
  model_config: "ModelConfig",
564
548
  dataset_config: "DatasetConfig",
565
549
  benchmark_config: "BenchmarkConfig",
566
550
  ) -> tuple["PreTrainedModel", "PreTrainedTokenizer"]:
567
- """Load the model and tokenizer.
551
+ """Load the model and tokeniser.
568
552
 
569
553
  Args:
570
554
  model_config:
@@ -575,7 +559,7 @@ def load_model_and_tokenizer(
575
559
  The benchmark configuration
576
560
 
577
561
  Returns:
578
- The loaded model and tokenizer.
562
+ The loaded model and tokeniser.
579
563
  """
580
564
  config: "PretrainedConfig"
581
565
  block_terminal_output()
@@ -607,12 +591,12 @@ def load_model_and_tokenizer(
607
591
  config=config,
608
592
  ignore_mismatched_sizes=ignore_mismatched_sizes,
609
593
  revision=model_config.revision,
610
- token=benchmark_config.api_key or os.getenv("HUGGINGFACE_API_KEY") or True,
594
+ token=get_hf_token(api_key=benchmark_config.api_key),
611
595
  cache_dir=model_config.model_cache_dir,
612
596
  trust_remote_code=benchmark_config.trust_remote_code,
613
- torch_dtype=get_torch_dtype(
597
+ dtype=get_dtype(
614
598
  device=benchmark_config.device,
615
- torch_dtype_is_set=config.to_dict().get("torch_dtype") is not None,
599
+ dtype_is_set=config.to_dict().get("dtype") is not None,
616
600
  bf16_available=(
617
601
  torch.cuda.is_available() and torch.cuda.is_bf16_supported()
618
602
  ),
@@ -658,11 +642,13 @@ def load_model_and_tokenizer(
658
642
  model_kwargs["ignore_mismatched_sizes"] = True
659
643
  continue
660
644
  else:
661
- raise InvalidModel(str(e))
662
- except (TimeoutError, RequestError):
645
+ raise InvalidModel(str(e)) from e
646
+ except (TimeoutError, RequestError) as e:
663
647
  attempts_left -= 1
664
648
  if attempts_left == 0:
665
- raise InvalidModel("The model could not be loaded after 5 attempts.")
649
+ raise InvalidModel(
650
+ "The model could not be loaded after 5 attempts."
651
+ ) from e
666
652
  logger.info(f"Couldn't load the model {model_id!r}. Retrying.")
667
653
  sleep(5)
668
654
  continue
@@ -670,16 +656,16 @@ def load_model_and_tokenizer(
670
656
  if "checkpoint seems to be incorrect" in str(e):
671
657
  raise InvalidModel(
672
658
  f"The model {model_id!r} has an incorrect checkpoint."
673
- )
659
+ ) from e
674
660
  if "trust_remote_code" in str(e):
675
661
  raise InvalidModel(
676
662
  f"Loading the model {model_id!r} needs to trust remote code. "
677
663
  "If you trust the suppliers of this model, then you can enable "
678
664
  "this by setting the `--trust-remote-code` flag."
679
- )
665
+ ) from e
680
666
  raise InvalidModel(
681
667
  f"The model {model_id!r} could not be loaded. The error was {e!r}."
682
- )
668
+ ) from e
683
669
 
684
670
  if isinstance(model_or_tuple, tuple):
685
671
  model = model_or_tuple[0]
@@ -697,13 +683,13 @@ def load_model_and_tokenizer(
697
683
  ):
698
684
  model = setup_model_for_question_answering(model=model)
699
685
 
700
- tokenizer = load_tokenizer(
686
+ tokeniser = load_tokeniser(
701
687
  model=model,
702
688
  model_id=model_id,
703
689
  trust_remote_code=benchmark_config.trust_remote_code,
704
690
  )
705
691
 
706
- return model, tokenizer
692
+ return model, tokeniser
707
693
 
708
694
 
709
695
  def get_model_repo_info(
@@ -722,7 +708,7 @@ def get_model_repo_info(
722
708
  Returns:
723
709
  The information about the model, or None if the model could not be found.
724
710
  """
725
- token = benchmark_config.api_key or os.getenv("HUGGINGFACE_API_KEY") or True
711
+ token = get_hf_token(api_key=benchmark_config.api_key)
726
712
  hf_api = HfApi(token=token)
727
713
  model_id, revision = model_id.split("@") if "@" in model_id else (model_id, "main")
728
714
 
@@ -800,12 +786,7 @@ def get_model_repo_info(
800
786
  level=logging.DEBUG,
801
787
  )
802
788
  if base_model_id is not None:
803
- base_model_info = hf_api.model_info(
804
- repo_id=base_model_id,
805
- token=benchmark_config.api_key
806
- or os.getenv("HUGGINGFACE_API_KEY")
807
- or True,
808
- )
789
+ base_model_info = hf_api.model_info(repo_id=base_model_id, token=token)
809
790
  tags += base_model_info.tags or list()
810
791
  tags = list(set(tags))
811
792
 
@@ -839,7 +820,7 @@ def get_model_repo_info(
839
820
  else:
840
821
  pipeline_tag = "fill-mask"
841
822
 
842
- if benchmark_config.only_allow_safetensors:
823
+ if benchmark_config.requires_safetensors:
843
824
  repo_files = hf_api.list_repo_files(repo_id=model_id, revision=revision)
844
825
  has_safetensors = any(f.endswith(".safetensors") for f in repo_files)
845
826
  if not has_safetensors:
@@ -848,7 +829,7 @@ def get_model_repo_info(
848
829
  msg += "Skipping since the `--only-allow-safetensors` flag is set."
849
830
  else:
850
831
  msg += (
851
- "Skipping since the `only_allow_safetensors` argument is set "
832
+ "Skipping since the `requires_safetensors` argument is set "
852
833
  "to `True`."
853
834
  )
854
835
  logger.warning(msg)
@@ -869,7 +850,7 @@ def get_model_repo_info(
869
850
  msg += " Skipping since the `--only-allow-safetensors` flag is set."
870
851
  else:
871
852
  msg += (
872
- " Skipping since the `only_allow_safetensors` argument is set "
853
+ " Skipping since the `requires_safetensors` argument is set "
873
854
  "to `True`."
874
855
  )
875
856
  logging.warning(msg)
@@ -880,10 +861,10 @@ def get_model_repo_info(
880
861
  )
881
862
 
882
863
 
883
- def load_tokenizer(
864
+ def load_tokeniser(
884
865
  model: "PreTrainedModel | None", model_id: str, trust_remote_code: bool
885
866
  ) -> "PreTrainedTokenizer":
886
- """Load the tokenizer.
867
+ """Load the tokeniser.
887
868
 
888
869
  Args:
889
870
  model:
@@ -895,7 +876,7 @@ def load_tokenizer(
895
876
  Whether to trust remote code.
896
877
 
897
878
  Returns:
898
- The loaded tokenizer.
879
+ The loaded tokeniser.
899
880
  """
900
881
  loading_kwargs: dict[str, bool | str] = dict(
901
882
  use_fast=True,
@@ -918,45 +899,46 @@ def load_tokenizer(
918
899
  num_retries = 5
919
900
  for _ in range(num_retries):
920
901
  try:
921
- tokenizer = AutoTokenizer.from_pretrained(model_id, **loading_kwargs)
902
+ tokeniser = AutoTokenizer.from_pretrained(model_id, **loading_kwargs)
922
903
  break
923
- except (JSONDecodeError, OSError, TypeError):
924
- raise InvalidModel(f"Could not load tokenizer for model {model_id!r}.")
904
+ except (JSONDecodeError, OSError, TypeError) as e:
905
+ raise InvalidModel(
906
+ f"Could not load tokeniser for model {model_id!r}."
907
+ ) from e
925
908
  except (TimeoutError, RequestError):
926
- logger.info(f"Couldn't load tokenizer for {model_id!r}. Retrying.")
909
+ logger.info(f"Couldn't load tokeniser for {model_id!r}. Retrying.")
927
910
  sleep(5)
928
911
  continue
929
912
  else:
930
913
  raise InvalidModel(
931
- f"Could not load tokenizer for model {model_id!r} after {num_retries} "
914
+ f"Could not load tokeniser for model {model_id!r} after {num_retries} "
932
915
  "attempts."
933
916
  )
934
917
 
935
918
  # Ensure that BOS, EOS and PAD tokens are set
936
- tokenizer.bos_token, tokenizer.bos_token_id = get_bos_token(tokenizer=tokenizer)
937
- tokenizer.eos_token, tokenizer.eos_token_id = get_eos_token(tokenizer=tokenizer)
919
+ tokeniser.bos_token, tokeniser.bos_token_id = get_bos_token(tokeniser=tokeniser)
920
+ tokeniser.eos_token, tokeniser.eos_token_id = get_eos_token(tokeniser=tokeniser)
938
921
 
939
- return tokenizer
922
+ return tokeniser
940
923
 
941
924
 
942
- def get_torch_dtype(
943
- device: torch.device, torch_dtype_is_set: bool, bf16_available: bool
925
+ def get_dtype(
926
+ device: torch.device, dtype_is_set: bool, bf16_available: bool
944
927
  ) -> str | torch.dtype:
945
928
  """Get the torch dtype, used for loading the model.
946
929
 
947
930
  Args:
948
931
  device:
949
932
  The device to use.
950
- torch_dtype_is_set:
951
- Whether the torch data type is set in the model configuration.
933
+ Whether the data type is set in the model configuration.
952
934
  bf16_available:
953
935
  Whether bfloat16 is available.
954
936
 
955
937
  Returns:
956
- The torch dtype.
938
+ The dtype.
957
939
  """
958
940
  using_cuda = device == torch.device("cuda")
959
- if using_cuda and torch_dtype_is_set:
941
+ if using_cuda and dtype_is_set:
960
942
  return "auto"
961
943
  elif using_cuda and bf16_available:
962
944
  return torch.bfloat16
@@ -1009,7 +991,7 @@ def load_hf_model_config(
1009
991
  id2label=id2label,
1010
992
  label2id=label2id,
1011
993
  revision=revision,
1012
- token=api_key or os.getenv("HUGGINGFACE_API_KEY") or True,
994
+ token=get_hf_token(api_key=api_key),
1013
995
  trust_remote_code=trust_remote_code,
1014
996
  cache_dir=model_cache_dir,
1015
997
  )
@@ -1024,7 +1006,7 @@ def load_hf_model_config(
1024
1006
  raise InvalidModel(
1025
1007
  f"The model config for the model {model_id!r} could not be "
1026
1008
  f"loaded, as the key {key!r} was not found in the config."
1027
- )
1009
+ ) from e
1028
1010
  except (OSError, GatedRepoError) as e:
1029
1011
  # TEMP: When the model is gated then we cannot set cache dir, for some
1030
1012
  # reason (since transformers v4.38.2, still a problem in v4.48.0). This
@@ -1035,7 +1017,7 @@ def load_hf_model_config(
1035
1017
  raise InvalidModel(
1036
1018
  f"Couldn't load model config for {model_id!r}. The error was "
1037
1019
  f"{e!r}. Skipping"
1038
- )
1020
+ ) from e
1039
1021
  except (TimeoutError, RequestError):
1040
1022
  logger.info(f"Couldn't load model config for {model_id!r}. Retrying.")
1041
1023
  sleep(5)
@@ -1045,17 +1027,17 @@ def load_hf_model_config(
1045
1027
  raise InvalidModel(
1046
1028
  f"The model {model_id!r} is awaiting a review from the repository "
1047
1029
  "authors. Please try again later."
1048
- )
1030
+ ) from e
1049
1031
  if "trust_remote_code" in str(e):
1050
1032
  raise NeedsAdditionalArgument(
1051
1033
  cli_argument="--trust-remote-code",
1052
1034
  script_argument="trust_remote_code=True",
1053
1035
  run_with_cli=run_with_cli,
1054
- )
1036
+ ) from e
1055
1037
  raise InvalidModel(
1056
1038
  f"The config for the model {model_id!r} could not be loaded. The "
1057
1039
  f"error was {e!r}."
1058
- )
1040
+ ) from e
1059
1041
 
1060
1042
 
1061
1043
  def setup_model_for_question_answering(model: "PreTrainedModel") -> "PreTrainedModel":
@@ -1140,33 +1122,33 @@ def get_children_of_module(
1140
1122
  return submodules
1141
1123
 
1142
1124
 
1143
- def align_model_and_tokenizer(
1125
+ def align_model_and_tokeniser(
1144
1126
  model: "PreTrainedModel",
1145
- tokenizer: "PreTrainedTokenizer",
1127
+ tokeniser: "PreTrainedTokenizer",
1146
1128
  model_max_length: int,
1147
1129
  raise_errors: bool = False,
1148
1130
  ) -> tuple["PreTrainedModel", "PreTrainedTokenizer"]:
1149
- """Aligns the model and the tokenizer.
1131
+ """Aligns the model and the tokeniser.
1150
1132
 
1151
1133
  Args:
1152
1134
  model:
1153
1135
  The model to fix.
1154
- tokenizer:
1155
- The tokenizer to fix.
1136
+ tokeniser:
1137
+ The tokeniser to fix.
1156
1138
  model_max_length:
1157
1139
  The maximum length of the model.
1158
1140
  raise_errors:
1159
1141
  Whether to raise errors instead of trying to fix them silently.
1160
1142
 
1161
1143
  Returns:
1162
- The fixed model and tokenizer.
1144
+ The fixed model and tokeniser.
1163
1145
  """
1164
1146
  model_max_length = min(model_max_length, MAX_CONTEXT_LENGTH)
1165
1147
 
1166
1148
  if model_max_length > 0:
1167
- tokenizer.model_max_length = model_max_length
1149
+ tokeniser.model_max_length = model_max_length
1168
1150
  else:
1169
- tokenizer.model_max_length = 512
1151
+ tokeniser.model_max_length = 512
1170
1152
 
1171
1153
  # Move the model to the CPU, since otherwise we can't catch the IndexErrors when
1172
1154
  # finding the maximum sequence length of the model
@@ -1175,9 +1157,9 @@ def align_model_and_tokenizer(
1175
1157
 
1176
1158
  # Manually check that this model max length is valid for the model, and adjust
1177
1159
  # otherwise
1178
- initial_max_length = tokenizer.model_max_length
1160
+ initial_max_length = tokeniser.model_max_length
1179
1161
  for max_length in range(initial_max_length, 0, -1):
1180
- tokenizer.model_max_length = max_length
1162
+ tokeniser.model_max_length = max_length
1181
1163
  dummy_inputs = torch.full(
1182
1164
  size=(1, max_length),
1183
1165
  fill_value=DUMMY_FILL_VALUE,
@@ -1204,24 +1186,24 @@ def align_model_and_tokenizer(
1204
1186
  # Move the model back to the original device
1205
1187
  model.to(model_device) # type: ignore[arg-type]
1206
1188
 
1207
- # If there is a mismatch between the vocab size according to the tokenizer and
1189
+ # If there is a mismatch between the vocab size according to the tokeniser and
1208
1190
  # the vocab size according to the model, we raise an error
1209
1191
  if hasattr(model.config, "vocab_size"):
1210
- if model.config.vocab_size < len(tokenizer):
1192
+ if model.config.vocab_size < len(tokeniser):
1211
1193
  if raise_errors:
1212
1194
  raise InvalidModel(
1213
- "The vocab size of the tokenizer is larger than the vocab size of "
1195
+ "The vocab size of the tokeniser is larger than the vocab size of "
1214
1196
  "the model. As the --raise-errors option was specified, the "
1215
1197
  "embeddings of the model will not be automatically adjusted."
1216
1198
  )
1217
1199
  if hasattr(model, "resize_token_embeddings"):
1218
- model.resize_token_embeddings(new_num_tokens=tokenizer.vocab_size + 1)
1200
+ model.resize_token_embeddings(new_num_tokens=tokeniser.vocab_size + 1)
1219
1201
 
1220
- if tokenizer.bos_token is None and tokenizer.eos_token is not None:
1221
- tokenizer.bos_token = tokenizer.eos_token
1222
- tokenizer.bos_token_id = tokenizer.eos_token_id
1202
+ if tokeniser.bos_token is None and tokeniser.eos_token is not None:
1203
+ tokeniser.bos_token = tokeniser.eos_token
1204
+ tokeniser.bos_token_id = tokeniser.eos_token_id
1223
1205
 
1224
- return model, tokenizer
1206
+ return model, tokeniser
1225
1207
 
1226
1208
 
1227
1209
  def task_group_to_class_name(task_group: TaskGroup) -> str: