EuroEval 16.3.0__py3-none-any.whl → 16.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (78) hide show
  1. euroeval/__init__.py +9 -2
  2. euroeval/benchmark_config_factory.py +51 -50
  3. euroeval/benchmark_modules/base.py +9 -21
  4. euroeval/benchmark_modules/fresh.py +2 -1
  5. euroeval/benchmark_modules/hf.py +101 -71
  6. euroeval/benchmark_modules/litellm.py +115 -53
  7. euroeval/benchmark_modules/vllm.py +107 -92
  8. euroeval/benchmarker.py +144 -121
  9. euroeval/caching_utils.py +79 -0
  10. euroeval/callbacks.py +5 -7
  11. euroeval/cli.py +86 -8
  12. euroeval/constants.py +9 -0
  13. euroeval/data_loading.py +80 -29
  14. euroeval/data_models.py +338 -330
  15. euroeval/dataset_configs/__init__.py +12 -3
  16. euroeval/dataset_configs/bulgarian.py +56 -0
  17. euroeval/dataset_configs/czech.py +75 -0
  18. euroeval/dataset_configs/danish.py +55 -93
  19. euroeval/dataset_configs/dutch.py +48 -87
  20. euroeval/dataset_configs/english.py +45 -77
  21. euroeval/dataset_configs/estonian.py +42 -34
  22. euroeval/dataset_configs/faroese.py +19 -60
  23. euroeval/dataset_configs/finnish.py +36 -69
  24. euroeval/dataset_configs/french.py +39 -75
  25. euroeval/dataset_configs/german.py +45 -82
  26. euroeval/dataset_configs/greek.py +64 -0
  27. euroeval/dataset_configs/icelandic.py +54 -91
  28. euroeval/dataset_configs/italian.py +42 -79
  29. euroeval/dataset_configs/latvian.py +28 -35
  30. euroeval/dataset_configs/lithuanian.py +28 -26
  31. euroeval/dataset_configs/norwegian.py +72 -115
  32. euroeval/dataset_configs/polish.py +33 -61
  33. euroeval/dataset_configs/portuguese.py +33 -66
  34. euroeval/dataset_configs/serbian.py +64 -0
  35. euroeval/dataset_configs/slovak.py +55 -0
  36. euroeval/dataset_configs/spanish.py +42 -77
  37. euroeval/dataset_configs/swedish.py +52 -90
  38. euroeval/dataset_configs/ukrainian.py +64 -0
  39. euroeval/exceptions.py +1 -1
  40. euroeval/finetuning.py +24 -17
  41. euroeval/generation.py +15 -14
  42. euroeval/generation_utils.py +8 -8
  43. euroeval/languages.py +395 -323
  44. euroeval/logging_utils.py +250 -0
  45. euroeval/metrics/base.py +0 -3
  46. euroeval/metrics/huggingface.py +21 -6
  47. euroeval/metrics/llm_as_a_judge.py +6 -4
  48. euroeval/metrics/pipeline.py +17 -9
  49. euroeval/metrics/speed.py +0 -3
  50. euroeval/model_cache.py +17 -19
  51. euroeval/model_config.py +4 -5
  52. euroeval/model_loading.py +3 -0
  53. euroeval/prompt_templates/__init__.py +2 -0
  54. euroeval/prompt_templates/classification.py +206 -0
  55. euroeval/prompt_templates/linguistic_acceptability.py +99 -42
  56. euroeval/prompt_templates/multiple_choice.py +102 -38
  57. euroeval/prompt_templates/named_entity_recognition.py +172 -51
  58. euroeval/prompt_templates/reading_comprehension.py +119 -42
  59. euroeval/prompt_templates/sentiment_classification.py +110 -40
  60. euroeval/prompt_templates/summarization.py +85 -40
  61. euroeval/prompt_templates/token_classification.py +279 -0
  62. euroeval/scores.py +11 -10
  63. euroeval/speed_benchmark.py +5 -6
  64. euroeval/task_group_utils/multiple_choice_classification.py +2 -4
  65. euroeval/task_group_utils/question_answering.py +24 -16
  66. euroeval/task_group_utils/sequence_classification.py +48 -35
  67. euroeval/task_group_utils/text_to_text.py +19 -9
  68. euroeval/task_group_utils/token_classification.py +21 -17
  69. euroeval/tasks.py +44 -1
  70. euroeval/tokenisation_utils.py +33 -22
  71. euroeval/types.py +10 -9
  72. euroeval/utils.py +35 -149
  73. {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/METADATA +196 -39
  74. euroeval-16.5.0.dist-info/RECORD +81 -0
  75. euroeval-16.3.0.dist-info/RECORD +0 -71
  76. {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/WHEEL +0 -0
  77. {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/entry_points.txt +0 -0
  78. {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -14,10 +14,9 @@ from time import sleep
14
14
  import torch
15
15
  from huggingface_hub import snapshot_download
16
16
  from pydantic import conlist, create_model
17
- from tqdm.auto import tqdm
18
- from transformers import MistralCommonTokenizer
19
17
  from transformers.models.auto.configuration_auto import AutoConfig
20
18
  from transformers.models.auto.tokenization_auto import AutoTokenizer
19
+ from transformers.tokenization_mistral_common import MistralCommonTokenizer
21
20
  from urllib3.exceptions import RequestError
22
21
 
23
22
  from ..constants import (
@@ -30,7 +29,7 @@ from ..constants import (
30
29
  REASONING_TOKENS,
31
30
  VLLM_BF16_MIN_CUDA_COMPUTE_CAPABILITY,
32
31
  )
33
- from ..data_models import GenerativeModelOutput, ModelConfig
32
+ from ..data_models import GenerativeModelOutput, HashableDict, ModelConfig
34
33
  from ..enums import (
35
34
  BatchingPreference,
36
35
  GenerativeType,
@@ -50,6 +49,7 @@ from ..generation_utils import (
50
49
  raise_if_wrong_params,
51
50
  )
52
51
  from ..languages import get_all_languages
52
+ from ..logging_utils import get_pbar, log, log_once, no_terminal_output
53
53
  from ..task_group_utils import (
54
54
  question_answering,
55
55
  sequence_classification,
@@ -73,7 +73,6 @@ from ..utils import (
73
73
  get_hf_token,
74
74
  get_min_cuda_compute_capability,
75
75
  internet_connection_available,
76
- log_once,
77
76
  resolve_model_path,
78
77
  split_model_id,
79
78
  )
@@ -86,7 +85,7 @@ if t.TYPE_CHECKING or importlib.util.find_spec("vllm") is not None:
86
85
  destroy_model_parallel,
87
86
  )
88
87
  from vllm.lora.request import LoRARequest
89
- from vllm.sampling_params import GuidedDecodingParams
88
+ from vllm.sampling_params import StructuredOutputsParams
90
89
 
91
90
  if t.TYPE_CHECKING:
92
91
  from datasets import DatasetDict
@@ -95,8 +94,6 @@ if t.TYPE_CHECKING:
95
94
 
96
95
  from ..data_models import BenchmarkConfig, DatasetConfig, Task
97
96
 
98
- logger = logging.getLogger("euroeval")
99
-
100
97
 
101
98
  class VLLMModel(HuggingFaceEncoderModel):
102
99
  """A generative model using the vLLM inference framework."""
@@ -132,9 +129,10 @@ class VLLMModel(HuggingFaceEncoderModel):
132
129
  model_config=model_config, allowed_params=self.allowed_params
133
130
  )
134
131
 
135
- model, tokeniser = load_model_and_tokeniser(
136
- model_config=model_config, benchmark_config=benchmark_config
137
- )
132
+ with no_terminal_output(disable=benchmark_config.verbose):
133
+ model, tokeniser = load_model_and_tokeniser(
134
+ model_config=model_config, benchmark_config=benchmark_config
135
+ )
138
136
  self._model: "LLM" = model
139
137
  self._tokeniser: "PreTrainedTokenizer" = tokeniser
140
138
 
@@ -245,6 +243,7 @@ class VLLMModel(HuggingFaceEncoderModel):
245
243
  return partial(
246
244
  sequence_classification.extract_labels_from_generation,
247
245
  dataset_config=self.dataset_config,
246
+ model_config=self.model_config,
248
247
  first_label_token_mapping=self.buffer["first_label_token_mapping"],
249
248
  )
250
249
  case TaskGroup.TEXT_TO_TEXT:
@@ -394,10 +393,11 @@ class VLLMModel(HuggingFaceEncoderModel):
394
393
  self.dataset_config.task.uses_structured_output
395
394
  or (self.dataset_config.task.uses_logprobs and self.dataset_config.labels)
396
395
  ) and self.generative_type == GenerativeType.REASONING:
397
- guided_decoding = None
398
- logger.debug(
396
+ structured_outputs = None
397
+ log(
399
398
  "The dataset uses structured output, but we are not using it as the "
400
- "model is a reasoning model."
399
+ "model is a reasoning model.",
400
+ level=logging.DEBUG,
401
401
  )
402
402
  elif self.dataset_config.task.uses_structured_output:
403
403
  ner_tag_names = list(self.dataset_config.prompt_label_mapping.values())
@@ -412,21 +412,29 @@ class VLLMModel(HuggingFaceEncoderModel):
412
412
  f"{json.dumps(structured_generation_schema)}",
413
413
  level=logging.DEBUG,
414
414
  )
415
- guided_decoding = GuidedDecodingParams(json=structured_generation_schema)
415
+ structured_outputs = StructuredOutputsParams(
416
+ json=structured_generation_schema
417
+ )
416
418
  elif self.dataset_config.task.uses_logprobs and self.dataset_config.labels:
417
- guided_decoding = GuidedDecodingParams(
418
- choice=[
419
- self.dataset_config.prompt_label_mapping[label]
420
- for label in self.dataset_config.labels
419
+ choice_labels = [
420
+ self.dataset_config.prompt_label_mapping[label]
421
+ for label in self.dataset_config.labels
422
+ ]
423
+ if "first_label_token_mapping" in self.buffer and isinstance(
424
+ self.buffer["first_label_token_mapping"], dict
425
+ ):
426
+ choice_labels = [
427
+ self.buffer["first_label_token_mapping"][label]
428
+ for label in choice_labels
421
429
  ]
422
- )
430
+ structured_outputs = StructuredOutputsParams(choice=choice_labels)
423
431
  log_once(
424
432
  "Using structured generation with the choices: "
425
- f"{guided_decoding.choice!r}.",
433
+ f"{structured_outputs.choice!r}.",
426
434
  level=logging.DEBUG,
427
435
  )
428
436
  else:
429
- guided_decoding = None
437
+ structured_outputs = None
430
438
  log_once(
431
439
  "Not using structured generation as the dataset does not require it.",
432
440
  level=logging.DEBUG,
@@ -445,14 +453,14 @@ class VLLMModel(HuggingFaceEncoderModel):
445
453
  else None,
446
454
  temperature=0.0,
447
455
  stop=[stop_token for stop_token in stop_tokens if stop_token],
448
- guided_decoding=guided_decoding,
456
+ structured_outputs=structured_outputs,
449
457
  )
450
458
 
451
459
  # If any of the prompts are empty then we need to replace them with a BOS token
452
460
  # so that the vLLM model can generate from them
453
- prompts: list[str] = inputs["text"]
461
+ prompts: c.Sequence[str] = inputs["text"]
454
462
  if any(len(prompt) == 0 for prompt in prompts):
455
- logger.debug("Found empty prompts, replacing with BOS token.")
463
+ log("Found empty prompts, replacing with BOS token.", level=logging.DEBUG)
456
464
  prompts = [
457
465
  prompt if len(prompt) > 0 else str(self._tokeniser.bos_token)
458
466
  for prompt in prompts
@@ -480,13 +488,14 @@ class VLLMModel(HuggingFaceEncoderModel):
480
488
  raw_outputs = self._model.generate(
481
489
  prompts=prompts,
482
490
  sampling_params=sampling_params,
483
- use_tqdm=False if input_is_a_test else get_pbar_without_leave,
491
+ use_tqdm=False if input_is_a_test else get_pbar,
484
492
  lora_request=self.buffer.get("lora_request"),
485
493
  )
486
494
  break
487
495
  except TypeError as e:
488
- logger.debug(
489
- f"Encountered error during vLLM generation: {str(e)}. Retrying..."
496
+ log(
497
+ f"Encountered error during vLLM generation: {str(e)}. Retrying...",
498
+ level=logging.DEBUG,
490
499
  )
491
500
  sleep(1)
492
501
  except ValueError as e:
@@ -498,10 +507,11 @@ class VLLMModel(HuggingFaceEncoderModel):
498
507
  re.search(pattern, str(e), flags=re.IGNORECASE) is not None
499
508
  for pattern in truncate_error_messages
500
509
  ):
501
- logger.info(
502
- "Prompts are too long, so truncating them and trying again..."
510
+ log(
511
+ "Prompts are too long, so truncating them and trying again...",
512
+ level=logging.WARNING,
503
513
  )
504
- logger.debug(f"The error message was: {str(e)}")
514
+ log(f"The error message was: {str(e)}", level=logging.DEBUG)
505
515
 
506
516
  # If we have already tried truncating the prompts a few times, then
507
517
  # we truncate a bit more aggressively
@@ -544,49 +554,50 @@ class VLLMModel(HuggingFaceEncoderModel):
544
554
  f"{num_extra_outputs!r} extra outputs."
545
555
  )
546
556
  else:
547
- logger.debug(
557
+ log(
548
558
  f"Filtered out {num_extra_outputs:,} extra outputs from the model, "
549
559
  "which occured as we interupted the generation when we truncated "
550
- "the prompts."
560
+ "the prompts.",
561
+ level=logging.DEBUG,
551
562
  )
552
563
 
553
564
  # Parse the raw model outputs
554
- completion_ids: list[list[int]] = [
555
- output.outputs[0].token_ids for output in raw_outputs
565
+ completion_ids: c.Sequence[c.Sequence[int]] = [
566
+ list(output.outputs[0].token_ids) for output in raw_outputs
556
567
  ]
557
568
  completions = self._tokeniser.batch_decode(
558
569
  sequences=[
559
570
  torch.LongTensor(completion_id) for completion_id in completion_ids
560
- ]
571
+ ],
572
+ skip_special_tokens=True,
561
573
  )
562
574
  if (
563
575
  self.end_of_reasoning_token is not None
564
576
  and self.generative_type == GenerativeType.REASONING
565
577
  ):
578
+ num_samples_without_eor_token = 0
566
579
  for idx in range(len(completions)):
567
580
  if self.end_of_reasoning_token in completions[idx]:
568
581
  completions[idx] = completions[idx].split(
569
582
  self.end_of_reasoning_token
570
583
  )[-1]
571
- elif self.benchmark_config.verbose:
572
- logger.warning(
573
- f"The model {self.model_config.model_id!r} is a reasoning "
574
- "model, but the generated output does not contain the end of "
575
- f"reasoning token ({self.end_of_reasoning_token!r}). Using "
576
- "an empty string as the prediction instead."
577
- )
578
- completions[idx] = ""
579
584
  else:
580
- log_once(
581
- f"The model {self.model_config.model_id!r} is a reasoning "
582
- "model, but the generated output does not contain the end of "
583
- f"reasoning token ({self.end_of_reasoning_token!r}). Using "
584
- "an empty string as the prediction instead. Only showing "
585
- "this warning once - see all occurrences if you run with the "
586
- "`verbose` flag.",
587
- level=logging.WARNING,
588
- )
585
+ num_samples_without_eor_token += 1
589
586
  completions[idx] = ""
587
+ if num_samples_without_eor_token > 0:
588
+ log_once(
589
+ f"The model {self.model_config.model_id!r} is a reasoning "
590
+ "model, but the generated output did not contain the end of "
591
+ f"reasoning token ({self.end_of_reasoning_token!r}) in "
592
+ f"{num_samples_without_eor_token:,}/{len(completions):,} of "
593
+ "the samples. Using an empty string for all these samples "
594
+ "instead.",
595
+ level=(
596
+ logging.WARNING
597
+ if num_samples_without_eor_token / len(completions) > 0.5
598
+ else logging.DEBUG
599
+ ),
600
+ )
590
601
  stop_token_pattern = re.compile(
591
602
  "|".join(re.escape(stop_token) for stop_token in stop_tokens)
592
603
  )
@@ -604,13 +615,13 @@ class VLLMModel(HuggingFaceEncoderModel):
604
615
 
605
616
  # Add logprobs scores to the output
606
617
  if self.buffer["first_label_token_mapping"]:
607
- scores: list[list[list[tuple[str, float]]]] = [
618
+ scores: c.Sequence[c.Sequence[c.Sequence[tuple[str, float]]]] = [
608
619
  [
609
620
  [
610
- (obj.decoded_token, obj.logprob)
621
+ (obj.decoded_token or "", obj.logprob)
611
622
  for obj in token_logprobs_dict.values()
612
623
  ]
613
- for token_logprobs_dict in raw_output.outputs[0].logprobs
624
+ for token_logprobs_dict in raw_output.outputs[0].logprobs or list()
614
625
  ]
615
626
  for raw_output in raw_outputs
616
627
  ]
@@ -648,7 +659,13 @@ class VLLMModel(HuggingFaceEncoderModel):
648
659
  revision = model_id_components.revision
649
660
 
650
661
  model_info = get_model_repo_info(
651
- model_id=model_id, revision=revision, benchmark_config=benchmark_config
662
+ model_id=model_id,
663
+ revision=revision,
664
+ api_key=benchmark_config.api_key,
665
+ cache_dir=benchmark_config.cache_dir,
666
+ trust_remote_code=benchmark_config.trust_remote_code,
667
+ requires_safetensors=benchmark_config.requires_safetensors,
668
+ run_with_cli=benchmark_config.run_with_cli,
652
669
  )
653
670
  return (
654
671
  model_info is not None
@@ -674,7 +691,11 @@ class VLLMModel(HuggingFaceEncoderModel):
674
691
  model_info = get_model_repo_info(
675
692
  model_id=model_id_components.model_id,
676
693
  revision=model_id_components.revision,
677
- benchmark_config=benchmark_config,
694
+ api_key=benchmark_config.api_key,
695
+ cache_dir=benchmark_config.cache_dir,
696
+ trust_remote_code=benchmark_config.trust_remote_code,
697
+ requires_safetensors=benchmark_config.requires_safetensors,
698
+ run_with_cli=benchmark_config.run_with_cli,
678
699
  )
679
700
  if model_info is None:
680
701
  raise InvalidModel(f"The model {model_id!r} could not be found.")
@@ -705,7 +726,7 @@ class VLLMModel(HuggingFaceEncoderModel):
705
726
  return model_config
706
727
 
707
728
  @property
708
- def data_collator(self) -> c.Callable[[list[t.Any]], dict[str, t.Any]]:
729
+ def data_collator(self) -> c.Callable[[c.Sequence[t.Any]], dict[str, t.Any]]:
709
730
  """The data collator used to prepare samples during finetuning.
710
731
 
711
732
  Returns:
@@ -751,8 +772,8 @@ def load_model_and_tokeniser(
751
772
  hf_model_config = load_hf_model_config(
752
773
  model_id=model_id,
753
774
  num_labels=0,
754
- id2label=dict(),
755
- label2id=dict(),
775
+ id2label=HashableDict(),
776
+ label2id=HashableDict(),
756
777
  revision=revision,
757
778
  model_cache_dir=model_config.model_cache_dir,
758
779
  api_key=benchmark_config.api_key,
@@ -779,32 +800,36 @@ def load_model_and_tokeniser(
779
800
  # Choose bf16 over fp16 if the model is a fp32 model and the GPU supports it
780
801
  if hf_model_config.dtype == torch.float32:
781
802
  if torch.cuda.is_bf16_supported():
782
- logger.info(
803
+ log(
783
804
  "You are loading a model with dtype FP32, which we will convert to "
784
805
  "BF16 as FP32 is not supported by vLLM and BF16 is supported by your "
785
- "GPU."
806
+ "GPU.",
807
+ level=logging.WARNING,
786
808
  )
787
809
  dtype = torch.bfloat16
788
810
  else:
789
- logger.info(
811
+ log(
790
812
  "You are loading a model with dtype FP32, which we will convert to "
791
813
  "FP16 as FP32 is not supported by vLLM and BF16 is not supported by "
792
- "your GPU."
814
+ "your GPU.",
815
+ level=logging.WARNING,
793
816
  )
794
817
  dtype = torch.float16
795
818
 
796
819
  # If the model is a quantized model, we might need to change the dtype
797
820
  if quantization == "mxfp4" and hf_model_config.dtype is None:
798
821
  dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
799
- logger.debug(
822
+ log(
800
823
  "You are loading a quantized model where `dtype` has not been set. "
801
- f"Setting dtype to {dtype!r}."
824
+ f"Setting dtype to {dtype!r}.",
825
+ level=logging.DEBUG,
802
826
  )
803
827
  elif quantization is not None and hf_model_config.dtype != torch.float16:
804
- logger.info(
828
+ log(
805
829
  "You are loading a quantized model with dtype "
806
830
  f"{hf_model_config.dtype}, which vLLM does not support. Setting "
807
- "dtype to float16 instead."
831
+ "dtype to float16 instead.",
832
+ level=logging.WARNING,
808
833
  )
809
834
  dtype = torch.float16
810
835
 
@@ -815,12 +840,13 @@ def load_model_and_tokeniser(
815
840
 
816
841
  if min_cuda_compute_capability is not None:
817
842
  if min_cuda_compute_capability < required_capability:
818
- logger.info(
843
+ log(
819
844
  f"You are loading a model with dtype {hf_model_config.dtype}, "
820
845
  "which vLLM only supports for CUDA devices with CUDA compute "
821
846
  f"capability >={required_capability}. You are using one or more "
822
847
  f"devices with compute capability {min_cuda_compute_capability}. "
823
- "Setting dtype to float16 instead."
848
+ "Setting dtype to float16 instead.",
849
+ level=logging.WARNING,
824
850
  )
825
851
  dtype = torch.float16
826
852
 
@@ -987,13 +1013,17 @@ def load_tokeniser(
987
1013
  f"Could not load tokeniser for model {model_id!r}. The error was "
988
1014
  f"{str(e)}."
989
1015
  ) from e
990
- logger.debug(
1016
+ log(
991
1017
  f"Could not load tokeniser for {model_id!r}. Falling back to "
992
- f"{adapter_base_model_id!r}."
1018
+ f"{adapter_base_model_id!r}.",
1019
+ level=logging.DEBUG,
993
1020
  )
994
1021
  model_id = adapter_base_model_id
995
1022
  except (TimeoutError, RequestError):
996
- logger.info(f"Couldn't load tokeniser for {model_id!r}. Retrying.")
1023
+ log(
1024
+ f"Couldn't load tokeniser for {model_id!r}. Retrying.",
1025
+ level=logging.WARNING,
1026
+ )
997
1027
  sleep(5)
998
1028
  continue
999
1029
  except (KeyError, ValueError) as e:
@@ -1192,32 +1222,17 @@ def get_custom_stop_tokens(
1192
1222
  if stop_token in prompt or stop_token in completion
1193
1223
  ]
1194
1224
  if stop_tokens:
1195
- logger.debug(
1225
+ log(
1196
1226
  f"Found the following custom stop tokens for model {model_id!r}: "
1197
- f"{stop_tokens}."
1227
+ f"{stop_tokens}.",
1228
+ level=logging.DEBUG,
1198
1229
  )
1199
1230
  else:
1200
- logger.debug(f"Found no custom stop tokens for model {model_id!r}.")
1231
+ log(f"Found no custom stop tokens for model {model_id!r}.", level=logging.DEBUG)
1201
1232
 
1202
1233
  return stop_tokens
1203
1234
 
1204
1235
 
1205
- def get_pbar_without_leave(*tqdm_args, **tqdm_kwargs) -> tqdm:
1206
- """Get a progress bar for vLLM which disappears after completion.
1207
-
1208
- Args:
1209
- *tqdm_args:
1210
- Positional arguments to pass to tqdm.
1211
- **tqdm_kwargs:
1212
- Additional keyword arguments to pass to tqdm.
1213
-
1214
- Returns:
1215
- A tqdm progress bar.
1216
- """
1217
- tqdm_kwargs.pop("leave", None) # Remove the 'leave' key if it exists
1218
- return tqdm(*tqdm_args, leave=False, **tqdm_kwargs)
1219
-
1220
-
1221
1236
  def get_vllm_tokenisation_params(
1222
1237
  tokeniser: "PreTrainedTokenizer", model_config: "ModelConfig"
1223
1238
  ) -> dict[str, t.Any]: