EuroEval 16.3.0__py3-none-any.whl → 16.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +9 -2
- euroeval/benchmark_config_factory.py +51 -50
- euroeval/benchmark_modules/base.py +9 -21
- euroeval/benchmark_modules/fresh.py +2 -1
- euroeval/benchmark_modules/hf.py +101 -71
- euroeval/benchmark_modules/litellm.py +115 -53
- euroeval/benchmark_modules/vllm.py +107 -92
- euroeval/benchmarker.py +144 -121
- euroeval/caching_utils.py +79 -0
- euroeval/callbacks.py +5 -7
- euroeval/cli.py +86 -8
- euroeval/constants.py +9 -0
- euroeval/data_loading.py +80 -29
- euroeval/data_models.py +338 -330
- euroeval/dataset_configs/__init__.py +12 -3
- euroeval/dataset_configs/bulgarian.py +56 -0
- euroeval/dataset_configs/czech.py +75 -0
- euroeval/dataset_configs/danish.py +55 -93
- euroeval/dataset_configs/dutch.py +48 -87
- euroeval/dataset_configs/english.py +45 -77
- euroeval/dataset_configs/estonian.py +42 -34
- euroeval/dataset_configs/faroese.py +19 -60
- euroeval/dataset_configs/finnish.py +36 -69
- euroeval/dataset_configs/french.py +39 -75
- euroeval/dataset_configs/german.py +45 -82
- euroeval/dataset_configs/greek.py +64 -0
- euroeval/dataset_configs/icelandic.py +54 -91
- euroeval/dataset_configs/italian.py +42 -79
- euroeval/dataset_configs/latvian.py +28 -35
- euroeval/dataset_configs/lithuanian.py +28 -26
- euroeval/dataset_configs/norwegian.py +72 -115
- euroeval/dataset_configs/polish.py +33 -61
- euroeval/dataset_configs/portuguese.py +33 -66
- euroeval/dataset_configs/serbian.py +64 -0
- euroeval/dataset_configs/slovak.py +55 -0
- euroeval/dataset_configs/spanish.py +42 -77
- euroeval/dataset_configs/swedish.py +52 -90
- euroeval/dataset_configs/ukrainian.py +64 -0
- euroeval/exceptions.py +1 -1
- euroeval/finetuning.py +24 -17
- euroeval/generation.py +15 -14
- euroeval/generation_utils.py +8 -8
- euroeval/languages.py +395 -323
- euroeval/logging_utils.py +250 -0
- euroeval/metrics/base.py +0 -3
- euroeval/metrics/huggingface.py +21 -6
- euroeval/metrics/llm_as_a_judge.py +6 -4
- euroeval/metrics/pipeline.py +17 -9
- euroeval/metrics/speed.py +0 -3
- euroeval/model_cache.py +17 -19
- euroeval/model_config.py +4 -5
- euroeval/model_loading.py +3 -0
- euroeval/prompt_templates/__init__.py +2 -0
- euroeval/prompt_templates/classification.py +206 -0
- euroeval/prompt_templates/linguistic_acceptability.py +99 -42
- euroeval/prompt_templates/multiple_choice.py +102 -38
- euroeval/prompt_templates/named_entity_recognition.py +172 -51
- euroeval/prompt_templates/reading_comprehension.py +119 -42
- euroeval/prompt_templates/sentiment_classification.py +110 -40
- euroeval/prompt_templates/summarization.py +85 -40
- euroeval/prompt_templates/token_classification.py +279 -0
- euroeval/scores.py +11 -10
- euroeval/speed_benchmark.py +5 -6
- euroeval/task_group_utils/multiple_choice_classification.py +2 -4
- euroeval/task_group_utils/question_answering.py +24 -16
- euroeval/task_group_utils/sequence_classification.py +48 -35
- euroeval/task_group_utils/text_to_text.py +19 -9
- euroeval/task_group_utils/token_classification.py +21 -17
- euroeval/tasks.py +44 -1
- euroeval/tokenisation_utils.py +33 -22
- euroeval/types.py +10 -9
- euroeval/utils.py +35 -149
- {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/METADATA +196 -39
- euroeval-16.5.0.dist-info/RECORD +81 -0
- euroeval-16.3.0.dist-info/RECORD +0 -71
- {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/WHEEL +0 -0
- {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/entry_points.txt +0 -0
- {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,10 +14,9 @@ from time import sleep
|
|
|
14
14
|
import torch
|
|
15
15
|
from huggingface_hub import snapshot_download
|
|
16
16
|
from pydantic import conlist, create_model
|
|
17
|
-
from tqdm.auto import tqdm
|
|
18
|
-
from transformers import MistralCommonTokenizer
|
|
19
17
|
from transformers.models.auto.configuration_auto import AutoConfig
|
|
20
18
|
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
|
19
|
+
from transformers.tokenization_mistral_common import MistralCommonTokenizer
|
|
21
20
|
from urllib3.exceptions import RequestError
|
|
22
21
|
|
|
23
22
|
from ..constants import (
|
|
@@ -30,7 +29,7 @@ from ..constants import (
|
|
|
30
29
|
REASONING_TOKENS,
|
|
31
30
|
VLLM_BF16_MIN_CUDA_COMPUTE_CAPABILITY,
|
|
32
31
|
)
|
|
33
|
-
from ..data_models import GenerativeModelOutput, ModelConfig
|
|
32
|
+
from ..data_models import GenerativeModelOutput, HashableDict, ModelConfig
|
|
34
33
|
from ..enums import (
|
|
35
34
|
BatchingPreference,
|
|
36
35
|
GenerativeType,
|
|
@@ -50,6 +49,7 @@ from ..generation_utils import (
|
|
|
50
49
|
raise_if_wrong_params,
|
|
51
50
|
)
|
|
52
51
|
from ..languages import get_all_languages
|
|
52
|
+
from ..logging_utils import get_pbar, log, log_once, no_terminal_output
|
|
53
53
|
from ..task_group_utils import (
|
|
54
54
|
question_answering,
|
|
55
55
|
sequence_classification,
|
|
@@ -73,7 +73,6 @@ from ..utils import (
|
|
|
73
73
|
get_hf_token,
|
|
74
74
|
get_min_cuda_compute_capability,
|
|
75
75
|
internet_connection_available,
|
|
76
|
-
log_once,
|
|
77
76
|
resolve_model_path,
|
|
78
77
|
split_model_id,
|
|
79
78
|
)
|
|
@@ -86,7 +85,7 @@ if t.TYPE_CHECKING or importlib.util.find_spec("vllm") is not None:
|
|
|
86
85
|
destroy_model_parallel,
|
|
87
86
|
)
|
|
88
87
|
from vllm.lora.request import LoRARequest
|
|
89
|
-
from vllm.sampling_params import
|
|
88
|
+
from vllm.sampling_params import StructuredOutputsParams
|
|
90
89
|
|
|
91
90
|
if t.TYPE_CHECKING:
|
|
92
91
|
from datasets import DatasetDict
|
|
@@ -95,8 +94,6 @@ if t.TYPE_CHECKING:
|
|
|
95
94
|
|
|
96
95
|
from ..data_models import BenchmarkConfig, DatasetConfig, Task
|
|
97
96
|
|
|
98
|
-
logger = logging.getLogger("euroeval")
|
|
99
|
-
|
|
100
97
|
|
|
101
98
|
class VLLMModel(HuggingFaceEncoderModel):
|
|
102
99
|
"""A generative model using the vLLM inference framework."""
|
|
@@ -132,9 +129,10 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
132
129
|
model_config=model_config, allowed_params=self.allowed_params
|
|
133
130
|
)
|
|
134
131
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
132
|
+
with no_terminal_output(disable=benchmark_config.verbose):
|
|
133
|
+
model, tokeniser = load_model_and_tokeniser(
|
|
134
|
+
model_config=model_config, benchmark_config=benchmark_config
|
|
135
|
+
)
|
|
138
136
|
self._model: "LLM" = model
|
|
139
137
|
self._tokeniser: "PreTrainedTokenizer" = tokeniser
|
|
140
138
|
|
|
@@ -245,6 +243,7 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
245
243
|
return partial(
|
|
246
244
|
sequence_classification.extract_labels_from_generation,
|
|
247
245
|
dataset_config=self.dataset_config,
|
|
246
|
+
model_config=self.model_config,
|
|
248
247
|
first_label_token_mapping=self.buffer["first_label_token_mapping"],
|
|
249
248
|
)
|
|
250
249
|
case TaskGroup.TEXT_TO_TEXT:
|
|
@@ -394,10 +393,11 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
394
393
|
self.dataset_config.task.uses_structured_output
|
|
395
394
|
or (self.dataset_config.task.uses_logprobs and self.dataset_config.labels)
|
|
396
395
|
) and self.generative_type == GenerativeType.REASONING:
|
|
397
|
-
|
|
398
|
-
|
|
396
|
+
structured_outputs = None
|
|
397
|
+
log(
|
|
399
398
|
"The dataset uses structured output, but we are not using it as the "
|
|
400
|
-
"model is a reasoning model."
|
|
399
|
+
"model is a reasoning model.",
|
|
400
|
+
level=logging.DEBUG,
|
|
401
401
|
)
|
|
402
402
|
elif self.dataset_config.task.uses_structured_output:
|
|
403
403
|
ner_tag_names = list(self.dataset_config.prompt_label_mapping.values())
|
|
@@ -412,21 +412,29 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
412
412
|
f"{json.dumps(structured_generation_schema)}",
|
|
413
413
|
level=logging.DEBUG,
|
|
414
414
|
)
|
|
415
|
-
|
|
415
|
+
structured_outputs = StructuredOutputsParams(
|
|
416
|
+
json=structured_generation_schema
|
|
417
|
+
)
|
|
416
418
|
elif self.dataset_config.task.uses_logprobs and self.dataset_config.labels:
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
419
|
+
choice_labels = [
|
|
420
|
+
self.dataset_config.prompt_label_mapping[label]
|
|
421
|
+
for label in self.dataset_config.labels
|
|
422
|
+
]
|
|
423
|
+
if "first_label_token_mapping" in self.buffer and isinstance(
|
|
424
|
+
self.buffer["first_label_token_mapping"], dict
|
|
425
|
+
):
|
|
426
|
+
choice_labels = [
|
|
427
|
+
self.buffer["first_label_token_mapping"][label]
|
|
428
|
+
for label in choice_labels
|
|
421
429
|
]
|
|
422
|
-
)
|
|
430
|
+
structured_outputs = StructuredOutputsParams(choice=choice_labels)
|
|
423
431
|
log_once(
|
|
424
432
|
"Using structured generation with the choices: "
|
|
425
|
-
f"{
|
|
433
|
+
f"{structured_outputs.choice!r}.",
|
|
426
434
|
level=logging.DEBUG,
|
|
427
435
|
)
|
|
428
436
|
else:
|
|
429
|
-
|
|
437
|
+
structured_outputs = None
|
|
430
438
|
log_once(
|
|
431
439
|
"Not using structured generation as the dataset does not require it.",
|
|
432
440
|
level=logging.DEBUG,
|
|
@@ -445,14 +453,14 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
445
453
|
else None,
|
|
446
454
|
temperature=0.0,
|
|
447
455
|
stop=[stop_token for stop_token in stop_tokens if stop_token],
|
|
448
|
-
|
|
456
|
+
structured_outputs=structured_outputs,
|
|
449
457
|
)
|
|
450
458
|
|
|
451
459
|
# If any of the prompts are empty then we need to replace them with a BOS token
|
|
452
460
|
# so that the vLLM model can generate from them
|
|
453
|
-
prompts:
|
|
461
|
+
prompts: c.Sequence[str] = inputs["text"]
|
|
454
462
|
if any(len(prompt) == 0 for prompt in prompts):
|
|
455
|
-
|
|
463
|
+
log("Found empty prompts, replacing with BOS token.", level=logging.DEBUG)
|
|
456
464
|
prompts = [
|
|
457
465
|
prompt if len(prompt) > 0 else str(self._tokeniser.bos_token)
|
|
458
466
|
for prompt in prompts
|
|
@@ -480,13 +488,14 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
480
488
|
raw_outputs = self._model.generate(
|
|
481
489
|
prompts=prompts,
|
|
482
490
|
sampling_params=sampling_params,
|
|
483
|
-
use_tqdm=False if input_is_a_test else
|
|
491
|
+
use_tqdm=False if input_is_a_test else get_pbar,
|
|
484
492
|
lora_request=self.buffer.get("lora_request"),
|
|
485
493
|
)
|
|
486
494
|
break
|
|
487
495
|
except TypeError as e:
|
|
488
|
-
|
|
489
|
-
f"Encountered error during vLLM generation: {str(e)}. Retrying..."
|
|
496
|
+
log(
|
|
497
|
+
f"Encountered error during vLLM generation: {str(e)}. Retrying...",
|
|
498
|
+
level=logging.DEBUG,
|
|
490
499
|
)
|
|
491
500
|
sleep(1)
|
|
492
501
|
except ValueError as e:
|
|
@@ -498,10 +507,11 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
498
507
|
re.search(pattern, str(e), flags=re.IGNORECASE) is not None
|
|
499
508
|
for pattern in truncate_error_messages
|
|
500
509
|
):
|
|
501
|
-
|
|
502
|
-
"Prompts are too long, so truncating them and trying again..."
|
|
510
|
+
log(
|
|
511
|
+
"Prompts are too long, so truncating them and trying again...",
|
|
512
|
+
level=logging.WARNING,
|
|
503
513
|
)
|
|
504
|
-
|
|
514
|
+
log(f"The error message was: {str(e)}", level=logging.DEBUG)
|
|
505
515
|
|
|
506
516
|
# If we have already tried truncating the prompts a few times, then
|
|
507
517
|
# we truncate a bit more aggressively
|
|
@@ -544,49 +554,50 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
544
554
|
f"{num_extra_outputs!r} extra outputs."
|
|
545
555
|
)
|
|
546
556
|
else:
|
|
547
|
-
|
|
557
|
+
log(
|
|
548
558
|
f"Filtered out {num_extra_outputs:,} extra outputs from the model, "
|
|
549
559
|
"which occured as we interupted the generation when we truncated "
|
|
550
|
-
"the prompts."
|
|
560
|
+
"the prompts.",
|
|
561
|
+
level=logging.DEBUG,
|
|
551
562
|
)
|
|
552
563
|
|
|
553
564
|
# Parse the raw model outputs
|
|
554
|
-
completion_ids:
|
|
555
|
-
output.outputs[0].token_ids for output in raw_outputs
|
|
565
|
+
completion_ids: c.Sequence[c.Sequence[int]] = [
|
|
566
|
+
list(output.outputs[0].token_ids) for output in raw_outputs
|
|
556
567
|
]
|
|
557
568
|
completions = self._tokeniser.batch_decode(
|
|
558
569
|
sequences=[
|
|
559
570
|
torch.LongTensor(completion_id) for completion_id in completion_ids
|
|
560
|
-
]
|
|
571
|
+
],
|
|
572
|
+
skip_special_tokens=True,
|
|
561
573
|
)
|
|
562
574
|
if (
|
|
563
575
|
self.end_of_reasoning_token is not None
|
|
564
576
|
and self.generative_type == GenerativeType.REASONING
|
|
565
577
|
):
|
|
578
|
+
num_samples_without_eor_token = 0
|
|
566
579
|
for idx in range(len(completions)):
|
|
567
580
|
if self.end_of_reasoning_token in completions[idx]:
|
|
568
581
|
completions[idx] = completions[idx].split(
|
|
569
582
|
self.end_of_reasoning_token
|
|
570
583
|
)[-1]
|
|
571
|
-
elif self.benchmark_config.verbose:
|
|
572
|
-
logger.warning(
|
|
573
|
-
f"The model {self.model_config.model_id!r} is a reasoning "
|
|
574
|
-
"model, but the generated output does not contain the end of "
|
|
575
|
-
f"reasoning token ({self.end_of_reasoning_token!r}). Using "
|
|
576
|
-
"an empty string as the prediction instead."
|
|
577
|
-
)
|
|
578
|
-
completions[idx] = ""
|
|
579
584
|
else:
|
|
580
|
-
|
|
581
|
-
f"The model {self.model_config.model_id!r} is a reasoning "
|
|
582
|
-
"model, but the generated output does not contain the end of "
|
|
583
|
-
f"reasoning token ({self.end_of_reasoning_token!r}). Using "
|
|
584
|
-
"an empty string as the prediction instead. Only showing "
|
|
585
|
-
"this warning once - see all occurrences if you run with the "
|
|
586
|
-
"`verbose` flag.",
|
|
587
|
-
level=logging.WARNING,
|
|
588
|
-
)
|
|
585
|
+
num_samples_without_eor_token += 1
|
|
589
586
|
completions[idx] = ""
|
|
587
|
+
if num_samples_without_eor_token > 0:
|
|
588
|
+
log_once(
|
|
589
|
+
f"The model {self.model_config.model_id!r} is a reasoning "
|
|
590
|
+
"model, but the generated output did not contain the end of "
|
|
591
|
+
f"reasoning token ({self.end_of_reasoning_token!r}) in "
|
|
592
|
+
f"{num_samples_without_eor_token:,}/{len(completions):,} of "
|
|
593
|
+
"the samples. Using an empty string for all these samples "
|
|
594
|
+
"instead.",
|
|
595
|
+
level=(
|
|
596
|
+
logging.WARNING
|
|
597
|
+
if num_samples_without_eor_token / len(completions) > 0.5
|
|
598
|
+
else logging.DEBUG
|
|
599
|
+
),
|
|
600
|
+
)
|
|
590
601
|
stop_token_pattern = re.compile(
|
|
591
602
|
"|".join(re.escape(stop_token) for stop_token in stop_tokens)
|
|
592
603
|
)
|
|
@@ -604,13 +615,13 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
604
615
|
|
|
605
616
|
# Add logprobs scores to the output
|
|
606
617
|
if self.buffer["first_label_token_mapping"]:
|
|
607
|
-
scores:
|
|
618
|
+
scores: c.Sequence[c.Sequence[c.Sequence[tuple[str, float]]]] = [
|
|
608
619
|
[
|
|
609
620
|
[
|
|
610
|
-
(obj.decoded_token, obj.logprob)
|
|
621
|
+
(obj.decoded_token or "", obj.logprob)
|
|
611
622
|
for obj in token_logprobs_dict.values()
|
|
612
623
|
]
|
|
613
|
-
for token_logprobs_dict in raw_output.outputs[0].logprobs
|
|
624
|
+
for token_logprobs_dict in raw_output.outputs[0].logprobs or list()
|
|
614
625
|
]
|
|
615
626
|
for raw_output in raw_outputs
|
|
616
627
|
]
|
|
@@ -648,7 +659,13 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
648
659
|
revision = model_id_components.revision
|
|
649
660
|
|
|
650
661
|
model_info = get_model_repo_info(
|
|
651
|
-
model_id=model_id,
|
|
662
|
+
model_id=model_id,
|
|
663
|
+
revision=revision,
|
|
664
|
+
api_key=benchmark_config.api_key,
|
|
665
|
+
cache_dir=benchmark_config.cache_dir,
|
|
666
|
+
trust_remote_code=benchmark_config.trust_remote_code,
|
|
667
|
+
requires_safetensors=benchmark_config.requires_safetensors,
|
|
668
|
+
run_with_cli=benchmark_config.run_with_cli,
|
|
652
669
|
)
|
|
653
670
|
return (
|
|
654
671
|
model_info is not None
|
|
@@ -674,7 +691,11 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
674
691
|
model_info = get_model_repo_info(
|
|
675
692
|
model_id=model_id_components.model_id,
|
|
676
693
|
revision=model_id_components.revision,
|
|
677
|
-
|
|
694
|
+
api_key=benchmark_config.api_key,
|
|
695
|
+
cache_dir=benchmark_config.cache_dir,
|
|
696
|
+
trust_remote_code=benchmark_config.trust_remote_code,
|
|
697
|
+
requires_safetensors=benchmark_config.requires_safetensors,
|
|
698
|
+
run_with_cli=benchmark_config.run_with_cli,
|
|
678
699
|
)
|
|
679
700
|
if model_info is None:
|
|
680
701
|
raise InvalidModel(f"The model {model_id!r} could not be found.")
|
|
@@ -705,7 +726,7 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
705
726
|
return model_config
|
|
706
727
|
|
|
707
728
|
@property
|
|
708
|
-
def data_collator(self) -> c.Callable[[
|
|
729
|
+
def data_collator(self) -> c.Callable[[c.Sequence[t.Any]], dict[str, t.Any]]:
|
|
709
730
|
"""The data collator used to prepare samples during finetuning.
|
|
710
731
|
|
|
711
732
|
Returns:
|
|
@@ -751,8 +772,8 @@ def load_model_and_tokeniser(
|
|
|
751
772
|
hf_model_config = load_hf_model_config(
|
|
752
773
|
model_id=model_id,
|
|
753
774
|
num_labels=0,
|
|
754
|
-
id2label=
|
|
755
|
-
label2id=
|
|
775
|
+
id2label=HashableDict(),
|
|
776
|
+
label2id=HashableDict(),
|
|
756
777
|
revision=revision,
|
|
757
778
|
model_cache_dir=model_config.model_cache_dir,
|
|
758
779
|
api_key=benchmark_config.api_key,
|
|
@@ -779,32 +800,36 @@ def load_model_and_tokeniser(
|
|
|
779
800
|
# Choose bf16 over fp16 if the model is a fp32 model and the GPU supports it
|
|
780
801
|
if hf_model_config.dtype == torch.float32:
|
|
781
802
|
if torch.cuda.is_bf16_supported():
|
|
782
|
-
|
|
803
|
+
log(
|
|
783
804
|
"You are loading a model with dtype FP32, which we will convert to "
|
|
784
805
|
"BF16 as FP32 is not supported by vLLM and BF16 is supported by your "
|
|
785
|
-
"GPU."
|
|
806
|
+
"GPU.",
|
|
807
|
+
level=logging.WARNING,
|
|
786
808
|
)
|
|
787
809
|
dtype = torch.bfloat16
|
|
788
810
|
else:
|
|
789
|
-
|
|
811
|
+
log(
|
|
790
812
|
"You are loading a model with dtype FP32, which we will convert to "
|
|
791
813
|
"FP16 as FP32 is not supported by vLLM and BF16 is not supported by "
|
|
792
|
-
"your GPU."
|
|
814
|
+
"your GPU.",
|
|
815
|
+
level=logging.WARNING,
|
|
793
816
|
)
|
|
794
817
|
dtype = torch.float16
|
|
795
818
|
|
|
796
819
|
# If the model is a quantized model, we might need to change the dtype
|
|
797
820
|
if quantization == "mxfp4" and hf_model_config.dtype is None:
|
|
798
821
|
dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
|
|
799
|
-
|
|
822
|
+
log(
|
|
800
823
|
"You are loading a quantized model where `dtype` has not been set. "
|
|
801
|
-
f"Setting dtype to {dtype!r}."
|
|
824
|
+
f"Setting dtype to {dtype!r}.",
|
|
825
|
+
level=logging.DEBUG,
|
|
802
826
|
)
|
|
803
827
|
elif quantization is not None and hf_model_config.dtype != torch.float16:
|
|
804
|
-
|
|
828
|
+
log(
|
|
805
829
|
"You are loading a quantized model with dtype "
|
|
806
830
|
f"{hf_model_config.dtype}, which vLLM does not support. Setting "
|
|
807
|
-
"dtype to float16 instead."
|
|
831
|
+
"dtype to float16 instead.",
|
|
832
|
+
level=logging.WARNING,
|
|
808
833
|
)
|
|
809
834
|
dtype = torch.float16
|
|
810
835
|
|
|
@@ -815,12 +840,13 @@ def load_model_and_tokeniser(
|
|
|
815
840
|
|
|
816
841
|
if min_cuda_compute_capability is not None:
|
|
817
842
|
if min_cuda_compute_capability < required_capability:
|
|
818
|
-
|
|
843
|
+
log(
|
|
819
844
|
f"You are loading a model with dtype {hf_model_config.dtype}, "
|
|
820
845
|
"which vLLM only supports for CUDA devices with CUDA compute "
|
|
821
846
|
f"capability >={required_capability}. You are using one or more "
|
|
822
847
|
f"devices with compute capability {min_cuda_compute_capability}. "
|
|
823
|
-
"Setting dtype to float16 instead."
|
|
848
|
+
"Setting dtype to float16 instead.",
|
|
849
|
+
level=logging.WARNING,
|
|
824
850
|
)
|
|
825
851
|
dtype = torch.float16
|
|
826
852
|
|
|
@@ -987,13 +1013,17 @@ def load_tokeniser(
|
|
|
987
1013
|
f"Could not load tokeniser for model {model_id!r}. The error was "
|
|
988
1014
|
f"{str(e)}."
|
|
989
1015
|
) from e
|
|
990
|
-
|
|
1016
|
+
log(
|
|
991
1017
|
f"Could not load tokeniser for {model_id!r}. Falling back to "
|
|
992
|
-
f"{adapter_base_model_id!r}."
|
|
1018
|
+
f"{adapter_base_model_id!r}.",
|
|
1019
|
+
level=logging.DEBUG,
|
|
993
1020
|
)
|
|
994
1021
|
model_id = adapter_base_model_id
|
|
995
1022
|
except (TimeoutError, RequestError):
|
|
996
|
-
|
|
1023
|
+
log(
|
|
1024
|
+
f"Couldn't load tokeniser for {model_id!r}. Retrying.",
|
|
1025
|
+
level=logging.WARNING,
|
|
1026
|
+
)
|
|
997
1027
|
sleep(5)
|
|
998
1028
|
continue
|
|
999
1029
|
except (KeyError, ValueError) as e:
|
|
@@ -1192,32 +1222,17 @@ def get_custom_stop_tokens(
|
|
|
1192
1222
|
if stop_token in prompt or stop_token in completion
|
|
1193
1223
|
]
|
|
1194
1224
|
if stop_tokens:
|
|
1195
|
-
|
|
1225
|
+
log(
|
|
1196
1226
|
f"Found the following custom stop tokens for model {model_id!r}: "
|
|
1197
|
-
f"{stop_tokens}."
|
|
1227
|
+
f"{stop_tokens}.",
|
|
1228
|
+
level=logging.DEBUG,
|
|
1198
1229
|
)
|
|
1199
1230
|
else:
|
|
1200
|
-
|
|
1231
|
+
log(f"Found no custom stop tokens for model {model_id!r}.", level=logging.DEBUG)
|
|
1201
1232
|
|
|
1202
1233
|
return stop_tokens
|
|
1203
1234
|
|
|
1204
1235
|
|
|
1205
|
-
def get_pbar_without_leave(*tqdm_args, **tqdm_kwargs) -> tqdm:
|
|
1206
|
-
"""Get a progress bar for vLLM which disappears after completion.
|
|
1207
|
-
|
|
1208
|
-
Args:
|
|
1209
|
-
*tqdm_args:
|
|
1210
|
-
Positional arguments to pass to tqdm.
|
|
1211
|
-
**tqdm_kwargs:
|
|
1212
|
-
Additional keyword arguments to pass to tqdm.
|
|
1213
|
-
|
|
1214
|
-
Returns:
|
|
1215
|
-
A tqdm progress bar.
|
|
1216
|
-
"""
|
|
1217
|
-
tqdm_kwargs.pop("leave", None) # Remove the 'leave' key if it exists
|
|
1218
|
-
return tqdm(*tqdm_args, leave=False, **tqdm_kwargs)
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
1236
|
def get_vllm_tokenisation_params(
|
|
1222
1237
|
tokeniser: "PreTrainedTokenizer", model_config: "ModelConfig"
|
|
1223
1238
|
) -> dict[str, t.Any]:
|