EuroEval 15.9.1__py3-none-any.whl → 15.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/benchmark_modules/vllm.py +151 -102
- euroeval/constants.py +13 -0
- euroeval/task_group_utils/sequence_classification.py +1 -1
- euroeval/tokenization_utils.py +18 -9
- {euroeval-15.9.1.dist-info → euroeval-15.9.2.dist-info}/METADATA +1 -1
- {euroeval-15.9.1.dist-info → euroeval-15.9.2.dist-info}/RECORD +9 -9
- {euroeval-15.9.1.dist-info → euroeval-15.9.2.dist-info}/WHEEL +0 -0
- {euroeval-15.9.1.dist-info → euroeval-15.9.2.dist-info}/entry_points.txt +0 -0
- {euroeval-15.9.1.dist-info → euroeval-15.9.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -26,11 +26,13 @@ from transformers.trainer import Trainer
|
|
|
26
26
|
from urllib3.exceptions import RequestError
|
|
27
27
|
|
|
28
28
|
from ..constants import (
|
|
29
|
+
CUSTOM_STOP_TOKENS,
|
|
29
30
|
GENERATIVE_PIPELINE_TAGS,
|
|
30
31
|
MAX_CONTEXT_LENGTH,
|
|
31
32
|
MAX_LOGPROBS,
|
|
32
33
|
MERGE_TAGS,
|
|
33
34
|
REASONING_MAX_TOKENS,
|
|
35
|
+
REASONING_TOKENS,
|
|
34
36
|
TASKS_USING_JSON,
|
|
35
37
|
VLLM_BF16_MIN_CUDA_COMPUTE_CAPABILITY,
|
|
36
38
|
)
|
|
@@ -135,9 +137,15 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
135
137
|
)
|
|
136
138
|
self._model: LLM = model
|
|
137
139
|
self._tokenizer: PreTrainedTokenizer = tokenizer
|
|
138
|
-
self.
|
|
140
|
+
self.end_of_reasoning_token = get_end_of_reasoning_token(
|
|
139
141
|
model=self._model, tokenizer=self._tokenizer, model_id=model_config.model_id
|
|
140
142
|
)
|
|
143
|
+
self.custom_stop_tokens = get_custom_stop_tokens(
|
|
144
|
+
model=self._model,
|
|
145
|
+
tokenizer=self._tokenizer,
|
|
146
|
+
model_id=model_config.model_id,
|
|
147
|
+
is_reasoning_model=self.end_of_reasoning_token is not None,
|
|
148
|
+
)
|
|
141
149
|
|
|
142
150
|
# We specify `HuggingFaceEncoderModel` here instead of `VLLMModel`, as we want
|
|
143
151
|
# to call the `__init__` method of the `BenchmarkModule` class.
|
|
@@ -183,7 +191,7 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
183
191
|
"""
|
|
184
192
|
if not hasattr(self, "_tokenizer"):
|
|
185
193
|
return None
|
|
186
|
-
elif self.
|
|
194
|
+
elif self.end_of_reasoning_token is not None:
|
|
187
195
|
return GenerativeType.REASONING
|
|
188
196
|
elif self._tokenizer.chat_template is not None:
|
|
189
197
|
return GenerativeType.INSTRUCTION_TUNED
|
|
@@ -299,7 +307,7 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
299
307
|
# token, end-of-sentence token, and a double newline if the model isn't
|
|
300
308
|
# instruction tuned (since these separate the few-shot examples in the input in
|
|
301
309
|
# this case)
|
|
302
|
-
stop_tokens: list[str] =
|
|
310
|
+
stop_tokens: list[str] = self.custom_stop_tokens.copy()
|
|
303
311
|
if self.buffer["instruction_model"] is False:
|
|
304
312
|
stop_tokens.append("\n\n")
|
|
305
313
|
if self._tokenizer.pad_token_id is not None:
|
|
@@ -497,30 +505,26 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
497
505
|
completion_ids: list[list[int]] = [
|
|
498
506
|
output.outputs[0].token_ids for output in raw_outputs
|
|
499
507
|
]
|
|
500
|
-
if self.end_of_reasoning_token_id in completion_ids[0]:
|
|
501
|
-
# Find the latest index of the end of reasoning token and slice
|
|
502
|
-
# the token IDs to only include the tokens after it
|
|
503
|
-
completion_ids = [
|
|
504
|
-
token_ids[
|
|
505
|
-
max(
|
|
506
|
-
[
|
|
507
|
-
i
|
|
508
|
-
for i, x in enumerate(token_ids)
|
|
509
|
-
if x == self.end_of_reasoning_token_id
|
|
510
|
-
]
|
|
511
|
-
)
|
|
512
|
-
+ 1 :
|
|
513
|
-
]
|
|
514
|
-
if self.end_of_reasoning_token_id in token_ids
|
|
515
|
-
else token_ids
|
|
516
|
-
for token_ids in completion_ids
|
|
517
|
-
]
|
|
518
508
|
completions = self._tokenizer.batch_decode(
|
|
519
509
|
sequences=[
|
|
520
510
|
torch.LongTensor(completion_id) for completion_id in completion_ids
|
|
521
|
-
]
|
|
522
|
-
skip_special_tokens=True,
|
|
511
|
+
]
|
|
523
512
|
)
|
|
513
|
+
if self.end_of_reasoning_token is not None:
|
|
514
|
+
completions = [
|
|
515
|
+
completion.split(self.end_of_reasoning_token)[-1]
|
|
516
|
+
for completion in completions
|
|
517
|
+
]
|
|
518
|
+
if self.custom_stop_tokens:
|
|
519
|
+
stop_token_pattern = re.compile(
|
|
520
|
+
"|".join(
|
|
521
|
+
re.escape(stop_token) for stop_token in self.custom_stop_tokens
|
|
522
|
+
)
|
|
523
|
+
)
|
|
524
|
+
completions = [
|
|
525
|
+
re.split(pattern=stop_token_pattern, string=completion)[0]
|
|
526
|
+
for completion in completions
|
|
527
|
+
]
|
|
524
528
|
completions = [completion.strip() for completion in completions]
|
|
525
529
|
|
|
526
530
|
# Sanity check
|
|
@@ -541,17 +545,6 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
541
545
|
]
|
|
542
546
|
for raw_output in raw_outputs
|
|
543
547
|
]
|
|
544
|
-
scores = [
|
|
545
|
-
score_list[
|
|
546
|
-
raw_output.outputs[0].token_ids.index(
|
|
547
|
-
self.end_of_reasoning_token_id
|
|
548
|
-
)
|
|
549
|
-
+ 2 :
|
|
550
|
-
]
|
|
551
|
-
if self.end_of_reasoning_token_id in raw_output.outputs[0].token_ids
|
|
552
|
-
else score_list
|
|
553
|
-
for raw_output, score_list in zip(raw_outputs, scores)
|
|
554
|
-
]
|
|
555
548
|
output = GenerativeModelOutput(sequences=completions, scores=scores)
|
|
556
549
|
else:
|
|
557
550
|
output = GenerativeModelOutput(sequences=completions)
|
|
@@ -1016,14 +1009,10 @@ def clear_vllm() -> None:
|
|
|
1016
1009
|
clear_memory()
|
|
1017
1010
|
|
|
1018
1011
|
|
|
1019
|
-
def
|
|
1012
|
+
def get_end_of_reasoning_token(
|
|
1020
1013
|
model: "LLM", tokenizer: "PreTrainedTokenizer", model_id: str
|
|
1021
|
-
) ->
|
|
1022
|
-
"""Get the end
|
|
1023
|
-
|
|
1024
|
-
This assumes that the reasoning token is of the form <X> and that the end of
|
|
1025
|
-
reasoning token is </X> (for X being any string without spaces). We disallow the
|
|
1026
|
-
reasoning token to be the same as the beginning-of-sentence token.
|
|
1014
|
+
) -> str | None:
|
|
1015
|
+
"""Get the end-of-reasoning token for a generative model.
|
|
1027
1016
|
|
|
1028
1017
|
Args:
|
|
1029
1018
|
model:
|
|
@@ -1034,86 +1023,146 @@ def get_end_of_reasoning_token_id(
|
|
|
1034
1023
|
The model ID.
|
|
1035
1024
|
|
|
1036
1025
|
Returns:
|
|
1037
|
-
The end of reasoning token
|
|
1026
|
+
The end of reasoning token, or None if it could not be found.
|
|
1038
1027
|
"""
|
|
1039
|
-
if
|
|
1040
|
-
|
|
1041
|
-
|
|
1028
|
+
# Create a prompt to check if the model uses the reasoning tokens
|
|
1029
|
+
prompt = "What is your name?"
|
|
1030
|
+
if tokenizer.chat_template is not None:
|
|
1042
1031
|
templated_prompt = tokenizer.apply_chat_template(
|
|
1043
|
-
conversation=[dict(role="user", content=
|
|
1032
|
+
conversation=[dict(role="user", content=prompt)],
|
|
1044
1033
|
add_generation_prompt=True,
|
|
1045
1034
|
tokenize=False,
|
|
1046
1035
|
)
|
|
1047
1036
|
assert isinstance(templated_prompt, str)
|
|
1048
1037
|
prompt = templated_prompt
|
|
1049
1038
|
|
|
1050
|
-
#
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1039
|
+
# Check that the beginning-of-reasoning token is actually used by the model
|
|
1040
|
+
completion = (
|
|
1041
|
+
model.generate(
|
|
1042
|
+
prompts=[prompt],
|
|
1043
|
+
sampling_params=SamplingParams(max_tokens=10),
|
|
1044
|
+
use_tqdm=False,
|
|
1045
|
+
)[0]
|
|
1046
|
+
.outputs[0]
|
|
1047
|
+
.text
|
|
1056
1048
|
)
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
if
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
elif isinstance(tokenizer.bos_token, list):
|
|
1064
|
-
for bos_token in tokenizer.bos_token:
|
|
1065
|
-
prompt = prompt.replace(bos_token, "").strip()
|
|
1066
|
-
completion = completion.replace(bos_token, "").strip()
|
|
1067
|
-
|
|
1068
|
-
# If it doesn't contain a reasoning token, we can't find the end of reasoning token
|
|
1069
|
-
prompt_match = re.search(pattern=r"<\w+>", string=prompt)
|
|
1070
|
-
completion_match = re.search(pattern=r"<\w+>", string=completion)
|
|
1071
|
-
if completion_match is None and prompt_match is None:
|
|
1049
|
+
bor_reasoning_matches = [
|
|
1050
|
+
(bor_token, eor_token)
|
|
1051
|
+
for bor_token, eor_token in REASONING_TOKENS
|
|
1052
|
+
if bor_token in prompt or bor_token in completion
|
|
1053
|
+
]
|
|
1054
|
+
if not bor_reasoning_matches:
|
|
1072
1055
|
log_once(
|
|
1073
|
-
f"
|
|
1074
|
-
"the model is not
|
|
1075
|
-
|
|
1056
|
+
f"The model {model_id!r} did not generate any beginning-of-reasoning "
|
|
1057
|
+
"tokens in the prompt or the completion. Assuming the model is not "
|
|
1058
|
+
"a reasoning model.",
|
|
1059
|
+
level=logging.INFO,
|
|
1076
1060
|
)
|
|
1077
1061
|
return None
|
|
1078
1062
|
|
|
1079
|
-
# Check that the
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
decoder_token.content
|
|
1089
|
-
for decoder_token in tokenizer.added_tokens_decoder.values()
|
|
1090
|
-
]
|
|
1091
|
-
special_tokens.extend(
|
|
1092
|
-
[encoder_token for encoder_token in tokenizer.added_tokens_encoder.keys()]
|
|
1063
|
+
# Check that the beginning-of-reasoning token is actually used by the model
|
|
1064
|
+
completion = (
|
|
1065
|
+
model.generate(
|
|
1066
|
+
prompts=[prompt],
|
|
1067
|
+
sampling_params=SamplingParams(max_tokens=REASONING_MAX_TOKENS),
|
|
1068
|
+
use_tqdm=False,
|
|
1069
|
+
)[0]
|
|
1070
|
+
.outputs[0]
|
|
1071
|
+
.text
|
|
1093
1072
|
)
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1073
|
+
eor_reasoning_matches = [
|
|
1074
|
+
(bor_token, eor_token)
|
|
1075
|
+
for bor_token, eor_token in bor_reasoning_matches
|
|
1076
|
+
if eor_token in completion
|
|
1077
|
+
]
|
|
1078
|
+
if not eor_reasoning_matches:
|
|
1099
1079
|
log_once(
|
|
1100
|
-
f"
|
|
1101
|
-
|
|
1102
|
-
"
|
|
1103
|
-
"
|
|
1104
|
-
|
|
1080
|
+
f"The model {model_id!r} did not generate any end-of-reasoning "
|
|
1081
|
+
"tokens in the prompt or the completion, even though it generated "
|
|
1082
|
+
"the beginning-of-reasoning tokens "
|
|
1083
|
+
f"{[bor_token for bor_token, _ in bor_reasoning_matches]!r}. "
|
|
1084
|
+
"This is probably not correct, so please report this issue.",
|
|
1085
|
+
level=logging.INFO,
|
|
1105
1086
|
)
|
|
1106
1087
|
return None
|
|
1107
1088
|
|
|
1089
|
+
if len(eor_reasoning_matches) > 1:
|
|
1090
|
+
log_once(
|
|
1091
|
+
f"Found multiple reasoning tokens {eor_reasoning_matches} for "
|
|
1092
|
+
f"model {model_id!r}. Using {eor_reasoning_matches[0]!r} as "
|
|
1093
|
+
"the reasoning token. If this is not the correct reasoning token, "
|
|
1094
|
+
"please report this issue.",
|
|
1095
|
+
level=logging.INFO,
|
|
1096
|
+
)
|
|
1097
|
+
|
|
1098
|
+
bor_token, eor_token = eor_reasoning_matches[0]
|
|
1108
1099
|
log_once(
|
|
1109
|
-
f"Detected reasoning token {
|
|
1110
|
-
f"token {
|
|
1111
|
-
level=logging.
|
|
1100
|
+
f"Detected beginning-of-reasoning token {bor_token!r} and end-of-reasoning "
|
|
1101
|
+
f"token {eor_token!r} for model {model_id!r}.",
|
|
1102
|
+
level=logging.INFO,
|
|
1112
1103
|
)
|
|
1113
1104
|
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1105
|
+
return eor_token
|
|
1106
|
+
|
|
1107
|
+
|
|
1108
|
+
def get_custom_stop_tokens(
|
|
1109
|
+
model: "LLM",
|
|
1110
|
+
tokenizer: "PreTrainedTokenizer",
|
|
1111
|
+
model_id: str,
|
|
1112
|
+
is_reasoning_model: bool,
|
|
1113
|
+
) -> list[str]:
|
|
1114
|
+
"""Get the stop tokens for a generative model.
|
|
1115
|
+
|
|
1116
|
+
Args:
|
|
1117
|
+
model:
|
|
1118
|
+
The vLLM model.
|
|
1119
|
+
tokenizer:
|
|
1120
|
+
The tokenizer.
|
|
1121
|
+
model_id:
|
|
1122
|
+
The model ID.
|
|
1123
|
+
is_reasoning_model:
|
|
1124
|
+
Whether the model is a reasoning model. This is used to determine the number
|
|
1125
|
+
of generated tokens to allow before stopping the generation.
|
|
1126
|
+
|
|
1127
|
+
Returns:
|
|
1128
|
+
A list of stop tokens.
|
|
1129
|
+
"""
|
|
1130
|
+
candidate_stop_tokens = CUSTOM_STOP_TOKENS
|
|
1131
|
+
|
|
1132
|
+
# Create a prompt to check if the model uses the reasoning tokens
|
|
1133
|
+
prompt = "Hello"
|
|
1134
|
+
if tokenizer.chat_template is not None:
|
|
1135
|
+
templated_prompt = tokenizer.apply_chat_template(
|
|
1136
|
+
conversation=[dict(role="user", content=prompt)],
|
|
1137
|
+
add_generation_prompt=True,
|
|
1138
|
+
tokenize=False,
|
|
1139
|
+
)
|
|
1140
|
+
assert isinstance(templated_prompt, str)
|
|
1141
|
+
prompt = templated_prompt
|
|
1142
|
+
|
|
1143
|
+
# Check that the beginning-of-reasoning token is actually used by the model
|
|
1144
|
+
max_tokens = REASONING_MAX_TOKENS if is_reasoning_model else 10
|
|
1145
|
+
completion = (
|
|
1146
|
+
model.generate(
|
|
1147
|
+
prompts=[prompt],
|
|
1148
|
+
sampling_params=SamplingParams(max_tokens=max_tokens, temperature=0.0),
|
|
1149
|
+
use_tqdm=False,
|
|
1150
|
+
)[0]
|
|
1151
|
+
.outputs[0]
|
|
1152
|
+
.text
|
|
1153
|
+
)
|
|
1154
|
+
|
|
1155
|
+
stop_tokens = [
|
|
1156
|
+
stop_token
|
|
1157
|
+
for stop_token in candidate_stop_tokens
|
|
1158
|
+
if stop_token in prompt or stop_token in completion
|
|
1159
|
+
]
|
|
1160
|
+
if stop_tokens:
|
|
1161
|
+
logger.debug(
|
|
1162
|
+
f"Found the following custom stop tokens for model {model_id!r}: "
|
|
1163
|
+
f"{stop_tokens}."
|
|
1164
|
+
)
|
|
1165
|
+
else:
|
|
1166
|
+
logger.debug(f"Found no custom stop tokens for model {model_id!r}.")
|
|
1118
1167
|
|
|
1119
|
-
return
|
|
1168
|
+
return stop_tokens
|
euroeval/constants.py
CHANGED
|
@@ -64,3 +64,16 @@ MERGE_TAGS = ["merge", "mergekit"]
|
|
|
64
64
|
|
|
65
65
|
# The minimum required CUDA compute capability for using bfloat16 in vLLM
|
|
66
66
|
VLLM_BF16_MIN_CUDA_COMPUTE_CAPABILITY = 8.0
|
|
67
|
+
|
|
68
|
+
# Used to detect whether a model is a reasoning model
|
|
69
|
+
REASONING_TOKENS = [
|
|
70
|
+
("<think>", "</think>"),
|
|
71
|
+
("<reason>", "</reason>"),
|
|
72
|
+
("<reasoning>", "</reasoning>"),
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
# These tokens are sometimes used by models to indicate the end of a generated
|
|
76
|
+
# response, but they do not use them as a proper EOS token, so we have to deal with them
|
|
77
|
+
# manually. We only use them as stop tokens if they actually appear in the model's
|
|
78
|
+
# output
|
|
79
|
+
CUSTOM_STOP_TOKENS = ["<sep>"]
|
|
@@ -135,7 +135,7 @@ def extract_labels_from_generation(
|
|
|
135
135
|
if first_label_token_mapping is False:
|
|
136
136
|
raise InvalidBenchmark(
|
|
137
137
|
"The model outputted logprobs, but the first label token mapping is "
|
|
138
|
-
"not provided
|
|
138
|
+
"not provided, which is not supported."
|
|
139
139
|
)
|
|
140
140
|
labels = get_closest_logprobs_labels(
|
|
141
141
|
generation_logprobs=model_output.scores,
|
euroeval/tokenization_utils.py
CHANGED
|
@@ -8,7 +8,6 @@ import torch
|
|
|
8
8
|
|
|
9
9
|
from .constants import TASK_GROUPS_USING_LOGPROBS
|
|
10
10
|
from .enums import GenerativeType
|
|
11
|
-
from .exceptions import InvalidModel
|
|
12
11
|
from .utils import log_once
|
|
13
12
|
|
|
14
13
|
if t.TYPE_CHECKING:
|
|
@@ -153,7 +152,9 @@ def should_prefix_space_be_added_to_labels(
|
|
|
153
152
|
return add_prefix_space
|
|
154
153
|
|
|
155
154
|
|
|
156
|
-
def get_bos_token(
|
|
155
|
+
def get_bos_token(
|
|
156
|
+
tokenizer: "PreTrainedTokenizer",
|
|
157
|
+
) -> tuple[str, int] | tuple[None, None]:
|
|
157
158
|
"""Get the beginning-of-sequence token from a tokenizer.
|
|
158
159
|
|
|
159
160
|
Args:
|
|
@@ -162,7 +163,7 @@ def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
|
|
|
162
163
|
|
|
163
164
|
Returns:
|
|
164
165
|
A pair (token, token_id) representing the beginning-of-sequence token and its
|
|
165
|
-
token ID.
|
|
166
|
+
token ID, or (None, None) if no BOS token is found.
|
|
166
167
|
"""
|
|
167
168
|
if isinstance(tokenizer.bos_token, str) and isinstance(tokenizer.bos_token_id, int):
|
|
168
169
|
return tokenizer.bos_token, tokenizer.bos_token_id
|
|
@@ -176,15 +177,20 @@ def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
|
|
|
176
177
|
bos_token_id = vocab[bos_token]
|
|
177
178
|
break
|
|
178
179
|
else:
|
|
179
|
-
|
|
180
|
+
log_once(
|
|
180
181
|
"The model does not have a beginning-of-sequence token. Please ensure that "
|
|
181
|
-
"this has been set in the tokenizer's configuration."
|
|
182
|
+
"this has been set in the tokenizer's configuration. Using no BOS token."
|
|
183
|
+
" This may lead to unexpected behavior in the model.",
|
|
184
|
+
level=logging.INFO,
|
|
182
185
|
)
|
|
186
|
+
return None, None
|
|
183
187
|
|
|
184
188
|
return bos_token, bos_token_id
|
|
185
189
|
|
|
186
190
|
|
|
187
|
-
def get_eos_token(
|
|
191
|
+
def get_eos_token(
|
|
192
|
+
tokenizer: "PreTrainedTokenizer",
|
|
193
|
+
) -> tuple[str, int] | tuple[None, None]:
|
|
188
194
|
"""Get the end-of-sequence token from a tokenizer.
|
|
189
195
|
|
|
190
196
|
Args:
|
|
@@ -193,7 +199,7 @@ def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
|
|
|
193
199
|
|
|
194
200
|
Returns:
|
|
195
201
|
A pair (token, token_id) representing the end-of-sequence token and its token
|
|
196
|
-
ID.
|
|
202
|
+
ID, or (None, None) if no EOS token is found.
|
|
197
203
|
"""
|
|
198
204
|
if isinstance(tokenizer.eos_token, str) and isinstance(tokenizer.eos_token_id, int):
|
|
199
205
|
return tokenizer.eos_token, tokenizer.eos_token_id
|
|
@@ -207,10 +213,13 @@ def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
|
|
|
207
213
|
eos_token_id = vocab[eos_token]
|
|
208
214
|
break
|
|
209
215
|
else:
|
|
210
|
-
|
|
216
|
+
log_once(
|
|
211
217
|
"The model does not have an end-of-sequence token. Please ensure that this "
|
|
212
|
-
"has been set in the tokenizer's configuration."
|
|
218
|
+
"has been set in the tokenizer's configuration. Using no EOS token. This "
|
|
219
|
+
"may lead to unexpected behavior in the model.",
|
|
220
|
+
level=logging.INFO,
|
|
213
221
|
)
|
|
222
|
+
return None, None
|
|
214
223
|
|
|
215
224
|
return eos_token, eos_token_id
|
|
216
225
|
|
|
@@ -3,7 +3,7 @@ euroeval/benchmark_config_factory.py,sha256=icTeT5C-bNCJmvSWFlxKdEpRboZN8OjwaHGu
|
|
|
3
3
|
euroeval/benchmarker.py,sha256=wmgrYVS31PMhhrVienjaVHHyfnZAy51kUvC6OjooiOw,48047
|
|
4
4
|
euroeval/callbacks.py,sha256=F1AJCLB8FJpxqYprwLi_PsH4Bc0x4lyR8UiTG-GlFLY,2452
|
|
5
5
|
euroeval/cli.py,sha256=d8JztMi_RbpUlEBXidd6DQ-xeC-xhozf_qU6Vkzye20,8161
|
|
6
|
-
euroeval/constants.py,sha256=
|
|
6
|
+
euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
|
|
7
7
|
euroeval/data_loading.py,sha256=L_REtxefte5Ke4xE_Cz01zkfCyKlOYhSqT5ZXXulHPc,3992
|
|
8
8
|
euroeval/data_models.py,sha256=7nAGDpN58Y35Lt9JZE_y0y5iOYesw2htcwHc68MkBZU,22953
|
|
9
9
|
euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
|
|
@@ -19,7 +19,7 @@ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,223
|
|
|
19
19
|
euroeval/scores.py,sha256=TovjCZD8wmGrIjA4v5oAQp18P5KVcHvakkByDh0Hstk,3059
|
|
20
20
|
euroeval/speed_benchmark.py,sha256=J7VKWMf7GU_l0lRR8f0QeUr_vAaBQqTbgQ_yToHhp_0,3980
|
|
21
21
|
euroeval/tasks.py,sha256=87gbe__K5KNIb1aBSuwGnMPmZgamJFecNNYmNgMxaVo,7069
|
|
22
|
-
euroeval/tokenization_utils.py,sha256=
|
|
22
|
+
euroeval/tokenization_utils.py,sha256=_B4KN3ZcuvVr8y3LedtfxBJfmPKjfVMjpbtl8bbQAuc,14278
|
|
23
23
|
euroeval/types.py,sha256=E0JhLfg-ek5pdFcYJbnGRUSodHxkuR3o8XGuIrBcuRM,2485
|
|
24
24
|
euroeval/utils.py,sha256=e83OnWc0GJn0Tn_vP3tbqh1DAbLy2ky-LnIlTEOKzKU,11410
|
|
25
25
|
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
@@ -27,7 +27,7 @@ euroeval/benchmark_modules/base.py,sha256=LcG46I2O5wcvu_3T_irBY6VkUhWVPKifBhcP-l
|
|
|
27
27
|
euroeval/benchmark_modules/fresh.py,sha256=_LWmpqiNGGTA-NoVC0v3-fS1sraDS9n-pgKUzz89jVk,9919
|
|
28
28
|
euroeval/benchmark_modules/hf.py,sha256=CoiaNakjhg6gm_5IbUUeevXQZebg2VrRLuhzEi2Hhrk,44617
|
|
29
29
|
euroeval/benchmark_modules/litellm.py,sha256=SxSr_0C6b_jVavR3y9QyhfkCOP5-va4zijGfghFTArY,48362
|
|
30
|
-
euroeval/benchmark_modules/vllm.py,sha256=
|
|
30
|
+
euroeval/benchmark_modules/vllm.py,sha256=SbQ_EYSwUFBVLsp9io1Q75A9S_H-iw6AzLOn3rlEhK0,43034
|
|
31
31
|
euroeval/dataset_configs/__init__.py,sha256=kWKtlSAOY-olOQL3UtFqL6I3Tki3G3waMZSd2YChjCg,1895
|
|
32
32
|
euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
|
|
33
33
|
euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
|
|
@@ -51,11 +51,11 @@ euroeval/prompt_templates/summarization.py,sha256=mcWeKNhGWmp7IG_iY64T-VOSabQg5w
|
|
|
51
51
|
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
52
52
|
euroeval/task_group_utils/multiple_choice_classification.py,sha256=nB78TzOgd0HBvTclmjOYJid9ZVAgu8IHZsqB_n1SAZU,6178
|
|
53
53
|
euroeval/task_group_utils/question_answering.py,sha256=kZBABJ_WYNTH4Xgo2jIvfx7iYvfoGt0EUObSaXRCGmk,27700
|
|
54
|
-
euroeval/task_group_utils/sequence_classification.py,sha256=
|
|
54
|
+
euroeval/task_group_utils/sequence_classification.py,sha256=zwRUgVHqLlREILwyg-yuDPkrIQOfqGVPsFBai-2D9a8,13525
|
|
55
55
|
euroeval/task_group_utils/text_to_text.py,sha256=Nu1_qRPLbboCd9Q5rxqY4fQFJ_aGXu80aWQqoTG1cYc,5047
|
|
56
56
|
euroeval/task_group_utils/token_classification.py,sha256=3idWB81Fcx9UhTuk-gxMfXENrCBmiWBDUWdULXoIhpw,17863
|
|
57
|
-
euroeval-15.9.
|
|
58
|
-
euroeval-15.9.
|
|
59
|
-
euroeval-15.9.
|
|
60
|
-
euroeval-15.9.
|
|
61
|
-
euroeval-15.9.
|
|
57
|
+
euroeval-15.9.2.dist-info/METADATA,sha256=LwHTlJ51OGVwcRTUPulH-gh8IFxu82CUFYHZ1uOUyT0,13555
|
|
58
|
+
euroeval-15.9.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
59
|
+
euroeval-15.9.2.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
|
|
60
|
+
euroeval-15.9.2.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
|
|
61
|
+
euroeval-15.9.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|