EuroEval 15.9.1__py3-none-any.whl → 15.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

@@ -26,11 +26,13 @@ from transformers.trainer import Trainer
26
26
  from urllib3.exceptions import RequestError
27
27
 
28
28
  from ..constants import (
29
+ CUSTOM_STOP_TOKENS,
29
30
  GENERATIVE_PIPELINE_TAGS,
30
31
  MAX_CONTEXT_LENGTH,
31
32
  MAX_LOGPROBS,
32
33
  MERGE_TAGS,
33
34
  REASONING_MAX_TOKENS,
35
+ REASONING_TOKENS,
34
36
  TASKS_USING_JSON,
35
37
  VLLM_BF16_MIN_CUDA_COMPUTE_CAPABILITY,
36
38
  )
@@ -135,9 +137,15 @@ class VLLMModel(HuggingFaceEncoderModel):
135
137
  )
136
138
  self._model: LLM = model
137
139
  self._tokenizer: PreTrainedTokenizer = tokenizer
138
- self.end_of_reasoning_token_id = get_end_of_reasoning_token_id(
140
+ self.end_of_reasoning_token = get_end_of_reasoning_token(
139
141
  model=self._model, tokenizer=self._tokenizer, model_id=model_config.model_id
140
142
  )
143
+ self.custom_stop_tokens = get_custom_stop_tokens(
144
+ model=self._model,
145
+ tokenizer=self._tokenizer,
146
+ model_id=model_config.model_id,
147
+ is_reasoning_model=self.end_of_reasoning_token is not None,
148
+ )
141
149
 
142
150
  # We specify `HuggingFaceEncoderModel` here instead of `VLLMModel`, as we want
143
151
  # to call the `__init__` method of the `BenchmarkModule` class.
@@ -183,7 +191,7 @@ class VLLMModel(HuggingFaceEncoderModel):
183
191
  """
184
192
  if not hasattr(self, "_tokenizer"):
185
193
  return None
186
- elif self.end_of_reasoning_token_id is not None:
194
+ elif self.end_of_reasoning_token is not None:
187
195
  return GenerativeType.REASONING
188
196
  elif self._tokenizer.chat_template is not None:
189
197
  return GenerativeType.INSTRUCTION_TUNED
@@ -299,7 +307,7 @@ class VLLMModel(HuggingFaceEncoderModel):
299
307
  # token, end-of-sentence token, and a double newline if the model isn't
300
308
  # instruction tuned (since these separate the few-shot examples in the input in
301
309
  # this case)
302
- stop_tokens: list[str] = list()
310
+ stop_tokens: list[str] = self.custom_stop_tokens.copy()
303
311
  if self.buffer["instruction_model"] is False:
304
312
  stop_tokens.append("\n\n")
305
313
  if self._tokenizer.pad_token_id is not None:
@@ -497,30 +505,26 @@ class VLLMModel(HuggingFaceEncoderModel):
497
505
  completion_ids: list[list[int]] = [
498
506
  output.outputs[0].token_ids for output in raw_outputs
499
507
  ]
500
- if self.end_of_reasoning_token_id in completion_ids[0]:
501
- # Find the latest index of the end of reasoning token and slice
502
- # the token IDs to only include the tokens after it
503
- completion_ids = [
504
- token_ids[
505
- max(
506
- [
507
- i
508
- for i, x in enumerate(token_ids)
509
- if x == self.end_of_reasoning_token_id
510
- ]
511
- )
512
- + 1 :
513
- ]
514
- if self.end_of_reasoning_token_id in token_ids
515
- else token_ids
516
- for token_ids in completion_ids
517
- ]
518
508
  completions = self._tokenizer.batch_decode(
519
509
  sequences=[
520
510
  torch.LongTensor(completion_id) for completion_id in completion_ids
521
- ],
522
- skip_special_tokens=True,
511
+ ]
523
512
  )
513
+ if self.end_of_reasoning_token is not None:
514
+ completions = [
515
+ completion.split(self.end_of_reasoning_token)[-1]
516
+ for completion in completions
517
+ ]
518
+ if self.custom_stop_tokens:
519
+ stop_token_pattern = re.compile(
520
+ "|".join(
521
+ re.escape(stop_token) for stop_token in self.custom_stop_tokens
522
+ )
523
+ )
524
+ completions = [
525
+ re.split(pattern=stop_token_pattern, string=completion)[0]
526
+ for completion in completions
527
+ ]
524
528
  completions = [completion.strip() for completion in completions]
525
529
 
526
530
  # Sanity check
@@ -541,17 +545,6 @@ class VLLMModel(HuggingFaceEncoderModel):
541
545
  ]
542
546
  for raw_output in raw_outputs
543
547
  ]
544
- scores = [
545
- score_list[
546
- raw_output.outputs[0].token_ids.index(
547
- self.end_of_reasoning_token_id
548
- )
549
- + 2 :
550
- ]
551
- if self.end_of_reasoning_token_id in raw_output.outputs[0].token_ids
552
- else score_list
553
- for raw_output, score_list in zip(raw_outputs, scores)
554
- ]
555
548
  output = GenerativeModelOutput(sequences=completions, scores=scores)
556
549
  else:
557
550
  output = GenerativeModelOutput(sequences=completions)
@@ -1016,14 +1009,10 @@ def clear_vllm() -> None:
1016
1009
  clear_memory()
1017
1010
 
1018
1011
 
1019
- def get_end_of_reasoning_token_id(
1012
+ def get_end_of_reasoning_token(
1020
1013
  model: "LLM", tokenizer: "PreTrainedTokenizer", model_id: str
1021
- ) -> int | None:
1022
- """Get the end of reasoning token ID for a generative model.
1023
-
1024
- This assumes that the reasoning token is of the form <X> and that the end of
1025
- reasoning token is </X> (for X being any string without spaces). We disallow the
1026
- reasoning token to be the same as the beginning-of-sentence token.
1014
+ ) -> str | None:
1015
+ """Get the end-of-reasoning token for a generative model.
1027
1016
 
1028
1017
  Args:
1029
1018
  model:
@@ -1034,86 +1023,146 @@ def get_end_of_reasoning_token_id(
1034
1023
  The model ID.
1035
1024
 
1036
1025
  Returns:
1037
- The end of reasoning token ID, or None if it could not be found.
1026
+ The end of reasoning token, or None if it could not be found.
1038
1027
  """
1039
- if tokenizer.chat_template is None:
1040
- prompt = "What is your name?"
1041
- else:
1028
+ # Create a prompt to check if the model uses the reasoning tokens
1029
+ prompt = "What is your name?"
1030
+ if tokenizer.chat_template is not None:
1042
1031
  templated_prompt = tokenizer.apply_chat_template(
1043
- conversation=[dict(role="user", content="What is your name?")],
1032
+ conversation=[dict(role="user", content=prompt)],
1044
1033
  add_generation_prompt=True,
1045
1034
  tokenize=False,
1046
1035
  )
1047
1036
  assert isinstance(templated_prompt, str)
1048
1037
  prompt = templated_prompt
1049
1038
 
1050
- # Generate a completion and remove the BOS token from it, to not confuse it with the
1051
- # potential reasoning token
1052
- model_output = model.generate(
1053
- prompts=[prompt],
1054
- sampling_params=SamplingParams(max_tokens=3, temperature=0.0),
1055
- use_tqdm=False,
1039
+ # Check that the beginning-of-reasoning token is actually used by the model
1040
+ completion = (
1041
+ model.generate(
1042
+ prompts=[prompt],
1043
+ sampling_params=SamplingParams(max_tokens=10),
1044
+ use_tqdm=False,
1045
+ )[0]
1046
+ .outputs[0]
1047
+ .text
1056
1048
  )
1057
- completion = model_output[0].outputs[0].text
1058
-
1059
- if tokenizer.bos_token is not None:
1060
- if isinstance(tokenizer.bos_token, str):
1061
- prompt = prompt.replace(tokenizer.bos_token, "").strip()
1062
- completion = completion.replace(tokenizer.bos_token, "").strip()
1063
- elif isinstance(tokenizer.bos_token, list):
1064
- for bos_token in tokenizer.bos_token:
1065
- prompt = prompt.replace(bos_token, "").strip()
1066
- completion = completion.replace(bos_token, "").strip()
1067
-
1068
- # If it doesn't contain a reasoning token, we can't find the end of reasoning token
1069
- prompt_match = re.search(pattern=r"<\w+>", string=prompt)
1070
- completion_match = re.search(pattern=r"<\w+>", string=completion)
1071
- if completion_match is None and prompt_match is None:
1049
+ bor_reasoning_matches = [
1050
+ (bor_token, eor_token)
1051
+ for bor_token, eor_token in REASONING_TOKENS
1052
+ if bor_token in prompt or bor_token in completion
1053
+ ]
1054
+ if not bor_reasoning_matches:
1072
1055
  log_once(
1073
- f"Could not find a reasoning token for model {model_id!r}, so assuming "
1074
- "the model is not a reasoning model.",
1075
- level=logging.DEBUG,
1056
+ f"The model {model_id!r} did not generate any beginning-of-reasoning "
1057
+ "tokens in the prompt or the completion. Assuming the model is not "
1058
+ "a reasoning model.",
1059
+ level=logging.INFO,
1076
1060
  )
1077
1061
  return None
1078
1062
 
1079
- # Check that the found reasoning token and its associated end-of-reasoning tokens
1080
- # are both special tokens
1081
- elif completion_match is not None:
1082
- reasoning_token = completion_match.group()
1083
- else:
1084
- assert prompt_match is not None
1085
- reasoning_token = prompt_match.group()
1086
- end_of_reasoning_token = f"</{reasoning_token[1:-1]}>"
1087
- special_tokens = [
1088
- decoder_token.content
1089
- for decoder_token in tokenizer.added_tokens_decoder.values()
1090
- ]
1091
- special_tokens.extend(
1092
- [encoder_token for encoder_token in tokenizer.added_tokens_encoder.keys()]
1063
+ # Check that the beginning-of-reasoning token is actually used by the model
1064
+ completion = (
1065
+ model.generate(
1066
+ prompts=[prompt],
1067
+ sampling_params=SamplingParams(max_tokens=REASONING_MAX_TOKENS),
1068
+ use_tqdm=False,
1069
+ )[0]
1070
+ .outputs[0]
1071
+ .text
1093
1072
  )
1094
- special_tokens.extend(tokenizer.all_special_tokens)
1095
- if (
1096
- reasoning_token not in special_tokens
1097
- or end_of_reasoning_token not in special_tokens
1098
- ):
1073
+ eor_reasoning_matches = [
1074
+ (bor_token, eor_token)
1075
+ for bor_token, eor_token in bor_reasoning_matches
1076
+ if eor_token in completion
1077
+ ]
1078
+ if not eor_reasoning_matches:
1099
1079
  log_once(
1100
- f"Detected reasoning token {reasoning_token!r} and end-of-reasoning "
1101
- f"token {end_of_reasoning_token!r} for model {model_id!r}, but one of "
1102
- "them is not registered as a special token, so assuming it is not a "
1103
- "real reasoning token.",
1104
- level=logging.DEBUG,
1080
+ f"The model {model_id!r} did not generate any end-of-reasoning "
1081
+ "tokens in the prompt or the completion, even though it generated "
1082
+ "the beginning-of-reasoning tokens "
1083
+ f"{[bor_token for bor_token, _ in bor_reasoning_matches]!r}. "
1084
+ "This is probably not correct, so please report this issue.",
1085
+ level=logging.INFO,
1105
1086
  )
1106
1087
  return None
1107
1088
 
1089
+ if len(eor_reasoning_matches) > 1:
1090
+ log_once(
1091
+ f"Found multiple reasoning tokens {eor_reasoning_matches} for "
1092
+ f"model {model_id!r}. Using {eor_reasoning_matches[0]!r} as "
1093
+ "the reasoning token. If this is not the correct reasoning token, "
1094
+ "please report this issue.",
1095
+ level=logging.INFO,
1096
+ )
1097
+
1098
+ bor_token, eor_token = eor_reasoning_matches[0]
1108
1099
  log_once(
1109
- f"Detected reasoning token {reasoning_token!r} and end-of-reasoning "
1110
- f"token {end_of_reasoning_token!r} for model {model_id!r}.",
1111
- level=logging.DEBUG,
1100
+ f"Detected beginning-of-reasoning token {bor_token!r} and end-of-reasoning "
1101
+ f"token {eor_token!r} for model {model_id!r}.",
1102
+ level=logging.INFO,
1112
1103
  )
1113
1104
 
1114
- # Encode the end of reasoning token and return its ID
1115
- end_of_reasoning_token_id = tokenizer.encode(
1116
- text=end_of_reasoning_token, add_special_tokens=False
1117
- )[0]
1105
+ return eor_token
1106
+
1107
+
1108
+ def get_custom_stop_tokens(
1109
+ model: "LLM",
1110
+ tokenizer: "PreTrainedTokenizer",
1111
+ model_id: str,
1112
+ is_reasoning_model: bool,
1113
+ ) -> list[str]:
1114
+ """Get the stop tokens for a generative model.
1115
+
1116
+ Args:
1117
+ model:
1118
+ The vLLM model.
1119
+ tokenizer:
1120
+ The tokenizer.
1121
+ model_id:
1122
+ The model ID.
1123
+ is_reasoning_model:
1124
+ Whether the model is a reasoning model. This is used to determine the number
1125
+ of generated tokens to allow before stopping the generation.
1126
+
1127
+ Returns:
1128
+ A list of stop tokens.
1129
+ """
1130
+ candidate_stop_tokens = CUSTOM_STOP_TOKENS
1131
+
1132
+ # Create a prompt to check if the model uses the reasoning tokens
1133
+ prompt = "Hello"
1134
+ if tokenizer.chat_template is not None:
1135
+ templated_prompt = tokenizer.apply_chat_template(
1136
+ conversation=[dict(role="user", content=prompt)],
1137
+ add_generation_prompt=True,
1138
+ tokenize=False,
1139
+ )
1140
+ assert isinstance(templated_prompt, str)
1141
+ prompt = templated_prompt
1142
+
1143
+ # Check that the beginning-of-reasoning token is actually used by the model
1144
+ max_tokens = REASONING_MAX_TOKENS if is_reasoning_model else 10
1145
+ completion = (
1146
+ model.generate(
1147
+ prompts=[prompt],
1148
+ sampling_params=SamplingParams(max_tokens=max_tokens, temperature=0.0),
1149
+ use_tqdm=False,
1150
+ )[0]
1151
+ .outputs[0]
1152
+ .text
1153
+ )
1154
+
1155
+ stop_tokens = [
1156
+ stop_token
1157
+ for stop_token in candidate_stop_tokens
1158
+ if stop_token in prompt or stop_token in completion
1159
+ ]
1160
+ if stop_tokens:
1161
+ logger.debug(
1162
+ f"Found the following custom stop tokens for model {model_id!r}: "
1163
+ f"{stop_tokens}."
1164
+ )
1165
+ else:
1166
+ logger.debug(f"Found no custom stop tokens for model {model_id!r}.")
1118
1167
 
1119
- return end_of_reasoning_token_id
1168
+ return stop_tokens
euroeval/constants.py CHANGED
@@ -64,3 +64,16 @@ MERGE_TAGS = ["merge", "mergekit"]
64
64
 
65
65
  # The minimum required CUDA compute capability for using bfloat16 in vLLM
66
66
  VLLM_BF16_MIN_CUDA_COMPUTE_CAPABILITY = 8.0
67
+
68
+ # Used to detect whether a model is a reasoning model
69
+ REASONING_TOKENS = [
70
+ ("<think>", "</think>"),
71
+ ("<reason>", "</reason>"),
72
+ ("<reasoning>", "</reasoning>"),
73
+ ]
74
+
75
+ # These tokens are sometimes used by models to indicate the end of a generated
76
+ # response, but they do not use them as a proper EOS token, so we have to deal with them
77
+ # manually. We only use them as stop tokens if they actually appear in the model's
78
+ # output
79
+ CUSTOM_STOP_TOKENS = ["<sep>"]
@@ -135,7 +135,7 @@ def extract_labels_from_generation(
135
135
  if first_label_token_mapping is False:
136
136
  raise InvalidBenchmark(
137
137
  "The model outputted logprobs, but the first label token mapping is "
138
- "not provided. This means that the model should not output logprobs."
138
+ "not provided, which is not supported."
139
139
  )
140
140
  labels = get_closest_logprobs_labels(
141
141
  generation_logprobs=model_output.scores,
@@ -8,7 +8,6 @@ import torch
8
8
 
9
9
  from .constants import TASK_GROUPS_USING_LOGPROBS
10
10
  from .enums import GenerativeType
11
- from .exceptions import InvalidModel
12
11
  from .utils import log_once
13
12
 
14
13
  if t.TYPE_CHECKING:
@@ -153,7 +152,9 @@ def should_prefix_space_be_added_to_labels(
153
152
  return add_prefix_space
154
153
 
155
154
 
156
- def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
155
+ def get_bos_token(
156
+ tokenizer: "PreTrainedTokenizer",
157
+ ) -> tuple[str, int] | tuple[None, None]:
157
158
  """Get the beginning-of-sequence token from a tokenizer.
158
159
 
159
160
  Args:
@@ -162,7 +163,7 @@ def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
162
163
 
163
164
  Returns:
164
165
  A pair (token, token_id) representing the beginning-of-sequence token and its
165
- token ID.
166
+ token ID, or (None, None) if no BOS token is found.
166
167
  """
167
168
  if isinstance(tokenizer.bos_token, str) and isinstance(tokenizer.bos_token_id, int):
168
169
  return tokenizer.bos_token, tokenizer.bos_token_id
@@ -176,15 +177,20 @@ def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
176
177
  bos_token_id = vocab[bos_token]
177
178
  break
178
179
  else:
179
- raise InvalidModel(
180
+ log_once(
180
181
  "The model does not have a beginning-of-sequence token. Please ensure that "
181
- "this has been set in the tokenizer's configuration."
182
+ "this has been set in the tokenizer's configuration. Using no BOS token."
183
+ " This may lead to unexpected behavior in the model.",
184
+ level=logging.INFO,
182
185
  )
186
+ return None, None
183
187
 
184
188
  return bos_token, bos_token_id
185
189
 
186
190
 
187
- def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
191
+ def get_eos_token(
192
+ tokenizer: "PreTrainedTokenizer",
193
+ ) -> tuple[str, int] | tuple[None, None]:
188
194
  """Get the end-of-sequence token from a tokenizer.
189
195
 
190
196
  Args:
@@ -193,7 +199,7 @@ def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
193
199
 
194
200
  Returns:
195
201
  A pair (token, token_id) representing the end-of-sequence token and its token
196
- ID.
202
+ ID, or (None, None) if no EOS token is found.
197
203
  """
198
204
  if isinstance(tokenizer.eos_token, str) and isinstance(tokenizer.eos_token_id, int):
199
205
  return tokenizer.eos_token, tokenizer.eos_token_id
@@ -207,10 +213,13 @@ def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
207
213
  eos_token_id = vocab[eos_token]
208
214
  break
209
215
  else:
210
- raise InvalidModel(
216
+ log_once(
211
217
  "The model does not have an end-of-sequence token. Please ensure that this "
212
- "has been set in the tokenizer's configuration."
218
+ "has been set in the tokenizer's configuration. Using no EOS token. This "
219
+ "may lead to unexpected behavior in the model.",
220
+ level=logging.INFO,
213
221
  )
222
+ return None, None
214
223
 
215
224
  return eos_token, eos_token_id
216
225
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 15.9.1
3
+ Version: 15.9.2
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -3,7 +3,7 @@ euroeval/benchmark_config_factory.py,sha256=icTeT5C-bNCJmvSWFlxKdEpRboZN8OjwaHGu
3
3
  euroeval/benchmarker.py,sha256=wmgrYVS31PMhhrVienjaVHHyfnZAy51kUvC6OjooiOw,48047
4
4
  euroeval/callbacks.py,sha256=F1AJCLB8FJpxqYprwLi_PsH4Bc0x4lyR8UiTG-GlFLY,2452
5
5
  euroeval/cli.py,sha256=d8JztMi_RbpUlEBXidd6DQ-xeC-xhozf_qU6Vkzye20,8161
6
- euroeval/constants.py,sha256=p6kp_R6-Tq5LBvyXyT6Sa6N3SkjEElGS2LSZRBoQaYs,1985
6
+ euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
7
7
  euroeval/data_loading.py,sha256=L_REtxefte5Ke4xE_Cz01zkfCyKlOYhSqT5ZXXulHPc,3992
8
8
  euroeval/data_models.py,sha256=7nAGDpN58Y35Lt9JZE_y0y5iOYesw2htcwHc68MkBZU,22953
9
9
  euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
@@ -19,7 +19,7 @@ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,223
19
19
  euroeval/scores.py,sha256=TovjCZD8wmGrIjA4v5oAQp18P5KVcHvakkByDh0Hstk,3059
20
20
  euroeval/speed_benchmark.py,sha256=J7VKWMf7GU_l0lRR8f0QeUr_vAaBQqTbgQ_yToHhp_0,3980
21
21
  euroeval/tasks.py,sha256=87gbe__K5KNIb1aBSuwGnMPmZgamJFecNNYmNgMxaVo,7069
22
- euroeval/tokenization_utils.py,sha256=kghOIZMM3H0P9YDv0VBSNI7drzgJXlkRtMwt3Cgeev8,13907
22
+ euroeval/tokenization_utils.py,sha256=_B4KN3ZcuvVr8y3LedtfxBJfmPKjfVMjpbtl8bbQAuc,14278
23
23
  euroeval/types.py,sha256=E0JhLfg-ek5pdFcYJbnGRUSodHxkuR3o8XGuIrBcuRM,2485
24
24
  euroeval/utils.py,sha256=e83OnWc0GJn0Tn_vP3tbqh1DAbLy2ky-LnIlTEOKzKU,11410
25
25
  euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
@@ -27,7 +27,7 @@ euroeval/benchmark_modules/base.py,sha256=LcG46I2O5wcvu_3T_irBY6VkUhWVPKifBhcP-l
27
27
  euroeval/benchmark_modules/fresh.py,sha256=_LWmpqiNGGTA-NoVC0v3-fS1sraDS9n-pgKUzz89jVk,9919
28
28
  euroeval/benchmark_modules/hf.py,sha256=CoiaNakjhg6gm_5IbUUeevXQZebg2VrRLuhzEi2Hhrk,44617
29
29
  euroeval/benchmark_modules/litellm.py,sha256=SxSr_0C6b_jVavR3y9QyhfkCOP5-va4zijGfghFTArY,48362
30
- euroeval/benchmark_modules/vllm.py,sha256=rz_Xau5TGiFeb2VkdVpW_fYOfRCCvYrH0q9BGzCwZlo,42156
30
+ euroeval/benchmark_modules/vllm.py,sha256=SbQ_EYSwUFBVLsp9io1Q75A9S_H-iw6AzLOn3rlEhK0,43034
31
31
  euroeval/dataset_configs/__init__.py,sha256=kWKtlSAOY-olOQL3UtFqL6I3Tki3G3waMZSd2YChjCg,1895
32
32
  euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
33
33
  euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
@@ -51,11 +51,11 @@ euroeval/prompt_templates/summarization.py,sha256=mcWeKNhGWmp7IG_iY64T-VOSabQg5w
51
51
  euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
52
52
  euroeval/task_group_utils/multiple_choice_classification.py,sha256=nB78TzOgd0HBvTclmjOYJid9ZVAgu8IHZsqB_n1SAZU,6178
53
53
  euroeval/task_group_utils/question_answering.py,sha256=kZBABJ_WYNTH4Xgo2jIvfx7iYvfoGt0EUObSaXRCGmk,27700
54
- euroeval/task_group_utils/sequence_classification.py,sha256=Yqx0pUhuHYmSkv1ZUfOndSLTvpr0lWCk19oYITfSjV4,13555
54
+ euroeval/task_group_utils/sequence_classification.py,sha256=zwRUgVHqLlREILwyg-yuDPkrIQOfqGVPsFBai-2D9a8,13525
55
55
  euroeval/task_group_utils/text_to_text.py,sha256=Nu1_qRPLbboCd9Q5rxqY4fQFJ_aGXu80aWQqoTG1cYc,5047
56
56
  euroeval/task_group_utils/token_classification.py,sha256=3idWB81Fcx9UhTuk-gxMfXENrCBmiWBDUWdULXoIhpw,17863
57
- euroeval-15.9.1.dist-info/METADATA,sha256=UkGmFcnarstFwD1J1eS6h3gbyxnucnaAVLnB5QhkdSo,13555
58
- euroeval-15.9.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
- euroeval-15.9.1.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
60
- euroeval-15.9.1.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
61
- euroeval-15.9.1.dist-info/RECORD,,
57
+ euroeval-15.9.2.dist-info/METADATA,sha256=LwHTlJ51OGVwcRTUPulH-gh8IFxu82CUFYHZ1uOUyT0,13555
58
+ euroeval-15.9.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
+ euroeval-15.9.2.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
60
+ euroeval-15.9.2.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
61
+ euroeval-15.9.2.dist-info/RECORD,,