EuroEval 16.1.0__py3-none-any.whl → 16.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

@@ -202,7 +202,7 @@ def apply_prompt(
202
202
  """
203
203
  # Sanity check
204
204
  if (
205
- generative_type == GenerativeType.INSTRUCTION_TUNED
205
+ generative_type in {GenerativeType.INSTRUCTION_TUNED, GenerativeType.REASONING}
206
206
  and always_populate_text_field
207
207
  and tokeniser is None
208
208
  ):
@@ -229,7 +229,10 @@ def apply_prompt(
229
229
  )
230
230
  label_mapping = dataset_config.prompt_label_mapping
231
231
  label = label_mapping.get(label, label)
232
- if generative_type == GenerativeType.INSTRUCTION_TUNED:
232
+ if generative_type in {
233
+ GenerativeType.INSTRUCTION_TUNED,
234
+ GenerativeType.REASONING,
235
+ }:
233
236
  prompt = dataset_config.instruction_prompt.format(**kwargs)
234
237
  return prompt, label
235
238
  else:
@@ -355,7 +358,7 @@ def apply_prompt(
355
358
  f"Unsupported task group: {dataset_config.task.task_group}."
356
359
  )
357
360
 
358
- if generative_type == GenerativeType.INSTRUCTION_TUNED:
361
+ if generative_type in {GenerativeType.INSTRUCTION_TUNED, GenerativeType.REASONING}:
359
362
  few_shot_messages = [
360
363
  dict(role=role, content=content)
361
364
  for prompt, label in few_shot_sections
@@ -408,7 +411,10 @@ def apply_prompt(
408
411
  else:
409
412
  prompt_prefix = ""
410
413
  if dataset_config.prompt_prefix:
411
- prompt_prefix = dataset_config.prompt_prefix + "\n\n"
414
+ labels_str = dataset_config.get_labels_str()
415
+ prompt_prefix = (
416
+ dataset_config.prompt_prefix.format(labels_str=labels_str) + "\n\n"
417
+ )
412
418
 
413
419
  few_shot_prompt = "\n\n".join([prompt for prompt, _ in few_shot_sections])
414
420
  if few_shot_prompt:
@@ -198,7 +198,7 @@ def extract_labels_from_generation(
198
198
  # If no candidate labels were found, we either pick the label with the smallest
199
199
  # word edit distance to the predicted label (if invalid model outputs are
200
200
  # allowed), or we raise an error
201
- if min(edit_distances) > 100:
201
+ if min(edit_distances) >= 1000:
202
202
  if dataset_config.allow_invalid_model_outputs:
203
203
  logger.warning(
204
204
  "No candidate labels found for the predicted label "
@@ -339,13 +339,18 @@ def get_end_of_chat_token_ids(
339
339
  return None
340
340
 
341
341
  user_message: dict[str, str] = dict(role="user", content="X")
342
- token_ids = apply_chat_template(
343
- conversation=[user_message],
344
- tokeniser=tokeniser,
345
- tokenise=True,
346
- add_generation_prompt=False,
347
- enable_thinking=generative_type == GenerativeType.REASONING,
348
- )
342
+ try:
343
+ token_ids = apply_chat_template(
344
+ conversation=[user_message],
345
+ tokeniser=tokeniser,
346
+ tokenise=True,
347
+ add_generation_prompt=False,
348
+ enable_thinking=generative_type == GenerativeType.REASONING,
349
+ )
350
+ except InvalidModel as e:
351
+ if "does not have a chat template" in str(e):
352
+ return None
353
+ raise e
349
354
  assert isinstance(token_ids, list)
350
355
 
351
356
  for idx, token in enumerate(tokeniser.convert_ids_to_tokens(token_ids)):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 16.1.0
3
+ Version: 16.1.1
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -10,7 +10,7 @@ euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
10
10
  euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
11
  euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
12
12
  euroeval/generation.py,sha256=MSrd0oIkoqwKsCOaIkY2CFF_urXLOfNR1OO5nMvcCpY,12476
13
- euroeval/generation_utils.py,sha256=OtEXLhI6L1vlbC768dH3xzj0qkokz43m0vswGKrRmBA,18061
13
+ euroeval/generation_utils.py,sha256=d2_vylWXIeH4xIXgbsI5rN6dMt0zKp0zXExD6aOKWaA,18299
14
14
  euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
15
15
  euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
16
16
  euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
@@ -18,7 +18,7 @@ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,223
18
18
  euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
19
19
  euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
20
20
  euroeval/tasks.py,sha256=3qEOBAMmfeqgXqlGkCKzQ-s0Yw-0-jPRgFZ97EZCFng,4535
21
- euroeval/tokenisation_utils.py,sha256=jRIi9m8XmGh3LeZna47AWmJI9U9m4ojXQynQTe7kzWc,21344
21
+ euroeval/tokenisation_utils.py,sha256=e2H86vhSVfz5gx6GmzoBJwLZLG6sf3GEcoCGmvJBQLc,21505
22
22
  euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
23
23
  euroeval/utils.py,sha256=c0tFw1IXZIqgLU4EfY_k28iJ1ZlCZ_oFoKZH2sGCKYg,16499
24
24
  euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
@@ -60,11 +60,11 @@ euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13a
60
60
  euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
61
61
  euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
62
62
  euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
63
- euroeval/task_group_utils/sequence_classification.py,sha256=qWUUrh4X4jK2XfUzP4aoPDoJhVJifrnDEaaw_F48hig,16080
63
+ euroeval/task_group_utils/sequence_classification.py,sha256=TAqZCoMQ9I-HFhMH35_J1mY2SQg95HUbXcgrBIyhgk0,16082
64
64
  euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
65
65
  euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
66
- euroeval-16.1.0.dist-info/METADATA,sha256=pYdW0IZwY8vatTA55EERxBK1kMaQuGhqzNys5xiSqsM,13729
67
- euroeval-16.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
68
- euroeval-16.1.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
69
- euroeval-16.1.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
70
- euroeval-16.1.0.dist-info/RECORD,,
66
+ euroeval-16.1.1.dist-info/METADATA,sha256=gyqd2PPeT0vv_ye9nnfqv-0DlpejquzqcftBwpwnH7Y,13729
67
+ euroeval-16.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
68
+ euroeval-16.1.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
69
+ euroeval-16.1.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
70
+ euroeval-16.1.1.dist-info/RECORD,,