EuroEval 16.1.0__py3-none-any.whl → 16.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/generation_utils.py +10 -4
- euroeval/task_group_utils/sequence_classification.py +1 -1
- euroeval/tokenisation_utils.py +12 -7
- {euroeval-16.1.0.dist-info → euroeval-16.1.1.dist-info}/METADATA +1 -1
- {euroeval-16.1.0.dist-info → euroeval-16.1.1.dist-info}/RECORD +8 -8
- {euroeval-16.1.0.dist-info → euroeval-16.1.1.dist-info}/WHEEL +0 -0
- {euroeval-16.1.0.dist-info → euroeval-16.1.1.dist-info}/entry_points.txt +0 -0
- {euroeval-16.1.0.dist-info → euroeval-16.1.1.dist-info}/licenses/LICENSE +0 -0
euroeval/generation_utils.py
CHANGED
|
@@ -202,7 +202,7 @@ def apply_prompt(
|
|
|
202
202
|
"""
|
|
203
203
|
# Sanity check
|
|
204
204
|
if (
|
|
205
|
-
generative_type
|
|
205
|
+
generative_type in {GenerativeType.INSTRUCTION_TUNED, GenerativeType.REASONING}
|
|
206
206
|
and always_populate_text_field
|
|
207
207
|
and tokeniser is None
|
|
208
208
|
):
|
|
@@ -229,7 +229,10 @@ def apply_prompt(
|
|
|
229
229
|
)
|
|
230
230
|
label_mapping = dataset_config.prompt_label_mapping
|
|
231
231
|
label = label_mapping.get(label, label)
|
|
232
|
-
if generative_type
|
|
232
|
+
if generative_type in {
|
|
233
|
+
GenerativeType.INSTRUCTION_TUNED,
|
|
234
|
+
GenerativeType.REASONING,
|
|
235
|
+
}:
|
|
233
236
|
prompt = dataset_config.instruction_prompt.format(**kwargs)
|
|
234
237
|
return prompt, label
|
|
235
238
|
else:
|
|
@@ -355,7 +358,7 @@ def apply_prompt(
|
|
|
355
358
|
f"Unsupported task group: {dataset_config.task.task_group}."
|
|
356
359
|
)
|
|
357
360
|
|
|
358
|
-
if generative_type
|
|
361
|
+
if generative_type in {GenerativeType.INSTRUCTION_TUNED, GenerativeType.REASONING}:
|
|
359
362
|
few_shot_messages = [
|
|
360
363
|
dict(role=role, content=content)
|
|
361
364
|
for prompt, label in few_shot_sections
|
|
@@ -408,7 +411,10 @@ def apply_prompt(
|
|
|
408
411
|
else:
|
|
409
412
|
prompt_prefix = ""
|
|
410
413
|
if dataset_config.prompt_prefix:
|
|
411
|
-
|
|
414
|
+
labels_str = dataset_config.get_labels_str()
|
|
415
|
+
prompt_prefix = (
|
|
416
|
+
dataset_config.prompt_prefix.format(labels_str=labels_str) + "\n\n"
|
|
417
|
+
)
|
|
412
418
|
|
|
413
419
|
few_shot_prompt = "\n\n".join([prompt for prompt, _ in few_shot_sections])
|
|
414
420
|
if few_shot_prompt:
|
|
@@ -198,7 +198,7 @@ def extract_labels_from_generation(
|
|
|
198
198
|
# If no candidate labels were found, we either pick the label with the smallest
|
|
199
199
|
# word edit distance to the predicted label (if invalid model outputs are
|
|
200
200
|
# allowed), or we raise an error
|
|
201
|
-
if min(edit_distances)
|
|
201
|
+
if min(edit_distances) >= 1000:
|
|
202
202
|
if dataset_config.allow_invalid_model_outputs:
|
|
203
203
|
logger.warning(
|
|
204
204
|
"No candidate labels found for the predicted label "
|
euroeval/tokenisation_utils.py
CHANGED
|
@@ -339,13 +339,18 @@ def get_end_of_chat_token_ids(
|
|
|
339
339
|
return None
|
|
340
340
|
|
|
341
341
|
user_message: dict[str, str] = dict(role="user", content="X")
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
342
|
+
try:
|
|
343
|
+
token_ids = apply_chat_template(
|
|
344
|
+
conversation=[user_message],
|
|
345
|
+
tokeniser=tokeniser,
|
|
346
|
+
tokenise=True,
|
|
347
|
+
add_generation_prompt=False,
|
|
348
|
+
enable_thinking=generative_type == GenerativeType.REASONING,
|
|
349
|
+
)
|
|
350
|
+
except InvalidModel as e:
|
|
351
|
+
if "does not have a chat template" in str(e):
|
|
352
|
+
return None
|
|
353
|
+
raise e
|
|
349
354
|
assert isinstance(token_ids, list)
|
|
350
355
|
|
|
351
356
|
for idx, token in enumerate(tokeniser.convert_ids_to_tokens(token_ids)):
|
|
@@ -10,7 +10,7 @@ euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
|
|
|
10
10
|
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
11
11
|
euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
|
|
12
12
|
euroeval/generation.py,sha256=MSrd0oIkoqwKsCOaIkY2CFF_urXLOfNR1OO5nMvcCpY,12476
|
|
13
|
-
euroeval/generation_utils.py,sha256=
|
|
13
|
+
euroeval/generation_utils.py,sha256=d2_vylWXIeH4xIXgbsI5rN6dMt0zKp0zXExD6aOKWaA,18299
|
|
14
14
|
euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
|
|
15
15
|
euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
|
|
16
16
|
euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
|
|
@@ -18,7 +18,7 @@ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,223
|
|
|
18
18
|
euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
|
|
19
19
|
euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
|
|
20
20
|
euroeval/tasks.py,sha256=3qEOBAMmfeqgXqlGkCKzQ-s0Yw-0-jPRgFZ97EZCFng,4535
|
|
21
|
-
euroeval/tokenisation_utils.py,sha256=
|
|
21
|
+
euroeval/tokenisation_utils.py,sha256=e2H86vhSVfz5gx6GmzoBJwLZLG6sf3GEcoCGmvJBQLc,21505
|
|
22
22
|
euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
|
|
23
23
|
euroeval/utils.py,sha256=c0tFw1IXZIqgLU4EfY_k28iJ1ZlCZ_oFoKZH2sGCKYg,16499
|
|
24
24
|
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
@@ -60,11 +60,11 @@ euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13a
|
|
|
60
60
|
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
61
61
|
euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
|
|
62
62
|
euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
|
|
63
|
-
euroeval/task_group_utils/sequence_classification.py,sha256=
|
|
63
|
+
euroeval/task_group_utils/sequence_classification.py,sha256=TAqZCoMQ9I-HFhMH35_J1mY2SQg95HUbXcgrBIyhgk0,16082
|
|
64
64
|
euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
|
|
65
65
|
euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
|
|
66
|
-
euroeval-16.1.
|
|
67
|
-
euroeval-16.1.
|
|
68
|
-
euroeval-16.1.
|
|
69
|
-
euroeval-16.1.
|
|
70
|
-
euroeval-16.1.
|
|
66
|
+
euroeval-16.1.1.dist-info/METADATA,sha256=gyqd2PPeT0vv_ye9nnfqv-0DlpejquzqcftBwpwnH7Y,13729
|
|
67
|
+
euroeval-16.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
68
|
+
euroeval-16.1.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
|
|
69
|
+
euroeval-16.1.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
70
|
+
euroeval-16.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|