EuroEval 16.0.1__py3-none-any.whl → 16.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/benchmark_config_factory.py +6 -1
- euroeval/benchmark_modules/base.py +2 -0
- euroeval/benchmark_modules/fresh.py +7 -1
- euroeval/benchmark_modules/hf.py +26 -21
- euroeval/benchmark_modules/litellm.py +258 -131
- euroeval/benchmark_modules/vllm.py +79 -40
- euroeval/benchmarker.py +11 -2
- euroeval/cli.py +14 -1
- euroeval/constants.py +1 -1
- euroeval/data_models.py +77 -6
- euroeval/dataset_configs/__init__.py +1 -0
- euroeval/dataset_configs/danish.py +14 -0
- euroeval/dataset_configs/dutch.py +14 -0
- euroeval/dataset_configs/english.py +22 -0
- euroeval/dataset_configs/estonian.py +15 -7
- euroeval/dataset_configs/finnish.py +14 -0
- euroeval/dataset_configs/french.py +14 -0
- euroeval/dataset_configs/german.py +23 -0
- euroeval/dataset_configs/italian.py +14 -0
- euroeval/dataset_configs/latvian.py +14 -0
- euroeval/dataset_configs/norwegian.py +14 -0
- euroeval/dataset_configs/polish.py +126 -0
- euroeval/dataset_configs/portuguese.py +14 -0
- euroeval/dataset_configs/spanish.py +14 -0
- euroeval/dataset_configs/swedish.py +25 -0
- euroeval/enums.py +12 -0
- euroeval/generation.py +17 -8
- euroeval/generation_utils.py +58 -10
- euroeval/metrics/pipeline.py +1 -1
- euroeval/prompt_templates/linguistic_acceptability.py +9 -0
- euroeval/prompt_templates/multiple_choice.py +27 -1
- euroeval/prompt_templates/named_entity_recognition.py +20 -0
- euroeval/prompt_templates/reading_comprehension.py +11 -0
- euroeval/prompt_templates/sentiment_classification.py +15 -0
- euroeval/prompt_templates/summarization.py +27 -1
- euroeval/scores.py +5 -0
- euroeval/task_group_utils/question_answering.py +29 -29
- euroeval/task_group_utils/sequence_classification.py +10 -33
- euroeval/task_group_utils/token_classification.py +3 -3
- euroeval/tasks.py +4 -4
- euroeval/{tokenization_utils.py → tokenisation_utils.py} +40 -23
- euroeval/utils.py +36 -3
- {euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/METADATA +1 -1
- euroeval-16.1.0.dist-info/RECORD +70 -0
- euroeval-16.0.1.dist-info/RECORD +0 -69
- {euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/WHEEL +0 -0
- {euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/entry_points.txt +0 -0
- {euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Utility functions related to
|
|
1
|
+
"""Utility functions related to tokenisation."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import re
|
|
@@ -79,8 +79,8 @@ def should_prompts_be_stripped(
|
|
|
79
79
|
"""Determine if we should strip the prompts for few-shot evaluation.
|
|
80
80
|
|
|
81
81
|
This is the case if the tokeniser needs to include the space as part of the label
|
|
82
|
-
token. The strategy is thus to
|
|
83
|
-
prompts), i.e., ": positive", and check if the
|
|
82
|
+
token. The strategy is thus to tokenise a label with a preceeding colon (as in the
|
|
83
|
+
prompts), i.e., ": positive", and check if the tokenisation starts with the tokens
|
|
84
84
|
of ": ". If this is the case, then we should not strip the prompts, since the
|
|
85
85
|
tokeniser produces the whitespace token separately.
|
|
86
86
|
|
|
@@ -88,7 +88,7 @@ def should_prompts_be_stripped(
|
|
|
88
88
|
labels_to_be_generated:
|
|
89
89
|
The labels that are to be generated.
|
|
90
90
|
tokeniser:
|
|
91
|
-
The tokeniser used to
|
|
91
|
+
The tokeniser used to tokenise the labels.
|
|
92
92
|
|
|
93
93
|
Returns:
|
|
94
94
|
Whether we should strip the prompts.
|
|
@@ -124,7 +124,7 @@ def should_prefix_space_be_added_to_labels(
|
|
|
124
124
|
labels_to_be_generated:
|
|
125
125
|
The labels that are to be generated.
|
|
126
126
|
tokeniser:
|
|
127
|
-
The tokeniser used to
|
|
127
|
+
The tokeniser used to tokenise the labels.
|
|
128
128
|
|
|
129
129
|
Returns:
|
|
130
130
|
Whether we should add a prefix space to the labels.
|
|
@@ -318,7 +318,9 @@ def get_pad_token(
|
|
|
318
318
|
return pad_token, pad_token_id
|
|
319
319
|
|
|
320
320
|
|
|
321
|
-
def get_end_of_chat_token_ids(
|
|
321
|
+
def get_end_of_chat_token_ids(
|
|
322
|
+
tokeniser: "PreTrainedTokenizer", generative_type: GenerativeType | None
|
|
323
|
+
) -> list[int] | None:
|
|
322
324
|
"""Get the end token ID for chat models.
|
|
323
325
|
|
|
324
326
|
This is only relevant for tokenisers with a chat template.
|
|
@@ -326,20 +328,23 @@ def get_end_of_chat_token_ids(tokeniser: "PreTrainedTokenizer") -> list[int] | N
|
|
|
326
328
|
Args:
|
|
327
329
|
tokeniser:
|
|
328
330
|
The tokeniser.
|
|
331
|
+
generative_type:
|
|
332
|
+
The generative type, or None if not available.
|
|
329
333
|
|
|
330
334
|
Returns:
|
|
331
335
|
The token IDs used to end chats, or None if the tokeniser does not have a chat
|
|
332
336
|
template or if no end-of-chat token could be found.
|
|
333
337
|
"""
|
|
334
|
-
if
|
|
338
|
+
if generative_type == GenerativeType.BASE:
|
|
335
339
|
return None
|
|
336
340
|
|
|
337
341
|
user_message: dict[str, str] = dict(role="user", content="X")
|
|
338
342
|
token_ids = apply_chat_template(
|
|
339
343
|
conversation=[user_message],
|
|
340
344
|
tokeniser=tokeniser,
|
|
341
|
-
|
|
345
|
+
tokenise=True,
|
|
342
346
|
add_generation_prompt=False,
|
|
347
|
+
enable_thinking=generative_type == GenerativeType.REASONING,
|
|
343
348
|
)
|
|
344
349
|
assert isinstance(token_ids, list)
|
|
345
350
|
|
|
@@ -420,7 +425,7 @@ def get_first_label_token_mapping(
|
|
|
420
425
|
for label in dataset_config.labels
|
|
421
426
|
]
|
|
422
427
|
|
|
423
|
-
#
|
|
428
|
+
# Tokenise some text containing each label, which we will use to extract the
|
|
424
429
|
# first token of each label
|
|
425
430
|
all_tokens: list[list[str]]
|
|
426
431
|
if not has_chat_template(tokeniser=tokeniser):
|
|
@@ -439,11 +444,13 @@ def get_first_label_token_mapping(
|
|
|
439
444
|
dict(role="user", content=""),
|
|
440
445
|
dict(role="assistant", content=label),
|
|
441
446
|
# Adding extra user message as Mistral tokenisers require
|
|
442
|
-
#
|
|
447
|
+
# conversations to end with a user message
|
|
443
448
|
dict(role="user", content=""),
|
|
444
449
|
],
|
|
445
450
|
tokeniser=tokeniser,
|
|
446
|
-
|
|
451
|
+
tokenise=True,
|
|
452
|
+
add_generation_prompt=True,
|
|
453
|
+
enable_thinking=generative_type == GenerativeType.REASONING,
|
|
447
454
|
)
|
|
448
455
|
)
|
|
449
456
|
for label in local_labels
|
|
@@ -537,9 +544,10 @@ def has_chat_template(tokeniser: "PreTrainedTokenizer") -> bool:
|
|
|
537
544
|
def apply_chat_template(
|
|
538
545
|
conversation: list[dict[str, str]],
|
|
539
546
|
tokeniser: "PreTrainedTokenizer",
|
|
540
|
-
|
|
541
|
-
add_generation_prompt: bool
|
|
542
|
-
|
|
547
|
+
tokenise: bool,
|
|
548
|
+
add_generation_prompt: bool,
|
|
549
|
+
enable_thinking: bool,
|
|
550
|
+
**extra_kwargs,
|
|
543
551
|
) -> str | list[int]:
|
|
544
552
|
"""Apply the chat template to a prompt.
|
|
545
553
|
|
|
@@ -548,38 +556,47 @@ def apply_chat_template(
|
|
|
548
556
|
The conversation to apply the chat template to.
|
|
549
557
|
tokeniser:
|
|
550
558
|
The tokeniser.
|
|
551
|
-
|
|
552
|
-
Whether to
|
|
559
|
+
tokenise:
|
|
560
|
+
Whether to tokenise the resulting prompt, returning a list of token IDs
|
|
553
561
|
instead of a string.
|
|
554
562
|
add_generation_prompt:
|
|
555
563
|
Whether to add a generation prompt at the end of the conversation. This is
|
|
556
564
|
only relevant for regular Hugging Face tokenisers, as Mistral tokenisers
|
|
557
565
|
always add a generation prompt.
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
is
|
|
566
|
+
enable_thinking:
|
|
567
|
+
Whether to enable special handling for reasoning models, such as adding
|
|
568
|
+
special tokens for thinking. This is only relevant for regular Hugging
|
|
569
|
+
Face tokenisers, as Mistral tokenisers always handle reasoning models.
|
|
570
|
+
**extra_kwargs:
|
|
571
|
+
Extra keyword arguments to pass to the tokeniser's `apply_chat_template`
|
|
572
|
+
method. Only relevant for regular Hugging Face tokenisers.
|
|
561
573
|
|
|
562
574
|
Returns:
|
|
563
575
|
The prompt with the chat template applied, either as a string or a list of
|
|
564
|
-
token IDs, depending on the value of `
|
|
576
|
+
token IDs, depending on the value of `tokenise`.
|
|
565
577
|
|
|
566
578
|
Raises:
|
|
567
579
|
InvalidModel:
|
|
568
580
|
If the tokeniser does not have a chat template.
|
|
569
581
|
"""
|
|
582
|
+
# Ensure that the first user message is not empty, as this can cause issues with
|
|
583
|
+
# Jinja2
|
|
584
|
+
conversation[0]["content"] = conversation[0]["content"] or " "
|
|
585
|
+
|
|
570
586
|
if not has_chat_template(tokeniser=tokeniser):
|
|
571
587
|
raise InvalidModel(
|
|
572
588
|
"The tokeniser does not have a chat template, so cannot apply it."
|
|
573
589
|
)
|
|
574
590
|
elif isinstance(tokeniser, MistralCommonTokenizer):
|
|
575
591
|
templated_prompt = tokeniser.apply_chat_template(
|
|
576
|
-
conversation=conversation, tokenize=
|
|
592
|
+
conversation=conversation, tokenize=tokenise
|
|
577
593
|
)
|
|
578
594
|
else:
|
|
579
595
|
templated_prompt = tokeniser.apply_chat_template(
|
|
580
596
|
conversation=conversation,
|
|
581
597
|
add_generation_prompt=add_generation_prompt,
|
|
582
|
-
tokenize=
|
|
583
|
-
|
|
598
|
+
tokenize=tokenise,
|
|
599
|
+
enable_thinking=enable_thinking,
|
|
600
|
+
**extra_kwargs,
|
|
584
601
|
)
|
|
585
602
|
return templated_prompt
|
euroeval/utils.py
CHANGED
|
@@ -4,7 +4,6 @@ import asyncio
|
|
|
4
4
|
import gc
|
|
5
5
|
import importlib
|
|
6
6
|
import importlib.metadata
|
|
7
|
-
import importlib.util
|
|
8
7
|
import logging
|
|
9
8
|
import os
|
|
10
9
|
import random
|
|
@@ -25,11 +24,12 @@ from datasets.utils import disable_progress_bar
|
|
|
25
24
|
from requests.exceptions import RequestException
|
|
26
25
|
from transformers import logging as tf_logging
|
|
27
26
|
|
|
28
|
-
from .exceptions import InvalidBenchmark, NaNValueInModelOutput
|
|
27
|
+
from .exceptions import InvalidBenchmark, InvalidModel, NaNValueInModelOutput
|
|
29
28
|
|
|
30
29
|
if t.TYPE_CHECKING:
|
|
31
30
|
from types import TracebackType
|
|
32
31
|
|
|
32
|
+
from .data_models import ModelIdComponents
|
|
33
33
|
from .types import Predictions
|
|
34
34
|
|
|
35
35
|
|
|
@@ -347,7 +347,8 @@ def safe_run(coroutine: t.Coroutine[t.Any, t.Any, T]) -> T:
|
|
|
347
347
|
loop = asyncio.new_event_loop()
|
|
348
348
|
try:
|
|
349
349
|
asyncio.set_event_loop(loop)
|
|
350
|
-
|
|
350
|
+
response = loop.run_until_complete(coroutine)
|
|
351
|
+
return response
|
|
351
352
|
finally:
|
|
352
353
|
loop.close()
|
|
353
354
|
asyncio.set_event_loop(None)
|
|
@@ -488,3 +489,35 @@ def extract_multiple_choice_labels(
|
|
|
488
489
|
f"{', '.join(candidate_labels)}. Here is the prompt: {prompt!r}"
|
|
489
490
|
)
|
|
490
491
|
return sample_candidate_labels
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def split_model_id(model_id: str) -> "ModelIdComponents":
|
|
495
|
+
"""Split a model ID into its components.
|
|
496
|
+
|
|
497
|
+
Args:
|
|
498
|
+
model_id:
|
|
499
|
+
The model ID to split.
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
The split model ID.
|
|
503
|
+
|
|
504
|
+
Raises:
|
|
505
|
+
If the model ID is not valid.
|
|
506
|
+
"""
|
|
507
|
+
# Importing here to avoid circular imports
|
|
508
|
+
from .data_models import ModelIdComponents
|
|
509
|
+
|
|
510
|
+
# Attempt to extract the model ID, revision, and param using regex
|
|
511
|
+
model_id_match = re.match(pattern=r"^[^@#]+", string=model_id)
|
|
512
|
+
revision_match = re.search(pattern=r"@([^@#]+)", string=model_id)
|
|
513
|
+
param_match = re.search(pattern=r"#([^@#]+)", string=model_id)
|
|
514
|
+
|
|
515
|
+
# If we cannot extract the model ID, raise an error
|
|
516
|
+
if model_id_match is None:
|
|
517
|
+
raise InvalidModel(f"The model ID {model_id!r} is not valid.")
|
|
518
|
+
model_id = model_id_match.group()
|
|
519
|
+
|
|
520
|
+
# Extract the revision and param and return the result
|
|
521
|
+
revision = revision_match.group(1) if revision_match is not None else "main"
|
|
522
|
+
param = param_match.group(1) if param_match is not None else None
|
|
523
|
+
return ModelIdComponents(model_id=model_id, revision=revision, param=param)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
|
|
2
|
+
euroeval/benchmark_config_factory.py,sha256=NzNSiqix4hlVXk3xnyzdg2WDxomkectf97UWdVS3POo,11667
|
|
3
|
+
euroeval/benchmarker.py,sha256=JkhvYxhVpQPcWmDLzwnB8Yy6tTqj3yfDWTefklbI7RM,50355
|
|
4
|
+
euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
|
|
5
|
+
euroeval/cli.py,sha256=wUGetj9Ld4wkS872ZOfYqHIJMh58o8L2MDi78wU5nxI,9099
|
|
6
|
+
euroeval/constants.py,sha256=NN7kcwQdlDyyGFSrLjsL_qKVRyoRqZ9sKO5SjlgtRwA,2741
|
|
7
|
+
euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
|
|
8
|
+
euroeval/data_models.py,sha256=S-PATp4F1wBwvra6wtjlJFXxZbZB_vEpJHXcdTTKA70,27593
|
|
9
|
+
euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
|
|
10
|
+
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
11
|
+
euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
|
|
12
|
+
euroeval/generation.py,sha256=MSrd0oIkoqwKsCOaIkY2CFF_urXLOfNR1OO5nMvcCpY,12476
|
|
13
|
+
euroeval/generation_utils.py,sha256=OtEXLhI6L1vlbC768dH3xzj0qkokz43m0vswGKrRmBA,18061
|
|
14
|
+
euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
|
|
15
|
+
euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
|
|
16
|
+
euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
|
|
17
|
+
euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
|
|
18
|
+
euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
|
|
19
|
+
euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
|
|
20
|
+
euroeval/tasks.py,sha256=3qEOBAMmfeqgXqlGkCKzQ-s0Yw-0-jPRgFZ97EZCFng,4535
|
|
21
|
+
euroeval/tokenisation_utils.py,sha256=jRIi9m8XmGh3LeZna47AWmJI9U9m4ojXQynQTe7kzWc,21344
|
|
22
|
+
euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
|
|
23
|
+
euroeval/utils.py,sha256=c0tFw1IXZIqgLU4EfY_k28iJ1ZlCZ_oFoKZH2sGCKYg,16499
|
|
24
|
+
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
25
|
+
euroeval/benchmark_modules/base.py,sha256=mHF8XS6GGUXV-sJtxmI5WJBWPLMHuh-4Z4OWjC25x9Y,11566
|
|
26
|
+
euroeval/benchmark_modules/fresh.py,sha256=TveSQiFBi3xXgCEQBdHwkUQ685PDkKW0y3G5Yt5rkeM,10655
|
|
27
|
+
euroeval/benchmark_modules/hf.py,sha256=oBjVumnSM9PW7ZocQwCGLKpbeGFWLN_71DBotxZo1aY,44038
|
|
28
|
+
euroeval/benchmark_modules/litellm.py,sha256=6EKjHnUoPCpuupISZHXqZsXLG8tyiA1-G12a5C6L8MM,64629
|
|
29
|
+
euroeval/benchmark_modules/vllm.py,sha256=sYFdVzB9CZX6_sGI4xghDyXoVn6I95_nbeFUWeSMXcc,43132
|
|
30
|
+
euroeval/dataset_configs/__init__.py,sha256=uuIZmElpJV8iupo5oDj3TeQhBDRANdWpLKYFASLirHA,2046
|
|
31
|
+
euroeval/dataset_configs/danish.py,sha256=QABfgI7m-0-5AimDXegp5ssDSLcM2VrAI_RWsinSZP4,5631
|
|
32
|
+
euroeval/dataset_configs/dutch.py,sha256=63Ro2yFym5MuIDXf5953vUYenw9B0kZSCmZbXjdy4Rs,5517
|
|
33
|
+
euroeval/dataset_configs/english.py,sha256=7lS12Tj7FnMGkS4xj7UoZyymNX6PGXTVl5muPswIgAE,4737
|
|
34
|
+
euroeval/dataset_configs/estonian.py,sha256=tdnz0gmMR9yO5rm3SsIz-Wd0LmlCvi3UJ2M5r4VwkSE,3093
|
|
35
|
+
euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
|
|
36
|
+
euroeval/dataset_configs/finnish.py,sha256=esb5nu4HAEdqiP7F9klmME-tkjme01Qd89TOxTB1S20,4390
|
|
37
|
+
euroeval/dataset_configs/french.py,sha256=lZKhJcTpaG8n3y8u5KY61UfU9YzEHF9tIPKm8UakoBs,4720
|
|
38
|
+
euroeval/dataset_configs/german.py,sha256=gF0idcfDt5Iy89ozwgEXEYR_ukyYurdQSS1KITPz5aM,5130
|
|
39
|
+
euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
|
|
40
|
+
euroeval/dataset_configs/italian.py,sha256=tJ_-OYRJ8wJX7ZCwdE4KJIScn1ijYigAXK3lDTZTA3E,5004
|
|
41
|
+
euroeval/dataset_configs/latvian.py,sha256=-zVftcd7Zl6MbrqL-zqBSixsIiPsbt5ZAqldE2wFOEI,2713
|
|
42
|
+
euroeval/dataset_configs/norwegian.py,sha256=ccLM2Zkf5eaFH1K1KyzqoMwkVNcXgjMQTxIhPf4tl_E,7745
|
|
43
|
+
euroeval/dataset_configs/polish.py,sha256=Z-9PT9KaopQUmBgFk5F85ve3pjQwTJqouG8IFgg5iqw,3672
|
|
44
|
+
euroeval/dataset_configs/portuguese.py,sha256=gQ054SdLQ5fkm4IAP6Mdh5RcPDJPDITcuyaLKZit_9o,4089
|
|
45
|
+
euroeval/dataset_configs/spanish.py,sha256=DvJlMK6OQg4qmxKzQA2IficlBMB7BafvxqIVuTKiZyw,4902
|
|
46
|
+
euroeval/dataset_configs/swedish.py,sha256=YWHp7hbJ25o36csSg9uXaQCEJK1BPb7u2RQZiCe0lNs,5445
|
|
47
|
+
euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
|
|
48
|
+
euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
|
|
49
|
+
euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
|
|
50
|
+
euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
|
|
51
|
+
euroeval/metrics/pipeline.py,sha256=Wcan3eDWV7t4WRXMPWCCe_JsA-fZnIfZU2ESinbbL2I,10284
|
|
52
|
+
euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
|
|
53
|
+
euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
|
|
54
|
+
euroeval/prompt_templates/linguistic_acceptability.py,sha256=pRR1QBnYt5DnfxQp6dw1OYFZfIct-1R9pfdgPGpjoco,8667
|
|
55
|
+
euroeval/prompt_templates/multiple_choice.py,sha256=Q-8-ETqG-RZeLzR8v8WUBIN7djiNSfNpmYnZRUWcd84,6905
|
|
56
|
+
euroeval/prompt_templates/named_entity_recognition.py,sha256=LT7J6Y9rUCJFimpnwujBZq_V5buSmXHJteIXbTOoaCE,16442
|
|
57
|
+
euroeval/prompt_templates/reading_comprehension.py,sha256=ogzmhiSZO6egrdxxQiWz6a0XMdC0vws-lg5yRKQoYV0,8730
|
|
58
|
+
euroeval/prompt_templates/sentiment_classification.py,sha256=BwnTpSdsAN_rL693ImgtKIRc5T_2G6ptWW0jCdC02NQ,9454
|
|
59
|
+
euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13aIOgY0SjIzzsNo,6593
|
|
60
|
+
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
61
|
+
euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
|
|
62
|
+
euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
|
|
63
|
+
euroeval/task_group_utils/sequence_classification.py,sha256=qWUUrh4X4jK2XfUzP4aoPDoJhVJifrnDEaaw_F48hig,16080
|
|
64
|
+
euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
|
|
65
|
+
euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
|
|
66
|
+
euroeval-16.1.0.dist-info/METADATA,sha256=pYdW0IZwY8vatTA55EERxBK1kMaQuGhqzNys5xiSqsM,13729
|
|
67
|
+
euroeval-16.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
68
|
+
euroeval-16.1.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
|
|
69
|
+
euroeval-16.1.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
70
|
+
euroeval-16.1.0.dist-info/RECORD,,
|
euroeval-16.0.1.dist-info/RECORD
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
|
|
2
|
-
euroeval/benchmark_config_factory.py,sha256=ZKzGkWr-Mr4wEMYNXUHsYkd2R-dxnNyETZJJ-Fq-my0,11386
|
|
3
|
-
euroeval/benchmarker.py,sha256=YNqhl2QchqzbGMGu8QoJAG_mnYbcJ46ksfaS0x78fiw,49847
|
|
4
|
-
euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
|
|
5
|
-
euroeval/cli.py,sha256=RR45NiHMI9hphqBJ7Xopde-C18Be9JgJxgg6eYPFVMM,8594
|
|
6
|
-
euroeval/constants.py,sha256=imy-YwofbAwTbjk_vgynYf3zaK5kKV349oXZl99DVyM,2742
|
|
7
|
-
euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
|
|
8
|
-
euroeval/data_models.py,sha256=UGyqPAYFImrR1gi4ctQdCVb0rjVkEmyf4Lc1a7_6t6E,24663
|
|
9
|
-
euroeval/enums.py,sha256=V73E8FTL1aRz74OKcxokTYLnO7Q8HGs2QI0JPZI4qQo,3032
|
|
10
|
-
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
11
|
-
euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
|
|
12
|
-
euroeval/generation.py,sha256=wm2u8fDGDgtWxCReG3N6v4_lLvo0OHTpR88ThGSRH7A,12139
|
|
13
|
-
euroeval/generation_utils.py,sha256=w3hfiJfUPDjf2xSKdDrhlpfuxZlztF0_0h2sFPB2hT0,16212
|
|
14
|
-
euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
|
|
15
|
-
euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
|
|
16
|
-
euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
|
|
17
|
-
euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
|
|
18
|
-
euroeval/scores.py,sha256=gJ7DSQVyE2_8qZxJPuUJcFk7Byj2D7nevE23kd4XMbA,3004
|
|
19
|
-
euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
|
|
20
|
-
euroeval/tasks.py,sha256=fwmDKnIexmWbm8HueLUilYzqdNRfo0rFxX-tjZ53Nbg,4503
|
|
21
|
-
euroeval/tokenization_utils.py,sha256=66nip9llPw3XBEzGY0TE1DrejLV2WvdSA1p1euXC6Bg,20556
|
|
22
|
-
euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
|
|
23
|
-
euroeval/utils.py,sha256=ITvT-JxXosrDuElNV7cbASfxzDWSBz9mJWAZHiTOiZY,15304
|
|
24
|
-
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
25
|
-
euroeval/benchmark_modules/base.py,sha256=vYW97bnlzqxxcIq6lY-zd0o6zxyDRMhT85jOhdKnoYE,11482
|
|
26
|
-
euroeval/benchmark_modules/fresh.py,sha256=_iRTHt9qUkq7jPOlgwx7IwZG48dK4mjMrh7KiEHeUjE,10462
|
|
27
|
-
euroeval/benchmark_modules/hf.py,sha256=HDXuVwt0kZUyL9x3aG5pEjSdGCRfzegqT0xKZYprjU0,43843
|
|
28
|
-
euroeval/benchmark_modules/litellm.py,sha256=M6ct5ppcYfO-Il5VMRm3PuyAeQ-rtS22UKyRStLnqfM,59210
|
|
29
|
-
euroeval/benchmark_modules/vllm.py,sha256=ckWLA9maDP5TLAfLhEXzkOYJBngb5BQR7X7RLKPl64A,41824
|
|
30
|
-
euroeval/dataset_configs/__init__.py,sha256=lEOr4kJzgtUymeNBVhd-VwdUK0YTUZ3GjUMlLz5fGWk,2010
|
|
31
|
-
euroeval/dataset_configs/danish.py,sha256=Pb43E-xfgQk9uaxq8ooznvf8okdX8KAYFEPHt1CG_TQ,5192
|
|
32
|
-
euroeval/dataset_configs/dutch.py,sha256=tY7FDw7BmhXxNfI1hqfasxQXP0QbYTqknokTZ7gqdRY,5079
|
|
33
|
-
euroeval/dataset_configs/english.py,sha256=Y4yc3AQu8WojqENj0sy4-rIlx1LhPnsCQ0DeonqDsVs,4128
|
|
34
|
-
euroeval/dataset_configs/estonian.py,sha256=o13P_XkrdhLFCz9l8LJy-TSY3JIN7XmByxesEDiagnc,2879
|
|
35
|
-
euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
|
|
36
|
-
euroeval/dataset_configs/finnish.py,sha256=7iXjjpJ23tupvtXwJF3TH1Tzwhxw0RFaoBv38HclsJc,3950
|
|
37
|
-
euroeval/dataset_configs/french.py,sha256=9ofGQpnjw0j_lPB0SuWMvbuWVZXfOvROMqZ03d-EAHs,4281
|
|
38
|
-
euroeval/dataset_configs/german.py,sha256=qsJO2YCND8Kuc_atSWXjkoD2itUQNbUsExiGk7P0OnE,4459
|
|
39
|
-
euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
|
|
40
|
-
euroeval/dataset_configs/italian.py,sha256=xoS_oIFXnTraiV9PX2dBsE1GyodlAbma5dEB7yM_Q8A,4564
|
|
41
|
-
euroeval/dataset_configs/latvian.py,sha256=tibwTbe-atsRZEBbegJ6nbr1Oh4RthUYhZoHPVVawq0,2273
|
|
42
|
-
euroeval/dataset_configs/norwegian.py,sha256=eTX0KpjH60FyLGrUTfspvNvYaL-Ytfw3DTFftlriVM0,7295
|
|
43
|
-
euroeval/dataset_configs/portuguese.py,sha256=x-Idrdo_EtmB_xoabwKivKG091DvFEQEbO6MTcjZVqs,3646
|
|
44
|
-
euroeval/dataset_configs/spanish.py,sha256=5m3Qh328YPhbN8jFPIy9Sa7ZWob02ToCWzlDoT8IsSw,4462
|
|
45
|
-
euroeval/dataset_configs/swedish.py,sha256=j_I7ba9a0nXzEPvpnPTuNFEkS51pnUPrnRwcqGh7tu0,4715
|
|
46
|
-
euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
|
|
47
|
-
euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
|
|
48
|
-
euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
|
|
49
|
-
euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
|
|
50
|
-
euroeval/metrics/pipeline.py,sha256=a09Um3tnNdyQhzyDa9k-seYQXriYiJRQ5vyHK2lrKcg,10276
|
|
51
|
-
euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
|
|
52
|
-
euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
|
|
53
|
-
euroeval/prompt_templates/linguistic_acceptability.py,sha256=9ZIyv_hfI2Aj20Uy9SY1izq5OBRV844PXPiZCNCOoEY,8207
|
|
54
|
-
euroeval/prompt_templates/multiple_choice.py,sha256=TCMKB0xS5IEa8f4YEUjsoifcUpaIv4yOL4FisVvPwok,6423
|
|
55
|
-
euroeval/prompt_templates/named_entity_recognition.py,sha256=_ZRVDcnbXvTs_C2NXy78oMbCLFDtW9SuxmvSVg51Umo,15554
|
|
56
|
-
euroeval/prompt_templates/reading_comprehension.py,sha256=eRMN-kCT3wuImbuFXzZYfo5WiVhCFWJkCYwRUDtpeWo,8208
|
|
57
|
-
euroeval/prompt_templates/sentiment_classification.py,sha256=eIXn-aAY7LKeXqxzMKoqdVbihA2f1RaNQk7DhceuQdQ,8887
|
|
58
|
-
euroeval/prompt_templates/summarization.py,sha256=GvnKuYJKbJ_2QkdtSWp_h4RhfOXdq-7_yYeClJSPaTY,6137
|
|
59
|
-
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
60
|
-
euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
|
|
61
|
-
euroeval/task_group_utils/question_answering.py,sha256=vdEbcZy7BE6ICA7kWkPYmPW4eVuIiZ_4uJRLUexDhwY,27750
|
|
62
|
-
euroeval/task_group_utils/sequence_classification.py,sha256=ZIXcYo6ins9VUv8TT4aupWrfUQoWGBlgU8a1hYATOYM,17249
|
|
63
|
-
euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
|
|
64
|
-
euroeval/task_group_utils/token_classification.py,sha256=sNl0rhkXI9g5zKsJujrWX-9jWbYYK2iaKA1AcUg0xW4,17118
|
|
65
|
-
euroeval-16.0.1.dist-info/METADATA,sha256=toyIiyjwyl4Oty2YsD-P6r95hN0Si3BkBNBMOfmiwBA,13729
|
|
66
|
-
euroeval-16.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
67
|
-
euroeval-16.0.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
|
|
68
|
-
euroeval-16.0.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
69
|
-
euroeval-16.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|