EuroEval 16.0.1__py3-none-any.whl → 16.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/benchmark_config_factory.py +6 -1
- euroeval/benchmark_modules/base.py +2 -0
- euroeval/benchmark_modules/fresh.py +7 -1
- euroeval/benchmark_modules/hf.py +26 -21
- euroeval/benchmark_modules/litellm.py +258 -131
- euroeval/benchmark_modules/vllm.py +79 -40
- euroeval/benchmarker.py +11 -2
- euroeval/cli.py +14 -1
- euroeval/constants.py +1 -1
- euroeval/data_models.py +77 -6
- euroeval/dataset_configs/__init__.py +1 -0
- euroeval/dataset_configs/danish.py +14 -0
- euroeval/dataset_configs/dutch.py +14 -0
- euroeval/dataset_configs/english.py +22 -0
- euroeval/dataset_configs/estonian.py +15 -7
- euroeval/dataset_configs/finnish.py +14 -0
- euroeval/dataset_configs/french.py +14 -0
- euroeval/dataset_configs/german.py +23 -0
- euroeval/dataset_configs/italian.py +14 -0
- euroeval/dataset_configs/latvian.py +14 -0
- euroeval/dataset_configs/norwegian.py +14 -0
- euroeval/dataset_configs/polish.py +126 -0
- euroeval/dataset_configs/portuguese.py +14 -0
- euroeval/dataset_configs/spanish.py +14 -0
- euroeval/dataset_configs/swedish.py +25 -0
- euroeval/enums.py +12 -0
- euroeval/generation.py +17 -8
- euroeval/generation_utils.py +65 -11
- euroeval/metrics/pipeline.py +1 -1
- euroeval/prompt_templates/linguistic_acceptability.py +9 -0
- euroeval/prompt_templates/multiple_choice.py +27 -1
- euroeval/prompt_templates/named_entity_recognition.py +20 -0
- euroeval/prompt_templates/reading_comprehension.py +11 -0
- euroeval/prompt_templates/sentiment_classification.py +15 -0
- euroeval/prompt_templates/summarization.py +27 -1
- euroeval/scores.py +5 -0
- euroeval/task_group_utils/question_answering.py +29 -29
- euroeval/task_group_utils/sequence_classification.py +11 -34
- euroeval/task_group_utils/token_classification.py +3 -3
- euroeval/tasks.py +4 -4
- euroeval/{tokenization_utils.py → tokenisation_utils.py} +50 -28
- euroeval/utils.py +36 -3
- {euroeval-16.0.1.dist-info → euroeval-16.1.1.dist-info}/METADATA +1 -1
- euroeval-16.1.1.dist-info/RECORD +70 -0
- euroeval-16.0.1.dist-info/RECORD +0 -69
- {euroeval-16.0.1.dist-info → euroeval-16.1.1.dist-info}/WHEEL +0 -0
- {euroeval-16.0.1.dist-info → euroeval-16.1.1.dist-info}/entry_points.txt +0 -0
- {euroeval-16.0.1.dist-info → euroeval-16.1.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Utility functions related to
|
|
1
|
+
"""Utility functions related to tokenisation."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import re
|
|
@@ -79,8 +79,8 @@ def should_prompts_be_stripped(
|
|
|
79
79
|
"""Determine if we should strip the prompts for few-shot evaluation.
|
|
80
80
|
|
|
81
81
|
This is the case if the tokeniser needs to include the space as part of the label
|
|
82
|
-
token. The strategy is thus to
|
|
83
|
-
prompts), i.e., ": positive", and check if the
|
|
82
|
+
token. The strategy is thus to tokenise a label with a preceeding colon (as in the
|
|
83
|
+
prompts), i.e., ": positive", and check if the tokenisation starts with the tokens
|
|
84
84
|
of ": ". If this is the case, then we should not strip the prompts, since the
|
|
85
85
|
tokeniser produces the whitespace token separately.
|
|
86
86
|
|
|
@@ -88,7 +88,7 @@ def should_prompts_be_stripped(
|
|
|
88
88
|
labels_to_be_generated:
|
|
89
89
|
The labels that are to be generated.
|
|
90
90
|
tokeniser:
|
|
91
|
-
The tokeniser used to
|
|
91
|
+
The tokeniser used to tokenise the labels.
|
|
92
92
|
|
|
93
93
|
Returns:
|
|
94
94
|
Whether we should strip the prompts.
|
|
@@ -124,7 +124,7 @@ def should_prefix_space_be_added_to_labels(
|
|
|
124
124
|
labels_to_be_generated:
|
|
125
125
|
The labels that are to be generated.
|
|
126
126
|
tokeniser:
|
|
127
|
-
The tokeniser used to
|
|
127
|
+
The tokeniser used to tokenise the labels.
|
|
128
128
|
|
|
129
129
|
Returns:
|
|
130
130
|
Whether we should add a prefix space to the labels.
|
|
@@ -318,7 +318,9 @@ def get_pad_token(
|
|
|
318
318
|
return pad_token, pad_token_id
|
|
319
319
|
|
|
320
320
|
|
|
321
|
-
def get_end_of_chat_token_ids(
|
|
321
|
+
def get_end_of_chat_token_ids(
|
|
322
|
+
tokeniser: "PreTrainedTokenizer", generative_type: GenerativeType | None
|
|
323
|
+
) -> list[int] | None:
|
|
322
324
|
"""Get the end token ID for chat models.
|
|
323
325
|
|
|
324
326
|
This is only relevant for tokenisers with a chat template.
|
|
@@ -326,21 +328,29 @@ def get_end_of_chat_token_ids(tokeniser: "PreTrainedTokenizer") -> list[int] | N
|
|
|
326
328
|
Args:
|
|
327
329
|
tokeniser:
|
|
328
330
|
The tokeniser.
|
|
331
|
+
generative_type:
|
|
332
|
+
The generative type, or None if not available.
|
|
329
333
|
|
|
330
334
|
Returns:
|
|
331
335
|
The token IDs used to end chats, or None if the tokeniser does not have a chat
|
|
332
336
|
template or if no end-of-chat token could be found.
|
|
333
337
|
"""
|
|
334
|
-
if
|
|
338
|
+
if generative_type == GenerativeType.BASE:
|
|
335
339
|
return None
|
|
336
340
|
|
|
337
341
|
user_message: dict[str, str] = dict(role="user", content="X")
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
342
|
+
try:
|
|
343
|
+
token_ids = apply_chat_template(
|
|
344
|
+
conversation=[user_message],
|
|
345
|
+
tokeniser=tokeniser,
|
|
346
|
+
tokenise=True,
|
|
347
|
+
add_generation_prompt=False,
|
|
348
|
+
enable_thinking=generative_type == GenerativeType.REASONING,
|
|
349
|
+
)
|
|
350
|
+
except InvalidModel as e:
|
|
351
|
+
if "does not have a chat template" in str(e):
|
|
352
|
+
return None
|
|
353
|
+
raise e
|
|
344
354
|
assert isinstance(token_ids, list)
|
|
345
355
|
|
|
346
356
|
for idx, token in enumerate(tokeniser.convert_ids_to_tokens(token_ids)):
|
|
@@ -420,7 +430,7 @@ def get_first_label_token_mapping(
|
|
|
420
430
|
for label in dataset_config.labels
|
|
421
431
|
]
|
|
422
432
|
|
|
423
|
-
#
|
|
433
|
+
# Tokenise some text containing each label, which we will use to extract the
|
|
424
434
|
# first token of each label
|
|
425
435
|
all_tokens: list[list[str]]
|
|
426
436
|
if not has_chat_template(tokeniser=tokeniser):
|
|
@@ -439,11 +449,13 @@ def get_first_label_token_mapping(
|
|
|
439
449
|
dict(role="user", content=""),
|
|
440
450
|
dict(role="assistant", content=label),
|
|
441
451
|
# Adding extra user message as Mistral tokenisers require
|
|
442
|
-
#
|
|
452
|
+
# conversations to end with a user message
|
|
443
453
|
dict(role="user", content=""),
|
|
444
454
|
],
|
|
445
455
|
tokeniser=tokeniser,
|
|
446
|
-
|
|
456
|
+
tokenise=True,
|
|
457
|
+
add_generation_prompt=True,
|
|
458
|
+
enable_thinking=generative_type == GenerativeType.REASONING,
|
|
447
459
|
)
|
|
448
460
|
)
|
|
449
461
|
for label in local_labels
|
|
@@ -537,9 +549,10 @@ def has_chat_template(tokeniser: "PreTrainedTokenizer") -> bool:
|
|
|
537
549
|
def apply_chat_template(
|
|
538
550
|
conversation: list[dict[str, str]],
|
|
539
551
|
tokeniser: "PreTrainedTokenizer",
|
|
540
|
-
|
|
541
|
-
add_generation_prompt: bool
|
|
542
|
-
|
|
552
|
+
tokenise: bool,
|
|
553
|
+
add_generation_prompt: bool,
|
|
554
|
+
enable_thinking: bool,
|
|
555
|
+
**extra_kwargs,
|
|
543
556
|
) -> str | list[int]:
|
|
544
557
|
"""Apply the chat template to a prompt.
|
|
545
558
|
|
|
@@ -548,38 +561,47 @@ def apply_chat_template(
|
|
|
548
561
|
The conversation to apply the chat template to.
|
|
549
562
|
tokeniser:
|
|
550
563
|
The tokeniser.
|
|
551
|
-
|
|
552
|
-
Whether to
|
|
564
|
+
tokenise:
|
|
565
|
+
Whether to tokenise the resulting prompt, returning a list of token IDs
|
|
553
566
|
instead of a string.
|
|
554
567
|
add_generation_prompt:
|
|
555
568
|
Whether to add a generation prompt at the end of the conversation. This is
|
|
556
569
|
only relevant for regular Hugging Face tokenisers, as Mistral tokenisers
|
|
557
570
|
always add a generation prompt.
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
is
|
|
571
|
+
enable_thinking:
|
|
572
|
+
Whether to enable special handling for reasoning models, such as adding
|
|
573
|
+
special tokens for thinking. This is only relevant for regular Hugging
|
|
574
|
+
Face tokenisers, as Mistral tokenisers always handle reasoning models.
|
|
575
|
+
**extra_kwargs:
|
|
576
|
+
Extra keyword arguments to pass to the tokeniser's `apply_chat_template`
|
|
577
|
+
method. Only relevant for regular Hugging Face tokenisers.
|
|
561
578
|
|
|
562
579
|
Returns:
|
|
563
580
|
The prompt with the chat template applied, either as a string or a list of
|
|
564
|
-
token IDs, depending on the value of `
|
|
581
|
+
token IDs, depending on the value of `tokenise`.
|
|
565
582
|
|
|
566
583
|
Raises:
|
|
567
584
|
InvalidModel:
|
|
568
585
|
If the tokeniser does not have a chat template.
|
|
569
586
|
"""
|
|
587
|
+
# Ensure that the first user message is not empty, as this can cause issues with
|
|
588
|
+
# Jinja2
|
|
589
|
+
conversation[0]["content"] = conversation[0]["content"] or " "
|
|
590
|
+
|
|
570
591
|
if not has_chat_template(tokeniser=tokeniser):
|
|
571
592
|
raise InvalidModel(
|
|
572
593
|
"The tokeniser does not have a chat template, so cannot apply it."
|
|
573
594
|
)
|
|
574
595
|
elif isinstance(tokeniser, MistralCommonTokenizer):
|
|
575
596
|
templated_prompt = tokeniser.apply_chat_template(
|
|
576
|
-
conversation=conversation, tokenize=
|
|
597
|
+
conversation=conversation, tokenize=tokenise
|
|
577
598
|
)
|
|
578
599
|
else:
|
|
579
600
|
templated_prompt = tokeniser.apply_chat_template(
|
|
580
601
|
conversation=conversation,
|
|
581
602
|
add_generation_prompt=add_generation_prompt,
|
|
582
|
-
tokenize=
|
|
583
|
-
|
|
603
|
+
tokenize=tokenise,
|
|
604
|
+
enable_thinking=enable_thinking,
|
|
605
|
+
**extra_kwargs,
|
|
584
606
|
)
|
|
585
607
|
return templated_prompt
|
euroeval/utils.py
CHANGED
|
@@ -4,7 +4,6 @@ import asyncio
|
|
|
4
4
|
import gc
|
|
5
5
|
import importlib
|
|
6
6
|
import importlib.metadata
|
|
7
|
-
import importlib.util
|
|
8
7
|
import logging
|
|
9
8
|
import os
|
|
10
9
|
import random
|
|
@@ -25,11 +24,12 @@ from datasets.utils import disable_progress_bar
|
|
|
25
24
|
from requests.exceptions import RequestException
|
|
26
25
|
from transformers import logging as tf_logging
|
|
27
26
|
|
|
28
|
-
from .exceptions import InvalidBenchmark, NaNValueInModelOutput
|
|
27
|
+
from .exceptions import InvalidBenchmark, InvalidModel, NaNValueInModelOutput
|
|
29
28
|
|
|
30
29
|
if t.TYPE_CHECKING:
|
|
31
30
|
from types import TracebackType
|
|
32
31
|
|
|
32
|
+
from .data_models import ModelIdComponents
|
|
33
33
|
from .types import Predictions
|
|
34
34
|
|
|
35
35
|
|
|
@@ -347,7 +347,8 @@ def safe_run(coroutine: t.Coroutine[t.Any, t.Any, T]) -> T:
|
|
|
347
347
|
loop = asyncio.new_event_loop()
|
|
348
348
|
try:
|
|
349
349
|
asyncio.set_event_loop(loop)
|
|
350
|
-
|
|
350
|
+
response = loop.run_until_complete(coroutine)
|
|
351
|
+
return response
|
|
351
352
|
finally:
|
|
352
353
|
loop.close()
|
|
353
354
|
asyncio.set_event_loop(None)
|
|
@@ -488,3 +489,35 @@ def extract_multiple_choice_labels(
|
|
|
488
489
|
f"{', '.join(candidate_labels)}. Here is the prompt: {prompt!r}"
|
|
489
490
|
)
|
|
490
491
|
return sample_candidate_labels
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def split_model_id(model_id: str) -> "ModelIdComponents":
|
|
495
|
+
"""Split a model ID into its components.
|
|
496
|
+
|
|
497
|
+
Args:
|
|
498
|
+
model_id:
|
|
499
|
+
The model ID to split.
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
The split model ID.
|
|
503
|
+
|
|
504
|
+
Raises:
|
|
505
|
+
If the model ID is not valid.
|
|
506
|
+
"""
|
|
507
|
+
# Importing here to avoid circular imports
|
|
508
|
+
from .data_models import ModelIdComponents
|
|
509
|
+
|
|
510
|
+
# Attempt to extract the model ID, revision, and param using regex
|
|
511
|
+
model_id_match = re.match(pattern=r"^[^@#]+", string=model_id)
|
|
512
|
+
revision_match = re.search(pattern=r"@([^@#]+)", string=model_id)
|
|
513
|
+
param_match = re.search(pattern=r"#([^@#]+)", string=model_id)
|
|
514
|
+
|
|
515
|
+
# If we cannot extract the model ID, raise an error
|
|
516
|
+
if model_id_match is None:
|
|
517
|
+
raise InvalidModel(f"The model ID {model_id!r} is not valid.")
|
|
518
|
+
model_id = model_id_match.group()
|
|
519
|
+
|
|
520
|
+
# Extract the revision and param and return the result
|
|
521
|
+
revision = revision_match.group(1) if revision_match is not None else "main"
|
|
522
|
+
param = param_match.group(1) if param_match is not None else None
|
|
523
|
+
return ModelIdComponents(model_id=model_id, revision=revision, param=param)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
|
|
2
|
+
euroeval/benchmark_config_factory.py,sha256=NzNSiqix4hlVXk3xnyzdg2WDxomkectf97UWdVS3POo,11667
|
|
3
|
+
euroeval/benchmarker.py,sha256=JkhvYxhVpQPcWmDLzwnB8Yy6tTqj3yfDWTefklbI7RM,50355
|
|
4
|
+
euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
|
|
5
|
+
euroeval/cli.py,sha256=wUGetj9Ld4wkS872ZOfYqHIJMh58o8L2MDi78wU5nxI,9099
|
|
6
|
+
euroeval/constants.py,sha256=NN7kcwQdlDyyGFSrLjsL_qKVRyoRqZ9sKO5SjlgtRwA,2741
|
|
7
|
+
euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
|
|
8
|
+
euroeval/data_models.py,sha256=S-PATp4F1wBwvra6wtjlJFXxZbZB_vEpJHXcdTTKA70,27593
|
|
9
|
+
euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
|
|
10
|
+
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
11
|
+
euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
|
|
12
|
+
euroeval/generation.py,sha256=MSrd0oIkoqwKsCOaIkY2CFF_urXLOfNR1OO5nMvcCpY,12476
|
|
13
|
+
euroeval/generation_utils.py,sha256=d2_vylWXIeH4xIXgbsI5rN6dMt0zKp0zXExD6aOKWaA,18299
|
|
14
|
+
euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
|
|
15
|
+
euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
|
|
16
|
+
euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
|
|
17
|
+
euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
|
|
18
|
+
euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
|
|
19
|
+
euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
|
|
20
|
+
euroeval/tasks.py,sha256=3qEOBAMmfeqgXqlGkCKzQ-s0Yw-0-jPRgFZ97EZCFng,4535
|
|
21
|
+
euroeval/tokenisation_utils.py,sha256=e2H86vhSVfz5gx6GmzoBJwLZLG6sf3GEcoCGmvJBQLc,21505
|
|
22
|
+
euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
|
|
23
|
+
euroeval/utils.py,sha256=c0tFw1IXZIqgLU4EfY_k28iJ1ZlCZ_oFoKZH2sGCKYg,16499
|
|
24
|
+
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
25
|
+
euroeval/benchmark_modules/base.py,sha256=mHF8XS6GGUXV-sJtxmI5WJBWPLMHuh-4Z4OWjC25x9Y,11566
|
|
26
|
+
euroeval/benchmark_modules/fresh.py,sha256=TveSQiFBi3xXgCEQBdHwkUQ685PDkKW0y3G5Yt5rkeM,10655
|
|
27
|
+
euroeval/benchmark_modules/hf.py,sha256=oBjVumnSM9PW7ZocQwCGLKpbeGFWLN_71DBotxZo1aY,44038
|
|
28
|
+
euroeval/benchmark_modules/litellm.py,sha256=6EKjHnUoPCpuupISZHXqZsXLG8tyiA1-G12a5C6L8MM,64629
|
|
29
|
+
euroeval/benchmark_modules/vllm.py,sha256=sYFdVzB9CZX6_sGI4xghDyXoVn6I95_nbeFUWeSMXcc,43132
|
|
30
|
+
euroeval/dataset_configs/__init__.py,sha256=uuIZmElpJV8iupo5oDj3TeQhBDRANdWpLKYFASLirHA,2046
|
|
31
|
+
euroeval/dataset_configs/danish.py,sha256=QABfgI7m-0-5AimDXegp5ssDSLcM2VrAI_RWsinSZP4,5631
|
|
32
|
+
euroeval/dataset_configs/dutch.py,sha256=63Ro2yFym5MuIDXf5953vUYenw9B0kZSCmZbXjdy4Rs,5517
|
|
33
|
+
euroeval/dataset_configs/english.py,sha256=7lS12Tj7FnMGkS4xj7UoZyymNX6PGXTVl5muPswIgAE,4737
|
|
34
|
+
euroeval/dataset_configs/estonian.py,sha256=tdnz0gmMR9yO5rm3SsIz-Wd0LmlCvi3UJ2M5r4VwkSE,3093
|
|
35
|
+
euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
|
|
36
|
+
euroeval/dataset_configs/finnish.py,sha256=esb5nu4HAEdqiP7F9klmME-tkjme01Qd89TOxTB1S20,4390
|
|
37
|
+
euroeval/dataset_configs/french.py,sha256=lZKhJcTpaG8n3y8u5KY61UfU9YzEHF9tIPKm8UakoBs,4720
|
|
38
|
+
euroeval/dataset_configs/german.py,sha256=gF0idcfDt5Iy89ozwgEXEYR_ukyYurdQSS1KITPz5aM,5130
|
|
39
|
+
euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
|
|
40
|
+
euroeval/dataset_configs/italian.py,sha256=tJ_-OYRJ8wJX7ZCwdE4KJIScn1ijYigAXK3lDTZTA3E,5004
|
|
41
|
+
euroeval/dataset_configs/latvian.py,sha256=-zVftcd7Zl6MbrqL-zqBSixsIiPsbt5ZAqldE2wFOEI,2713
|
|
42
|
+
euroeval/dataset_configs/norwegian.py,sha256=ccLM2Zkf5eaFH1K1KyzqoMwkVNcXgjMQTxIhPf4tl_E,7745
|
|
43
|
+
euroeval/dataset_configs/polish.py,sha256=Z-9PT9KaopQUmBgFk5F85ve3pjQwTJqouG8IFgg5iqw,3672
|
|
44
|
+
euroeval/dataset_configs/portuguese.py,sha256=gQ054SdLQ5fkm4IAP6Mdh5RcPDJPDITcuyaLKZit_9o,4089
|
|
45
|
+
euroeval/dataset_configs/spanish.py,sha256=DvJlMK6OQg4qmxKzQA2IficlBMB7BafvxqIVuTKiZyw,4902
|
|
46
|
+
euroeval/dataset_configs/swedish.py,sha256=YWHp7hbJ25o36csSg9uXaQCEJK1BPb7u2RQZiCe0lNs,5445
|
|
47
|
+
euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
|
|
48
|
+
euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
|
|
49
|
+
euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
|
|
50
|
+
euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
|
|
51
|
+
euroeval/metrics/pipeline.py,sha256=Wcan3eDWV7t4WRXMPWCCe_JsA-fZnIfZU2ESinbbL2I,10284
|
|
52
|
+
euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
|
|
53
|
+
euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
|
|
54
|
+
euroeval/prompt_templates/linguistic_acceptability.py,sha256=pRR1QBnYt5DnfxQp6dw1OYFZfIct-1R9pfdgPGpjoco,8667
|
|
55
|
+
euroeval/prompt_templates/multiple_choice.py,sha256=Q-8-ETqG-RZeLzR8v8WUBIN7djiNSfNpmYnZRUWcd84,6905
|
|
56
|
+
euroeval/prompt_templates/named_entity_recognition.py,sha256=LT7J6Y9rUCJFimpnwujBZq_V5buSmXHJteIXbTOoaCE,16442
|
|
57
|
+
euroeval/prompt_templates/reading_comprehension.py,sha256=ogzmhiSZO6egrdxxQiWz6a0XMdC0vws-lg5yRKQoYV0,8730
|
|
58
|
+
euroeval/prompt_templates/sentiment_classification.py,sha256=BwnTpSdsAN_rL693ImgtKIRc5T_2G6ptWW0jCdC02NQ,9454
|
|
59
|
+
euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13aIOgY0SjIzzsNo,6593
|
|
60
|
+
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
61
|
+
euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
|
|
62
|
+
euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
|
|
63
|
+
euroeval/task_group_utils/sequence_classification.py,sha256=TAqZCoMQ9I-HFhMH35_J1mY2SQg95HUbXcgrBIyhgk0,16082
|
|
64
|
+
euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
|
|
65
|
+
euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
|
|
66
|
+
euroeval-16.1.1.dist-info/METADATA,sha256=gyqd2PPeT0vv_ye9nnfqv-0DlpejquzqcftBwpwnH7Y,13729
|
|
67
|
+
euroeval-16.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
68
|
+
euroeval-16.1.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
|
|
69
|
+
euroeval-16.1.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
70
|
+
euroeval-16.1.1.dist-info/RECORD,,
|
euroeval-16.0.1.dist-info/RECORD
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
|
|
2
|
-
euroeval/benchmark_config_factory.py,sha256=ZKzGkWr-Mr4wEMYNXUHsYkd2R-dxnNyETZJJ-Fq-my0,11386
|
|
3
|
-
euroeval/benchmarker.py,sha256=YNqhl2QchqzbGMGu8QoJAG_mnYbcJ46ksfaS0x78fiw,49847
|
|
4
|
-
euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
|
|
5
|
-
euroeval/cli.py,sha256=RR45NiHMI9hphqBJ7Xopde-C18Be9JgJxgg6eYPFVMM,8594
|
|
6
|
-
euroeval/constants.py,sha256=imy-YwofbAwTbjk_vgynYf3zaK5kKV349oXZl99DVyM,2742
|
|
7
|
-
euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
|
|
8
|
-
euroeval/data_models.py,sha256=UGyqPAYFImrR1gi4ctQdCVb0rjVkEmyf4Lc1a7_6t6E,24663
|
|
9
|
-
euroeval/enums.py,sha256=V73E8FTL1aRz74OKcxokTYLnO7Q8HGs2QI0JPZI4qQo,3032
|
|
10
|
-
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
11
|
-
euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
|
|
12
|
-
euroeval/generation.py,sha256=wm2u8fDGDgtWxCReG3N6v4_lLvo0OHTpR88ThGSRH7A,12139
|
|
13
|
-
euroeval/generation_utils.py,sha256=w3hfiJfUPDjf2xSKdDrhlpfuxZlztF0_0h2sFPB2hT0,16212
|
|
14
|
-
euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
|
|
15
|
-
euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
|
|
16
|
-
euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
|
|
17
|
-
euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
|
|
18
|
-
euroeval/scores.py,sha256=gJ7DSQVyE2_8qZxJPuUJcFk7Byj2D7nevE23kd4XMbA,3004
|
|
19
|
-
euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
|
|
20
|
-
euroeval/tasks.py,sha256=fwmDKnIexmWbm8HueLUilYzqdNRfo0rFxX-tjZ53Nbg,4503
|
|
21
|
-
euroeval/tokenization_utils.py,sha256=66nip9llPw3XBEzGY0TE1DrejLV2WvdSA1p1euXC6Bg,20556
|
|
22
|
-
euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
|
|
23
|
-
euroeval/utils.py,sha256=ITvT-JxXosrDuElNV7cbASfxzDWSBz9mJWAZHiTOiZY,15304
|
|
24
|
-
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
25
|
-
euroeval/benchmark_modules/base.py,sha256=vYW97bnlzqxxcIq6lY-zd0o6zxyDRMhT85jOhdKnoYE,11482
|
|
26
|
-
euroeval/benchmark_modules/fresh.py,sha256=_iRTHt9qUkq7jPOlgwx7IwZG48dK4mjMrh7KiEHeUjE,10462
|
|
27
|
-
euroeval/benchmark_modules/hf.py,sha256=HDXuVwt0kZUyL9x3aG5pEjSdGCRfzegqT0xKZYprjU0,43843
|
|
28
|
-
euroeval/benchmark_modules/litellm.py,sha256=M6ct5ppcYfO-Il5VMRm3PuyAeQ-rtS22UKyRStLnqfM,59210
|
|
29
|
-
euroeval/benchmark_modules/vllm.py,sha256=ckWLA9maDP5TLAfLhEXzkOYJBngb5BQR7X7RLKPl64A,41824
|
|
30
|
-
euroeval/dataset_configs/__init__.py,sha256=lEOr4kJzgtUymeNBVhd-VwdUK0YTUZ3GjUMlLz5fGWk,2010
|
|
31
|
-
euroeval/dataset_configs/danish.py,sha256=Pb43E-xfgQk9uaxq8ooznvf8okdX8KAYFEPHt1CG_TQ,5192
|
|
32
|
-
euroeval/dataset_configs/dutch.py,sha256=tY7FDw7BmhXxNfI1hqfasxQXP0QbYTqknokTZ7gqdRY,5079
|
|
33
|
-
euroeval/dataset_configs/english.py,sha256=Y4yc3AQu8WojqENj0sy4-rIlx1LhPnsCQ0DeonqDsVs,4128
|
|
34
|
-
euroeval/dataset_configs/estonian.py,sha256=o13P_XkrdhLFCz9l8LJy-TSY3JIN7XmByxesEDiagnc,2879
|
|
35
|
-
euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
|
|
36
|
-
euroeval/dataset_configs/finnish.py,sha256=7iXjjpJ23tupvtXwJF3TH1Tzwhxw0RFaoBv38HclsJc,3950
|
|
37
|
-
euroeval/dataset_configs/french.py,sha256=9ofGQpnjw0j_lPB0SuWMvbuWVZXfOvROMqZ03d-EAHs,4281
|
|
38
|
-
euroeval/dataset_configs/german.py,sha256=qsJO2YCND8Kuc_atSWXjkoD2itUQNbUsExiGk7P0OnE,4459
|
|
39
|
-
euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
|
|
40
|
-
euroeval/dataset_configs/italian.py,sha256=xoS_oIFXnTraiV9PX2dBsE1GyodlAbma5dEB7yM_Q8A,4564
|
|
41
|
-
euroeval/dataset_configs/latvian.py,sha256=tibwTbe-atsRZEBbegJ6nbr1Oh4RthUYhZoHPVVawq0,2273
|
|
42
|
-
euroeval/dataset_configs/norwegian.py,sha256=eTX0KpjH60FyLGrUTfspvNvYaL-Ytfw3DTFftlriVM0,7295
|
|
43
|
-
euroeval/dataset_configs/portuguese.py,sha256=x-Idrdo_EtmB_xoabwKivKG091DvFEQEbO6MTcjZVqs,3646
|
|
44
|
-
euroeval/dataset_configs/spanish.py,sha256=5m3Qh328YPhbN8jFPIy9Sa7ZWob02ToCWzlDoT8IsSw,4462
|
|
45
|
-
euroeval/dataset_configs/swedish.py,sha256=j_I7ba9a0nXzEPvpnPTuNFEkS51pnUPrnRwcqGh7tu0,4715
|
|
46
|
-
euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
|
|
47
|
-
euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
|
|
48
|
-
euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
|
|
49
|
-
euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
|
|
50
|
-
euroeval/metrics/pipeline.py,sha256=a09Um3tnNdyQhzyDa9k-seYQXriYiJRQ5vyHK2lrKcg,10276
|
|
51
|
-
euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
|
|
52
|
-
euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
|
|
53
|
-
euroeval/prompt_templates/linguistic_acceptability.py,sha256=9ZIyv_hfI2Aj20Uy9SY1izq5OBRV844PXPiZCNCOoEY,8207
|
|
54
|
-
euroeval/prompt_templates/multiple_choice.py,sha256=TCMKB0xS5IEa8f4YEUjsoifcUpaIv4yOL4FisVvPwok,6423
|
|
55
|
-
euroeval/prompt_templates/named_entity_recognition.py,sha256=_ZRVDcnbXvTs_C2NXy78oMbCLFDtW9SuxmvSVg51Umo,15554
|
|
56
|
-
euroeval/prompt_templates/reading_comprehension.py,sha256=eRMN-kCT3wuImbuFXzZYfo5WiVhCFWJkCYwRUDtpeWo,8208
|
|
57
|
-
euroeval/prompt_templates/sentiment_classification.py,sha256=eIXn-aAY7LKeXqxzMKoqdVbihA2f1RaNQk7DhceuQdQ,8887
|
|
58
|
-
euroeval/prompt_templates/summarization.py,sha256=GvnKuYJKbJ_2QkdtSWp_h4RhfOXdq-7_yYeClJSPaTY,6137
|
|
59
|
-
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
60
|
-
euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
|
|
61
|
-
euroeval/task_group_utils/question_answering.py,sha256=vdEbcZy7BE6ICA7kWkPYmPW4eVuIiZ_4uJRLUexDhwY,27750
|
|
62
|
-
euroeval/task_group_utils/sequence_classification.py,sha256=ZIXcYo6ins9VUv8TT4aupWrfUQoWGBlgU8a1hYATOYM,17249
|
|
63
|
-
euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
|
|
64
|
-
euroeval/task_group_utils/token_classification.py,sha256=sNl0rhkXI9g5zKsJujrWX-9jWbYYK2iaKA1AcUg0xW4,17118
|
|
65
|
-
euroeval-16.0.1.dist-info/METADATA,sha256=toyIiyjwyl4Oty2YsD-P6r95hN0Si3BkBNBMOfmiwBA,13729
|
|
66
|
-
euroeval-16.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
67
|
-
euroeval-16.0.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
|
|
68
|
-
euroeval-16.0.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
69
|
-
euroeval-16.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|