EuroEval 16.0.0__py3-none-any.whl → 16.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +5 -0
- euroeval/benchmark_config_factory.py +6 -1
- euroeval/benchmark_modules/base.py +2 -0
- euroeval/benchmark_modules/fresh.py +7 -1
- euroeval/benchmark_modules/hf.py +26 -21
- euroeval/benchmark_modules/litellm.py +258 -131
- euroeval/benchmark_modules/vllm.py +120 -68
- euroeval/benchmarker.py +11 -2
- euroeval/cli.py +14 -1
- euroeval/constants.py +7 -1
- euroeval/data_models.py +95 -20
- euroeval/dataset_configs/__init__.py +1 -0
- euroeval/dataset_configs/danish.py +14 -3
- euroeval/dataset_configs/dutch.py +14 -0
- euroeval/dataset_configs/english.py +22 -0
- euroeval/dataset_configs/estonian.py +15 -7
- euroeval/dataset_configs/finnish.py +14 -0
- euroeval/dataset_configs/french.py +14 -0
- euroeval/dataset_configs/german.py +23 -0
- euroeval/dataset_configs/italian.py +14 -0
- euroeval/dataset_configs/latvian.py +14 -0
- euroeval/dataset_configs/norwegian.py +14 -0
- euroeval/dataset_configs/polish.py +126 -0
- euroeval/dataset_configs/portuguese.py +14 -0
- euroeval/dataset_configs/spanish.py +14 -0
- euroeval/dataset_configs/swedish.py +25 -0
- euroeval/enums.py +12 -0
- euroeval/generation.py +17 -8
- euroeval/generation_utils.py +102 -16
- euroeval/metrics/pipeline.py +51 -9
- euroeval/model_cache.py +13 -1
- euroeval/prompt_templates/linguistic_acceptability.py +9 -0
- euroeval/prompt_templates/multiple_choice.py +27 -1
- euroeval/prompt_templates/named_entity_recognition.py +20 -0
- euroeval/prompt_templates/reading_comprehension.py +11 -0
- euroeval/prompt_templates/sentiment_classification.py +15 -0
- euroeval/prompt_templates/summarization.py +27 -1
- euroeval/scores.py +5 -0
- euroeval/task_group_utils/multiple_choice_classification.py +2 -2
- euroeval/task_group_utils/question_answering.py +29 -29
- euroeval/task_group_utils/sequence_classification.py +71 -81
- euroeval/task_group_utils/token_classification.py +17 -3
- euroeval/tasks.py +12 -10
- euroeval/{tokenization_utils.py → tokenisation_utils.py} +41 -25
- euroeval/utils.py +67 -3
- {euroeval-16.0.0.dist-info → euroeval-16.1.0.dist-info}/METADATA +3 -1
- euroeval-16.1.0.dist-info/RECORD +70 -0
- euroeval-16.0.0.dist-info/RECORD +0 -69
- {euroeval-16.0.0.dist-info → euroeval-16.1.0.dist-info}/WHEEL +0 -0
- {euroeval-16.0.0.dist-info → euroeval-16.1.0.dist-info}/entry_points.txt +0 -0
- {euroeval-16.0.0.dist-info → euroeval-16.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -215,6 +215,20 @@ def extract_labels_from_generation(
|
|
|
215
215
|
|
|
216
216
|
prompt_label_mapping = dataset_config.prompt_label_mapping
|
|
217
217
|
for prompt_tag_name, named_entities in prediction_dict.items():
|
|
218
|
+
if not isinstance(named_entities, list):
|
|
219
|
+
logger.debug(
|
|
220
|
+
"The model produced an invalid format for the named entities. "
|
|
221
|
+
f"Expected a list but got {type(named_entities)}. Skipping."
|
|
222
|
+
)
|
|
223
|
+
continue
|
|
224
|
+
try:
|
|
225
|
+
named_entities = [str(ne) for ne in named_entities]
|
|
226
|
+
except Exception:
|
|
227
|
+
logger.debug(
|
|
228
|
+
"The model produced an invalid format for the named entities. "
|
|
229
|
+
f"Expected a list of strings but got {named_entities}. Skipping."
|
|
230
|
+
)
|
|
231
|
+
continue
|
|
218
232
|
try:
|
|
219
233
|
tag_name = [
|
|
220
234
|
tag[2:]
|
|
@@ -259,7 +273,7 @@ def tokenize_and_align_labels(
|
|
|
259
273
|
Returns:
|
|
260
274
|
A dictionary containing the tokenized data as well as labels.
|
|
261
275
|
"""
|
|
262
|
-
#
|
|
276
|
+
# Tokenise the texts. We use the `is_split_into_words` argument here because
|
|
263
277
|
# the texts in our dataset are lists of words (with a label for each word)
|
|
264
278
|
tokenized_inputs = tokeniser(
|
|
265
279
|
examples["tokens"], is_split_into_words=True, truncation=True, padding=True
|
|
@@ -382,7 +396,7 @@ def handle_unk_tokens(
|
|
|
382
396
|
|
|
383
397
|
Args:
|
|
384
398
|
tokeniser:
|
|
385
|
-
The tokeniser used to
|
|
399
|
+
The tokeniser used to tokenise the words.
|
|
386
400
|
tokens:
|
|
387
401
|
The list of tokens.
|
|
388
402
|
words:
|
|
@@ -409,7 +423,7 @@ def handle_unk_tokens(
|
|
|
409
423
|
# Fetch the word
|
|
410
424
|
word = words[word_idx]
|
|
411
425
|
|
|
412
|
-
#
|
|
426
|
+
# Tokenise the word, which is now a list containing at least one UNK token
|
|
413
427
|
tokens_with_unk = tokeniser.convert_ids_to_tokens(
|
|
414
428
|
tokeniser.encode(word, add_special_tokens=False)
|
|
415
429
|
)
|
euroeval/tasks.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""All benchmarks tasks used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from . import metrics as m
|
|
4
|
+
from .constants import NUM_GENERATION_TOKENS_FOR_CLASSIFICATION
|
|
4
5
|
from .data_models import Task
|
|
5
6
|
from .enums import GenerativeType, ModelType, TaskGroup
|
|
6
7
|
from .prompt_templates import (
|
|
@@ -28,7 +29,7 @@ LA = Task(
|
|
|
28
29
|
template_dict=LA_TEMPLATES,
|
|
29
30
|
metrics=[m.mcc_metric, m.macro_f1_metric],
|
|
30
31
|
default_num_few_shot_examples=12,
|
|
31
|
-
default_max_generated_tokens=
|
|
32
|
+
default_max_generated_tokens=NUM_GENERATION_TOKENS_FOR_CLASSIFICATION,
|
|
32
33
|
default_labels=["correct", "incorrect"],
|
|
33
34
|
uses_logprobs=True,
|
|
34
35
|
)
|
|
@@ -73,7 +74,7 @@ SENT = Task(
|
|
|
73
74
|
template_dict=SENT_TEMPLATES,
|
|
74
75
|
metrics=[m.mcc_metric, m.macro_f1_metric],
|
|
75
76
|
default_num_few_shot_examples=12,
|
|
76
|
-
default_max_generated_tokens=
|
|
77
|
+
default_max_generated_tokens=NUM_GENERATION_TOKENS_FOR_CLASSIFICATION,
|
|
77
78
|
default_labels=["positive", "neutral", "negative"],
|
|
78
79
|
uses_logprobs=True,
|
|
79
80
|
)
|
|
@@ -87,7 +88,7 @@ SUMM = Task(
|
|
|
87
88
|
default_num_few_shot_examples=1,
|
|
88
89
|
default_max_generated_tokens=256,
|
|
89
90
|
default_labels=[],
|
|
90
|
-
|
|
91
|
+
default_allowed_model_types=[ModelType.GENERATIVE],
|
|
91
92
|
)
|
|
92
93
|
|
|
93
94
|
|
|
@@ -97,7 +98,7 @@ KNOW = Task(
|
|
|
97
98
|
template_dict=MULTIPLE_CHOICE_TEMPLATES,
|
|
98
99
|
metrics=[m.mcc_metric, m.accuracy_metric],
|
|
99
100
|
default_num_few_shot_examples=5,
|
|
100
|
-
default_max_generated_tokens=
|
|
101
|
+
default_max_generated_tokens=NUM_GENERATION_TOKENS_FOR_CLASSIFICATION,
|
|
101
102
|
default_labels=["a", "b", "c", "d"],
|
|
102
103
|
uses_logprobs=True,
|
|
103
104
|
)
|
|
@@ -109,7 +110,7 @@ MCRC = Task(
|
|
|
109
110
|
template_dict=MULTIPLE_CHOICE_TEMPLATES,
|
|
110
111
|
metrics=[m.mcc_metric, m.accuracy_metric],
|
|
111
112
|
default_num_few_shot_examples=5,
|
|
112
|
-
default_max_generated_tokens=
|
|
113
|
+
default_max_generated_tokens=NUM_GENERATION_TOKENS_FOR_CLASSIFICATION,
|
|
113
114
|
default_labels=["a", "b", "c", "d"],
|
|
114
115
|
uses_logprobs=True,
|
|
115
116
|
)
|
|
@@ -121,7 +122,7 @@ COMMON_SENSE = Task(
|
|
|
121
122
|
template_dict=MULTIPLE_CHOICE_TEMPLATES,
|
|
122
123
|
metrics=[m.mcc_metric, m.accuracy_metric],
|
|
123
124
|
default_num_few_shot_examples=5,
|
|
124
|
-
default_max_generated_tokens=
|
|
125
|
+
default_max_generated_tokens=NUM_GENERATION_TOKENS_FOR_CLASSIFICATION,
|
|
125
126
|
default_labels=["a", "b", "c", "d"],
|
|
126
127
|
uses_logprobs=True,
|
|
127
128
|
)
|
|
@@ -133,15 +134,16 @@ EUROPEAN_VALUES = Task(
|
|
|
133
134
|
template_dict=MULTIPLE_CHOICE_TEMPLATES,
|
|
134
135
|
metrics=[m.european_values_metric],
|
|
135
136
|
default_num_few_shot_examples=0,
|
|
136
|
-
default_max_generated_tokens=
|
|
137
|
-
default_labels=["
|
|
138
|
-
|
|
139
|
-
|
|
137
|
+
default_max_generated_tokens=NUM_GENERATION_TOKENS_FOR_CLASSIFICATION,
|
|
138
|
+
default_labels=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
|
|
139
|
+
default_allowed_model_types=[ModelType.GENERATIVE],
|
|
140
|
+
default_allowed_generative_types=[
|
|
140
141
|
GenerativeType.INSTRUCTION_TUNED,
|
|
141
142
|
GenerativeType.REASONING,
|
|
142
143
|
],
|
|
143
144
|
requires_zero_shot=True,
|
|
144
145
|
uses_logprobs=True,
|
|
146
|
+
default_allow_invalid_model_outputs=False,
|
|
145
147
|
)
|
|
146
148
|
|
|
147
149
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Utility functions related to
|
|
1
|
+
"""Utility functions related to tokenisation."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import re
|
|
@@ -7,9 +7,8 @@ import typing as t
|
|
|
7
7
|
import torch
|
|
8
8
|
from transformers import MistralCommonTokenizer
|
|
9
9
|
|
|
10
|
-
from euroeval.exceptions import InvalidModel
|
|
11
|
-
|
|
12
10
|
from .enums import GenerativeType
|
|
11
|
+
from .exceptions import InvalidModel
|
|
13
12
|
from .utils import log_once
|
|
14
13
|
|
|
15
14
|
if t.TYPE_CHECKING:
|
|
@@ -80,8 +79,8 @@ def should_prompts_be_stripped(
|
|
|
80
79
|
"""Determine if we should strip the prompts for few-shot evaluation.
|
|
81
80
|
|
|
82
81
|
This is the case if the tokeniser needs to include the space as part of the label
|
|
83
|
-
token. The strategy is thus to
|
|
84
|
-
prompts), i.e., ": positive", and check if the
|
|
82
|
+
token. The strategy is thus to tokenise a label with a preceeding colon (as in the
|
|
83
|
+
prompts), i.e., ": positive", and check if the tokenisation starts with the tokens
|
|
85
84
|
of ": ". If this is the case, then we should not strip the prompts, since the
|
|
86
85
|
tokeniser produces the whitespace token separately.
|
|
87
86
|
|
|
@@ -89,7 +88,7 @@ def should_prompts_be_stripped(
|
|
|
89
88
|
labels_to_be_generated:
|
|
90
89
|
The labels that are to be generated.
|
|
91
90
|
tokeniser:
|
|
92
|
-
The tokeniser used to
|
|
91
|
+
The tokeniser used to tokenise the labels.
|
|
93
92
|
|
|
94
93
|
Returns:
|
|
95
94
|
Whether we should strip the prompts.
|
|
@@ -125,7 +124,7 @@ def should_prefix_space_be_added_to_labels(
|
|
|
125
124
|
labels_to_be_generated:
|
|
126
125
|
The labels that are to be generated.
|
|
127
126
|
tokeniser:
|
|
128
|
-
The tokeniser used to
|
|
127
|
+
The tokeniser used to tokenise the labels.
|
|
129
128
|
|
|
130
129
|
Returns:
|
|
131
130
|
Whether we should add a prefix space to the labels.
|
|
@@ -319,7 +318,9 @@ def get_pad_token(
|
|
|
319
318
|
return pad_token, pad_token_id
|
|
320
319
|
|
|
321
320
|
|
|
322
|
-
def get_end_of_chat_token_ids(
|
|
321
|
+
def get_end_of_chat_token_ids(
|
|
322
|
+
tokeniser: "PreTrainedTokenizer", generative_type: GenerativeType | None
|
|
323
|
+
) -> list[int] | None:
|
|
323
324
|
"""Get the end token ID for chat models.
|
|
324
325
|
|
|
325
326
|
This is only relevant for tokenisers with a chat template.
|
|
@@ -327,20 +328,23 @@ def get_end_of_chat_token_ids(tokeniser: "PreTrainedTokenizer") -> list[int] | N
|
|
|
327
328
|
Args:
|
|
328
329
|
tokeniser:
|
|
329
330
|
The tokeniser.
|
|
331
|
+
generative_type:
|
|
332
|
+
The generative type, or None if not available.
|
|
330
333
|
|
|
331
334
|
Returns:
|
|
332
335
|
The token IDs used to end chats, or None if the tokeniser does not have a chat
|
|
333
336
|
template or if no end-of-chat token could be found.
|
|
334
337
|
"""
|
|
335
|
-
if
|
|
338
|
+
if generative_type == GenerativeType.BASE:
|
|
336
339
|
return None
|
|
337
340
|
|
|
338
341
|
user_message: dict[str, str] = dict(role="user", content="X")
|
|
339
342
|
token_ids = apply_chat_template(
|
|
340
343
|
conversation=[user_message],
|
|
341
344
|
tokeniser=tokeniser,
|
|
342
|
-
|
|
345
|
+
tokenise=True,
|
|
343
346
|
add_generation_prompt=False,
|
|
347
|
+
enable_thinking=generative_type == GenerativeType.REASONING,
|
|
344
348
|
)
|
|
345
349
|
assert isinstance(token_ids, list)
|
|
346
350
|
|
|
@@ -421,7 +425,7 @@ def get_first_label_token_mapping(
|
|
|
421
425
|
for label in dataset_config.labels
|
|
422
426
|
]
|
|
423
427
|
|
|
424
|
-
#
|
|
428
|
+
# Tokenise some text containing each label, which we will use to extract the
|
|
425
429
|
# first token of each label
|
|
426
430
|
all_tokens: list[list[str]]
|
|
427
431
|
if not has_chat_template(tokeniser=tokeniser):
|
|
@@ -440,11 +444,13 @@ def get_first_label_token_mapping(
|
|
|
440
444
|
dict(role="user", content=""),
|
|
441
445
|
dict(role="assistant", content=label),
|
|
442
446
|
# Adding extra user message as Mistral tokenisers require
|
|
443
|
-
#
|
|
447
|
+
# conversations to end with a user message
|
|
444
448
|
dict(role="user", content=""),
|
|
445
449
|
],
|
|
446
450
|
tokeniser=tokeniser,
|
|
447
|
-
|
|
451
|
+
tokenise=True,
|
|
452
|
+
add_generation_prompt=True,
|
|
453
|
+
enable_thinking=generative_type == GenerativeType.REASONING,
|
|
448
454
|
)
|
|
449
455
|
)
|
|
450
456
|
for label in local_labels
|
|
@@ -538,9 +544,10 @@ def has_chat_template(tokeniser: "PreTrainedTokenizer") -> bool:
|
|
|
538
544
|
def apply_chat_template(
|
|
539
545
|
conversation: list[dict[str, str]],
|
|
540
546
|
tokeniser: "PreTrainedTokenizer",
|
|
541
|
-
|
|
542
|
-
add_generation_prompt: bool
|
|
543
|
-
|
|
547
|
+
tokenise: bool,
|
|
548
|
+
add_generation_prompt: bool,
|
|
549
|
+
enable_thinking: bool,
|
|
550
|
+
**extra_kwargs,
|
|
544
551
|
) -> str | list[int]:
|
|
545
552
|
"""Apply the chat template to a prompt.
|
|
546
553
|
|
|
@@ -549,38 +556,47 @@ def apply_chat_template(
|
|
|
549
556
|
The conversation to apply the chat template to.
|
|
550
557
|
tokeniser:
|
|
551
558
|
The tokeniser.
|
|
552
|
-
|
|
553
|
-
Whether to
|
|
559
|
+
tokenise:
|
|
560
|
+
Whether to tokenise the resulting prompt, returning a list of token IDs
|
|
554
561
|
instead of a string.
|
|
555
562
|
add_generation_prompt:
|
|
556
563
|
Whether to add a generation prompt at the end of the conversation. This is
|
|
557
564
|
only relevant for regular Hugging Face tokenisers, as Mistral tokenisers
|
|
558
565
|
always add a generation prompt.
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
is
|
|
566
|
+
enable_thinking:
|
|
567
|
+
Whether to enable special handling for reasoning models, such as adding
|
|
568
|
+
special tokens for thinking. This is only relevant for regular Hugging
|
|
569
|
+
Face tokenisers, as Mistral tokenisers always handle reasoning models.
|
|
570
|
+
**extra_kwargs:
|
|
571
|
+
Extra keyword arguments to pass to the tokeniser's `apply_chat_template`
|
|
572
|
+
method. Only relevant for regular Hugging Face tokenisers.
|
|
562
573
|
|
|
563
574
|
Returns:
|
|
564
575
|
The prompt with the chat template applied, either as a string or a list of
|
|
565
|
-
token IDs, depending on the value of `
|
|
576
|
+
token IDs, depending on the value of `tokenise`.
|
|
566
577
|
|
|
567
578
|
Raises:
|
|
568
579
|
InvalidModel:
|
|
569
580
|
If the tokeniser does not have a chat template.
|
|
570
581
|
"""
|
|
582
|
+
# Ensure that the first user message is not empty, as this can cause issues with
|
|
583
|
+
# Jinja2
|
|
584
|
+
conversation[0]["content"] = conversation[0]["content"] or " "
|
|
585
|
+
|
|
571
586
|
if not has_chat_template(tokeniser=tokeniser):
|
|
572
587
|
raise InvalidModel(
|
|
573
588
|
"The tokeniser does not have a chat template, so cannot apply it."
|
|
574
589
|
)
|
|
575
590
|
elif isinstance(tokeniser, MistralCommonTokenizer):
|
|
576
591
|
templated_prompt = tokeniser.apply_chat_template(
|
|
577
|
-
conversation=conversation, tokenize=
|
|
592
|
+
conversation=conversation, tokenize=tokenise
|
|
578
593
|
)
|
|
579
594
|
else:
|
|
580
595
|
templated_prompt = tokeniser.apply_chat_template(
|
|
581
596
|
conversation=conversation,
|
|
582
597
|
add_generation_prompt=add_generation_prompt,
|
|
583
|
-
tokenize=
|
|
584
|
-
|
|
598
|
+
tokenize=tokenise,
|
|
599
|
+
enable_thinking=enable_thinking,
|
|
600
|
+
**extra_kwargs,
|
|
585
601
|
)
|
|
586
602
|
return templated_prompt
|
euroeval/utils.py
CHANGED
|
@@ -4,7 +4,6 @@ import asyncio
|
|
|
4
4
|
import gc
|
|
5
5
|
import importlib
|
|
6
6
|
import importlib.metadata
|
|
7
|
-
import importlib.util
|
|
8
7
|
import logging
|
|
9
8
|
import os
|
|
10
9
|
import random
|
|
@@ -25,11 +24,12 @@ from datasets.utils import disable_progress_bar
|
|
|
25
24
|
from requests.exceptions import RequestException
|
|
26
25
|
from transformers import logging as tf_logging
|
|
27
26
|
|
|
28
|
-
from .exceptions import NaNValueInModelOutput
|
|
27
|
+
from .exceptions import InvalidBenchmark, InvalidModel, NaNValueInModelOutput
|
|
29
28
|
|
|
30
29
|
if t.TYPE_CHECKING:
|
|
31
30
|
from types import TracebackType
|
|
32
31
|
|
|
32
|
+
from .data_models import ModelIdComponents
|
|
33
33
|
from .types import Predictions
|
|
34
34
|
|
|
35
35
|
|
|
@@ -347,7 +347,8 @@ def safe_run(coroutine: t.Coroutine[t.Any, t.Any, T]) -> T:
|
|
|
347
347
|
loop = asyncio.new_event_loop()
|
|
348
348
|
try:
|
|
349
349
|
asyncio.set_event_loop(loop)
|
|
350
|
-
|
|
350
|
+
response = loop.run_until_complete(coroutine)
|
|
351
|
+
return response
|
|
351
352
|
finally:
|
|
352
353
|
loop.close()
|
|
353
354
|
asyncio.set_event_loop(None)
|
|
@@ -457,3 +458,66 @@ def get_hf_token(api_key: str | None) -> str | bool:
|
|
|
457
458
|
level=logging.DEBUG,
|
|
458
459
|
)
|
|
459
460
|
return False
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def extract_multiple_choice_labels(
|
|
464
|
+
prompt: str, candidate_labels: list[str]
|
|
465
|
+
) -> list[str]:
|
|
466
|
+
"""Extract multiple choice labels from a prompt.
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
prompt:
|
|
470
|
+
The prompt to extract the labels from.
|
|
471
|
+
candidate_labels:
|
|
472
|
+
The candidate labels to look for in the prompt.
|
|
473
|
+
|
|
474
|
+
Returns:
|
|
475
|
+
The extracted labels.
|
|
476
|
+
"""
|
|
477
|
+
sample_candidate_labels: list[str] = list()
|
|
478
|
+
for candidate_label in candidate_labels:
|
|
479
|
+
candidate_label_match = re.search(
|
|
480
|
+
pattern=rf"\b{candidate_label}\. ", string=prompt, flags=re.IGNORECASE
|
|
481
|
+
)
|
|
482
|
+
if candidate_label_match is not None:
|
|
483
|
+
sample_candidate_labels.append(candidate_label)
|
|
484
|
+
if not sample_candidate_labels:
|
|
485
|
+
raise InvalidBenchmark(
|
|
486
|
+
"Could not extract any candidate labels from the prompt. Please ensure "
|
|
487
|
+
"that the candidate labels are present in the prompt, each followed by a "
|
|
488
|
+
"dot and a space (e.g., 'a. '). The candidate labels are: "
|
|
489
|
+
f"{', '.join(candidate_labels)}. Here is the prompt: {prompt!r}"
|
|
490
|
+
)
|
|
491
|
+
return sample_candidate_labels
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def split_model_id(model_id: str) -> "ModelIdComponents":
|
|
495
|
+
"""Split a model ID into its components.
|
|
496
|
+
|
|
497
|
+
Args:
|
|
498
|
+
model_id:
|
|
499
|
+
The model ID to split.
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
The split model ID.
|
|
503
|
+
|
|
504
|
+
Raises:
|
|
505
|
+
If the model ID is not valid.
|
|
506
|
+
"""
|
|
507
|
+
# Importing here to avoid circular imports
|
|
508
|
+
from .data_models import ModelIdComponents
|
|
509
|
+
|
|
510
|
+
# Attempt to extract the model ID, revision, and param using regex
|
|
511
|
+
model_id_match = re.match(pattern=r"^[^@#]+", string=model_id)
|
|
512
|
+
revision_match = re.search(pattern=r"@([^@#]+)", string=model_id)
|
|
513
|
+
param_match = re.search(pattern=r"#([^@#]+)", string=model_id)
|
|
514
|
+
|
|
515
|
+
# If we cannot extract the model ID, raise an error
|
|
516
|
+
if model_id_match is None:
|
|
517
|
+
raise InvalidModel(f"The model ID {model_id!r} is not valid.")
|
|
518
|
+
model_id = model_id_match.group()
|
|
519
|
+
|
|
520
|
+
# Extract the revision and param and return the result
|
|
521
|
+
revision = revision_match.group(1) if revision_match is not None else "main"
|
|
522
|
+
param = param_match.group(1) if param_match is not None else None
|
|
523
|
+
return ModelIdComponents(model_id=model_id, revision=revision, param=param)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: EuroEval
|
|
3
|
-
Version: 16.
|
|
3
|
+
Version: 16.1.0
|
|
4
4
|
Summary: The robust European language model benchmark.
|
|
5
5
|
Project-URL: Repository, https://github.com/EuroEval/EuroEval
|
|
6
6
|
Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
|
|
@@ -61,10 +61,12 @@ Requires-Dist: transformers[mistral-common]>=4.56.0
|
|
|
61
61
|
Provides-Extra: all
|
|
62
62
|
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
|
|
63
63
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
|
|
64
|
+
Requires-Dist: flashinfer-python>=0.3.1; (platform_system == 'Linux') and extra == 'all'
|
|
64
65
|
Requires-Dist: vllm>=0.10.1; (platform_system == 'Linux') and extra == 'all'
|
|
65
66
|
Provides-Extra: generative
|
|
66
67
|
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
|
|
67
68
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
|
|
69
|
+
Requires-Dist: flashinfer-python>=0.3.1; (platform_system == 'Linux') and extra == 'generative'
|
|
68
70
|
Requires-Dist: vllm>=0.10.1; (platform_system == 'Linux') and extra == 'generative'
|
|
69
71
|
Description-Content-Type: text/markdown
|
|
70
72
|
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
|
|
2
|
+
euroeval/benchmark_config_factory.py,sha256=NzNSiqix4hlVXk3xnyzdg2WDxomkectf97UWdVS3POo,11667
|
|
3
|
+
euroeval/benchmarker.py,sha256=JkhvYxhVpQPcWmDLzwnB8Yy6tTqj3yfDWTefklbI7RM,50355
|
|
4
|
+
euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
|
|
5
|
+
euroeval/cli.py,sha256=wUGetj9Ld4wkS872ZOfYqHIJMh58o8L2MDi78wU5nxI,9099
|
|
6
|
+
euroeval/constants.py,sha256=NN7kcwQdlDyyGFSrLjsL_qKVRyoRqZ9sKO5SjlgtRwA,2741
|
|
7
|
+
euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
|
|
8
|
+
euroeval/data_models.py,sha256=S-PATp4F1wBwvra6wtjlJFXxZbZB_vEpJHXcdTTKA70,27593
|
|
9
|
+
euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
|
|
10
|
+
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
11
|
+
euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
|
|
12
|
+
euroeval/generation.py,sha256=MSrd0oIkoqwKsCOaIkY2CFF_urXLOfNR1OO5nMvcCpY,12476
|
|
13
|
+
euroeval/generation_utils.py,sha256=OtEXLhI6L1vlbC768dH3xzj0qkokz43m0vswGKrRmBA,18061
|
|
14
|
+
euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
|
|
15
|
+
euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
|
|
16
|
+
euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
|
|
17
|
+
euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
|
|
18
|
+
euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
|
|
19
|
+
euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
|
|
20
|
+
euroeval/tasks.py,sha256=3qEOBAMmfeqgXqlGkCKzQ-s0Yw-0-jPRgFZ97EZCFng,4535
|
|
21
|
+
euroeval/tokenisation_utils.py,sha256=jRIi9m8XmGh3LeZna47AWmJI9U9m4ojXQynQTe7kzWc,21344
|
|
22
|
+
euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
|
|
23
|
+
euroeval/utils.py,sha256=c0tFw1IXZIqgLU4EfY_k28iJ1ZlCZ_oFoKZH2sGCKYg,16499
|
|
24
|
+
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
25
|
+
euroeval/benchmark_modules/base.py,sha256=mHF8XS6GGUXV-sJtxmI5WJBWPLMHuh-4Z4OWjC25x9Y,11566
|
|
26
|
+
euroeval/benchmark_modules/fresh.py,sha256=TveSQiFBi3xXgCEQBdHwkUQ685PDkKW0y3G5Yt5rkeM,10655
|
|
27
|
+
euroeval/benchmark_modules/hf.py,sha256=oBjVumnSM9PW7ZocQwCGLKpbeGFWLN_71DBotxZo1aY,44038
|
|
28
|
+
euroeval/benchmark_modules/litellm.py,sha256=6EKjHnUoPCpuupISZHXqZsXLG8tyiA1-G12a5C6L8MM,64629
|
|
29
|
+
euroeval/benchmark_modules/vllm.py,sha256=sYFdVzB9CZX6_sGI4xghDyXoVn6I95_nbeFUWeSMXcc,43132
|
|
30
|
+
euroeval/dataset_configs/__init__.py,sha256=uuIZmElpJV8iupo5oDj3TeQhBDRANdWpLKYFASLirHA,2046
|
|
31
|
+
euroeval/dataset_configs/danish.py,sha256=QABfgI7m-0-5AimDXegp5ssDSLcM2VrAI_RWsinSZP4,5631
|
|
32
|
+
euroeval/dataset_configs/dutch.py,sha256=63Ro2yFym5MuIDXf5953vUYenw9B0kZSCmZbXjdy4Rs,5517
|
|
33
|
+
euroeval/dataset_configs/english.py,sha256=7lS12Tj7FnMGkS4xj7UoZyymNX6PGXTVl5muPswIgAE,4737
|
|
34
|
+
euroeval/dataset_configs/estonian.py,sha256=tdnz0gmMR9yO5rm3SsIz-Wd0LmlCvi3UJ2M5r4VwkSE,3093
|
|
35
|
+
euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
|
|
36
|
+
euroeval/dataset_configs/finnish.py,sha256=esb5nu4HAEdqiP7F9klmME-tkjme01Qd89TOxTB1S20,4390
|
|
37
|
+
euroeval/dataset_configs/french.py,sha256=lZKhJcTpaG8n3y8u5KY61UfU9YzEHF9tIPKm8UakoBs,4720
|
|
38
|
+
euroeval/dataset_configs/german.py,sha256=gF0idcfDt5Iy89ozwgEXEYR_ukyYurdQSS1KITPz5aM,5130
|
|
39
|
+
euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
|
|
40
|
+
euroeval/dataset_configs/italian.py,sha256=tJ_-OYRJ8wJX7ZCwdE4KJIScn1ijYigAXK3lDTZTA3E,5004
|
|
41
|
+
euroeval/dataset_configs/latvian.py,sha256=-zVftcd7Zl6MbrqL-zqBSixsIiPsbt5ZAqldE2wFOEI,2713
|
|
42
|
+
euroeval/dataset_configs/norwegian.py,sha256=ccLM2Zkf5eaFH1K1KyzqoMwkVNcXgjMQTxIhPf4tl_E,7745
|
|
43
|
+
euroeval/dataset_configs/polish.py,sha256=Z-9PT9KaopQUmBgFk5F85ve3pjQwTJqouG8IFgg5iqw,3672
|
|
44
|
+
euroeval/dataset_configs/portuguese.py,sha256=gQ054SdLQ5fkm4IAP6Mdh5RcPDJPDITcuyaLKZit_9o,4089
|
|
45
|
+
euroeval/dataset_configs/spanish.py,sha256=DvJlMK6OQg4qmxKzQA2IficlBMB7BafvxqIVuTKiZyw,4902
|
|
46
|
+
euroeval/dataset_configs/swedish.py,sha256=YWHp7hbJ25o36csSg9uXaQCEJK1BPb7u2RQZiCe0lNs,5445
|
|
47
|
+
euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
|
|
48
|
+
euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
|
|
49
|
+
euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
|
|
50
|
+
euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
|
|
51
|
+
euroeval/metrics/pipeline.py,sha256=Wcan3eDWV7t4WRXMPWCCe_JsA-fZnIfZU2ESinbbL2I,10284
|
|
52
|
+
euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
|
|
53
|
+
euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
|
|
54
|
+
euroeval/prompt_templates/linguistic_acceptability.py,sha256=pRR1QBnYt5DnfxQp6dw1OYFZfIct-1R9pfdgPGpjoco,8667
|
|
55
|
+
euroeval/prompt_templates/multiple_choice.py,sha256=Q-8-ETqG-RZeLzR8v8WUBIN7djiNSfNpmYnZRUWcd84,6905
|
|
56
|
+
euroeval/prompt_templates/named_entity_recognition.py,sha256=LT7J6Y9rUCJFimpnwujBZq_V5buSmXHJteIXbTOoaCE,16442
|
|
57
|
+
euroeval/prompt_templates/reading_comprehension.py,sha256=ogzmhiSZO6egrdxxQiWz6a0XMdC0vws-lg5yRKQoYV0,8730
|
|
58
|
+
euroeval/prompt_templates/sentiment_classification.py,sha256=BwnTpSdsAN_rL693ImgtKIRc5T_2G6ptWW0jCdC02NQ,9454
|
|
59
|
+
euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13aIOgY0SjIzzsNo,6593
|
|
60
|
+
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
61
|
+
euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
|
|
62
|
+
euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
|
|
63
|
+
euroeval/task_group_utils/sequence_classification.py,sha256=qWUUrh4X4jK2XfUzP4aoPDoJhVJifrnDEaaw_F48hig,16080
|
|
64
|
+
euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
|
|
65
|
+
euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
|
|
66
|
+
euroeval-16.1.0.dist-info/METADATA,sha256=pYdW0IZwY8vatTA55EERxBK1kMaQuGhqzNys5xiSqsM,13729
|
|
67
|
+
euroeval-16.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
68
|
+
euroeval-16.1.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
|
|
69
|
+
euroeval-16.1.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
70
|
+
euroeval-16.1.0.dist-info/RECORD,,
|
euroeval-16.0.0.dist-info/RECORD
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
euroeval/__init__.py,sha256=MgFG1amMgiTJmK_hcQ7nnX-o4KFhlD1P5xKUBTloPCQ,3564
|
|
2
|
-
euroeval/benchmark_config_factory.py,sha256=ZKzGkWr-Mr4wEMYNXUHsYkd2R-dxnNyETZJJ-Fq-my0,11386
|
|
3
|
-
euroeval/benchmarker.py,sha256=YNqhl2QchqzbGMGu8QoJAG_mnYbcJ46ksfaS0x78fiw,49847
|
|
4
|
-
euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
|
|
5
|
-
euroeval/cli.py,sha256=RR45NiHMI9hphqBJ7Xopde-C18Be9JgJxgg6eYPFVMM,8594
|
|
6
|
-
euroeval/constants.py,sha256=HWJ3PJRS-ZbAMXTvujiK8QP7IiS4RHkjnegv3oi52w0,2499
|
|
7
|
-
euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
|
|
8
|
-
euroeval/data_models.py,sha256=NdzD1ER3GHJp51UXLGTW8iTYwzZlITH2nO0vanTkEWU,24272
|
|
9
|
-
euroeval/enums.py,sha256=V73E8FTL1aRz74OKcxokTYLnO7Q8HGs2QI0JPZI4qQo,3032
|
|
10
|
-
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
11
|
-
euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
|
|
12
|
-
euroeval/generation.py,sha256=wm2u8fDGDgtWxCReG3N6v4_lLvo0OHTpR88ThGSRH7A,12139
|
|
13
|
-
euroeval/generation_utils.py,sha256=vU-j9kjFDuPlSizEaRByx_XJyyAVpE8PdGOm9i--9zQ,14613
|
|
14
|
-
euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
|
|
15
|
-
euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
|
|
16
|
-
euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
|
|
17
|
-
euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
|
|
18
|
-
euroeval/scores.py,sha256=gJ7DSQVyE2_8qZxJPuUJcFk7Byj2D7nevE23kd4XMbA,3004
|
|
19
|
-
euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
|
|
20
|
-
euroeval/tasks.py,sha256=jl8HicriMSN_LfHANokVGFqzgV53QcJ5dmzb297xI04,4173
|
|
21
|
-
euroeval/tokenization_utils.py,sha256=icEfttWReKRC5MbREOuxTHOPpuVvH6uHhnqz1w7qIyA,20565
|
|
22
|
-
euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
|
|
23
|
-
euroeval/utils.py,sha256=O4JIROPfbA7MD9SbOY0CifoCckYjmdNjXYjOxDwBnwM,14149
|
|
24
|
-
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
25
|
-
euroeval/benchmark_modules/base.py,sha256=vYW97bnlzqxxcIq6lY-zd0o6zxyDRMhT85jOhdKnoYE,11482
|
|
26
|
-
euroeval/benchmark_modules/fresh.py,sha256=_iRTHt9qUkq7jPOlgwx7IwZG48dK4mjMrh7KiEHeUjE,10462
|
|
27
|
-
euroeval/benchmark_modules/hf.py,sha256=HDXuVwt0kZUyL9x3aG5pEjSdGCRfzegqT0xKZYprjU0,43843
|
|
28
|
-
euroeval/benchmark_modules/litellm.py,sha256=M6ct5ppcYfO-Il5VMRm3PuyAeQ-rtS22UKyRStLnqfM,59210
|
|
29
|
-
euroeval/benchmark_modules/vllm.py,sha256=dTwGGOFQ7wqYXg7x2YBUJNQcO6OwqjTMBfUf5OveXNk,41289
|
|
30
|
-
euroeval/dataset_configs/__init__.py,sha256=lEOr4kJzgtUymeNBVhd-VwdUK0YTUZ3GjUMlLz5fGWk,2010
|
|
31
|
-
euroeval/dataset_configs/danish.py,sha256=3n9e0r-hYRI2hPOgLDMQsO8bPgZKjw7OcFCUsCvdmk4,5294
|
|
32
|
-
euroeval/dataset_configs/dutch.py,sha256=tY7FDw7BmhXxNfI1hqfasxQXP0QbYTqknokTZ7gqdRY,5079
|
|
33
|
-
euroeval/dataset_configs/english.py,sha256=Y4yc3AQu8WojqENj0sy4-rIlx1LhPnsCQ0DeonqDsVs,4128
|
|
34
|
-
euroeval/dataset_configs/estonian.py,sha256=o13P_XkrdhLFCz9l8LJy-TSY3JIN7XmByxesEDiagnc,2879
|
|
35
|
-
euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
|
|
36
|
-
euroeval/dataset_configs/finnish.py,sha256=7iXjjpJ23tupvtXwJF3TH1Tzwhxw0RFaoBv38HclsJc,3950
|
|
37
|
-
euroeval/dataset_configs/french.py,sha256=9ofGQpnjw0j_lPB0SuWMvbuWVZXfOvROMqZ03d-EAHs,4281
|
|
38
|
-
euroeval/dataset_configs/german.py,sha256=qsJO2YCND8Kuc_atSWXjkoD2itUQNbUsExiGk7P0OnE,4459
|
|
39
|
-
euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
|
|
40
|
-
euroeval/dataset_configs/italian.py,sha256=xoS_oIFXnTraiV9PX2dBsE1GyodlAbma5dEB7yM_Q8A,4564
|
|
41
|
-
euroeval/dataset_configs/latvian.py,sha256=tibwTbe-atsRZEBbegJ6nbr1Oh4RthUYhZoHPVVawq0,2273
|
|
42
|
-
euroeval/dataset_configs/norwegian.py,sha256=eTX0KpjH60FyLGrUTfspvNvYaL-Ytfw3DTFftlriVM0,7295
|
|
43
|
-
euroeval/dataset_configs/portuguese.py,sha256=x-Idrdo_EtmB_xoabwKivKG091DvFEQEbO6MTcjZVqs,3646
|
|
44
|
-
euroeval/dataset_configs/spanish.py,sha256=5m3Qh328YPhbN8jFPIy9Sa7ZWob02ToCWzlDoT8IsSw,4462
|
|
45
|
-
euroeval/dataset_configs/swedish.py,sha256=j_I7ba9a0nXzEPvpnPTuNFEkS51pnUPrnRwcqGh7tu0,4715
|
|
46
|
-
euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
|
|
47
|
-
euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
|
|
48
|
-
euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
|
|
49
|
-
euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
|
|
50
|
-
euroeval/metrics/pipeline.py,sha256=T65p2sxPnwh2WgCjqsqzvE3XOzizNY7rlSm8KPR7sCk,8883
|
|
51
|
-
euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
|
|
52
|
-
euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
|
|
53
|
-
euroeval/prompt_templates/linguistic_acceptability.py,sha256=9ZIyv_hfI2Aj20Uy9SY1izq5OBRV844PXPiZCNCOoEY,8207
|
|
54
|
-
euroeval/prompt_templates/multiple_choice.py,sha256=TCMKB0xS5IEa8f4YEUjsoifcUpaIv4yOL4FisVvPwok,6423
|
|
55
|
-
euroeval/prompt_templates/named_entity_recognition.py,sha256=_ZRVDcnbXvTs_C2NXy78oMbCLFDtW9SuxmvSVg51Umo,15554
|
|
56
|
-
euroeval/prompt_templates/reading_comprehension.py,sha256=eRMN-kCT3wuImbuFXzZYfo5WiVhCFWJkCYwRUDtpeWo,8208
|
|
57
|
-
euroeval/prompt_templates/sentiment_classification.py,sha256=eIXn-aAY7LKeXqxzMKoqdVbihA2f1RaNQk7DhceuQdQ,8887
|
|
58
|
-
euroeval/prompt_templates/summarization.py,sha256=GvnKuYJKbJ_2QkdtSWp_h4RhfOXdq-7_yYeClJSPaTY,6137
|
|
59
|
-
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
60
|
-
euroeval/task_group_utils/multiple_choice_classification.py,sha256=lNEOWi3ckLBnMP1QoSTxNxT-s6kBz2XH17mrmjQlv5s,7075
|
|
61
|
-
euroeval/task_group_utils/question_answering.py,sha256=vdEbcZy7BE6ICA7kWkPYmPW4eVuIiZ_4uJRLUexDhwY,27750
|
|
62
|
-
euroeval/task_group_utils/sequence_classification.py,sha256=K_hFWY6D5WR8-uy6ZikCq3ighHNHSyzW7A62vwDkwDs,16512
|
|
63
|
-
euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
|
|
64
|
-
euroeval/task_group_utils/token_classification.py,sha256=6bN9soT1kLthutCpqUT-jDmZZw9Mt7H3tjI4zVvE4BY,16469
|
|
65
|
-
euroeval-16.0.0.dist-info/METADATA,sha256=uvzi8Bkgab8rKhgKavqFnv8rpL0KntFIYMZ7f1Joa0U,13544
|
|
66
|
-
euroeval-16.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
67
|
-
euroeval-16.0.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
|
|
68
|
-
euroeval-16.0.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
69
|
-
euroeval-16.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|