EuroEval 15.12.0__py3-none-any.whl → 15.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/benchmark_modules/litellm.py +31 -4
- euroeval/dataset_configs/danish.py +10 -0
- euroeval/dataset_configs/dutch.py +10 -0
- euroeval/dataset_configs/english.py +10 -0
- euroeval/dataset_configs/faroese.py +10 -0
- euroeval/dataset_configs/finnish.py +10 -0
- euroeval/dataset_configs/french.py +10 -0
- euroeval/dataset_configs/german.py +10 -0
- euroeval/dataset_configs/icelandic.py +10 -0
- euroeval/dataset_configs/italian.py +10 -0
- euroeval/dataset_configs/norwegian.py +20 -0
- euroeval/dataset_configs/portuguese.py +29 -22
- euroeval/dataset_configs/spanish.py +10 -0
- euroeval/dataset_configs/swedish.py +10 -0
- euroeval/prompt_templates/reading_comprehension.py +10 -1
- {euroeval-15.12.0.dist-info → euroeval-15.14.0.dist-info}/METADATA +2 -2
- {euroeval-15.12.0.dist-info → euroeval-15.14.0.dist-info}/RECORD +20 -20
- {euroeval-15.12.0.dist-info → euroeval-15.14.0.dist-info}/WHEEL +0 -0
- {euroeval-15.12.0.dist-info → euroeval-15.14.0.dist-info}/entry_points.txt +0 -0
- {euroeval-15.12.0.dist-info → euroeval-15.14.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -31,6 +31,7 @@ from litellm.exceptions import (
|
|
|
31
31
|
from litellm.llms.vertex_ai.common_utils import VertexAIError
|
|
32
32
|
from litellm.router import Router
|
|
33
33
|
from litellm.types.utils import ChoiceLogprobs
|
|
34
|
+
from litellm.utils import supports_reasoning, supports_response_schema
|
|
34
35
|
from pydantic import conlist, create_model
|
|
35
36
|
from requests.exceptions import RequestException
|
|
36
37
|
from tqdm.asyncio import tqdm as tqdm_async
|
|
@@ -234,6 +235,8 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
234
235
|
pattern="|".join(REASONING_MODELS), string=self.model_config.model_id
|
|
235
236
|
):
|
|
236
237
|
type_ = GenerativeType.REASONING
|
|
238
|
+
elif supports_reasoning(model=self.model_config.model_id):
|
|
239
|
+
type_ = GenerativeType.REASONING
|
|
237
240
|
else:
|
|
238
241
|
type_ = GenerativeType.INSTRUCTION_TUNED
|
|
239
242
|
|
|
@@ -314,9 +317,7 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
314
317
|
"enable it.",
|
|
315
318
|
level=logging.DEBUG,
|
|
316
319
|
)
|
|
317
|
-
elif
|
|
318
|
-
model=self.model_config.model_id
|
|
319
|
-
):
|
|
320
|
+
elif supports_response_schema(model=self.model_config.model_id):
|
|
320
321
|
ner_tag_names = list(self.dataset_config.prompt_label_mapping.values())
|
|
321
322
|
keys_and_their_types: dict[str, t.Any] = {
|
|
322
323
|
tag_name: (conlist(str, max_length=5), ...)
|
|
@@ -361,7 +362,7 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
361
362
|
level=logging.DEBUG,
|
|
362
363
|
)
|
|
363
364
|
elif self.model_config.revision == "no-thinking":
|
|
364
|
-
generation_kwargs["thinking"] = dict(
|
|
365
|
+
generation_kwargs["thinking"] = dict(budget_tokens=0)
|
|
365
366
|
log_once(
|
|
366
367
|
f"Disabling thinking mode for model {self.model_config.model_id!r}",
|
|
367
368
|
level=logging.DEBUG,
|
|
@@ -377,6 +378,19 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
377
378
|
# Drop generation kwargs that are not supported by the model
|
|
378
379
|
litellm.drop_params = True
|
|
379
380
|
|
|
381
|
+
# First attempt is a test run with a single conversation to handle errors
|
|
382
|
+
# quickly
|
|
383
|
+
test_conversation = conversations[0]
|
|
384
|
+
_, failures = safe_run(
|
|
385
|
+
self._generate_async(
|
|
386
|
+
model_id=self.model_config.model_id,
|
|
387
|
+
conversations=[test_conversation],
|
|
388
|
+
**generation_kwargs,
|
|
389
|
+
)
|
|
390
|
+
)
|
|
391
|
+
for _, error in failures:
|
|
392
|
+
self._handle_exception(error=error, generation_kwargs=generation_kwargs)
|
|
393
|
+
|
|
380
394
|
all_responses: dict[int, "ModelResponse"] = {}
|
|
381
395
|
conversations_to_run: list[tuple[int, list[litellm.AllMessageValues]]] = list(
|
|
382
396
|
enumerate(conversations)
|
|
@@ -477,6 +491,7 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
477
491
|
r"the thinking budget [0-9]+ is invalid. please choose a value between "
|
|
478
492
|
r"[0-9]+ and ([0-9]+)\."
|
|
479
493
|
)
|
|
494
|
+
requires_thinking_disabled_messages = ["thinking.type: Field required"]
|
|
480
495
|
|
|
481
496
|
if any(msg.lower() in error_msg for msg in stop_messages):
|
|
482
497
|
log_once(
|
|
@@ -557,6 +572,18 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
557
572
|
type="enabled", budget_tokens=thinking_budget - 1
|
|
558
573
|
)
|
|
559
574
|
return
|
|
575
|
+
elif (
|
|
576
|
+
any(msg.lower() in error_msg for msg in requires_thinking_disabled_messages)
|
|
577
|
+
and self.generative_type != GenerativeType.REASONING
|
|
578
|
+
):
|
|
579
|
+
log_once(
|
|
580
|
+
f"The model {model_id!r} requires the `thinking.type` field to be "
|
|
581
|
+
f"set to `disabled` rather than just setting `budget_tokens` to 0. "
|
|
582
|
+
"Setting `thinking.type` to `disabled`.",
|
|
583
|
+
level=logging.DEBUG,
|
|
584
|
+
)
|
|
585
|
+
generation_kwargs["thinking"] = dict(type="disabled")
|
|
586
|
+
return
|
|
560
587
|
elif isinstance(
|
|
561
588
|
error, (Timeout, ServiceUnavailableError, InternalServerError, SystemError)
|
|
562
589
|
):
|
|
@@ -118,3 +118,13 @@ BELEBELE_DA_CONFIG = DatasetConfig(
|
|
|
118
118
|
languages=[DA],
|
|
119
119
|
unofficial=True,
|
|
120
120
|
)
|
|
121
|
+
|
|
122
|
+
MULTI_WIKI_QA_DA_CONFIG = DatasetConfig(
|
|
123
|
+
name="multi-wiki-qa-da",
|
|
124
|
+
pretty_name="the truncated version of the Danish part of the reading "
|
|
125
|
+
"comprehension dataset MultiWikiQA",
|
|
126
|
+
huggingface_id="EuroEval/multi-wiki-qa-da-mini",
|
|
127
|
+
task=RC,
|
|
128
|
+
languages=[DA],
|
|
129
|
+
unofficial=True,
|
|
130
|
+
)
|
|
@@ -110,3 +110,13 @@ BELEBELE_NL_CONFIG = DatasetConfig(
|
|
|
110
110
|
languages=[NL],
|
|
111
111
|
unofficial=True,
|
|
112
112
|
)
|
|
113
|
+
|
|
114
|
+
MULTI_WIKI_QA_NL_CONFIG = DatasetConfig(
|
|
115
|
+
name="multi-wiki-qa-nl",
|
|
116
|
+
pretty_name="the truncated version of the Dutch part of the reading "
|
|
117
|
+
"comprehension dataset MultiWikiQA",
|
|
118
|
+
huggingface_id="EuroEval/multi-wiki-qa-nl-mini",
|
|
119
|
+
task=RC,
|
|
120
|
+
languages=[NL],
|
|
121
|
+
unofficial=True,
|
|
122
|
+
)
|
|
@@ -95,3 +95,13 @@ MMLU_CONFIG = DatasetConfig(
|
|
|
95
95
|
languages=[EN],
|
|
96
96
|
unofficial=True,
|
|
97
97
|
)
|
|
98
|
+
|
|
99
|
+
MULTI_WIKI_QA_EN_CONFIG = DatasetConfig(
|
|
100
|
+
name="multi-wiki-qa-en",
|
|
101
|
+
pretty_name="the truncated version of the English part of the reading "
|
|
102
|
+
"comprehension dataset MultiWikiQA",
|
|
103
|
+
huggingface_id="EuroEval/multi-wiki-qa-en-mini",
|
|
104
|
+
task=RC,
|
|
105
|
+
languages=[EN],
|
|
106
|
+
unofficial=True,
|
|
107
|
+
)
|
|
@@ -52,3 +52,13 @@ WIKIANN_FO_CONFIG = DatasetConfig(
|
|
|
52
52
|
languages=[FO],
|
|
53
53
|
unofficial=True,
|
|
54
54
|
)
|
|
55
|
+
|
|
56
|
+
MULTI_WIKI_QA_FO_CONFIG = DatasetConfig(
|
|
57
|
+
name="multi-wiki-qa-fo",
|
|
58
|
+
pretty_name="the truncated version of the Faroese part of the reading "
|
|
59
|
+
"comprehension dataset MultiWikiQA",
|
|
60
|
+
huggingface_id="EuroEval/multi-wiki-qa-fo-mini",
|
|
61
|
+
task=RC,
|
|
62
|
+
languages=[FO],
|
|
63
|
+
unofficial=True,
|
|
64
|
+
)
|
|
@@ -68,3 +68,13 @@ BELEBELE_FI_CONFIG = DatasetConfig(
|
|
|
68
68
|
languages=[FI],
|
|
69
69
|
unofficial=True,
|
|
70
70
|
)
|
|
71
|
+
|
|
72
|
+
MULTI_WIKI_QA_FI_CONFIG = DatasetConfig(
|
|
73
|
+
name="multi-wiki-qa-fi",
|
|
74
|
+
pretty_name="the truncated version of the Finnish part of the reading "
|
|
75
|
+
"comprehension dataset MultiWikiQA",
|
|
76
|
+
huggingface_id="EuroEval/multi-wiki-qa-fi-mini",
|
|
77
|
+
task=RC,
|
|
78
|
+
languages=[FI],
|
|
79
|
+
unofficial=True,
|
|
80
|
+
)
|
|
@@ -81,3 +81,13 @@ BELEBELE_FR_CONFIG = DatasetConfig(
|
|
|
81
81
|
languages=[FR],
|
|
82
82
|
unofficial=True,
|
|
83
83
|
)
|
|
84
|
+
|
|
85
|
+
MULTI_WIKI_QA_FR_CONFIG = DatasetConfig(
|
|
86
|
+
name="multi-wiki-qa-fr",
|
|
87
|
+
pretty_name="the truncated version of the French part of the reading "
|
|
88
|
+
"comprehension dataset MultiWikiQA",
|
|
89
|
+
huggingface_id="EuroEval/multi-wiki-qa-fr-mini",
|
|
90
|
+
task=RC,
|
|
91
|
+
languages=[FR],
|
|
92
|
+
unofficial=True,
|
|
93
|
+
)
|
|
@@ -89,3 +89,13 @@ BELEBELE_DE_CONFIG = DatasetConfig(
|
|
|
89
89
|
languages=[DE],
|
|
90
90
|
unofficial=True,
|
|
91
91
|
)
|
|
92
|
+
|
|
93
|
+
MULTI_WIKI_QA_DE_CONFIG = DatasetConfig(
|
|
94
|
+
name="multi-wiki-qa-de",
|
|
95
|
+
pretty_name="the truncated version of the German part of the reading "
|
|
96
|
+
"comprehension dataset MultiWikiQA",
|
|
97
|
+
huggingface_id="EuroEval/multi-wiki-qa-de-mini",
|
|
98
|
+
task=RC,
|
|
99
|
+
languages=[DE],
|
|
100
|
+
unofficial=True,
|
|
101
|
+
)
|
|
@@ -146,3 +146,13 @@ BELEBELE_IS_CONFIG = DatasetConfig(
|
|
|
146
146
|
languages=[IS],
|
|
147
147
|
unofficial=True,
|
|
148
148
|
)
|
|
149
|
+
|
|
150
|
+
MULTI_WIKI_QA_IS_CONFIG = DatasetConfig(
|
|
151
|
+
name="multi-wiki-qa-is",
|
|
152
|
+
pretty_name="the truncated version of the Icelandic part of the reading "
|
|
153
|
+
"comprehension dataset MultiWikiQA",
|
|
154
|
+
huggingface_id="EuroEval/multi-wiki-qa-is-mini",
|
|
155
|
+
task=RC,
|
|
156
|
+
languages=[IS],
|
|
157
|
+
unofficial=True,
|
|
158
|
+
)
|
|
@@ -89,3 +89,13 @@ BELEBELE_IT_CONFIG = DatasetConfig(
|
|
|
89
89
|
languages=[IT],
|
|
90
90
|
unofficial=True,
|
|
91
91
|
)
|
|
92
|
+
|
|
93
|
+
MULTI_WIKI_QA_IT_CONFIG = DatasetConfig(
|
|
94
|
+
name="multi-wiki-qa-it",
|
|
95
|
+
pretty_name="the truncated version of the Italian part of the reading "
|
|
96
|
+
"comprehension dataset MultiWikiQA",
|
|
97
|
+
huggingface_id="EuroEval/multi-wiki-qa-it-mini",
|
|
98
|
+
task=RC,
|
|
99
|
+
languages=[IT],
|
|
100
|
+
unofficial=True,
|
|
101
|
+
)
|
|
@@ -184,3 +184,23 @@ BELEBELE_NO_CONFIG = DatasetConfig(
|
|
|
184
184
|
languages=[NB, NN, NO],
|
|
185
185
|
unofficial=True,
|
|
186
186
|
)
|
|
187
|
+
|
|
188
|
+
MULTI_WIKI_QA_NB_CONFIG = DatasetConfig(
|
|
189
|
+
name="multi-wiki-qa-nb",
|
|
190
|
+
pretty_name="the truncated version of the Norwegian Bokmål part of the reading "
|
|
191
|
+
"comprehension dataset MultiWikiQA",
|
|
192
|
+
huggingface_id="EuroEval/multi-wiki-qa-no-mini",
|
|
193
|
+
task=RC,
|
|
194
|
+
languages=[NB, NO],
|
|
195
|
+
unofficial=True,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
MULTI_WIKI_QA_NN_CONFIG = DatasetConfig(
|
|
199
|
+
name="multi-wiki-qa-nn",
|
|
200
|
+
pretty_name="the truncated version of the Norwegian Nynorsk part of the reading "
|
|
201
|
+
"comprehension dataset MultiWikiQA",
|
|
202
|
+
huggingface_id="EuroEval/multi-wiki-qa-nn-mini",
|
|
203
|
+
task=RC,
|
|
204
|
+
languages=[NN],
|
|
205
|
+
unofficial=True,
|
|
206
|
+
)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
4
|
from ..languages import PT
|
|
5
|
-
from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, SENT, SUMM
|
|
5
|
+
from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
6
6
|
|
|
7
7
|
### Official datasets ###
|
|
8
8
|
|
|
@@ -16,27 +16,6 @@ SST2_PT_CONFIG = DatasetConfig(
|
|
|
16
16
|
_labels=["positive", "negative"],
|
|
17
17
|
)
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
MMLU_PT_CONFIG = DatasetConfig(
|
|
21
|
-
name="mmlu-pt",
|
|
22
|
-
pretty_name="the truncated version of the Portuguese knowledge dataset MMLU-pt, "
|
|
23
|
-
"translated from the English MMLU dataset",
|
|
24
|
-
huggingface_id="EuroEval/mmlu-pt-mini",
|
|
25
|
-
task=KNOW,
|
|
26
|
-
languages=[PT],
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
GOLDENSWAG_PT_CONFIG = DatasetConfig(
|
|
31
|
-
name="goldenswag-pt",
|
|
32
|
-
pretty_name="the truncated version of the Portuguese common-sense reasoning "
|
|
33
|
-
"dataset GoldenSwag-pt, translated from the English GoldenSwag dataset",
|
|
34
|
-
huggingface_id="EuroEval/goldenswag-pt-mini",
|
|
35
|
-
task=COMMON_SENSE,
|
|
36
|
-
languages=[PT],
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
|
|
40
19
|
SCALA_PT = DatasetConfig(
|
|
41
20
|
name="scala-pt",
|
|
42
21
|
pretty_name="the Portuguese part of the linguistic acceptability dataset ScaLA",
|
|
@@ -53,6 +32,15 @@ HAREM_CONFIG = DatasetConfig(
|
|
|
53
32
|
languages=[PT],
|
|
54
33
|
)
|
|
55
34
|
|
|
35
|
+
MULTI_WIKI_QA_PT_CONFIG = DatasetConfig(
|
|
36
|
+
name="multi-wiki-qa-pt",
|
|
37
|
+
pretty_name="the truncated version of the Portuguese part of the reading "
|
|
38
|
+
"comprehension dataset MultiWikiQA",
|
|
39
|
+
huggingface_id="EuroEval/multi-wiki-qa-pt-pt-mini",
|
|
40
|
+
task=RC,
|
|
41
|
+
languages=[PT],
|
|
42
|
+
)
|
|
43
|
+
|
|
56
44
|
PUBLICO_CONFIG = DatasetConfig(
|
|
57
45
|
name="publico",
|
|
58
46
|
pretty_name="the truncated version of the Portuguese summarisation dataset Público",
|
|
@@ -61,6 +49,24 @@ PUBLICO_CONFIG = DatasetConfig(
|
|
|
61
49
|
languages=[PT],
|
|
62
50
|
)
|
|
63
51
|
|
|
52
|
+
MMLU_PT_CONFIG = DatasetConfig(
|
|
53
|
+
name="mmlu-pt",
|
|
54
|
+
pretty_name="the truncated version of the Portuguese knowledge dataset MMLU-pt, "
|
|
55
|
+
"translated from the English MMLU dataset",
|
|
56
|
+
huggingface_id="EuroEval/mmlu-pt-mini",
|
|
57
|
+
task=KNOW,
|
|
58
|
+
languages=[PT],
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
GOLDENSWAG_PT_CONFIG = DatasetConfig(
|
|
62
|
+
name="goldenswag-pt",
|
|
63
|
+
pretty_name="the truncated version of the Portuguese common-sense reasoning "
|
|
64
|
+
"dataset GoldenSwag-pt, translated from the English GoldenSwag dataset",
|
|
65
|
+
huggingface_id="EuroEval/goldenswag-pt-mini",
|
|
66
|
+
task=COMMON_SENSE,
|
|
67
|
+
languages=[PT],
|
|
68
|
+
)
|
|
69
|
+
|
|
64
70
|
|
|
65
71
|
### Unofficial datasets ###
|
|
66
72
|
|
|
@@ -71,4 +77,5 @@ BOOLQ_PT_CONFIG = DatasetConfig(
|
|
|
71
77
|
huggingface_id="EuroEval/boolq-pt",
|
|
72
78
|
task=MCRC,
|
|
73
79
|
languages=[PT],
|
|
80
|
+
unofficial=True,
|
|
74
81
|
)
|
|
@@ -87,3 +87,13 @@ BELEBELE_ES_CONFIG = DatasetConfig(
|
|
|
87
87
|
languages=[ES],
|
|
88
88
|
unofficial=True,
|
|
89
89
|
)
|
|
90
|
+
|
|
91
|
+
MULTI_WIKI_QA_ES_CONFIG = DatasetConfig(
|
|
92
|
+
name="multi-wiki-qa-es",
|
|
93
|
+
pretty_name="the truncated version of the Spanish part of the reading "
|
|
94
|
+
"comprehension dataset MultiWikiQA",
|
|
95
|
+
huggingface_id="EuroEval/multi-wiki-qa-es-mini",
|
|
96
|
+
task=RC,
|
|
97
|
+
languages=[ES],
|
|
98
|
+
unofficial=True,
|
|
99
|
+
)
|
|
@@ -98,3 +98,13 @@ BELEBELE_SV_CONFIG = DatasetConfig(
|
|
|
98
98
|
languages=[SV],
|
|
99
99
|
unofficial=True,
|
|
100
100
|
)
|
|
101
|
+
|
|
102
|
+
MULTI_WIKI_QA_SV_CONFIG = DatasetConfig(
|
|
103
|
+
name="multi-wiki-qa-sv",
|
|
104
|
+
pretty_name="the truncated version of the Swedish part of the reading "
|
|
105
|
+
"comprehension dataset MultiWikiQA",
|
|
106
|
+
huggingface_id="EuroEval/multi-wiki-qa-sv-mini",
|
|
107
|
+
task=RC,
|
|
108
|
+
languages=[SV],
|
|
109
|
+
unofficial=True,
|
|
110
|
+
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Templates for the Reading Comprehension task."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import PromptConfig
|
|
4
|
-
from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, SV
|
|
4
|
+
from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
|
|
5
5
|
|
|
6
6
|
RC_TEMPLATES = {
|
|
7
7
|
DA: PromptConfig(
|
|
@@ -117,6 +117,15 @@ RC_TEMPLATES = {
|
|
|
117
117
|
"teksten ovenfor med maks 3 ord.\n\nSpørsmål: {question}",
|
|
118
118
|
default_prompt_label_mapping=dict(),
|
|
119
119
|
),
|
|
120
|
+
PT: PromptConfig(
|
|
121
|
+
default_prompt_prefix="Os textos que se seguem são acompanhados de perguntas "
|
|
122
|
+
"e respostas.",
|
|
123
|
+
default_prompt_template="Texto: {text}\nPergunta: {question}\nResposta com "
|
|
124
|
+
"um máximo de 3 palavras: {label}",
|
|
125
|
+
default_instruction_prompt="Texto: {text}\n\nResponde à seguinte pergunta "
|
|
126
|
+
"sobre o texto acima num máximo de 3 palavras.\n\nPergunta: {question}",
|
|
127
|
+
default_prompt_label_mapping=dict(),
|
|
128
|
+
),
|
|
120
129
|
SV: PromptConfig(
|
|
121
130
|
default_prompt_prefix="Nedan följer texter med tillhörande frågor och svar.",
|
|
122
131
|
default_prompt_template="Text: {text}\nFråga: {question}\nSvar på max 3 ord: "
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: EuroEval
|
|
3
|
-
Version: 15.
|
|
3
|
+
Version: 15.14.0
|
|
4
4
|
Summary: The robust European language model benchmark.
|
|
5
5
|
Project-URL: Repository, https://github.com/EuroEval/EuroEval
|
|
6
6
|
Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
|
|
@@ -29,7 +29,7 @@ License: MIT License
|
|
|
29
29
|
SOFTWARE.
|
|
30
30
|
License-File: LICENSE
|
|
31
31
|
Requires-Python: <4.0,>=3.10
|
|
32
|
-
Requires-Dist: accelerate>=
|
|
32
|
+
Requires-Dist: accelerate>=1.9.0
|
|
33
33
|
Requires-Dist: bert-score>=0.3.13
|
|
34
34
|
Requires-Dist: click>=8.1.3
|
|
35
35
|
Requires-Dist: datasets>=3.5.0
|
|
@@ -27,27 +27,27 @@ euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwR
|
|
|
27
27
|
euroeval/benchmark_modules/base.py,sha256=D1oKD16KBvxEoBUfqwvzvcDc1hx6letdD3v1PnBmF4A,10669
|
|
28
28
|
euroeval/benchmark_modules/fresh.py,sha256=sg_AXNPApFObCzCRWhCgKxfr-eqQsT6Ri0xx0_Yy5JM,10293
|
|
29
29
|
euroeval/benchmark_modules/hf.py,sha256=-W_bWEdm0zePkn4nDz4l0T4hhJJnlfwHrtIO3m5BrUs,44725
|
|
30
|
-
euroeval/benchmark_modules/litellm.py,sha256=
|
|
30
|
+
euroeval/benchmark_modules/litellm.py,sha256=qv-k2ntk48OF4ikevQ95k4zLbBkZYOZ2z-GAisA-tFY,53374
|
|
31
31
|
euroeval/benchmark_modules/vllm.py,sha256=kq3PMUuRT0NOky6XSHl1JeHTDGehwcub0HcGC5S_Wv4,38834
|
|
32
32
|
euroeval/dataset_configs/__init__.py,sha256=EbjEyHwBtSztASl8_xblD8hessruDdV4Eg1vXrmGOuY,1935
|
|
33
|
-
euroeval/dataset_configs/danish.py,sha256
|
|
34
|
-
euroeval/dataset_configs/dutch.py,sha256=
|
|
35
|
-
euroeval/dataset_configs/english.py,sha256=
|
|
36
|
-
euroeval/dataset_configs/faroese.py,sha256=
|
|
37
|
-
euroeval/dataset_configs/finnish.py,sha256=
|
|
38
|
-
euroeval/dataset_configs/french.py,sha256=
|
|
39
|
-
euroeval/dataset_configs/german.py,sha256=
|
|
40
|
-
euroeval/dataset_configs/icelandic.py,sha256=
|
|
41
|
-
euroeval/dataset_configs/italian.py,sha256=
|
|
42
|
-
euroeval/dataset_configs/norwegian.py,sha256
|
|
43
|
-
euroeval/dataset_configs/portuguese.py,sha256
|
|
44
|
-
euroeval/dataset_configs/spanish.py,sha256=
|
|
45
|
-
euroeval/dataset_configs/swedish.py,sha256=
|
|
33
|
+
euroeval/dataset_configs/danish.py,sha256=-y-n08hTApwTdSVdjRlZYa3gOX92cTGhg8xsuG-Lhww,3691
|
|
34
|
+
euroeval/dataset_configs/dutch.py,sha256=siyFeEKYx2gBpyqQPtOZ0cD8FTsIMUqzRX5xrQfrNXI,3480
|
|
35
|
+
euroeval/dataset_configs/english.py,sha256=uQAaGWpHk8xqFCeIhmmPXYTb1cZomeEdRaRe9qIZQrg,2858
|
|
36
|
+
euroeval/dataset_configs/faroese.py,sha256=gkgxQTWGFbfg9Eo1z-NSLROgKDcaij9tAN2mfgtrt0M,1647
|
|
37
|
+
euroeval/dataset_configs/finnish.py,sha256=OyveLgyii0hOlo6HZsqAq4rwDrj8tl2qstRfQKugURo,2342
|
|
38
|
+
euroeval/dataset_configs/french.py,sha256=DKKZEtohWkw_ouBaxWcPzp-K6NhQNtvCKxj8NLbIpUc,2678
|
|
39
|
+
euroeval/dataset_configs/german.py,sha256=3bfRgkqIGkAhcw4kwcJN9PKuJSmi1r6AFTJY-IWKgWM,2856
|
|
40
|
+
euroeval/dataset_configs/icelandic.py,sha256=g21IHjcwEZvf_yJ9PobeuBOqRiLOk0oCdEjY34g-UMk,4497
|
|
41
|
+
euroeval/dataset_configs/italian.py,sha256=rHLMkSXT0kFoQlkwHODxO50WBRIfGtkAnW_C-sfIu74,2957
|
|
42
|
+
euroeval/dataset_configs/norwegian.py,sha256=-WvQM44xCwjrqBzlAy4rjf6v87fGera2JmZV_069TeQ,6003
|
|
43
|
+
euroeval/dataset_configs/portuguese.py,sha256=3SqbwD0PNTILGALzh50pVoEwC-spRD75ZeE2NEj151E,2367
|
|
44
|
+
euroeval/dataset_configs/spanish.py,sha256=VKfBIpBRR38ckuULw7Ftmc-0smsm6GshUAik2-Y1Npw,2855
|
|
45
|
+
euroeval/dataset_configs/swedish.py,sha256=WpExi4TJqy_Ruwy4Kvde94jM605vT_88el_KKUzLV4E,3108
|
|
46
46
|
euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
|
|
47
47
|
euroeval/prompt_templates/linguistic_acceptability.py,sha256=ZN71BEt4HAhSYY-GWjh-S-iVvq5AODQJThkrjDhy4oM,7138
|
|
48
48
|
euroeval/prompt_templates/multiple_choice.py,sha256=F9ItGQtnaaez15A8MQ1UCpKRDsLM-AZyRdYetGAofa0,5494
|
|
49
49
|
euroeval/prompt_templates/named_entity_recognition.py,sha256=ga21s9T4_Hhbf88boWm7gnL7OgD7txuS_EeDgXaxEoE,13602
|
|
50
|
-
euroeval/prompt_templates/reading_comprehension.py,sha256=
|
|
50
|
+
euroeval/prompt_templates/reading_comprehension.py,sha256=3Nch-9zHfUDIwy-k5mP-TRhHQRQ9nad8HdhpJ1S8nGc,7072
|
|
51
51
|
euroeval/prompt_templates/sentiment_classification.py,sha256=2Xsmj8lbaAXACHhwbbR4dWhoKyKB87TqpMO-ssQ-Djo,7649
|
|
52
52
|
euroeval/prompt_templates/summarization.py,sha256=I98LlUOBVa_xo02npq7BWKKZOXGqm-_15i64QzbEsb0,5334
|
|
53
53
|
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
@@ -56,8 +56,8 @@ euroeval/task_group_utils/question_answering.py,sha256=agwtWOmctgat98yqgFiMSPY6z
|
|
|
56
56
|
euroeval/task_group_utils/sequence_classification.py,sha256=igmD24aMNN7QBJ8NDzgEnGwM-jq_zhC37QxazNm7GZ4,12711
|
|
57
57
|
euroeval/task_group_utils/text_to_text.py,sha256=xOpja-W4E-1peMjZX8G-3G5iRgmFHHygrQ5WN1hB3FI,4550
|
|
58
58
|
euroeval/task_group_utils/token_classification.py,sha256=wCy3aI-Sn9f-87tHzAnYDA6EbY3ah3xao1SnfnoRNz4,17490
|
|
59
|
-
euroeval-15.
|
|
60
|
-
euroeval-15.
|
|
61
|
-
euroeval-15.
|
|
62
|
-
euroeval-15.
|
|
63
|
-
euroeval-15.
|
|
59
|
+
euroeval-15.14.0.dist-info/METADATA,sha256=uQY74VCgn3TRCTXJGCb8ilS-3U5UL69lbhNGQw2NGTM,13478
|
|
60
|
+
euroeval-15.14.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
61
|
+
euroeval-15.14.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
|
|
62
|
+
euroeval-15.14.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
63
|
+
euroeval-15.14.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|