EuroEval 15.10.1__py3-none-any.whl → 15.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- euroeval/__init__.py +7 -0
- euroeval/benchmark_config_factory.py +7 -0
- euroeval/benchmark_modules/base.py +29 -29
- euroeval/benchmark_modules/fresh.py +31 -19
- euroeval/benchmark_modules/hf.py +27 -23
- euroeval/benchmark_modules/litellm.py +50 -30
- euroeval/benchmark_modules/vllm.py +22 -26
- euroeval/benchmarker.py +8 -1
- euroeval/callbacks.py +17 -13
- euroeval/cli.py +10 -0
- euroeval/data_loading.py +10 -5
- euroeval/data_models.py +9 -40
- euroeval/dataset_configs/__init__.py +1 -0
- euroeval/dataset_configs/english.py +13 -4
- euroeval/dataset_configs/norwegian.py +8 -0
- euroeval/dataset_configs/portuguese.py +74 -0
- euroeval/dataset_configs/spanish.py +4 -3
- euroeval/finetuning.py +9 -8
- euroeval/generation.py +27 -8
- euroeval/human_evaluation.py +14 -13
- euroeval/languages.py +1 -2
- euroeval/metrics.py +452 -0
- euroeval/prompt_templates/linguistic_acceptability.py +9 -1
- euroeval/prompt_templates/multiple_choice.py +9 -1
- euroeval/prompt_templates/named_entity_recognition.py +20 -1
- euroeval/prompt_templates/sentiment_classification.py +11 -1
- euroeval/prompt_templates/summarization.py +8 -1
- euroeval/scores.py +14 -19
- euroeval/speed_benchmark.py +6 -7
- euroeval/task_group_utils/multiple_choice_classification.py +6 -4
- euroeval/task_group_utils/question_answering.py +5 -28
- euroeval/task_group_utils/sequence_classification.py +6 -30
- euroeval/task_group_utils/text_to_text.py +19 -34
- euroeval/task_group_utils/token_classification.py +18 -30
- euroeval/tasks.py +11 -136
- euroeval/types.py +6 -4
- {euroeval-15.10.1.dist-info → euroeval-15.12.0.dist-info}/METADATA +10 -10
- euroeval-15.12.0.dist-info/RECORD +63 -0
- {euroeval-15.10.1.dist-info → euroeval-15.12.0.dist-info}/licenses/LICENSE +1 -1
- euroeval-15.10.1.dist-info/RECORD +0 -61
- {euroeval-15.10.1.dist-info → euroeval-15.12.0.dist-info}/WHEEL +0 -0
- {euroeval-15.10.1.dist-info → euroeval-15.12.0.dist-info}/entry_points.txt +0 -0
euroeval/tasks.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""All benchmarks tasks used in EuroEval."""
|
|
2
2
|
|
|
3
|
-
from .
|
|
3
|
+
from . import metrics as m
|
|
4
|
+
from .data_models import Task
|
|
4
5
|
from .enums import TaskGroup
|
|
5
6
|
from .prompt_templates import (
|
|
6
7
|
LA_TEMPLATES,
|
|
@@ -25,21 +26,7 @@ LA = Task(
|
|
|
25
26
|
name="linguistic-acceptability",
|
|
26
27
|
task_group=TaskGroup.SEQUENCE_CLASSIFICATION,
|
|
27
28
|
template_dict=LA_TEMPLATES,
|
|
28
|
-
metrics=[
|
|
29
|
-
MetricConfig(
|
|
30
|
-
name="mcc",
|
|
31
|
-
pretty_name="Matthew's Correlation Coefficient",
|
|
32
|
-
huggingface_id="matthews_correlation",
|
|
33
|
-
results_key="matthews_correlation",
|
|
34
|
-
),
|
|
35
|
-
MetricConfig(
|
|
36
|
-
name="macro_f1",
|
|
37
|
-
pretty_name="Macro-average F1-score",
|
|
38
|
-
huggingface_id="f1",
|
|
39
|
-
results_key="f1",
|
|
40
|
-
compute_kwargs=dict(average="macro"),
|
|
41
|
-
),
|
|
42
|
-
],
|
|
29
|
+
metrics=[m.mcc_metric, m.macro_f1_metric],
|
|
43
30
|
default_num_few_shot_examples=12,
|
|
44
31
|
default_max_generated_tokens=5,
|
|
45
32
|
default_labels=["correct", "incorrect"],
|
|
@@ -50,20 +37,7 @@ NER = Task(
|
|
|
50
37
|
name="named-entity-recognition",
|
|
51
38
|
task_group=TaskGroup.TOKEN_CLASSIFICATION,
|
|
52
39
|
template_dict=NER_TEMPLATES,
|
|
53
|
-
metrics=[
|
|
54
|
-
MetricConfig(
|
|
55
|
-
name="micro_f1_no_misc",
|
|
56
|
-
pretty_name="Micro-average F1-score without MISC tags",
|
|
57
|
-
huggingface_id="seqeval",
|
|
58
|
-
results_key="overall_f1",
|
|
59
|
-
),
|
|
60
|
-
MetricConfig(
|
|
61
|
-
name="micro_f1",
|
|
62
|
-
pretty_name="Micro-average F1-score with MISC tags",
|
|
63
|
-
huggingface_id="seqeval",
|
|
64
|
-
results_key="overall_f1",
|
|
65
|
-
),
|
|
66
|
-
],
|
|
40
|
+
metrics=[m.micro_f1_no_misc_metric, m.micro_f1_metric],
|
|
67
41
|
default_num_few_shot_examples=8,
|
|
68
42
|
default_max_generated_tokens=128,
|
|
69
43
|
default_labels=[
|
|
@@ -84,22 +58,7 @@ RC = Task(
|
|
|
84
58
|
name="reading-comprehension",
|
|
85
59
|
task_group=TaskGroup.QUESTION_ANSWERING,
|
|
86
60
|
template_dict=RC_TEMPLATES,
|
|
87
|
-
metrics=[
|
|
88
|
-
MetricConfig(
|
|
89
|
-
name="f1",
|
|
90
|
-
pretty_name="F1-score",
|
|
91
|
-
huggingface_id="squad_v2",
|
|
92
|
-
results_key="f1",
|
|
93
|
-
postprocessing_fn=lambda raw_score: (raw_score, f"{raw_score:.2f}%"),
|
|
94
|
-
),
|
|
95
|
-
MetricConfig(
|
|
96
|
-
name="em",
|
|
97
|
-
pretty_name="Exact Match",
|
|
98
|
-
huggingface_id="squad_v2",
|
|
99
|
-
results_key="exact",
|
|
100
|
-
postprocessing_fn=lambda raw_score: (raw_score, f"{raw_score:.2f}%"),
|
|
101
|
-
),
|
|
102
|
-
],
|
|
61
|
+
metrics=[m.f1_metric, m.em_metric],
|
|
103
62
|
default_num_few_shot_examples=4,
|
|
104
63
|
default_max_generated_tokens=32,
|
|
105
64
|
default_labels=["start_positions", "end_positions"],
|
|
@@ -110,21 +69,7 @@ SENT = Task(
|
|
|
110
69
|
name="sentiment-classification",
|
|
111
70
|
task_group=TaskGroup.SEQUENCE_CLASSIFICATION,
|
|
112
71
|
template_dict=SENT_TEMPLATES,
|
|
113
|
-
metrics=[
|
|
114
|
-
MetricConfig(
|
|
115
|
-
name="mcc",
|
|
116
|
-
pretty_name="Matthew's Correlation Coefficient",
|
|
117
|
-
huggingface_id="matthews_correlation",
|
|
118
|
-
results_key="matthews_correlation",
|
|
119
|
-
),
|
|
120
|
-
MetricConfig(
|
|
121
|
-
name="macro_f1",
|
|
122
|
-
pretty_name="Macro-average F1-score",
|
|
123
|
-
huggingface_id="f1",
|
|
124
|
-
results_key="f1",
|
|
125
|
-
compute_kwargs=dict(average="macro"),
|
|
126
|
-
),
|
|
127
|
-
],
|
|
72
|
+
metrics=[m.mcc_metric, m.macro_f1_metric],
|
|
128
73
|
default_num_few_shot_examples=12,
|
|
129
74
|
default_max_generated_tokens=5,
|
|
130
75
|
default_labels=["positive", "neutral", "negative"],
|
|
@@ -135,23 +80,7 @@ SUMM = Task(
|
|
|
135
80
|
name="summarization",
|
|
136
81
|
task_group=TaskGroup.TEXT_TO_TEXT,
|
|
137
82
|
template_dict=SUMM_TEMPLATES,
|
|
138
|
-
metrics=[
|
|
139
|
-
MetricConfig(
|
|
140
|
-
name="bertscore",
|
|
141
|
-
pretty_name="BERTScore",
|
|
142
|
-
huggingface_id="bertscore",
|
|
143
|
-
results_key="f1",
|
|
144
|
-
compute_kwargs=dict(
|
|
145
|
-
model_type="microsoft/mdeberta-v3-base", device="auto", batch_size=1
|
|
146
|
-
),
|
|
147
|
-
),
|
|
148
|
-
MetricConfig(
|
|
149
|
-
name="rouge_l",
|
|
150
|
-
pretty_name="ROUGE-L",
|
|
151
|
-
huggingface_id="rouge",
|
|
152
|
-
results_key="rougeL",
|
|
153
|
-
),
|
|
154
|
-
],
|
|
83
|
+
metrics=[m.bert_score_metric, m.rouge_l_metric],
|
|
155
84
|
default_num_few_shot_examples=1,
|
|
156
85
|
default_max_generated_tokens=256,
|
|
157
86
|
default_labels=[],
|
|
@@ -162,20 +91,7 @@ KNOW = Task(
|
|
|
162
91
|
name="knowledge",
|
|
163
92
|
task_group=TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION,
|
|
164
93
|
template_dict=MULTIPLE_CHOICE_TEMPLATES,
|
|
165
|
-
metrics=[
|
|
166
|
-
MetricConfig(
|
|
167
|
-
name="mcc",
|
|
168
|
-
pretty_name="Matthew's Correlation Coefficient",
|
|
169
|
-
huggingface_id="matthews_correlation",
|
|
170
|
-
results_key="matthews_correlation",
|
|
171
|
-
),
|
|
172
|
-
MetricConfig(
|
|
173
|
-
name="accuracy",
|
|
174
|
-
pretty_name="Accuracy",
|
|
175
|
-
huggingface_id="accuracy",
|
|
176
|
-
results_key="accuracy",
|
|
177
|
-
),
|
|
178
|
-
],
|
|
94
|
+
metrics=[m.mcc_metric, m.accuracy_metric],
|
|
179
95
|
default_num_few_shot_examples=5,
|
|
180
96
|
default_max_generated_tokens=5,
|
|
181
97
|
default_labels=["a", "b", "c", "d"],
|
|
@@ -186,20 +102,7 @@ MCRC = Task(
|
|
|
186
102
|
name="multiple-choice-reading-comprehension",
|
|
187
103
|
task_group=TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION,
|
|
188
104
|
template_dict=MULTIPLE_CHOICE_TEMPLATES,
|
|
189
|
-
metrics=[
|
|
190
|
-
MetricConfig(
|
|
191
|
-
name="mcc",
|
|
192
|
-
pretty_name="Matthew's Correlation Coefficient",
|
|
193
|
-
huggingface_id="matthews_correlation",
|
|
194
|
-
results_key="matthews_correlation",
|
|
195
|
-
),
|
|
196
|
-
MetricConfig(
|
|
197
|
-
name="accuracy",
|
|
198
|
-
pretty_name="Accuracy",
|
|
199
|
-
huggingface_id="accuracy",
|
|
200
|
-
results_key="accuracy",
|
|
201
|
-
),
|
|
202
|
-
],
|
|
105
|
+
metrics=[m.mcc_metric, m.accuracy_metric],
|
|
203
106
|
default_num_few_shot_examples=5,
|
|
204
107
|
default_max_generated_tokens=5,
|
|
205
108
|
default_labels=["a", "b", "c", "d"],
|
|
@@ -210,20 +113,7 @@ COMMON_SENSE = Task(
|
|
|
210
113
|
name="common-sense-reasoning",
|
|
211
114
|
task_group=TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION,
|
|
212
115
|
template_dict=MULTIPLE_CHOICE_TEMPLATES,
|
|
213
|
-
metrics=[
|
|
214
|
-
MetricConfig(
|
|
215
|
-
name="mcc",
|
|
216
|
-
pretty_name="Matthew's Correlation Coefficient",
|
|
217
|
-
huggingface_id="matthews_correlation",
|
|
218
|
-
results_key="matthews_correlation",
|
|
219
|
-
),
|
|
220
|
-
MetricConfig(
|
|
221
|
-
name="accuracy",
|
|
222
|
-
pretty_name="Accuracy",
|
|
223
|
-
huggingface_id="accuracy",
|
|
224
|
-
results_key="accuracy",
|
|
225
|
-
),
|
|
226
|
-
],
|
|
116
|
+
metrics=[m.mcc_metric, m.accuracy_metric],
|
|
227
117
|
default_num_few_shot_examples=5,
|
|
228
118
|
default_max_generated_tokens=5,
|
|
229
119
|
default_labels=["a", "b", "c", "d"],
|
|
@@ -234,22 +124,7 @@ SPEED = Task(
|
|
|
234
124
|
name="speed",
|
|
235
125
|
task_group=TaskGroup.SPEED,
|
|
236
126
|
template_dict={},
|
|
237
|
-
metrics=[
|
|
238
|
-
MetricConfig(
|
|
239
|
-
name="speed",
|
|
240
|
-
pretty_name="Tokens per second",
|
|
241
|
-
huggingface_id="",
|
|
242
|
-
results_key="speed",
|
|
243
|
-
postprocessing_fn=lambda raw_score: (raw_score, f"{raw_score:,.0f}"),
|
|
244
|
-
),
|
|
245
|
-
MetricConfig(
|
|
246
|
-
name="speed_short",
|
|
247
|
-
pretty_name="Tokens per second on short documents",
|
|
248
|
-
huggingface_id="",
|
|
249
|
-
results_key="speed",
|
|
250
|
-
postprocessing_fn=lambda raw_score: (raw_score, f"{raw_score:,.0f}"),
|
|
251
|
-
),
|
|
252
|
-
],
|
|
127
|
+
metrics=[m.speed_metric, m.speed_short_metric],
|
|
253
128
|
default_num_few_shot_examples=0,
|
|
254
129
|
default_max_generated_tokens=5,
|
|
255
130
|
default_labels=[],
|
euroeval/types.py
CHANGED
|
@@ -2,16 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
import typing as t
|
|
4
4
|
|
|
5
|
-
from numpy.typing import NDArray
|
|
6
5
|
from transformers.trainer_utils import EvalPrediction
|
|
7
6
|
|
|
8
7
|
if t.TYPE_CHECKING:
|
|
8
|
+
from numpy.typing import NDArray
|
|
9
|
+
|
|
9
10
|
from .data_models import GenerativeModelOutput
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
ScoreDict: t.TypeAlias = dict[str, dict[str, float] | list[dict[str, float]]]
|
|
13
|
-
Predictions: t.TypeAlias = NDArray | list[str] | list[list[str]]
|
|
14
|
-
Labels: t.TypeAlias = NDArray | list[str] | list[list[str]]
|
|
14
|
+
Predictions: t.TypeAlias = "NDArray | list[str] | list[list[str]]"
|
|
15
|
+
Labels: t.TypeAlias = "NDArray | list[str] | list[list[str]]"
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class ComputeMetricsFunction(t.Protocol):
|
|
@@ -21,7 +22,8 @@ class ComputeMetricsFunction(t.Protocol):
|
|
|
21
22
|
self,
|
|
22
23
|
model_outputs_and_labels: EvalPrediction
|
|
23
24
|
| tuple[
|
|
24
|
-
NDArray | list[str] | list[list[str]],
|
|
25
|
+
"NDArray | list[str] | list[list[str]]",
|
|
26
|
+
"NDArray | list[str] | list[list[str]]",
|
|
25
27
|
],
|
|
26
28
|
) -> dict[str, float]:
|
|
27
29
|
"""Compute the metrics.
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: EuroEval
|
|
3
|
-
Version: 15.
|
|
3
|
+
Version: 15.12.0
|
|
4
4
|
Summary: The robust European language model benchmark.
|
|
5
5
|
Project-URL: Repository, https://github.com/EuroEval/EuroEval
|
|
6
6
|
Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
|
|
7
|
-
Author-email: Dan Saattrup
|
|
8
|
-
Maintainer-email: Dan Saattrup
|
|
7
|
+
Author-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
|
|
8
|
+
Maintainer-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
|
|
9
9
|
License: MIT License
|
|
10
10
|
|
|
11
|
-
Copyright (c) 2022-
|
|
11
|
+
Copyright (c) 2022-2025 Dan Saattrup Smart
|
|
12
12
|
|
|
13
13
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
14
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -43,6 +43,7 @@ Requires-Dist: numpy<2.0.0,>=1.23.0
|
|
|
43
43
|
Requires-Dist: ollama>=0.5.1
|
|
44
44
|
Requires-Dist: pandas>=2.2.0
|
|
45
45
|
Requires-Dist: peft>=0.15.0
|
|
46
|
+
Requires-Dist: protobuf>=2.0.0
|
|
46
47
|
Requires-Dist: pydantic>=2.6.0
|
|
47
48
|
Requires-Dist: pyinfer>=0.0.3
|
|
48
49
|
Requires-Dist: python-dotenv>=1.0.1
|
|
@@ -94,8 +95,7 @@ ______________________________________________________________________
|
|
|
94
95
|
|
|
95
96
|
## Maintainer
|
|
96
97
|
|
|
97
|
-
- Dan Saattrup
|
|
98
|
-
dan.nielsen@alexandra.dk)
|
|
98
|
+
- Dan Saattrup Smart ([@saattrupdan](https://github.com/saattrupdan), dan.smart@alexandra.dk)
|
|
99
99
|
|
|
100
100
|
|
|
101
101
|
## Installation
|
|
@@ -268,14 +268,14 @@ contributing new datasets, your help makes this project better for everyone.
|
|
|
268
268
|
If you want to cite the framework then feel free to use this:
|
|
269
269
|
|
|
270
270
|
```
|
|
271
|
-
@article{
|
|
271
|
+
@article{smart2024encoder,
|
|
272
272
|
title={Encoder vs Decoder: Comparative Analysis of Encoder and Decoder Language Models on Multilingual NLU Tasks},
|
|
273
|
-
author={
|
|
273
|
+
author={Smart, Dan Saattrup and Enevoldsen, Kenneth and Schneider-Kamp, Peter},
|
|
274
274
|
journal={arXiv preprint arXiv:2406.13469},
|
|
275
275
|
year={2024}
|
|
276
276
|
}
|
|
277
|
-
@inproceedings{
|
|
278
|
-
author = {
|
|
277
|
+
@inproceedings{smart2023scandeval,
|
|
278
|
+
author = {Smart, Dan Saattrup},
|
|
279
279
|
booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)},
|
|
280
280
|
month = may,
|
|
281
281
|
pages = {185--201},
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
euroeval/__init__.py,sha256=fZyR9R3C3vwGJS3CrCJ6ySr_FDnMu_Aqnz0FdadWEEs,3399
|
|
2
|
+
euroeval/benchmark_config_factory.py,sha256=jKC8bEzJSGGCcG8aWsPxiyHX6fjOQYQWvkp1MIUuHYM,11564
|
|
3
|
+
euroeval/benchmarker.py,sha256=SDBzdCa4I8u1XDeN_1mKTFzfaaQbbY_oWcHt3niADxk,48497
|
|
4
|
+
euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
|
|
5
|
+
euroeval/cli.py,sha256=h81Lswm_q9htkYz-GQQQVIsdsUPnfe3LDH8AZdBcpKs,8602
|
|
6
|
+
euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
|
|
7
|
+
euroeval/data_loading.py,sha256=DP-cqwN_d0Y-KaN8P8c3fDr6PX80UYROHgRwX82ix4w,4156
|
|
8
|
+
euroeval/data_models.py,sha256=gPHyIoN2A5_O-cJgyb6jhn6enH8zsiIBI09W_wdHMQs,22031
|
|
9
|
+
euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
|
|
10
|
+
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
11
|
+
euroeval/finetuning.py,sha256=BrPZ-6qFY8K-dwfaRwNetVYfYburoQwLQty6pn6iP_s,11340
|
|
12
|
+
euroeval/generation.py,sha256=1fqFEWwM2RzI3uPZem95VFWbN8EfrKZQTrHEP34ihHs,11622
|
|
13
|
+
euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
|
|
14
|
+
euroeval/human_evaluation.py,sha256=Jtz3K5Lqne48wPZWf4EAd3d-n_wX27nGJHigjhV1D7s,27537
|
|
15
|
+
euroeval/languages.py,sha256=cr_Z5jtaHb2XY0zeOhuk3ATHX74PODzt6gMPC2zMD7c,8594
|
|
16
|
+
euroeval/metrics.py,sha256=nxosyoRjlk7TcoAOkjU7zx2TB43b9tA8M1m4V1s5eKU,15516
|
|
17
|
+
euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
|
|
18
|
+
euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
|
|
19
|
+
euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
|
|
20
|
+
euroeval/scores.py,sha256=TatSbjia7Zwj71gQFyV_gCHyppMbOgeaZgNCib8G86k,2849
|
|
21
|
+
euroeval/speed_benchmark.py,sha256=6bFGeMmtdl_6owkxNQ3ZKiyQQS58k0NApzlsbDgBW5s,4037
|
|
22
|
+
euroeval/tasks.py,sha256=btxf29M5rUP7JjBl6u9aQlHQAxrJNP4bRbdEQtDnmDA,3376
|
|
23
|
+
euroeval/tokenization_utils.py,sha256=LxgGs7juS5PuMYt5LL2X6eVXdtnpi-A2jFxqcWpF6NA,17931
|
|
24
|
+
euroeval/types.py,sha256=EIYMNOqqHqibnbNw-fvdst6HwTvq32gtxhr7jL7i-xM,2511
|
|
25
|
+
euroeval/utils.py,sha256=5R7y67xe0ODaje7k8nOu2AFS3Ph2gcsiWpIq5rjSSuA,11613
|
|
26
|
+
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
27
|
+
euroeval/benchmark_modules/base.py,sha256=D1oKD16KBvxEoBUfqwvzvcDc1hx6letdD3v1PnBmF4A,10669
|
|
28
|
+
euroeval/benchmark_modules/fresh.py,sha256=sg_AXNPApFObCzCRWhCgKxfr-eqQsT6Ri0xx0_Yy5JM,10293
|
|
29
|
+
euroeval/benchmark_modules/hf.py,sha256=-W_bWEdm0zePkn4nDz4l0T4hhJJnlfwHrtIO3m5BrUs,44725
|
|
30
|
+
euroeval/benchmark_modules/litellm.py,sha256=_gKBbJsXzo_cHJVaeuQpHRBENEZUGS_vcC-uGIhhmHA,52111
|
|
31
|
+
euroeval/benchmark_modules/vllm.py,sha256=kq3PMUuRT0NOky6XSHl1JeHTDGehwcub0HcGC5S_Wv4,38834
|
|
32
|
+
euroeval/dataset_configs/__init__.py,sha256=EbjEyHwBtSztASl8_xblD8hessruDdV4Eg1vXrmGOuY,1935
|
|
33
|
+
euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
|
|
34
|
+
euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
|
|
35
|
+
euroeval/dataset_configs/english.py,sha256=1q8XJqIVWBBNkldL7t-cVnU2O9EUb9_xoVRSN8arN90,2561
|
|
36
|
+
euroeval/dataset_configs/faroese.py,sha256=QQgLe5gv0f3AtXe5rV65xZ98gFgyITQPDr3UwO4Bnv4,1350
|
|
37
|
+
euroeval/dataset_configs/finnish.py,sha256=_8YWIlZNpO8Qi233bH7cKwm3tq3WETLfC_6mzg7LLog,2045
|
|
38
|
+
euroeval/dataset_configs/french.py,sha256=ATsj8_9_GxFTQgmfrniPQFZ1R9hoQCI1_ieWTnscFHU,2382
|
|
39
|
+
euroeval/dataset_configs/german.py,sha256=QO6PrBQY6kyZeQMU1vg6KrC_sKyj9U2ukS9nbKO19is,2560
|
|
40
|
+
euroeval/dataset_configs/icelandic.py,sha256=mncl7X4yO9gBmYqXMBfm7FKU1jcKryerSgd0dqlIA_4,4198
|
|
41
|
+
euroeval/dataset_configs/italian.py,sha256=KNjCvTzsEqH_EEk3At8slKqNwWWiIdbv_t5ke7n9nZI,2660
|
|
42
|
+
euroeval/dataset_configs/norwegian.py,sha256=30YGdDPtDszG10BNDVHb-XXTGgGIIgDUNGoeM9q0K_E,5385
|
|
43
|
+
euroeval/dataset_configs/portuguese.py,sha256=-HSDsujWfK__nV2SCu-z0ne0AXLDszOT05oYphQUDTw,2063
|
|
44
|
+
euroeval/dataset_configs/spanish.py,sha256=Yzm1kiilEKoHyd3xD2wrw596Ac9UcaWhlE93GlOFjlc,2558
|
|
45
|
+
euroeval/dataset_configs/swedish.py,sha256=SOD2nKQTVwTpTvr362mDPHon42kr9vWs5C0mK02Fh-o,2811
|
|
46
|
+
euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
|
|
47
|
+
euroeval/prompt_templates/linguistic_acceptability.py,sha256=ZN71BEt4HAhSYY-GWjh-S-iVvq5AODQJThkrjDhy4oM,7138
|
|
48
|
+
euroeval/prompt_templates/multiple_choice.py,sha256=F9ItGQtnaaez15A8MQ1UCpKRDsLM-AZyRdYetGAofa0,5494
|
|
49
|
+
euroeval/prompt_templates/named_entity_recognition.py,sha256=ga21s9T4_Hhbf88boWm7gnL7OgD7txuS_EeDgXaxEoE,13602
|
|
50
|
+
euroeval/prompt_templates/reading_comprehension.py,sha256=yLqryWQAW04GULz_EyNDLOS7ZrDUeasuLFt-dtqCnYk,6585
|
|
51
|
+
euroeval/prompt_templates/sentiment_classification.py,sha256=2Xsmj8lbaAXACHhwbbR4dWhoKyKB87TqpMO-ssQ-Djo,7649
|
|
52
|
+
euroeval/prompt_templates/summarization.py,sha256=I98LlUOBVa_xo02npq7BWKKZOXGqm-_15i64QzbEsb0,5334
|
|
53
|
+
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
54
|
+
euroeval/task_group_utils/multiple_choice_classification.py,sha256=yfy8lczpZ_MY-Y4FQx3Et9vEUpuD3YMFjF3wQGCfMNw,6632
|
|
55
|
+
euroeval/task_group_utils/question_answering.py,sha256=agwtWOmctgat98yqgFiMSPY6zmoaPgYVyzMmOkNjr58,27284
|
|
56
|
+
euroeval/task_group_utils/sequence_classification.py,sha256=igmD24aMNN7QBJ8NDzgEnGwM-jq_zhC37QxazNm7GZ4,12711
|
|
57
|
+
euroeval/task_group_utils/text_to_text.py,sha256=xOpja-W4E-1peMjZX8G-3G5iRgmFHHygrQ5WN1hB3FI,4550
|
|
58
|
+
euroeval/task_group_utils/token_classification.py,sha256=wCy3aI-Sn9f-87tHzAnYDA6EbY3ah3xao1SnfnoRNz4,17490
|
|
59
|
+
euroeval-15.12.0.dist-info/METADATA,sha256=8cY6HWgAZgrCkIA20lVKuf42y-e7U1MZQZSTdF3e7ig,13479
|
|
60
|
+
euroeval-15.12.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
61
|
+
euroeval-15.12.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
|
|
62
|
+
euroeval-15.12.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
63
|
+
euroeval-15.12.0.dist-info/RECORD,,
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
euroeval/__init__.py,sha256=jjInLLkd5IrDrwqag3U35g7SgzITBlFYllgofc-uQFg,3067
|
|
2
|
-
euroeval/benchmark_config_factory.py,sha256=icTeT5C-bNCJmvSWFlxKdEpRboZN8OjwaHGu7JM-2xI,11158
|
|
3
|
-
euroeval/benchmarker.py,sha256=wmgrYVS31PMhhrVienjaVHHyfnZAy51kUvC6OjooiOw,48047
|
|
4
|
-
euroeval/callbacks.py,sha256=F1AJCLB8FJpxqYprwLi_PsH4Bc0x4lyR8UiTG-GlFLY,2452
|
|
5
|
-
euroeval/cli.py,sha256=d8JztMi_RbpUlEBXidd6DQ-xeC-xhozf_qU6Vkzye20,8161
|
|
6
|
-
euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
|
|
7
|
-
euroeval/data_loading.py,sha256=2rMLSy8pbntlwmImizMtkTiUzj93mcv5kzYjZELWWfU,4081
|
|
8
|
-
euroeval/data_models.py,sha256=7nAGDpN58Y35Lt9JZE_y0y5iOYesw2htcwHc68MkBZU,22953
|
|
9
|
-
euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
|
|
10
|
-
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
11
|
-
euroeval/finetuning.py,sha256=OFS8YlDhckPupoKWf26Nrd7CTtLQzJXTsDvzMdSR_34,11319
|
|
12
|
-
euroeval/generation.py,sha256=LSsskfLjIJ-c3gQxmr7eiAobPOm-5bU9vnR7uHQ7XmU,10745
|
|
13
|
-
euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
|
|
14
|
-
euroeval/human_evaluation.py,sha256=zqbbJkqm2Uymf-88PxM3R9vVRR8SZJlq3QrqWEoiVeE,27643
|
|
15
|
-
euroeval/languages.py,sha256=LerXuRBAUYkQL6qSV-F82itAE4EgBGFBtzaGnJJZvOE,8555
|
|
16
|
-
euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
|
|
17
|
-
euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
|
|
18
|
-
euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
|
|
19
|
-
euroeval/scores.py,sha256=TovjCZD8wmGrIjA4v5oAQp18P5KVcHvakkByDh0Hstk,3059
|
|
20
|
-
euroeval/speed_benchmark.py,sha256=J7VKWMf7GU_l0lRR8f0QeUr_vAaBQqTbgQ_yToHhp_0,3980
|
|
21
|
-
euroeval/tasks.py,sha256=87gbe__K5KNIb1aBSuwGnMPmZgamJFecNNYmNgMxaVo,7069
|
|
22
|
-
euroeval/tokenization_utils.py,sha256=LxgGs7juS5PuMYt5LL2X6eVXdtnpi-A2jFxqcWpF6NA,17931
|
|
23
|
-
euroeval/types.py,sha256=E0JhLfg-ek5pdFcYJbnGRUSodHxkuR3o8XGuIrBcuRM,2485
|
|
24
|
-
euroeval/utils.py,sha256=5R7y67xe0ODaje7k8nOu2AFS3Ph2gcsiWpIq5rjSSuA,11613
|
|
25
|
-
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
26
|
-
euroeval/benchmark_modules/base.py,sha256=LcG46I2O5wcvu_3T_irBY6VkUhWVPKifBhcP-ln93TA,10798
|
|
27
|
-
euroeval/benchmark_modules/fresh.py,sha256=_LWmpqiNGGTA-NoVC0v3-fS1sraDS9n-pgKUzz89jVk,9919
|
|
28
|
-
euroeval/benchmark_modules/hf.py,sha256=Nbtn5eZ4axbmL09M8dGZCBr07pn9-btbqGgQ6q7KbHg,44620
|
|
29
|
-
euroeval/benchmark_modules/litellm.py,sha256=LS4mBXXG6h4uJwySPc6SI6f0y_HuiKE7IprprqWpoCI,50601
|
|
30
|
-
euroeval/benchmark_modules/vllm.py,sha256=sgeltOVfZA9bu0AmXV7PtZvuRst0I8s6VOIp0CI6DO8,38880
|
|
31
|
-
euroeval/dataset_configs/__init__.py,sha256=kWKtlSAOY-olOQL3UtFqL6I3Tki3G3waMZSd2YChjCg,1895
|
|
32
|
-
euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
|
|
33
|
-
euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
|
|
34
|
-
euroeval/dataset_configs/english.py,sha256=-N85DiNVrZFqpahNUTfxaWy4vvdOWC8Bi0G4uAO4uDw,2326
|
|
35
|
-
euroeval/dataset_configs/faroese.py,sha256=QQgLe5gv0f3AtXe5rV65xZ98gFgyITQPDr3UwO4Bnv4,1350
|
|
36
|
-
euroeval/dataset_configs/finnish.py,sha256=_8YWIlZNpO8Qi233bH7cKwm3tq3WETLfC_6mzg7LLog,2045
|
|
37
|
-
euroeval/dataset_configs/french.py,sha256=ATsj8_9_GxFTQgmfrniPQFZ1R9hoQCI1_ieWTnscFHU,2382
|
|
38
|
-
euroeval/dataset_configs/german.py,sha256=QO6PrBQY6kyZeQMU1vg6KrC_sKyj9U2ukS9nbKO19is,2560
|
|
39
|
-
euroeval/dataset_configs/icelandic.py,sha256=mncl7X4yO9gBmYqXMBfm7FKU1jcKryerSgd0dqlIA_4,4198
|
|
40
|
-
euroeval/dataset_configs/italian.py,sha256=KNjCvTzsEqH_EEk3At8slKqNwWWiIdbv_t5ke7n9nZI,2660
|
|
41
|
-
euroeval/dataset_configs/norwegian.py,sha256=2SD5681gZFa1Ig-AEpnyStbivan_bq_Pada4qwE7tw0,5181
|
|
42
|
-
euroeval/dataset_configs/spanish.py,sha256=NviL-FzJ5jq1bLTRvbtZBiGrAmZjxyijZNpKZFrnT-M,2527
|
|
43
|
-
euroeval/dataset_configs/swedish.py,sha256=SOD2nKQTVwTpTvr362mDPHon42kr9vWs5C0mK02Fh-o,2811
|
|
44
|
-
euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
|
|
45
|
-
euroeval/prompt_templates/linguistic_acceptability.py,sha256=FAIJKS26EVRxlLHk1C3lN0GDtd5AM0MwvaMf-NNIxfU,6677
|
|
46
|
-
euroeval/prompt_templates/multiple_choice.py,sha256=6iEqiPpT-3WJN_gsyhyapnwsrcsYGdVkSkzwn-VKKxw,5101
|
|
47
|
-
euroeval/prompt_templates/named_entity_recognition.py,sha256=Xd6gBJD2e1l8-We2Ujor7crRUBcbgnNeeVknBIrTMJo,12737
|
|
48
|
-
euroeval/prompt_templates/reading_comprehension.py,sha256=yLqryWQAW04GULz_EyNDLOS7ZrDUeasuLFt-dtqCnYk,6585
|
|
49
|
-
euroeval/prompt_templates/sentiment_classification.py,sha256=LDOwjGQ2kqhwgNyphPywQeolwNB09o-xYWc9RUbzc84,7136
|
|
50
|
-
euroeval/prompt_templates/summarization.py,sha256=mcWeKNhGWmp7IG_iY64T-VOSabQg5wKddjSbJNYFDp8,4984
|
|
51
|
-
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
52
|
-
euroeval/task_group_utils/multiple_choice_classification.py,sha256=LQ6zD1UGi-jGCKI2xUJiQdAXoqb5QMpIJu41B2U0HPw,6543
|
|
53
|
-
euroeval/task_group_utils/question_answering.py,sha256=D4oJL2vQEjHghyxiiiq_vj1IQC6eryqNoLXuTiQEPmw,28071
|
|
54
|
-
euroeval/task_group_utils/sequence_classification.py,sha256=zwRUgVHqLlREILwyg-yuDPkrIQOfqGVPsFBai-2D9a8,13525
|
|
55
|
-
euroeval/task_group_utils/text_to_text.py,sha256=Nu1_qRPLbboCd9Q5rxqY4fQFJ_aGXu80aWQqoTG1cYc,5047
|
|
56
|
-
euroeval/task_group_utils/token_classification.py,sha256=3idWB81Fcx9UhTuk-gxMfXENrCBmiWBDUWdULXoIhpw,17863
|
|
57
|
-
euroeval-15.10.1.dist-info/METADATA,sha256=mx7pTjlWwRsDgD05msa6lNaaq7M2XeoCQV-BxDLSvag,13472
|
|
58
|
-
euroeval-15.10.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
59
|
-
euroeval-15.10.1.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
|
|
60
|
-
euroeval-15.10.1.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
|
|
61
|
-
euroeval-15.10.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|