EuroEval 16.3.0__py3-none-any.whl → 16.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +9 -2
- euroeval/benchmark_config_factory.py +51 -50
- euroeval/benchmark_modules/base.py +9 -21
- euroeval/benchmark_modules/fresh.py +2 -1
- euroeval/benchmark_modules/hf.py +101 -71
- euroeval/benchmark_modules/litellm.py +115 -53
- euroeval/benchmark_modules/vllm.py +107 -92
- euroeval/benchmarker.py +144 -121
- euroeval/caching_utils.py +79 -0
- euroeval/callbacks.py +5 -7
- euroeval/cli.py +86 -8
- euroeval/constants.py +9 -0
- euroeval/data_loading.py +80 -29
- euroeval/data_models.py +338 -330
- euroeval/dataset_configs/__init__.py +12 -3
- euroeval/dataset_configs/bulgarian.py +56 -0
- euroeval/dataset_configs/czech.py +75 -0
- euroeval/dataset_configs/danish.py +55 -93
- euroeval/dataset_configs/dutch.py +48 -87
- euroeval/dataset_configs/english.py +45 -77
- euroeval/dataset_configs/estonian.py +42 -34
- euroeval/dataset_configs/faroese.py +19 -60
- euroeval/dataset_configs/finnish.py +36 -69
- euroeval/dataset_configs/french.py +39 -75
- euroeval/dataset_configs/german.py +45 -82
- euroeval/dataset_configs/greek.py +64 -0
- euroeval/dataset_configs/icelandic.py +54 -91
- euroeval/dataset_configs/italian.py +42 -79
- euroeval/dataset_configs/latvian.py +28 -35
- euroeval/dataset_configs/lithuanian.py +28 -26
- euroeval/dataset_configs/norwegian.py +72 -115
- euroeval/dataset_configs/polish.py +33 -61
- euroeval/dataset_configs/portuguese.py +33 -66
- euroeval/dataset_configs/serbian.py +64 -0
- euroeval/dataset_configs/slovak.py +55 -0
- euroeval/dataset_configs/spanish.py +42 -77
- euroeval/dataset_configs/swedish.py +52 -90
- euroeval/dataset_configs/ukrainian.py +64 -0
- euroeval/exceptions.py +1 -1
- euroeval/finetuning.py +24 -17
- euroeval/generation.py +15 -14
- euroeval/generation_utils.py +8 -8
- euroeval/languages.py +395 -323
- euroeval/logging_utils.py +250 -0
- euroeval/metrics/base.py +0 -3
- euroeval/metrics/huggingface.py +21 -6
- euroeval/metrics/llm_as_a_judge.py +6 -4
- euroeval/metrics/pipeline.py +17 -9
- euroeval/metrics/speed.py +0 -3
- euroeval/model_cache.py +17 -19
- euroeval/model_config.py +4 -5
- euroeval/model_loading.py +3 -0
- euroeval/prompt_templates/__init__.py +2 -0
- euroeval/prompt_templates/classification.py +206 -0
- euroeval/prompt_templates/linguistic_acceptability.py +99 -42
- euroeval/prompt_templates/multiple_choice.py +102 -38
- euroeval/prompt_templates/named_entity_recognition.py +172 -51
- euroeval/prompt_templates/reading_comprehension.py +119 -42
- euroeval/prompt_templates/sentiment_classification.py +110 -40
- euroeval/prompt_templates/summarization.py +85 -40
- euroeval/prompt_templates/token_classification.py +279 -0
- euroeval/scores.py +11 -10
- euroeval/speed_benchmark.py +5 -6
- euroeval/task_group_utils/multiple_choice_classification.py +2 -4
- euroeval/task_group_utils/question_answering.py +24 -16
- euroeval/task_group_utils/sequence_classification.py +48 -35
- euroeval/task_group_utils/text_to_text.py +19 -9
- euroeval/task_group_utils/token_classification.py +21 -17
- euroeval/tasks.py +44 -1
- euroeval/tokenisation_utils.py +33 -22
- euroeval/types.py +10 -9
- euroeval/utils.py +35 -149
- {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/METADATA +196 -39
- euroeval-16.5.0.dist-info/RECORD +81 -0
- euroeval-16.3.0.dist-info/RECORD +0 -71
- {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/WHEEL +0 -0
- {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/entry_points.txt +0 -0
- {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,38 +4,51 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from ..data_models import PromptConfig
|
|
6
6
|
from ..languages import (
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
7
|
+
CZECH,
|
|
8
|
+
DANISH,
|
|
9
|
+
DUTCH,
|
|
10
|
+
ENGLISH,
|
|
11
|
+
ESTONIAN,
|
|
12
|
+
FINNISH,
|
|
13
|
+
FRENCH,
|
|
14
|
+
GERMAN,
|
|
15
|
+
GREEK,
|
|
16
|
+
ICELANDIC,
|
|
17
|
+
ITALIAN,
|
|
18
|
+
LATVIAN,
|
|
19
|
+
LITHUANIAN,
|
|
20
|
+
NORWEGIAN,
|
|
21
|
+
NORWEGIAN_BOKMÅL,
|
|
22
|
+
NORWEGIAN_NYNORSK,
|
|
23
|
+
POLISH,
|
|
24
|
+
PORTUGUESE,
|
|
25
|
+
SERBIAN,
|
|
26
|
+
SPANISH,
|
|
27
|
+
SWEDISH,
|
|
28
|
+
UKRAINIAN,
|
|
24
29
|
)
|
|
25
30
|
|
|
26
31
|
if t.TYPE_CHECKING:
|
|
27
|
-
from ..
|
|
32
|
+
from ..languages import Language
|
|
28
33
|
|
|
29
34
|
# TODO: Missing Faroese
|
|
30
35
|
SUMM_TEMPLATES: dict["Language", PromptConfig] = {
|
|
31
|
-
|
|
36
|
+
CZECH: PromptConfig(
|
|
37
|
+
default_prompt_prefix=("Následující jsou dokumenty s přiloženými souhrny."),
|
|
38
|
+
default_prompt_template=("Dokument: {text}\nSouhrn: {target_text}"),
|
|
39
|
+
default_instruction_prompt=(
|
|
40
|
+
"Dokument: {text}\n\nNapište souhrn výše uvedeného dokumentu."
|
|
41
|
+
),
|
|
42
|
+
default_prompt_label_mapping=dict(),
|
|
43
|
+
),
|
|
44
|
+
DANISH: PromptConfig(
|
|
32
45
|
default_prompt_prefix="Følgende er dokumenter med tilhørende resuméer.",
|
|
33
46
|
default_prompt_template="Dokument: {text}\nResumé: {target_text}",
|
|
34
47
|
default_instruction_prompt="Dokument: {text}\n\nSkriv et resumé af ovenstående "
|
|
35
48
|
"dokument.",
|
|
36
49
|
default_prompt_label_mapping=dict(),
|
|
37
50
|
),
|
|
38
|
-
|
|
51
|
+
GERMAN: PromptConfig(
|
|
39
52
|
default_prompt_prefix="Nachstehend finden Sie Dokumente mit zugehörigen "
|
|
40
53
|
"Zusammenfassungen.",
|
|
41
54
|
default_prompt_template="Dokument: {text}\nZusammenfassung: {target_text}",
|
|
@@ -43,7 +56,14 @@ SUMM_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
43
56
|
"Zusammenfassung des oben genannten Dokuments.",
|
|
44
57
|
default_prompt_label_mapping=dict(),
|
|
45
58
|
),
|
|
46
|
-
|
|
59
|
+
GREEK: PromptConfig(
|
|
60
|
+
default_prompt_prefix="Ακολουθούν έγγραφα με συνοδευτικές περιλήψεις.",
|
|
61
|
+
default_prompt_template="Έγγραφο: {text}\nΠερίληψη: {target_text}",
|
|
62
|
+
default_instruction_prompt="Έγγραφο: {text}\n\nΓράψτε μια περίληψη του "
|
|
63
|
+
"παραπάνω εγγράφου.",
|
|
64
|
+
default_prompt_label_mapping=dict(),
|
|
65
|
+
),
|
|
66
|
+
ENGLISH: PromptConfig(
|
|
47
67
|
default_prompt_prefix="The following are documents with accompanying "
|
|
48
68
|
"summaries.",
|
|
49
69
|
default_prompt_template="Document: {text}\nSummary: {target_text}",
|
|
@@ -51,28 +71,28 @@ SUMM_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
51
71
|
"document.",
|
|
52
72
|
default_prompt_label_mapping=dict(),
|
|
53
73
|
),
|
|
54
|
-
|
|
74
|
+
SPANISH: PromptConfig(
|
|
55
75
|
default_prompt_prefix="A continuación se presentan documentos con resúmenes "
|
|
56
76
|
"adjuntos.",
|
|
57
77
|
default_prompt_template="Documento: {text}\nResumen: {target_text}",
|
|
58
78
|
default_instruction_prompt="Documento: {text}\n\n",
|
|
59
79
|
default_prompt_label_mapping=dict(),
|
|
60
80
|
),
|
|
61
|
-
|
|
81
|
+
ESTONIAN: PromptConfig(
|
|
62
82
|
default_prompt_prefix="Allpool on dokumendid koos kokkuvõtetega.",
|
|
63
83
|
default_prompt_template="Dokument: {text}\nKokkuvõte: {target_text}",
|
|
64
84
|
default_instruction_prompt="Dokument: {text}\n\nKoosta ülaltoodud dokumendi "
|
|
65
85
|
"kokkuvõte.",
|
|
66
86
|
default_prompt_label_mapping=dict(),
|
|
67
87
|
),
|
|
68
|
-
|
|
88
|
+
PORTUGUESE: PromptConfig(
|
|
69
89
|
default_prompt_prefix="Abaixo encontras documentos com resumos associados.",
|
|
70
90
|
default_prompt_template="Documento: {text}\nResumo: {target_text}",
|
|
71
91
|
default_instruction_prompt="Documento: {text}\n\nEscreve um resumo do "
|
|
72
92
|
"documento anterior.",
|
|
73
93
|
default_prompt_label_mapping=dict(),
|
|
74
94
|
),
|
|
75
|
-
|
|
95
|
+
FINNISH: PromptConfig(
|
|
76
96
|
default_prompt_prefix="Seuraavassa on artikkeleita ja niihin liittyviä "
|
|
77
97
|
"tiivistelmiä.",
|
|
78
98
|
default_prompt_template="Uutisartikkeli: {text}\nTiivistelmä: {target_text}",
|
|
@@ -80,14 +100,14 @@ SUMM_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
80
100
|
"yllä olevasta artikkelista.",
|
|
81
101
|
default_prompt_label_mapping=dict(),
|
|
82
102
|
),
|
|
83
|
-
|
|
103
|
+
FRENCH: PromptConfig(
|
|
84
104
|
default_prompt_prefix="Les documents suivants sont accompagnés d'un résumé.",
|
|
85
105
|
default_prompt_template="Document: {text}\nRésumé: {target_text}",
|
|
86
106
|
default_instruction_prompt="Document: {text}\n\nRédigez un résumé du "
|
|
87
107
|
"document ci-dessus.",
|
|
88
108
|
default_prompt_label_mapping=dict(),
|
|
89
109
|
),
|
|
90
|
-
|
|
110
|
+
LATVIAN: PromptConfig(
|
|
91
111
|
default_prompt_prefix="Tālāk ir dokumenti ar pievienotām kopsavilkumiem.",
|
|
92
112
|
default_prompt_template="Dokuments: {text}\nKopsavilkums: {target_text}",
|
|
93
113
|
default_instruction_prompt=(
|
|
@@ -96,14 +116,17 @@ SUMM_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
96
116
|
),
|
|
97
117
|
default_prompt_label_mapping=dict(),
|
|
98
118
|
),
|
|
99
|
-
|
|
100
|
-
default_prompt_prefix=
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
"
|
|
119
|
+
LITHUANIAN: PromptConfig(
|
|
120
|
+
default_prompt_prefix=(
|
|
121
|
+
"Žemiau pateikiami dokumentai su pridėtomis santraukomis."
|
|
122
|
+
),
|
|
123
|
+
default_prompt_template=("Dokumentas: {text}\nSantrauka: {target_text}"),
|
|
124
|
+
default_instruction_prompt=(
|
|
125
|
+
"Dokumentas: {text}\n\nParašykite aukščiau pateikto dokumento santrauką."
|
|
126
|
+
),
|
|
104
127
|
default_prompt_label_mapping=dict(),
|
|
105
128
|
),
|
|
106
|
-
|
|
129
|
+
ITALIAN: PromptConfig(
|
|
107
130
|
default_prompt_prefix="Di seguito sono riportati i documenti con le relative "
|
|
108
131
|
"sintesi.",
|
|
109
132
|
default_prompt_template="Documento: {text}\nSintesi: {target_text}",
|
|
@@ -111,14 +134,21 @@ SUMM_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
111
134
|
"documento di cui sopra.",
|
|
112
135
|
default_prompt_label_mapping=dict(),
|
|
113
136
|
),
|
|
114
|
-
|
|
137
|
+
ICELANDIC: PromptConfig(
|
|
138
|
+
default_prompt_prefix="Eftirfarandi eru skjöl með meðfylgjandi samantektum.",
|
|
139
|
+
default_prompt_template="Skjal: {text}\nSamantekt: {target_text}",
|
|
140
|
+
default_instruction_prompt="Skjal: {text}\n\nSkrifaðu samantekt á ofangreindu "
|
|
141
|
+
"skjali.",
|
|
142
|
+
default_prompt_label_mapping=dict(),
|
|
143
|
+
),
|
|
144
|
+
NORWEGIAN_BOKMÅL: PromptConfig(
|
|
115
145
|
default_prompt_prefix="Nedenfor følger dokumenter med tilhørende sammendrag.",
|
|
116
146
|
default_prompt_template="Dokument: {text}\nSammendrag: {target_text}",
|
|
117
147
|
default_instruction_prompt="Dokument: {text}\n\nSkriv et sammendrag av "
|
|
118
148
|
"dokumentet ovenfor.",
|
|
119
149
|
default_prompt_label_mapping=dict(),
|
|
120
150
|
),
|
|
121
|
-
|
|
151
|
+
DUTCH: PromptConfig(
|
|
122
152
|
default_prompt_prefix="Hieronder volgen documenten met bijbehorende "
|
|
123
153
|
"samenvattingen.",
|
|
124
154
|
default_prompt_template="Document: {text}\nSamenvatting: {target_text}",
|
|
@@ -126,33 +156,48 @@ SUMM_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
126
156
|
"het bovenstaande document.",
|
|
127
157
|
default_prompt_label_mapping=dict(),
|
|
128
158
|
),
|
|
129
|
-
|
|
159
|
+
NORWEGIAN_NYNORSK: PromptConfig(
|
|
130
160
|
default_prompt_prefix="Nedenfor følger dokumenter med tilhørende sammendrag.",
|
|
131
161
|
default_prompt_template="Dokument: {text}\nSammendrag: {target_text}",
|
|
132
162
|
default_instruction_prompt="Dokument: {text}\n\nSkriv et sammendrag av "
|
|
133
163
|
"dokumentet ovenfor.",
|
|
134
164
|
default_prompt_label_mapping=dict(),
|
|
135
165
|
),
|
|
136
|
-
|
|
166
|
+
NORWEGIAN: PromptConfig(
|
|
137
167
|
default_prompt_prefix="Nedenfor følger dokumenter med tilhørende sammendrag.",
|
|
138
168
|
default_prompt_template="Dokument: {text}\nSammendrag: {target_text}",
|
|
139
169
|
default_instruction_prompt="Dokument: {text}\n\nSkriv et sammendrag av "
|
|
140
170
|
"dokumentet ovenfor.",
|
|
141
171
|
default_prompt_label_mapping=dict(),
|
|
142
172
|
),
|
|
143
|
-
|
|
173
|
+
POLISH: PromptConfig(
|
|
144
174
|
default_prompt_prefix="Poniżej znajdują się artykuły z towarzyszącymi "
|
|
145
|
-
"streszczeniami.",
|
|
175
|
+
"im streszczeniami.",
|
|
146
176
|
default_prompt_template="Artykuł: {text}\nStreszczenie: {target_text}",
|
|
147
177
|
default_instruction_prompt="Artykuł: {text}\n\nNapisz streszczenie "
|
|
148
178
|
"powyższego artykułu.",
|
|
149
179
|
default_prompt_label_mapping=dict(),
|
|
150
180
|
),
|
|
151
|
-
|
|
181
|
+
SERBIAN: PromptConfig(
|
|
182
|
+
default_prompt_prefix="Slede dokumenti sa odgovarajućim sažecima.",
|
|
183
|
+
default_prompt_template="Dokument: {text}\nSažetak: {target_text}",
|
|
184
|
+
default_instruction_prompt="Dokument: {text}\n\nNapišite sažetak "
|
|
185
|
+
"gorenavedenog dokumenta.",
|
|
186
|
+
default_prompt_label_mapping=dict(),
|
|
187
|
+
),
|
|
188
|
+
SWEDISH: PromptConfig(
|
|
152
189
|
default_prompt_prefix="Nedan följer dokument med tillhörande sammanfattningar.",
|
|
153
190
|
default_prompt_template="Dokument: {text}\nSammanfattning: {target_text}",
|
|
154
191
|
default_instruction_prompt="Dokument: {text}\n\nSkriv en sammanfattning av "
|
|
155
192
|
"ovanstående dokument.",
|
|
156
193
|
default_prompt_label_mapping=dict(),
|
|
157
194
|
),
|
|
195
|
+
UKRAINIAN: PromptConfig(
|
|
196
|
+
default_prompt_prefix="Нижче наведено документи з супровідними резюме.",
|
|
197
|
+
default_prompt_template="Документ: {text}\nРезюме: {target_text}",
|
|
198
|
+
default_instruction_prompt=(
|
|
199
|
+
"Документ: {text}\n\nНапишіть резюме наведеного вище документа."
|
|
200
|
+
),
|
|
201
|
+
default_prompt_label_mapping=dict(),
|
|
202
|
+
),
|
|
158
203
|
}
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""Templates for the token classification task."""
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from ..data_models import PromptConfig
|
|
6
|
+
from ..languages import (
|
|
7
|
+
BULGARIAN,
|
|
8
|
+
CZECH,
|
|
9
|
+
DANISH,
|
|
10
|
+
DUTCH,
|
|
11
|
+
ENGLISH,
|
|
12
|
+
ESTONIAN,
|
|
13
|
+
FAROESE,
|
|
14
|
+
FINNISH,
|
|
15
|
+
FRENCH,
|
|
16
|
+
GERMAN,
|
|
17
|
+
GREEK,
|
|
18
|
+
ICELANDIC,
|
|
19
|
+
ITALIAN,
|
|
20
|
+
LATVIAN,
|
|
21
|
+
LITHUANIAN,
|
|
22
|
+
NORWEGIAN,
|
|
23
|
+
NORWEGIAN_BOKMÅL,
|
|
24
|
+
NORWEGIAN_NYNORSK,
|
|
25
|
+
POLISH,
|
|
26
|
+
PORTUGUESE,
|
|
27
|
+
SLOVAK,
|
|
28
|
+
SPANISH,
|
|
29
|
+
SWEDISH,
|
|
30
|
+
UKRAINIAN,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
if t.TYPE_CHECKING:
|
|
34
|
+
from ..languages import Language
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
TOKEN_CLASSIFICATION_TEMPLATES: dict["Language", PromptConfig] = {
|
|
38
|
+
ENGLISH: PromptConfig(
|
|
39
|
+
default_prompt_prefix="Below are texts and JSON dictionaries with the "
|
|
40
|
+
"categories that occur in the given text.",
|
|
41
|
+
default_prompt_template="Text: {text}\nCategories: {label}",
|
|
42
|
+
default_instruction_prompt="Text: {text}\n\nIdentify the categories in "
|
|
43
|
+
"the text. You should output this as a JSON dictionary with the keys being "
|
|
44
|
+
"{labels_str}. The values should be lists of the spans of that category, "
|
|
45
|
+
"exactly as they appear in the text.",
|
|
46
|
+
default_prompt_label_mapping="auto",
|
|
47
|
+
),
|
|
48
|
+
BULGARIAN: PromptConfig(
|
|
49
|
+
default_prompt_prefix="По-долу са текстове и JSON речници с категориите, "
|
|
50
|
+
"които се срещат в дадения текст.",
|
|
51
|
+
default_prompt_template="Текст: {text}\nКатегории: {label}",
|
|
52
|
+
default_instruction_prompt="Текст: {text}\n\nИдентифицирайте категориите "
|
|
53
|
+
"в текста. Трябва да изведете това като JSON речник, като ключовете са "
|
|
54
|
+
"{labels_str}. Стойностите трябва да бъдат списъци с откъсите от тази "
|
|
55
|
+
"категория, точно както се появяват в текста.",
|
|
56
|
+
default_prompt_label_mapping="auto",
|
|
57
|
+
),
|
|
58
|
+
CZECH: PromptConfig(
|
|
59
|
+
default_prompt_prefix="Níže jsou texty a JSON slovníky s kategoriemi, "
|
|
60
|
+
"které se v daném textu vyskytují.",
|
|
61
|
+
default_prompt_template="Text: {text}\nKategorie: {label}",
|
|
62
|
+
default_instruction_prompt="Text: {text}\n\nIdentifikujte kategorie v "
|
|
63
|
+
"textu. Měli byste to vypsat jako JSON slovník, kde klíče jsou "
|
|
64
|
+
"{labels_str}. Hodnoty by měly být seznamy úseků dané kategorie, přesně "
|
|
65
|
+
"tak, jak se objevují v textu.",
|
|
66
|
+
default_prompt_label_mapping="auto",
|
|
67
|
+
),
|
|
68
|
+
DANISH: PromptConfig(
|
|
69
|
+
default_prompt_prefix="Nedenfor er tekster og JSON-ordbøger med de "
|
|
70
|
+
"kategorier, der forekommer i den givne tekst.",
|
|
71
|
+
default_prompt_template="Tekst: {text}\nKategorier: {label}",
|
|
72
|
+
default_instruction_prompt="Tekst: {text}\n\nIdentificer kategorierne i "
|
|
73
|
+
"teksten. Du skal udskrive dette som en JSON-ordbog, hvor nøglerne er "
|
|
74
|
+
"{labels_str}. Værdierne skal være lister over uddrag af den kategori, "
|
|
75
|
+
"præcis som de optræder i teksten.",
|
|
76
|
+
default_prompt_label_mapping="auto",
|
|
77
|
+
),
|
|
78
|
+
GERMAN: PromptConfig(
|
|
79
|
+
default_prompt_prefix="Unten sind Texte und JSON-Wörterbücher mit den "
|
|
80
|
+
"Kategorien, die im jeweiligen Text vorkommen.",
|
|
81
|
+
default_prompt_template="Text: {text}\nKategorien: {label}",
|
|
82
|
+
default_instruction_prompt="Text: {text}\n\nIdentifiziere die Kategorien "
|
|
83
|
+
"im Text. Du solltest dies als ein JSON-Wörterbuch ausgeben, wobei die "
|
|
84
|
+
"Schlüssel {labels_str} sind. Die Werte sollten Listen der Ausschnitte "
|
|
85
|
+
"dieser Kategorie sein, genau so, wie sie im Text erscheinen.",
|
|
86
|
+
default_prompt_label_mapping="auto",
|
|
87
|
+
),
|
|
88
|
+
GREEK: PromptConfig(
|
|
89
|
+
default_prompt_prefix="Παρακάτω είναι κείμενα και JSON λεξικά με τις "
|
|
90
|
+
"κατηγορίες που εμφανίζονται στο δεδομένο κείμενο.",
|
|
91
|
+
default_prompt_template="Κείμενο: {text}\nΚατηγορίες: {label}",
|
|
92
|
+
default_instruction_prompt="Κείμενο: {text}\n\nΑναγνωρίστε τις "
|
|
93
|
+
"κατηγορίες στο κείμενο. Θα πρέπει να το εκτυπώσετε ως ένα JSON "
|
|
94
|
+
"λεξικό με τα κλειδιά να είναι {labels_str}. Οι τιμές θα πρέπει να "
|
|
95
|
+
"είναι λίστες με τα αποσπάσματα αυτής της κατηγορίας, ακριβώς όπως "
|
|
96
|
+
"εμφανίζονται στο κείμενο.",
|
|
97
|
+
default_prompt_label_mapping="auto",
|
|
98
|
+
),
|
|
99
|
+
SPANISH: PromptConfig(
|
|
100
|
+
default_prompt_prefix="A continuación se presentan textos y diccionarios "
|
|
101
|
+
"JSON con las categorías que aparecen en el texto dado.",
|
|
102
|
+
default_prompt_template="Texto: {text}\nCategorías: {label}",
|
|
103
|
+
default_instruction_prompt="Texto: {text}\n\nIdentifica las categorías "
|
|
104
|
+
"en el texto. Debes imprimir esto como un diccionario JSON con las "
|
|
105
|
+
"claves siendo {labels_str}. Los valores deben ser listas de los "
|
|
106
|
+
"fragmentos de esa categoría, tal como aparecen en el texto.",
|
|
107
|
+
default_prompt_label_mapping="auto",
|
|
108
|
+
),
|
|
109
|
+
ESTONIAN: PromptConfig(
|
|
110
|
+
default_prompt_prefix="Allpool on tekstid ja JSON-sõnastikud kategooriatega, "
|
|
111
|
+
"mis esinevad antud tekstis.",
|
|
112
|
+
default_prompt_template="Tekst: {text}\nKategooriad: {label}",
|
|
113
|
+
default_instruction_prompt="Tekst: {text}\n\nTuvastage tekstis "
|
|
114
|
+
"kategooriad. Te peate selle väljatrükkima JSON-sõnastikuna, kus "
|
|
115
|
+
"võtmed on {labels_str}. Väärtused peaksid olema selle kategooria "
|
|
116
|
+
"lõikude loendid, täpselt nii, nagu need tekstis esinevad.",
|
|
117
|
+
default_prompt_label_mapping="auto",
|
|
118
|
+
),
|
|
119
|
+
FINNISH: PromptConfig(
|
|
120
|
+
default_prompt_prefix="Alla on tekstejä ja JSON-sanakirjoja, joissa on "
|
|
121
|
+
"kategorioita, jotka esiintyvät annetussa tekstissä.",
|
|
122
|
+
default_prompt_template="Teksti: {text}\nKategoriat: {label}",
|
|
123
|
+
default_instruction_prompt="Teksti: {text}\n\nTunnista tekstin "
|
|
124
|
+
"kategoriat. Sinun tulee tulostaa tämä JSON-sanakirjana, jossa "
|
|
125
|
+
"avaimet ovat {labels_str}. Arvojen tulee olla kyseisen kategorian "
|
|
126
|
+
"pätkien listoja, täsmälleen niin kuin ne esiintyvät tekstissä.",
|
|
127
|
+
default_prompt_label_mapping="auto",
|
|
128
|
+
),
|
|
129
|
+
FAROESE: PromptConfig(
|
|
130
|
+
default_prompt_prefix="Niðanfyri eru tekstir og JSON orðabøkur við "
|
|
131
|
+
"bólkum, ið eru í givna tekstinum.",
|
|
132
|
+
default_prompt_template="Tekstur: {text}\nBólkar: {label}",
|
|
133
|
+
default_instruction_prompt="Tekstur: {text}\n\nKenn aftur bólkarnar "
|
|
134
|
+
"í tekstinum. Tú skalt prenta hetta sum ein JSON orðabók, har "
|
|
135
|
+
"lyklarnir eru {labels_str}. Virðini skulu vera listar yvir "
|
|
136
|
+
"brotini av tí bólkinum, beint sum tey síggjast í tekstinum.",
|
|
137
|
+
default_prompt_label_mapping="auto",
|
|
138
|
+
),
|
|
139
|
+
FRENCH: PromptConfig(
|
|
140
|
+
default_prompt_prefix="Ci-dessous se trouvent des textes et des dictionnaires "
|
|
141
|
+
"JSON avec les catégories qui apparaissent dans le texte donné.",
|
|
142
|
+
default_prompt_template="Texte : {text}\nCatégories : {label}",
|
|
143
|
+
default_instruction_prompt="Texte : {text}\n\nIdentifiez les catégories "
|
|
144
|
+
"dans le texte. Vous devez l'imprimer sous la forme d'un dictionnaire JSON "
|
|
145
|
+
"avec pour clés {labels_str}. Les valeurs doivent être des listes des "
|
|
146
|
+
"extraits de cette catégorie, exactement comme ils apparaissent dans le texte.",
|
|
147
|
+
default_prompt_label_mapping="auto",
|
|
148
|
+
),
|
|
149
|
+
ICELANDIC: PromptConfig(
|
|
150
|
+
default_prompt_prefix="Hér fyrir neðan eru textar og JSON orðabækur með "
|
|
151
|
+
"flokkum sem koma fyrir í tilteknum texta.",
|
|
152
|
+
default_prompt_template="Texti: {text}\nFlokkar: {label}",
|
|
153
|
+
default_instruction_prompt="Texti: {text}\n\nFinndu flokkana í "
|
|
154
|
+
"textanum. Þú átt að prenta þetta sem JSON orðabók þar sem lyklar "
|
|
155
|
+
"eru {labels_str}. Gildin eiga að vera listar yfir brot af þeim "
|
|
156
|
+
"flokki, nákvæmlega eins og þau koma fyrir í textanum.",
|
|
157
|
+
default_prompt_label_mapping="auto",
|
|
158
|
+
),
|
|
159
|
+
ITALIAN: PromptConfig(
|
|
160
|
+
default_prompt_prefix="Di seguito sono riportati testi e dizionari JSON "
|
|
161
|
+
"con le categorie che compaiono nel testo dato.",
|
|
162
|
+
default_prompt_template="Testo: {text}\nCategorie: {label}",
|
|
163
|
+
default_instruction_prompt="Testo: {text}\n\nIdentifica le categorie "
|
|
164
|
+
"nel testo. Devi stampare questo come un dizionario JSON con le chiavi "
|
|
165
|
+
"che sono {labels_str}. I valori devono essere liste dei brani di "
|
|
166
|
+
"quella categoria, esattamente come appaiono nel testo.",
|
|
167
|
+
default_prompt_label_mapping="auto",
|
|
168
|
+
),
|
|
169
|
+
LITHUANIAN: PromptConfig(
|
|
170
|
+
default_prompt_prefix="Žemiau pateikti tekstai ir JSON žodynai su "
|
|
171
|
+
"kategorijomis, kurios pasitaiko nurodytame tekste.",
|
|
172
|
+
default_prompt_template="Tekstas: {text}\nKategorijos: {label}",
|
|
173
|
+
default_instruction_prompt="Tekstas: {text}\n\nNustatykite kategorijas "
|
|
174
|
+
"tekste. Turite tai atspausdinti kaip JSON žodyną, kur raktai yra "
|
|
175
|
+
"{labels_str}. Reikšmės turėtų būti tos kategorijos ištraukų sąrašai, "
|
|
176
|
+
"tiksliai taip, kaip jos pateikiamos tekste.",
|
|
177
|
+
default_prompt_label_mapping="auto",
|
|
178
|
+
),
|
|
179
|
+
LATVIAN: PromptConfig(
|
|
180
|
+
default_prompt_prefix="Zemāk ir teksti un JSON vārdnīcas ar kategorijām, "
|
|
181
|
+
"kas parādās dotajā tekstā.",
|
|
182
|
+
default_prompt_template="Teksts: {text}\nKategorijas: {label}",
|
|
183
|
+
default_instruction_prompt="Teksts: {text}\n\nIdentificējiet "
|
|
184
|
+
"kategorijas tekstā. Jums tas jāizdrukā kā JSON vārdnīca, kur "
|
|
185
|
+
"atslēgas ir {labels_str}. Vērtībām jābūt šo kategoriju "
|
|
186
|
+
"izvilkumu sarakstiem, tieši tā, kā tās parādās tekstā.",
|
|
187
|
+
default_prompt_label_mapping="auto",
|
|
188
|
+
),
|
|
189
|
+
NORWEGIAN_BOKMÅL: PromptConfig(
|
|
190
|
+
default_prompt_prefix="Nedenfor er tekster og JSON-ordbøker med de "
|
|
191
|
+
"kategoriene som forekommer i den gitte teksten.",
|
|
192
|
+
default_prompt_template="Tekst: {text}\nKategorier: {label}",
|
|
193
|
+
default_instruction_prompt="Tekst: {text}\n\nIdentifiser kategoriene "
|
|
194
|
+
"i teksten. Du skal skrive dette ut som en JSON-ordbok med nøklene "
|
|
195
|
+
"som er {labels_str}. Verdiene skal være lister over utdragene av "
|
|
196
|
+
"den kategorien, akkurat som de vises i teksten.",
|
|
197
|
+
default_prompt_label_mapping="auto",
|
|
198
|
+
),
|
|
199
|
+
DUTCH: PromptConfig(
|
|
200
|
+
default_prompt_prefix="Hieronder volgen teksten en JSON-woordenboeken "
|
|
201
|
+
"met de categorieën die in de gegeven tekst voorkomen.",
|
|
202
|
+
default_prompt_template="Tekst: {text}\nCategorieën: {label}",
|
|
203
|
+
default_instruction_prompt="Tekst: {text}\n\nIdentificeer de "
|
|
204
|
+
"categorieën in de tekst. Je moet dit afdrukken als een JSON-woordenboek "
|
|
205
|
+
"met de sleutels zijnde {labels_str}. De waarden moeten lijsten zijn van "
|
|
206
|
+
"de fragmenten van die categorie, precies zoals ze in de tekst voorkomen.",
|
|
207
|
+
default_prompt_label_mapping="auto",
|
|
208
|
+
),
|
|
209
|
+
NORWEGIAN_NYNORSK: PromptConfig(
|
|
210
|
+
default_prompt_prefix="Nedanfor er tekstar og JSON-ordbøker med dei "
|
|
211
|
+
"kategoriane som førekjem i den gitte teksten.",
|
|
212
|
+
default_prompt_template="Tekst: {text}\nKategoriar: {label}",
|
|
213
|
+
default_instruction_prompt="Tekst: {text}\n\nIdentifiser kategoriane "
|
|
214
|
+
"i teksten. Du skal skrive dette ut som ein JSON-ordbok med nøklane "
|
|
215
|
+
"som er {labels_str}. Verdiane skal vere lister over utdraga av den "
|
|
216
|
+
"kategorien, akkurat som dei viser seg i teksten.",
|
|
217
|
+
default_prompt_label_mapping="auto",
|
|
218
|
+
),
|
|
219
|
+
NORWEGIAN: PromptConfig(
|
|
220
|
+
default_prompt_prefix="Nedenfor er tekster og JSON-ordbøker med de "
|
|
221
|
+
"kategoriene som forekommer i den gitte teksten.",
|
|
222
|
+
default_prompt_template="Tekst: {text}\nKategorier: {label}",
|
|
223
|
+
default_instruction_prompt="Tekst: {text}\n\nIdentifiser kategoriene "
|
|
224
|
+
"i teksten. Du skal skrive dette ut som en JSON-ordbok med nøklene "
|
|
225
|
+
"som er {labels_str}. Verdiene skal være lister over utdragene av "
|
|
226
|
+
"den kategorien, akkurat som de vises i teksten.",
|
|
227
|
+
default_prompt_label_mapping="auto",
|
|
228
|
+
),
|
|
229
|
+
POLISH: PromptConfig(
|
|
230
|
+
default_prompt_prefix="Poniżej znajdują się teksty i słowniki JSON z "
|
|
231
|
+
"kategoriami występującymi w danym tekście.",
|
|
232
|
+
default_prompt_template="Tekst: {text}\nKategorie: {label}",
|
|
233
|
+
default_instruction_prompt="Tekst: {text}\n\nZidentyfikuj kategorie "
|
|
234
|
+
"w tekście. Należy to wydrukować jako słownik JSON, w którym kluczami "
|
|
235
|
+
"są {labels_str}. Wartości powinny być listami fragmentów danej "
|
|
236
|
+
"kategorii, dokładnie tak, jak pojawiają się w tekście.",
|
|
237
|
+
default_prompt_label_mapping="auto",
|
|
238
|
+
),
|
|
239
|
+
PORTUGUESE: PromptConfig(
|
|
240
|
+
default_prompt_prefix="A seguir estão textos e dicionários JSON com as "
|
|
241
|
+
"categorias que aparecem no texto dado.",
|
|
242
|
+
default_prompt_template="Texto: {text}\nCategorias: {label}",
|
|
243
|
+
default_instruction_prompt="Texto: {text}\n\nIdentifique as categorias "
|
|
244
|
+
"no texto. Você deve imprimir isso como um dicionário JSON com as "
|
|
245
|
+
"chaves sendo {labels_str}. Os valores devem ser listas dos trechos "
|
|
246
|
+
"dessa categoria, exatamente como aparecem no texto.",
|
|
247
|
+
default_prompt_label_mapping="auto",
|
|
248
|
+
),
|
|
249
|
+
SLOVAK: PromptConfig(
|
|
250
|
+
default_prompt_prefix="Nižšie sú texty a JSON slovníky s kategóriami, "
|
|
251
|
+
"ktoré sa v danom texte vyskytujú.",
|
|
252
|
+
default_prompt_template="Text: {text}\nKategórie: {label}",
|
|
253
|
+
default_instruction_prompt="Text: {text}\n\nIdentifikujte kategórie v "
|
|
254
|
+
"texte. Mali by ste to vypísať ako JSON slovník, kde kľúče sú "
|
|
255
|
+
"{labels_str}. Hodnoty by mali byť zoznamy úsekov danej kategórie, "
|
|
256
|
+
"presne tak, ako sa objavujú v texte.",
|
|
257
|
+
default_prompt_label_mapping="auto",
|
|
258
|
+
),
|
|
259
|
+
SWEDISH: PromptConfig(
|
|
260
|
+
default_prompt_prefix="Nedan är texter och JSON-ordböcker med de "
|
|
261
|
+
"kategorier som förekommer i den givna texten.",
|
|
262
|
+
default_prompt_template="Text: {text}\nKategorier: {label}",
|
|
263
|
+
default_instruction_prompt="Text: {text}\n\nIdentifiera kategorierna "
|
|
264
|
+
"i texten. Du ska skriva ut detta som en JSON-ordbok med nycklarna "
|
|
265
|
+
"som är {labels_str}. Värdena ska vara listor över utdragen av den "
|
|
266
|
+
"kategorin, precis som de visas i texten.",
|
|
267
|
+
default_prompt_label_mapping="auto",
|
|
268
|
+
),
|
|
269
|
+
UKRAINIAN: PromptConfig(
|
|
270
|
+
default_prompt_prefix="Нижче наведено тексти та JSON-словники з "
|
|
271
|
+
"категоріями, які зустрічаються в наведеному тексті.",
|
|
272
|
+
default_prompt_template="Текст: {text}\nКатегорії: {label}",
|
|
273
|
+
default_instruction_prompt="Текст: {text}\n\nВизначте категорії в "
|
|
274
|
+
"тексті. Ви повинні надрукувати це як JSON-словник, де ключі - це "
|
|
275
|
+
"{labels_str}. Значення повинні бути списками уривків цієї категорії, "
|
|
276
|
+
"саме так, як вони з'являються в тексті.",
|
|
277
|
+
default_prompt_label_mapping="auto",
|
|
278
|
+
),
|
|
279
|
+
}
|
euroeval/scores.py
CHANGED
|
@@ -1,22 +1,23 @@
|
|
|
1
1
|
"""Aggregation of raw scores into the mean and a confidence interval."""
|
|
2
2
|
|
|
3
|
+
import collections.abc as c
|
|
3
4
|
import logging
|
|
4
5
|
import typing as t
|
|
5
6
|
import warnings
|
|
6
7
|
|
|
7
8
|
import numpy as np
|
|
8
9
|
|
|
10
|
+
from .logging_utils import log
|
|
11
|
+
|
|
9
12
|
if t.TYPE_CHECKING:
|
|
10
13
|
from .metrics import Metric
|
|
11
14
|
from .types import ScoreDict
|
|
12
15
|
|
|
13
|
-
logger = logging.getLogger("euroeval")
|
|
14
|
-
|
|
15
16
|
|
|
16
17
|
def log_scores(
|
|
17
18
|
dataset_name: str,
|
|
18
|
-
metrics:
|
|
19
|
-
scores:
|
|
19
|
+
metrics: c.Sequence["Metric"],
|
|
20
|
+
scores: c.Sequence[dict[str, float]],
|
|
20
21
|
model_id: str,
|
|
21
22
|
model_revision: str,
|
|
22
23
|
model_param: str | None,
|
|
@@ -48,9 +49,8 @@ def log_scores(
|
|
|
48
49
|
if model_param is not None:
|
|
49
50
|
model_id += f"#{model_param}"
|
|
50
51
|
|
|
51
|
-
logger.info(f"Finished evaluation of {model_id} on {dataset_name}.")
|
|
52
|
-
|
|
53
52
|
total_dict: dict[str, float] = dict()
|
|
53
|
+
all_log_strs: list[str] = [f"Finished benchmarking {model_id} on {dataset_name}."]
|
|
54
54
|
for metric in metrics:
|
|
55
55
|
test_score, test_se = aggregate_scores(scores=scores, metric=metric)
|
|
56
56
|
test_score, test_score_str = metric.postprocessing_fn(test_score)
|
|
@@ -58,17 +58,18 @@ def log_scores(
|
|
|
58
58
|
total_dict[f"test_{metric.name}"] = test_score
|
|
59
59
|
total_dict[f"test_{metric.name}_se"] = test_se
|
|
60
60
|
log_str = (
|
|
61
|
-
f"{metric.pretty_name}: {test_score_str} ± {test_se_str}"
|
|
61
|
+
f"- {metric.pretty_name}: {test_score_str} ± {test_se_str}"
|
|
62
62
|
if not np.isnan(test_se)
|
|
63
|
-
else f"{metric.pretty_name}: {test_score_str}"
|
|
63
|
+
else f"- {metric.pretty_name}: {test_score_str}"
|
|
64
64
|
)
|
|
65
|
-
|
|
65
|
+
all_log_strs.append(log_str)
|
|
66
|
+
log("\n".join(all_log_strs), level=logging.INFO)
|
|
66
67
|
|
|
67
68
|
return dict(raw=scores, total=total_dict)
|
|
68
69
|
|
|
69
70
|
|
|
70
71
|
def aggregate_scores(
|
|
71
|
-
scores:
|
|
72
|
+
scores: c.Sequence[dict[str, float]], metric: "Metric"
|
|
72
73
|
) -> tuple[float, float]:
|
|
73
74
|
"""Helper function to compute the mean with confidence intervals.
|
|
74
75
|
|
euroeval/speed_benchmark.py
CHANGED
|
@@ -1,26 +1,25 @@
|
|
|
1
1
|
"""Benchmarking model inference speed."""
|
|
2
2
|
|
|
3
|
+
import collections.abc as c
|
|
3
4
|
import logging
|
|
4
5
|
import typing as t
|
|
5
6
|
|
|
6
7
|
import pyinfer
|
|
7
|
-
from tqdm.auto import tqdm
|
|
8
8
|
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
|
9
9
|
|
|
10
10
|
from .benchmark_modules import HuggingFaceEncoderModel, LiteLLMModel, VLLMModel
|
|
11
11
|
from .exceptions import InvalidBenchmark
|
|
12
|
+
from .logging_utils import get_pbar, log
|
|
12
13
|
from .utils import clear_memory
|
|
13
14
|
|
|
14
15
|
if t.TYPE_CHECKING:
|
|
15
16
|
from .benchmark_modules import BenchmarkModule
|
|
16
17
|
from .data_models import BenchmarkConfig
|
|
17
18
|
|
|
18
|
-
logger = logging.getLogger("euroeval")
|
|
19
|
-
|
|
20
19
|
|
|
21
20
|
def benchmark_speed(
|
|
22
21
|
model: "BenchmarkModule", benchmark_config: "BenchmarkConfig"
|
|
23
|
-
) ->
|
|
22
|
+
) -> c.Sequence[dict[str, float]]:
|
|
24
23
|
"""Benchmark model inference speed.
|
|
25
24
|
|
|
26
25
|
Args:
|
|
@@ -33,7 +32,7 @@ def benchmark_speed(
|
|
|
33
32
|
Dictionary of scores.
|
|
34
33
|
"""
|
|
35
34
|
scores: list[dict[str, float]] = list()
|
|
36
|
-
for idx in
|
|
35
|
+
for idx in get_pbar(
|
|
37
36
|
iterable=range(benchmark_config.num_iterations),
|
|
38
37
|
desc="Benchmarking",
|
|
39
38
|
disable=not benchmark_config.progress_bar,
|
|
@@ -41,7 +40,7 @@ def benchmark_speed(
|
|
|
41
40
|
itr_scores = benchmark_speed_single_iteration(model=model, itr_idx=idx)
|
|
42
41
|
clear_memory()
|
|
43
42
|
scores.append(itr_scores)
|
|
44
|
-
|
|
43
|
+
log(f"Scores for iteration {idx}: {itr_scores}", level=logging.DEBUG)
|
|
45
44
|
return scores
|
|
46
45
|
|
|
47
46
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Utility functions related to the multiple-choice classification task group."""
|
|
2
2
|
|
|
3
|
+
import collections.abc as c
|
|
3
4
|
import hashlib
|
|
4
|
-
import logging
|
|
5
5
|
import re
|
|
6
6
|
import typing as t
|
|
7
7
|
from collections import defaultdict
|
|
@@ -18,8 +18,6 @@ if t.TYPE_CHECKING:
|
|
|
18
18
|
|
|
19
19
|
from ..types import Labels, Predictions
|
|
20
20
|
|
|
21
|
-
logger = logging.getLogger("euroeval")
|
|
22
|
-
|
|
23
21
|
|
|
24
22
|
class MultipleChoiceClassificationTrainer(Trainer):
|
|
25
23
|
"""Trainer subclass for multiple-choice classification tasks."""
|
|
@@ -27,7 +25,7 @@ class MultipleChoiceClassificationTrainer(Trainer):
|
|
|
27
25
|
def evaluate( # type: ignore[override]
|
|
28
26
|
self,
|
|
29
27
|
eval_dataset: "Dataset | None" = None,
|
|
30
|
-
ignore_keys:
|
|
28
|
+
ignore_keys: c.Sequence[str] | None = None,
|
|
31
29
|
metric_key_prefix: str = "eval",
|
|
32
30
|
) -> dict[str, float]:
|
|
33
31
|
"""Evaluate the model on the given dataset.
|