EuroEval 15.12.0__py3-none-any.whl → 16.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- euroeval/__init__.py +32 -14
- euroeval/benchmark_config_factory.py +92 -180
- euroeval/benchmark_modules/base.py +49 -39
- euroeval/benchmark_modules/fresh.py +35 -21
- euroeval/benchmark_modules/hf.py +280 -244
- euroeval/benchmark_modules/litellm.py +752 -312
- euroeval/benchmark_modules/vllm.py +570 -268
- euroeval/benchmarker.py +651 -528
- euroeval/caching_utils.py +79 -0
- euroeval/callbacks.py +5 -7
- euroeval/cli.py +49 -38
- euroeval/constants.py +44 -25
- euroeval/data_loading.py +111 -55
- euroeval/data_models.py +490 -323
- euroeval/dataset_configs/__init__.py +26 -4
- euroeval/dataset_configs/bosnian.py +39 -0
- euroeval/dataset_configs/bulgarian.py +56 -0
- euroeval/dataset_configs/croatian.py +56 -0
- euroeval/dataset_configs/czech.py +75 -0
- euroeval/dataset_configs/danish.py +78 -50
- euroeval/dataset_configs/dutch.py +74 -44
- euroeval/dataset_configs/english.py +71 -36
- euroeval/dataset_configs/estonian.py +111 -0
- euroeval/dataset_configs/faroese.py +25 -18
- euroeval/dataset_configs/finnish.py +63 -26
- euroeval/dataset_configs/french.py +65 -32
- euroeval/dataset_configs/german.py +77 -36
- euroeval/dataset_configs/greek.py +64 -0
- euroeval/dataset_configs/icelandic.py +68 -57
- euroeval/dataset_configs/italian.py +68 -36
- euroeval/dataset_configs/latvian.py +87 -0
- euroeval/dataset_configs/lithuanian.py +64 -0
- euroeval/dataset_configs/norwegian.py +98 -72
- euroeval/dataset_configs/polish.py +96 -0
- euroeval/dataset_configs/portuguese.py +63 -40
- euroeval/dataset_configs/serbian.py +64 -0
- euroeval/dataset_configs/slovak.py +55 -0
- euroeval/dataset_configs/slovene.py +56 -0
- euroeval/dataset_configs/spanish.py +68 -34
- euroeval/dataset_configs/swedish.py +82 -41
- euroeval/dataset_configs/ukrainian.py +64 -0
- euroeval/enums.py +12 -6
- euroeval/exceptions.py +21 -1
- euroeval/finetuning.py +34 -26
- euroeval/generation.py +76 -41
- euroeval/generation_utils.py +169 -34
- euroeval/languages.py +1020 -188
- euroeval/logging_utils.py +268 -0
- euroeval/metrics/__init__.py +6 -0
- euroeval/metrics/base.py +85 -0
- euroeval/metrics/huggingface.py +216 -0
- euroeval/metrics/llm_as_a_judge.py +260 -0
- euroeval/metrics/pipeline.py +289 -0
- euroeval/metrics/speed.py +48 -0
- euroeval/model_cache.py +40 -21
- euroeval/model_config.py +4 -5
- euroeval/model_loading.py +3 -0
- euroeval/prompt_templates/__init__.py +2 -0
- euroeval/prompt_templates/classification.py +206 -0
- euroeval/prompt_templates/linguistic_acceptability.py +157 -22
- euroeval/prompt_templates/multiple_choice.py +159 -17
- euroeval/prompt_templates/named_entity_recognition.py +318 -21
- euroeval/prompt_templates/reading_comprehension.py +207 -16
- euroeval/prompt_templates/sentiment_classification.py +205 -22
- euroeval/prompt_templates/summarization.py +122 -22
- euroeval/prompt_templates/token_classification.py +279 -0
- euroeval/scores.py +20 -9
- euroeval/speed_benchmark.py +11 -12
- euroeval/task_group_utils/multiple_choice_classification.py +21 -12
- euroeval/task_group_utils/question_answering.py +101 -73
- euroeval/task_group_utils/sequence_classification.py +144 -61
- euroeval/task_group_utils/text_to_text.py +33 -12
- euroeval/task_group_utils/token_classification.py +86 -89
- euroeval/tasks.py +75 -16
- euroeval/tokenisation_utils.py +603 -0
- euroeval/types.py +17 -11
- euroeval/utils.py +332 -137
- euroeval-16.7.1.dist-info/METADATA +623 -0
- euroeval-16.7.1.dist-info/RECORD +84 -0
- {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/entry_points.txt +0 -1
- euroeval/human_evaluation.py +0 -737
- euroeval/metrics.py +0 -452
- euroeval/tokenization_utils.py +0 -498
- euroeval-15.12.0.dist-info/METADATA +0 -285
- euroeval-15.12.0.dist-info/RECORD +0 -63
- {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/WHEEL +0 -0
- {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,89 @@
|
|
|
1
1
|
"""Templates for the Reading Comprehension task."""
|
|
2
2
|
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
3
5
|
from ..data_models import PromptConfig
|
|
4
|
-
from ..languages import
|
|
6
|
+
from ..languages import (
|
|
7
|
+
BOSNIAN,
|
|
8
|
+
BULGARIAN,
|
|
9
|
+
CROATIAN,
|
|
10
|
+
CZECH,
|
|
11
|
+
DANISH,
|
|
12
|
+
DUTCH,
|
|
13
|
+
ENGLISH,
|
|
14
|
+
ESTONIAN,
|
|
15
|
+
FAROESE,
|
|
16
|
+
FINNISH,
|
|
17
|
+
FRENCH,
|
|
18
|
+
GERMAN,
|
|
19
|
+
GREEK,
|
|
20
|
+
ICELANDIC,
|
|
21
|
+
ITALIAN,
|
|
22
|
+
LATVIAN,
|
|
23
|
+
LITHUANIAN,
|
|
24
|
+
NORWEGIAN,
|
|
25
|
+
NORWEGIAN_BOKMÅL,
|
|
26
|
+
NORWEGIAN_NYNORSK,
|
|
27
|
+
POLISH,
|
|
28
|
+
PORTUGUESE,
|
|
29
|
+
SERBIAN,
|
|
30
|
+
SLOVAK,
|
|
31
|
+
SLOVENE,
|
|
32
|
+
SPANISH,
|
|
33
|
+
SWEDISH,
|
|
34
|
+
UKRAINIAN,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
if t.TYPE_CHECKING:
|
|
38
|
+
from ..languages import Language
|
|
5
39
|
|
|
6
|
-
RC_TEMPLATES = {
|
|
7
|
-
|
|
40
|
+
RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
41
|
+
BOSNIAN: PromptConfig(
|
|
42
|
+
default_prompt_prefix="Slijede tekstovi s pitanjima i odgovorima.",
|
|
43
|
+
default_prompt_template=(
|
|
44
|
+
"Tekst: {text}\nPitanje: {question}\nOdgovor s najviše 3 riječi: {label}"
|
|
45
|
+
),
|
|
46
|
+
default_instruction_prompt=(
|
|
47
|
+
"Tekst: {text}\n\n"
|
|
48
|
+
"Odgovorite na sljedeće pitanje o gornjem tekstu s najviše 3 riječi.\n\n"
|
|
49
|
+
"Pitanje: {question}"
|
|
50
|
+
),
|
|
51
|
+
default_prompt_label_mapping=dict(),
|
|
52
|
+
),
|
|
53
|
+
BULGARIAN: PromptConfig(
|
|
54
|
+
default_prompt_prefix="Следват текстове със съответни въпроси и отговори.",
|
|
55
|
+
default_prompt_template="Текст: {text}\nВъпрос: {question}\nОтговор с максимум "
|
|
56
|
+
"3 думи: {label}",
|
|
57
|
+
default_instruction_prompt="Текст: {text}\n\nОтговорете на следния въпрос "
|
|
58
|
+
"относно текста по-горе с максимум 3 думи.\n\nВъпрос: {question}",
|
|
59
|
+
default_prompt_label_mapping=dict(),
|
|
60
|
+
),
|
|
61
|
+
CROATIAN: PromptConfig(
|
|
62
|
+
default_prompt_prefix=("Sljedeći tekstovi sadrže pitanja i odgovore."),
|
|
63
|
+
default_prompt_template=(
|
|
64
|
+
"Tekst: {text}\nPitanje: {question}\nOdgovor s najviše 3 riječi: {label}"
|
|
65
|
+
),
|
|
66
|
+
default_instruction_prompt=(
|
|
67
|
+
"Tekst: {text}\n\n"
|
|
68
|
+
"Odgovorite na sljedeće pitanje o gornjem tekstu s najviše 3 riječi.\n\n"
|
|
69
|
+
"Pitanje: {question}"
|
|
70
|
+
),
|
|
71
|
+
default_prompt_label_mapping=dict(),
|
|
72
|
+
),
|
|
73
|
+
CZECH: PromptConfig(
|
|
74
|
+
default_prompt_prefix="Následující texty obsahují otázky a odpovědi.",
|
|
75
|
+
default_prompt_template=(
|
|
76
|
+
"Text: {text}\nOtázka: {question}\nOdpověď maximálně 3 slovy: {label}"
|
|
77
|
+
),
|
|
78
|
+
default_instruction_prompt=(
|
|
79
|
+
"Text: {text}\n\n"
|
|
80
|
+
"Odpovězte na následující otázku k výše uvedenému textu "
|
|
81
|
+
"maximálně 3 slovy.\n\n"
|
|
82
|
+
"Otázka: {question}"
|
|
83
|
+
),
|
|
84
|
+
default_prompt_label_mapping=dict(),
|
|
85
|
+
),
|
|
86
|
+
DANISH: PromptConfig(
|
|
8
87
|
default_prompt_prefix="Følgende er tekster med tilhørende spørgsmål og svar.",
|
|
9
88
|
default_prompt_template="Tekst: {text}\nSpørgsmål: {question}\nSvar med maks. "
|
|
10
89
|
"3 ord: {label}",
|
|
@@ -12,7 +91,7 @@ RC_TEMPLATES = {
|
|
|
12
91
|
"teksten ovenfor med maks. 3 ord.\n\nSpørgsmål: {question}",
|
|
13
92
|
default_prompt_label_mapping=dict(),
|
|
14
93
|
),
|
|
15
|
-
|
|
94
|
+
GERMAN: PromptConfig(
|
|
16
95
|
default_prompt_prefix="Im Folgenden finden Sie Texte mit den dazugehörigen "
|
|
17
96
|
"Fragen und Antworten.",
|
|
18
97
|
default_prompt_template="Text: {text}\nFragen: {question}\nFragen Antwort in "
|
|
@@ -21,7 +100,16 @@ RC_TEMPLATES = {
|
|
|
21
100
|
"zum obigen Text in höchstens 3 Wörtern.\n\nFrage: {question}",
|
|
22
101
|
default_prompt_label_mapping=dict(),
|
|
23
102
|
),
|
|
24
|
-
|
|
103
|
+
GREEK: PromptConfig(
|
|
104
|
+
default_prompt_prefix="Ακολουθούν κείμενα με τις αντίστοιχες ερωτήσεις και "
|
|
105
|
+
"απαντήσεις.",
|
|
106
|
+
default_prompt_template="Κείμενο: {text}\nΕρώτηση: {question}\nΑπάντηση σε το "
|
|
107
|
+
"πολύ 3 λέξεις: {label}",
|
|
108
|
+
default_instruction_prompt="Κείμενο: {text}\n\nΑπαντήστε την παρακάτω ερώτηση "
|
|
109
|
+
"σχετικά με το παραπάνω κείμενο σε το πολύ 3 λέξεις.\n\nΕρώτηση: {question}",
|
|
110
|
+
default_prompt_label_mapping=dict(),
|
|
111
|
+
),
|
|
112
|
+
ENGLISH: PromptConfig(
|
|
25
113
|
default_prompt_prefix="The following are texts with accompanying questions and "
|
|
26
114
|
"answers.",
|
|
27
115
|
default_prompt_template="Text: {text}\nQuestion: {question}\nAnswer in max "
|
|
@@ -30,7 +118,7 @@ RC_TEMPLATES = {
|
|
|
30
118
|
"about the above text in at most 3 words.\n\nQuestion: {question}",
|
|
31
119
|
default_prompt_label_mapping=dict(),
|
|
32
120
|
),
|
|
33
|
-
|
|
121
|
+
SPANISH: PromptConfig(
|
|
34
122
|
default_prompt_prefix="A continuación se presentan textos con sus preguntas y "
|
|
35
123
|
"respuestas correspondientes.",
|
|
36
124
|
default_prompt_template="Texto: {text}\nPregunta: {question}\nRespuesta en "
|
|
@@ -39,7 +127,15 @@ RC_TEMPLATES = {
|
|
|
39
127
|
"sobre el texto anterior en máximo 3 palabras.\n\nPregunta: {question}",
|
|
40
128
|
default_prompt_label_mapping=dict(),
|
|
41
129
|
),
|
|
42
|
-
|
|
130
|
+
ESTONIAN: PromptConfig(
|
|
131
|
+
default_prompt_prefix="Järgnevad on tekstid koos küsimuste ja vastustega.",
|
|
132
|
+
default_prompt_template="Tekst: {text}\nKüsimus: {question}\nVasta "
|
|
133
|
+
"maksimaalselt 3 sõnaga: {label}",
|
|
134
|
+
default_instruction_prompt="Tekst: {text}\n\nVasta järgmisele küsimusele "
|
|
135
|
+
"ülevaltoodud teksti kohta maksimaalselt 3 sõnaga.\n\nKüsimus: {question}",
|
|
136
|
+
default_prompt_label_mapping=dict(),
|
|
137
|
+
),
|
|
138
|
+
FINNISH: PromptConfig(
|
|
43
139
|
default_prompt_prefix="Seuraavassa on tekstejä ja niihin liittyviä kysymyksiä "
|
|
44
140
|
"ja vastauksia.",
|
|
45
141
|
default_prompt_template="Teksti: {text}\nKysymys: {question} "
|
|
@@ -49,7 +145,7 @@ RC_TEMPLATES = {
|
|
|
49
145
|
"Kysymys: {question}",
|
|
50
146
|
default_prompt_label_mapping=dict(),
|
|
51
147
|
),
|
|
52
|
-
|
|
148
|
+
FAROESE: PromptConfig(
|
|
53
149
|
default_prompt_prefix="Hetta eru tekstir saman við spurningum og svar.",
|
|
54
150
|
default_prompt_template="Tekstur: {text}\nSpurningur: {question}\nSvara við í "
|
|
55
151
|
"mesta lagi trimum orðum: {label}",
|
|
@@ -57,7 +153,7 @@ RC_TEMPLATES = {
|
|
|
57
153
|
"tekstin uppiyvir við í mesta lagi trimum orðum.\n\nSpurningur: {question}",
|
|
58
154
|
default_prompt_label_mapping=dict(),
|
|
59
155
|
),
|
|
60
|
-
|
|
156
|
+
FRENCH: PromptConfig(
|
|
61
157
|
default_prompt_prefix="Les textes suivants sont accompagnés de questions et de "
|
|
62
158
|
"réponses.",
|
|
63
159
|
default_prompt_template="Texte: {text}\nQuestion: {question}\nRéponse en 3 "
|
|
@@ -66,7 +162,7 @@ RC_TEMPLATES = {
|
|
|
66
162
|
"sur le texte ci-dessus en 3 mots maximum.\n\nQuestion: {question}",
|
|
67
163
|
default_prompt_label_mapping=dict(),
|
|
68
164
|
),
|
|
69
|
-
|
|
165
|
+
ICELANDIC: PromptConfig(
|
|
70
166
|
default_prompt_prefix="Eftirfarandi eru textar með tilheyrandi spurningum og "
|
|
71
167
|
"svörum.",
|
|
72
168
|
default_prompt_template="Texti: {text}\nSpurning: {question}\nSvaraðu með að "
|
|
@@ -75,7 +171,7 @@ RC_TEMPLATES = {
|
|
|
75
171
|
"textann að hámarki í 3 orðum.\n\nSpurning: {question}",
|
|
76
172
|
default_prompt_label_mapping=dict(),
|
|
77
173
|
),
|
|
78
|
-
|
|
174
|
+
ITALIAN: PromptConfig(
|
|
79
175
|
default_prompt_prefix="I testi che seguono sono accompagnati da domande e "
|
|
80
176
|
"risposte.",
|
|
81
177
|
default_prompt_template="Testo: {text}\nDomanda: {question}\nRispondere in "
|
|
@@ -84,7 +180,25 @@ RC_TEMPLATES = {
|
|
|
84
180
|
"sul in un massimo di 3 parole.\n\nDomanda: {question}",
|
|
85
181
|
default_prompt_label_mapping=dict(),
|
|
86
182
|
),
|
|
87
|
-
|
|
183
|
+
LITHUANIAN: PromptConfig(
|
|
184
|
+
default_prompt_prefix="Toliau pateikti tekstai su atitinkamais klausimais ir "
|
|
185
|
+
"atsakymais.",
|
|
186
|
+
default_prompt_template="Tekstas: {text}\nKlausimas: {question}\nAtsakykite ne "
|
|
187
|
+
"daugiau kaip 3 žodžiais: {label}",
|
|
188
|
+
default_instruction_prompt="Tekstas: {text}\n\nAtsakykite į šį klausimą apie "
|
|
189
|
+
"aukščiau pateiktą tekstą ne daugiau kaip 3 žodžiais.\n\nKlausimas: {question}",
|
|
190
|
+
default_prompt_label_mapping=dict(),
|
|
191
|
+
),
|
|
192
|
+
LATVIAN: PromptConfig(
|
|
193
|
+
default_prompt_prefix="Turpmāk seko teksti ar atbilstošiem jautājumiem un "
|
|
194
|
+
"atbildēm.",
|
|
195
|
+
default_prompt_template="Teksts: {text}\nJautājums: {question}\nAtbildēt ar "
|
|
196
|
+
"maksimāli 3 vārdiem: {label}",
|
|
197
|
+
default_instruction_prompt="Teksts: {text}\n\nAtbildiet uz šo jautājumu par "
|
|
198
|
+
"iepriekš minēto tekstu ar maksimāli 3 vārdiem.\n\nJautājums: {question}",
|
|
199
|
+
default_prompt_label_mapping=dict(),
|
|
200
|
+
),
|
|
201
|
+
NORWEGIAN_BOKMÅL: PromptConfig(
|
|
88
202
|
default_prompt_prefix="Her følger tekster med tilhørende spørsmål og svar.",
|
|
89
203
|
default_prompt_template="Tekst: {text}\nSpørsmål: {question}\nSvar på maks 3 "
|
|
90
204
|
"ord: {label}",
|
|
@@ -92,7 +206,7 @@ RC_TEMPLATES = {
|
|
|
92
206
|
"teksten ovenfor med maks 3 ord.\n\nSpørsmål: {question}",
|
|
93
207
|
default_prompt_label_mapping=dict(),
|
|
94
208
|
),
|
|
95
|
-
|
|
209
|
+
DUTCH: PromptConfig(
|
|
96
210
|
default_prompt_prefix="Hieronder volgen teksten met bijbehorende vragen en "
|
|
97
211
|
"antwoorden.",
|
|
98
212
|
default_prompt_template="Tekst: {text}\nVraag: {question}\nAntwoord in max "
|
|
@@ -101,7 +215,7 @@ RC_TEMPLATES = {
|
|
|
101
215
|
"over de bovenstaande tekst in maximaal 3 woorden.\n\nVraag: {question}",
|
|
102
216
|
default_prompt_label_mapping=dict(),
|
|
103
217
|
),
|
|
104
|
-
|
|
218
|
+
NORWEGIAN_NYNORSK: PromptConfig(
|
|
105
219
|
default_prompt_prefix="Her følger tekster med tilhørende spørsmål og svar.",
|
|
106
220
|
default_prompt_template="Tekst: {text}\nSpørsmål: {question}\nSvar på maks 3 "
|
|
107
221
|
"ord: {label}",
|
|
@@ -109,7 +223,7 @@ RC_TEMPLATES = {
|
|
|
109
223
|
"teksten ovenfor med maks 3 ord.\n\nSpørsmål: {question}",
|
|
110
224
|
default_prompt_label_mapping=dict(),
|
|
111
225
|
),
|
|
112
|
-
|
|
226
|
+
NORWEGIAN: PromptConfig(
|
|
113
227
|
default_prompt_prefix="Her følger tekster med tilhørende spørsmål og svar.",
|
|
114
228
|
default_prompt_template="Tekst: {text}\nSpørsmål: {question}\nSvar på maks 3 "
|
|
115
229
|
"ord: {label}",
|
|
@@ -117,7 +231,70 @@ RC_TEMPLATES = {
|
|
|
117
231
|
"teksten ovenfor med maks 3 ord.\n\nSpørsmål: {question}",
|
|
118
232
|
default_prompt_label_mapping=dict(),
|
|
119
233
|
),
|
|
120
|
-
|
|
234
|
+
POLISH: PromptConfig(
|
|
235
|
+
default_prompt_prefix=(
|
|
236
|
+
"Poniżej znajdują się teksty z towarzyszącymi pytaniami i odpowiedziami."
|
|
237
|
+
),
|
|
238
|
+
default_prompt_template="Tekst: {text}\nPytanie: {question}\nOdpowiedź z "
|
|
239
|
+
"użyciem maksymalnie 3 słów: {label}",
|
|
240
|
+
default_instruction_prompt="Tekst: {text}\n\nOdpowiedz na następujące pytanie "
|
|
241
|
+
"dotyczące powyższego tekstu, używając maksymalnie 3 słów.\n\nPytanie: "
|
|
242
|
+
"{question}",
|
|
243
|
+
default_prompt_label_mapping=dict(),
|
|
244
|
+
),
|
|
245
|
+
PORTUGUESE: PromptConfig(
|
|
246
|
+
default_prompt_prefix="Os textos que se seguem são acompanhados de perguntas "
|
|
247
|
+
"e respostas.",
|
|
248
|
+
default_prompt_template="Texto: {text}\nPergunta: {question}\nResposta com "
|
|
249
|
+
"um máximo de 3 palavras: {label}",
|
|
250
|
+
default_instruction_prompt="Texto: {text}\n\nResponde à seguinte pergunta "
|
|
251
|
+
"sobre o texto acima num máximo de 3 palavras.\n\nPergunta: {question}",
|
|
252
|
+
default_prompt_label_mapping=dict(),
|
|
253
|
+
),
|
|
254
|
+
SLOVENE: PromptConfig(
|
|
255
|
+
default_prompt_prefix=(
|
|
256
|
+
"Spodaj so besedila z ustreznimi vprašanji in odgovori."
|
|
257
|
+
),
|
|
258
|
+
default_prompt_template=(
|
|
259
|
+
"Besedilo: {text}\n"
|
|
260
|
+
"Vprašanje: {question}\n"
|
|
261
|
+
"Odgovor v največ 3 besedah: {label}"
|
|
262
|
+
),
|
|
263
|
+
default_instruction_prompt=(
|
|
264
|
+
"Besedilo: {text}\n\n"
|
|
265
|
+
"Odgovorite na naslednje vprašanje o zgornjem besedilu "
|
|
266
|
+
"v največ 3 besedah.\n\n"
|
|
267
|
+
"Vprašanje: {question}"
|
|
268
|
+
),
|
|
269
|
+
default_prompt_label_mapping=dict(),
|
|
270
|
+
),
|
|
271
|
+
SLOVAK: PromptConfig(
|
|
272
|
+
default_prompt_prefix=("Nasledujú texty s pridruženými otázkami a odpoveďami."),
|
|
273
|
+
default_prompt_template=(
|
|
274
|
+
"Text: {text}\nOtázka: {question}\nOdpoveď na maximálne 3 slová: {label}"
|
|
275
|
+
),
|
|
276
|
+
default_instruction_prompt=(
|
|
277
|
+
"Text: {text}\n\n"
|
|
278
|
+
"Odpovedzte na nasledujúcu otázku týkajúcu sa textu uvedeného vyššie "
|
|
279
|
+
"maximálne 3 slovami.\n\nOtázka: {question}"
|
|
280
|
+
),
|
|
281
|
+
default_prompt_label_mapping=dict(),
|
|
282
|
+
),
|
|
283
|
+
SERBIAN: PromptConfig(
|
|
284
|
+
default_prompt_prefix=(
|
|
285
|
+
"Следе текстови са одговарајућим питањима и одговорима."
|
|
286
|
+
),
|
|
287
|
+
default_prompt_template=(
|
|
288
|
+
"Текст: {text}\nПитање: {question}\nОдговор у максимум 3 речи: {label}"
|
|
289
|
+
),
|
|
290
|
+
default_instruction_prompt=(
|
|
291
|
+
"Текст: {text}\n\n"
|
|
292
|
+
"Одговорите на следеће питање о горњем тексту у максимум 3 речи.\n\n"
|
|
293
|
+
"Питање: {question}"
|
|
294
|
+
),
|
|
295
|
+
default_prompt_label_mapping=dict(),
|
|
296
|
+
),
|
|
297
|
+
SWEDISH: PromptConfig(
|
|
121
298
|
default_prompt_prefix="Nedan följer texter med tillhörande frågor och svar.",
|
|
122
299
|
default_prompt_template="Text: {text}\nFråga: {question}\nSvar på max 3 ord: "
|
|
123
300
|
"{label}",
|
|
@@ -125,4 +302,18 @@ RC_TEMPLATES = {
|
|
|
125
302
|
"ovan med högst 3 ord.\n\nFråga: {question}",
|
|
126
303
|
default_prompt_label_mapping=dict(),
|
|
127
304
|
),
|
|
305
|
+
UKRAINIAN: PromptConfig(
|
|
306
|
+
default_prompt_prefix=(
|
|
307
|
+
"Нижче наведені тексти з відповідними питаннями та відповідями."
|
|
308
|
+
),
|
|
309
|
+
default_prompt_template=(
|
|
310
|
+
"Текст: {text}\nПитання: {question}\nВідповідь максимум 3 словами: {label}"
|
|
311
|
+
),
|
|
312
|
+
default_instruction_prompt=(
|
|
313
|
+
"Текст: {text}\n\n"
|
|
314
|
+
"Відповідь на наступне питання про вищезазначений текст максимум 3 "
|
|
315
|
+
"словами.\n\nПитання: {question}"
|
|
316
|
+
),
|
|
317
|
+
default_prompt_label_mapping=dict(),
|
|
318
|
+
),
|
|
128
319
|
}
|
|
@@ -1,10 +1,67 @@
|
|
|
1
1
|
"""Templates for the Sentiment Analysis task."""
|
|
2
2
|
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
3
5
|
from ..data_models import PromptConfig
|
|
4
|
-
from ..languages import
|
|
6
|
+
from ..languages import (
|
|
7
|
+
BOSNIAN,
|
|
8
|
+
BULGARIAN,
|
|
9
|
+
CROATIAN,
|
|
10
|
+
CZECH,
|
|
11
|
+
DANISH,
|
|
12
|
+
DUTCH,
|
|
13
|
+
ENGLISH,
|
|
14
|
+
ESTONIAN,
|
|
15
|
+
FAROESE,
|
|
16
|
+
FINNISH,
|
|
17
|
+
FRENCH,
|
|
18
|
+
GERMAN,
|
|
19
|
+
GREEK,
|
|
20
|
+
ICELANDIC,
|
|
21
|
+
ITALIAN,
|
|
22
|
+
LATVIAN,
|
|
23
|
+
LITHUANIAN,
|
|
24
|
+
NORWEGIAN,
|
|
25
|
+
NORWEGIAN_BOKMÅL,
|
|
26
|
+
NORWEGIAN_NYNORSK,
|
|
27
|
+
POLISH,
|
|
28
|
+
PORTUGUESE,
|
|
29
|
+
SERBIAN,
|
|
30
|
+
SLOVAK,
|
|
31
|
+
SLOVENE,
|
|
32
|
+
SPANISH,
|
|
33
|
+
SWEDISH,
|
|
34
|
+
UKRAINIAN,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
if t.TYPE_CHECKING:
|
|
38
|
+
from ..languages import Language
|
|
5
39
|
|
|
6
|
-
SENT_TEMPLATES = {
|
|
7
|
-
|
|
40
|
+
SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
41
|
+
BOSNIAN: PromptConfig(
|
|
42
|
+
default_prompt_label_mapping=dict(
|
|
43
|
+
positive="pozitivno", neutral="neutralno", negative="negativno"
|
|
44
|
+
),
|
|
45
|
+
default_prompt_prefix=(
|
|
46
|
+
"Slijede dokumenti i njihova osjetila, koja mogu biti {labels_str}."
|
|
47
|
+
),
|
|
48
|
+
default_prompt_template="Dokument: {text}\nOsjetilo: {label}",
|
|
49
|
+
default_instruction_prompt=(
|
|
50
|
+
"Dokument: {text}\n\nKlasificirajte osjećaj u dokumentu. "
|
|
51
|
+
"Odgovorite samo s {labels_str}, i ništa drugo."
|
|
52
|
+
),
|
|
53
|
+
),
|
|
54
|
+
BULGARIAN: PromptConfig(
|
|
55
|
+
default_prompt_label_mapping=dict(
|
|
56
|
+
positive="позитивен", neutral="неутрален", negative="негативен"
|
|
57
|
+
),
|
|
58
|
+
default_prompt_prefix="Следват документи и техният сентимент, който може да "
|
|
59
|
+
"бъде{labels_str}.",
|
|
60
|
+
default_prompt_template="Документ: {text}\nСентимент: {label}",
|
|
61
|
+
default_instruction_prompt="Документ: {text}\n\nКласифицирайте сентимента в "
|
|
62
|
+
"документа. Отговорете с {labels_str}, и нищо друго.",
|
|
63
|
+
),
|
|
64
|
+
DANISH: PromptConfig(
|
|
8
65
|
default_prompt_label_mapping=dict(
|
|
9
66
|
positive="positiv", neutral="neutral", negative="negativ"
|
|
10
67
|
),
|
|
@@ -14,7 +71,30 @@ SENT_TEMPLATES = {
|
|
|
14
71
|
default_instruction_prompt="Dokument: {text}\n\nKlassificer sentimentet i "
|
|
15
72
|
"dokumentet. Svar kun med {labels_str}, og intet andet.",
|
|
16
73
|
),
|
|
17
|
-
|
|
74
|
+
CROATIAN: PromptConfig(
|
|
75
|
+
default_prompt_label_mapping=dict(
|
|
76
|
+
positive="pozitivno", neutral="neutralno", negative="negativno"
|
|
77
|
+
),
|
|
78
|
+
default_prompt_prefix=(
|
|
79
|
+
"Slijede dokumenti i njihova osjetila, koja mogu biti {labels_str}."
|
|
80
|
+
),
|
|
81
|
+
default_prompt_template=("Dokument: {text}\nOsjetilo: {label}"),
|
|
82
|
+
default_instruction_prompt=(
|
|
83
|
+
"Dokument: {text}\n\nKlasificirajte osjećaj u dokumentu. "
|
|
84
|
+
"Odgovorite samo s {labels_str}, i ništa drugo."
|
|
85
|
+
),
|
|
86
|
+
),
|
|
87
|
+
CZECH: PromptConfig(
|
|
88
|
+
default_prompt_label_mapping=dict(
|
|
89
|
+
positive="pozitivní", neutral="neutrální", negative="negativní"
|
|
90
|
+
),
|
|
91
|
+
default_prompt_prefix="Následují dokumenty a jejich sentiment, který může být "
|
|
92
|
+
"{labels_str}.",
|
|
93
|
+
default_prompt_template="Dokument: {text}\nSentiment: {label}",
|
|
94
|
+
default_instruction_prompt="Dokument: {text}\n\nKlasifikujte sentiment v "
|
|
95
|
+
"dokumentu. Odpovězte pouze s {labels_str}, a nic jiného.",
|
|
96
|
+
),
|
|
97
|
+
GERMAN: PromptConfig(
|
|
18
98
|
default_prompt_label_mapping=dict(
|
|
19
99
|
positive="positiv", neutral="neutral", negative="negativ"
|
|
20
100
|
),
|
|
@@ -24,7 +104,17 @@ SENT_TEMPLATES = {
|
|
|
24
104
|
default_instruction_prompt="Dokument: {text}\n\nKlassifizieren Sie die "
|
|
25
105
|
"Stimmung im Dokument. Antworten Sie mit {labels_str}, und nichts anderes.",
|
|
26
106
|
),
|
|
27
|
-
|
|
107
|
+
GREEK: PromptConfig(
|
|
108
|
+
default_prompt_label_mapping=dict(
|
|
109
|
+
positive="θετικό", neutral="ουδέτερο", negative="αρνητικό"
|
|
110
|
+
),
|
|
111
|
+
default_prompt_prefix="Τα ακόλουθα είναι έγγραφα και το συναίσθημά τους, "
|
|
112
|
+
"το οποίο μπορεί να είναι {labels_str}.",
|
|
113
|
+
default_prompt_template="Έγγραφο: {text}\nΣυναίσθημα: {label}",
|
|
114
|
+
default_instruction_prompt="Έγγραφο: {text}\n\nΤαξινομήστε το συναίσθημα "
|
|
115
|
+
"στο έγγραφο. Απαντήστε με {labels_str}, και τίποτα άλλο.",
|
|
116
|
+
),
|
|
117
|
+
ENGLISH: PromptConfig(
|
|
28
118
|
default_prompt_label_mapping=dict(
|
|
29
119
|
positive="positive", neutral="neutral", negative="negative"
|
|
30
120
|
),
|
|
@@ -34,7 +124,7 @@ SENT_TEMPLATES = {
|
|
|
34
124
|
default_instruction_prompt="Document: {text}\n\nClassify the sentiment in the "
|
|
35
125
|
"document. Answer with {labels_str}, and nothing else.",
|
|
36
126
|
),
|
|
37
|
-
|
|
127
|
+
SPANISH: PromptConfig(
|
|
38
128
|
default_prompt_label_mapping=dict(
|
|
39
129
|
positive="positivo", neutral="neutral", negative="negativo"
|
|
40
130
|
),
|
|
@@ -44,7 +134,32 @@ SENT_TEMPLATES = {
|
|
|
44
134
|
default_instruction_prompt="Documento: {text}\n\nClasifica el sentimiento del "
|
|
45
135
|
"documento. Responde con {labels_str}, y nada más.",
|
|
46
136
|
),
|
|
47
|
-
|
|
137
|
+
ESTONIAN: PromptConfig(
|
|
138
|
+
default_prompt_label_mapping=dict(
|
|
139
|
+
positive="positiivne", neutral="neutraalne", negative="negatiivne"
|
|
140
|
+
),
|
|
141
|
+
default_prompt_prefix="Järgmised on dokumendid ja nende meelestatus, "
|
|
142
|
+
"mis võib olla {labels_str}.",
|
|
143
|
+
default_prompt_template="Dokument: {text}\nMeelestatus: {label}",
|
|
144
|
+
default_instruction_prompt="Dokument: {text}\n\nKlassifitseeri dokument "
|
|
145
|
+
"meelestatuse järgi. Võimalikud vastused: {labels_str}. Muud vastused "
|
|
146
|
+
"ei ole lubatud.",
|
|
147
|
+
),
|
|
148
|
+
POLISH: PromptConfig(
|
|
149
|
+
default_prompt_label_mapping=dict(
|
|
150
|
+
positive="pozytywny", neutral="neutralny", negative="negatywny"
|
|
151
|
+
),
|
|
152
|
+
default_prompt_prefix=(
|
|
153
|
+
"Poniżej znajdują się dokumenty i ich sentyment, który może być "
|
|
154
|
+
"{labels_str}."
|
|
155
|
+
),
|
|
156
|
+
default_prompt_template="Dokument: {text}\nSentyment: {label}",
|
|
157
|
+
default_instruction_prompt=(
|
|
158
|
+
"Dokument: {text}\n\nKlasyfikuj sentyment w dokumencie. "
|
|
159
|
+
"Odpowiedz jednym słowem: {labels_str}."
|
|
160
|
+
),
|
|
161
|
+
),
|
|
162
|
+
PORTUGUESE: PromptConfig(
|
|
48
163
|
default_prompt_label_mapping=dict(
|
|
49
164
|
positive="positivo", neutral="neutro", negative="negativo"
|
|
50
165
|
),
|
|
@@ -54,7 +169,7 @@ SENT_TEMPLATES = {
|
|
|
54
169
|
default_instruction_prompt="Documento: {text}\n\nClassifica o "
|
|
55
170
|
"sentimento do documento. Responde apenas com {labels_str}.",
|
|
56
171
|
),
|
|
57
|
-
|
|
172
|
+
FINNISH: PromptConfig(
|
|
58
173
|
default_prompt_label_mapping=dict(
|
|
59
174
|
positive="positiivinen", neutral="neutrali", negative="negatiivinen"
|
|
60
175
|
),
|
|
@@ -64,7 +179,7 @@ SENT_TEMPLATES = {
|
|
|
64
179
|
default_instruction_prompt="Teksti: {text}\n\nLuokittele arvostelun tunnesävy. "
|
|
65
180
|
"Vastaa vain {labels_str}, ei muuta.",
|
|
66
181
|
),
|
|
67
|
-
|
|
182
|
+
FAROESE: PromptConfig(
|
|
68
183
|
default_prompt_label_mapping=dict(
|
|
69
184
|
positive="positivt", neutral="neutralt", negative="negativt"
|
|
70
185
|
),
|
|
@@ -74,7 +189,7 @@ SENT_TEMPLATES = {
|
|
|
74
189
|
default_instruction_prompt="Skjal: {text}\n\nFlokka kensluna í skjalinum. "
|
|
75
190
|
"Svara við {labels_str}, og einki annað.",
|
|
76
191
|
),
|
|
77
|
-
|
|
192
|
+
FRENCH: PromptConfig(
|
|
78
193
|
default_prompt_label_mapping=dict(
|
|
79
194
|
positive="positif", neutral="neutre", negative="négatif"
|
|
80
195
|
),
|
|
@@ -84,17 +199,17 @@ SENT_TEMPLATES = {
|
|
|
84
199
|
default_instruction_prompt="Document: {text}\n\nClassez le sentiment dans le "
|
|
85
200
|
"document. Répondez par {labels_str}, et rien d'autre.",
|
|
86
201
|
),
|
|
87
|
-
|
|
202
|
+
ICELANDIC: PromptConfig(
|
|
88
203
|
default_prompt_label_mapping=dict(
|
|
89
204
|
positive="jákvætt", neutral="hlutlaust", negative="neikvætt"
|
|
90
205
|
),
|
|
91
|
-
default_prompt_prefix="
|
|
92
|
-
"verið
|
|
93
|
-
default_prompt_template="
|
|
94
|
-
default_instruction_prompt="
|
|
95
|
-
"Svaraðu með {labels_str}, og ekkert annað.",
|
|
206
|
+
default_prompt_prefix="Hér fyrir neðan eru textabrot ásamt lyndisgildi þeirra "
|
|
207
|
+
"sem getur verið 'jákvætt', 'hlutlaust' eða 'neikvætt'.",
|
|
208
|
+
default_prompt_template="Textabrot: {text}\nViðhorf: {label}",
|
|
209
|
+
default_instruction_prompt="Textabrot: {text}\n\nGreindu lyndið í "
|
|
210
|
+
"textabrotinu. Svaraðu með {labels_str}, og ekkert annað.",
|
|
96
211
|
),
|
|
97
|
-
|
|
212
|
+
ITALIAN: PromptConfig(
|
|
98
213
|
default_prompt_label_mapping=dict(
|
|
99
214
|
positive="positivo", neutral="neutro", negative="negativo"
|
|
100
215
|
),
|
|
@@ -104,7 +219,27 @@ SENT_TEMPLATES = {
|
|
|
104
219
|
default_instruction_prompt="Documento: {text}\n\nClassificare il sentiment del "
|
|
105
220
|
"documento. Rispondere con {labels_str}, e nient'altro.",
|
|
106
221
|
),
|
|
107
|
-
|
|
222
|
+
LITHUANIAN: PromptConfig(
|
|
223
|
+
default_prompt_label_mapping=dict(
|
|
224
|
+
positive="teigiamas", neutral="neutralus", negative="neigiamas"
|
|
225
|
+
),
|
|
226
|
+
default_prompt_prefix="Toliau pateikti dokumentai ir jų nuotaika, kuri "
|
|
227
|
+
"gali būti {labels_str}.",
|
|
228
|
+
default_prompt_template="Dokumentas: {text}\nNuotaika: {label}",
|
|
229
|
+
default_instruction_prompt="Dokumentas: {text}\n\nKlasifikuokite nuotaiką "
|
|
230
|
+
"dokumente. Atsakykite su {labels_str}, ir nieko kito.",
|
|
231
|
+
),
|
|
232
|
+
LATVIAN: PromptConfig(
|
|
233
|
+
default_prompt_label_mapping=dict(
|
|
234
|
+
positive="pozitīvs", neutral="neitrāls", negative="negatīvs"
|
|
235
|
+
),
|
|
236
|
+
default_prompt_prefix="Tālāk ir dokumenti un to noskaņojums, kas var būt "
|
|
237
|
+
"{labels_str}.",
|
|
238
|
+
default_prompt_template="Dokuments: {text}\nNoskaņojums: {label}",
|
|
239
|
+
default_instruction_prompt="Dokuments: {text}\n\nKlasificējiet noskaņojumu "
|
|
240
|
+
"dokumentā. Atbildiet ar {labels_str}, un neko citu.",
|
|
241
|
+
),
|
|
242
|
+
NORWEGIAN_BOKMÅL: PromptConfig(
|
|
108
243
|
default_prompt_label_mapping=dict(
|
|
109
244
|
positive="positiv", neutral="nøytral", negative="negativ"
|
|
110
245
|
),
|
|
@@ -114,7 +249,7 @@ SENT_TEMPLATES = {
|
|
|
114
249
|
default_instruction_prompt="Dokument: {text}\n\nKlassifiser følelsen i "
|
|
115
250
|
"teksten. Svar med {labels_str}, og ikke noe annet.",
|
|
116
251
|
),
|
|
117
|
-
|
|
252
|
+
DUTCH: PromptConfig(
|
|
118
253
|
default_prompt_label_mapping=dict(
|
|
119
254
|
positive="positief", neutral="neutraal", negative="negatief"
|
|
120
255
|
),
|
|
@@ -124,7 +259,7 @@ SENT_TEMPLATES = {
|
|
|
124
259
|
default_instruction_prompt="Document: {text}\n\nClassificeer het sentiment in "
|
|
125
260
|
"het document. Antwoord met {labels_str}, en verder niets.",
|
|
126
261
|
),
|
|
127
|
-
|
|
262
|
+
NORWEGIAN_NYNORSK: PromptConfig(
|
|
128
263
|
default_prompt_label_mapping=dict(
|
|
129
264
|
positive="positiv", neutral="nøytral", negative="negativ"
|
|
130
265
|
),
|
|
@@ -134,7 +269,7 @@ SENT_TEMPLATES = {
|
|
|
134
269
|
default_instruction_prompt="Dokument: {text}\n\nKlassifiser følelsen i "
|
|
135
270
|
"teksten. Svar med {labels_str}, og ikke noe annet.",
|
|
136
271
|
),
|
|
137
|
-
|
|
272
|
+
NORWEGIAN: PromptConfig(
|
|
138
273
|
default_prompt_label_mapping=dict(
|
|
139
274
|
positive="positiv", neutral="nøytral", negative="negativ"
|
|
140
275
|
),
|
|
@@ -144,7 +279,41 @@ SENT_TEMPLATES = {
|
|
|
144
279
|
default_instruction_prompt="Dokument: {text}\n\nKlassifiser følelsen i "
|
|
145
280
|
"teksten. Svar med {labels_str}, og ikke noe annet.",
|
|
146
281
|
),
|
|
147
|
-
|
|
282
|
+
SLOVAK: PromptConfig(
|
|
283
|
+
default_prompt_label_mapping=dict(
|
|
284
|
+
positive="pozitívne", neutral="neutrálne", negative="negatívne"
|
|
285
|
+
),
|
|
286
|
+
default_prompt_prefix="Nižšie sú dokumenty a ich sentiment, ktorý môže byť "
|
|
287
|
+
"{labels_str}.",
|
|
288
|
+
default_prompt_template="Dokument: {text}\nSentiment: {label}",
|
|
289
|
+
default_instruction_prompt="Dokument: {text}\n\nKlasifikujte pocit v "
|
|
290
|
+
"dokumente. Odpovedzte so {labels_str}, a nič iné.",
|
|
291
|
+
),
|
|
292
|
+
SLOVENE: PromptConfig(
|
|
293
|
+
default_prompt_label_mapping=dict(
|
|
294
|
+
positive="pozitivno", neutral="nevtralno", negative="negativno"
|
|
295
|
+
),
|
|
296
|
+
default_prompt_prefix=(
|
|
297
|
+
"Spodaj so dokumenti in njihov sentiment, ki je lahko {labels_str}."
|
|
298
|
+
),
|
|
299
|
+
default_prompt_template=("Dokument: {text}\nSentiment: {label}"),
|
|
300
|
+
default_instruction_prompt=(
|
|
301
|
+
"Dokument: {text}\n\n"
|
|
302
|
+
"Klasificirajte sentiment v dokumentu. "
|
|
303
|
+
"Odgovorite z {labels_str}, in nič drugega."
|
|
304
|
+
),
|
|
305
|
+
),
|
|
306
|
+
SERBIAN: PromptConfig(
|
|
307
|
+
default_prompt_label_mapping=dict(
|
|
308
|
+
positive="pozitivan", neutral="neutralan", negative="negativan"
|
|
309
|
+
),
|
|
310
|
+
default_prompt_prefix="U nastavku su dokumenti i njihov sentiment, koji može "
|
|
311
|
+
"biti {labels_str}.",
|
|
312
|
+
default_prompt_template="Dokument: {text}\nSentiment: {label}",
|
|
313
|
+
default_instruction_prompt="Dokument: {text}\n\nKlasifikujte sentiment u "
|
|
314
|
+
"dokumentu. Odgovorite sa {labels_str}, i ništa drugo.",
|
|
315
|
+
),
|
|
316
|
+
SWEDISH: PromptConfig(
|
|
148
317
|
default_prompt_label_mapping=dict(
|
|
149
318
|
positive="positiv", neutral="neutral", negative="negativ"
|
|
150
319
|
),
|
|
@@ -154,4 +323,18 @@ SENT_TEMPLATES = {
|
|
|
154
323
|
default_instruction_prompt="Dokument: {text}\n\nKlassificera känslan i "
|
|
155
324
|
"dokumentet. Svara med {labels_str}, och inget annat.",
|
|
156
325
|
),
|
|
326
|
+
UKRAINIAN: PromptConfig(
|
|
327
|
+
default_prompt_label_mapping=dict(
|
|
328
|
+
positive="позитивний", neutral="нейтральний", negative="негативний"
|
|
329
|
+
),
|
|
330
|
+
default_prompt_prefix=(
|
|
331
|
+
"Нижче наведені документи і їх настрій, який може бути {labels_str}."
|
|
332
|
+
),
|
|
333
|
+
default_prompt_template="Документ: {text}\nНастрій: {label}",
|
|
334
|
+
default_instruction_prompt=(
|
|
335
|
+
"Документ: {text}\n\n"
|
|
336
|
+
"Класифікуйте настрій у документі. "
|
|
337
|
+
"Відповідайте {labels_str}, і нічого більше."
|
|
338
|
+
),
|
|
339
|
+
),
|
|
157
340
|
}
|