EuroEval 16.3.0__py3-none-any.whl → 16.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +9 -2
- euroeval/benchmark_config_factory.py +51 -50
- euroeval/benchmark_modules/base.py +9 -21
- euroeval/benchmark_modules/fresh.py +2 -1
- euroeval/benchmark_modules/hf.py +101 -71
- euroeval/benchmark_modules/litellm.py +115 -53
- euroeval/benchmark_modules/vllm.py +107 -92
- euroeval/benchmarker.py +144 -121
- euroeval/caching_utils.py +79 -0
- euroeval/callbacks.py +5 -7
- euroeval/cli.py +86 -8
- euroeval/constants.py +9 -0
- euroeval/data_loading.py +80 -29
- euroeval/data_models.py +338 -330
- euroeval/dataset_configs/__init__.py +12 -3
- euroeval/dataset_configs/bulgarian.py +56 -0
- euroeval/dataset_configs/czech.py +75 -0
- euroeval/dataset_configs/danish.py +55 -93
- euroeval/dataset_configs/dutch.py +48 -87
- euroeval/dataset_configs/english.py +45 -77
- euroeval/dataset_configs/estonian.py +42 -34
- euroeval/dataset_configs/faroese.py +19 -60
- euroeval/dataset_configs/finnish.py +36 -69
- euroeval/dataset_configs/french.py +39 -75
- euroeval/dataset_configs/german.py +45 -82
- euroeval/dataset_configs/greek.py +64 -0
- euroeval/dataset_configs/icelandic.py +54 -91
- euroeval/dataset_configs/italian.py +42 -79
- euroeval/dataset_configs/latvian.py +28 -35
- euroeval/dataset_configs/lithuanian.py +28 -26
- euroeval/dataset_configs/norwegian.py +72 -115
- euroeval/dataset_configs/polish.py +33 -61
- euroeval/dataset_configs/portuguese.py +33 -66
- euroeval/dataset_configs/serbian.py +64 -0
- euroeval/dataset_configs/slovak.py +55 -0
- euroeval/dataset_configs/spanish.py +42 -77
- euroeval/dataset_configs/swedish.py +52 -90
- euroeval/dataset_configs/ukrainian.py +64 -0
- euroeval/exceptions.py +1 -1
- euroeval/finetuning.py +24 -17
- euroeval/generation.py +15 -14
- euroeval/generation_utils.py +8 -8
- euroeval/languages.py +395 -323
- euroeval/logging_utils.py +250 -0
- euroeval/metrics/base.py +0 -3
- euroeval/metrics/huggingface.py +21 -6
- euroeval/metrics/llm_as_a_judge.py +6 -4
- euroeval/metrics/pipeline.py +17 -9
- euroeval/metrics/speed.py +0 -3
- euroeval/model_cache.py +17 -19
- euroeval/model_config.py +4 -5
- euroeval/model_loading.py +3 -0
- euroeval/prompt_templates/__init__.py +2 -0
- euroeval/prompt_templates/classification.py +206 -0
- euroeval/prompt_templates/linguistic_acceptability.py +99 -42
- euroeval/prompt_templates/multiple_choice.py +102 -38
- euroeval/prompt_templates/named_entity_recognition.py +172 -51
- euroeval/prompt_templates/reading_comprehension.py +119 -42
- euroeval/prompt_templates/sentiment_classification.py +110 -40
- euroeval/prompt_templates/summarization.py +85 -40
- euroeval/prompt_templates/token_classification.py +279 -0
- euroeval/scores.py +11 -10
- euroeval/speed_benchmark.py +5 -6
- euroeval/task_group_utils/multiple_choice_classification.py +2 -4
- euroeval/task_group_utils/question_answering.py +24 -16
- euroeval/task_group_utils/sequence_classification.py +48 -35
- euroeval/task_group_utils/text_to_text.py +19 -9
- euroeval/task_group_utils/token_classification.py +21 -17
- euroeval/tasks.py +44 -1
- euroeval/tokenisation_utils.py +33 -22
- euroeval/types.py +10 -9
- euroeval/utils.py +35 -149
- {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/METADATA +196 -39
- euroeval-16.5.0.dist-info/RECORD +81 -0
- euroeval-16.3.0.dist-info/RECORD +0 -71
- {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/WHEEL +0 -0
- {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/entry_points.txt +0 -0
- {euroeval-16.3.0.dist-info → euroeval-16.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,32 +4,59 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from ..data_models import PromptConfig
|
|
6
6
|
from ..languages import (
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
7
|
+
BULGARIAN,
|
|
8
|
+
CZECH,
|
|
9
|
+
DANISH,
|
|
10
|
+
DUTCH,
|
|
11
|
+
ENGLISH,
|
|
12
|
+
ESTONIAN,
|
|
13
|
+
FAROESE,
|
|
14
|
+
FINNISH,
|
|
15
|
+
FRENCH,
|
|
16
|
+
GERMAN,
|
|
17
|
+
GREEK,
|
|
18
|
+
ICELANDIC,
|
|
19
|
+
ITALIAN,
|
|
20
|
+
LATVIAN,
|
|
21
|
+
LITHUANIAN,
|
|
22
|
+
NORWEGIAN,
|
|
23
|
+
NORWEGIAN_BOKMÅL,
|
|
24
|
+
NORWEGIAN_NYNORSK,
|
|
25
|
+
POLISH,
|
|
26
|
+
PORTUGUESE,
|
|
27
|
+
SERBIAN,
|
|
28
|
+
SLOVAK,
|
|
29
|
+
SPANISH,
|
|
30
|
+
SWEDISH,
|
|
31
|
+
UKRAINIAN,
|
|
26
32
|
)
|
|
27
33
|
|
|
28
34
|
if t.TYPE_CHECKING:
|
|
29
|
-
from ..
|
|
35
|
+
from ..languages import Language
|
|
30
36
|
|
|
31
37
|
RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
32
|
-
|
|
38
|
+
BULGARIAN: PromptConfig(
|
|
39
|
+
default_prompt_prefix="Следват текстове със съответни въпроси и отговори.",
|
|
40
|
+
default_prompt_template="Текст: {text}\nВъпрос: {question}\nОтговор с максимум "
|
|
41
|
+
"3 думи: {label}",
|
|
42
|
+
default_instruction_prompt="Текст: {text}\n\nОтговорете на следния въпрос "
|
|
43
|
+
"относно текста по-горе с максимум 3 думи.\n\nВъпрос: {question}",
|
|
44
|
+
default_prompt_label_mapping=dict(),
|
|
45
|
+
),
|
|
46
|
+
CZECH: PromptConfig(
|
|
47
|
+
default_prompt_prefix="Následující texty obsahují otázky a odpovědi.",
|
|
48
|
+
default_prompt_template=(
|
|
49
|
+
"Text: {text}\nOtázka: {question}\nOdpověď maximálně 3 slovy: {label}"
|
|
50
|
+
),
|
|
51
|
+
default_instruction_prompt=(
|
|
52
|
+
"Text: {text}\n\n"
|
|
53
|
+
"Odpovězte na následující otázku k výše uvedenému textu "
|
|
54
|
+
"maximálně 3 slovy.\n\n"
|
|
55
|
+
"Otázka: {question}"
|
|
56
|
+
),
|
|
57
|
+
default_prompt_label_mapping=dict(),
|
|
58
|
+
),
|
|
59
|
+
DANISH: PromptConfig(
|
|
33
60
|
default_prompt_prefix="Følgende er tekster med tilhørende spørgsmål og svar.",
|
|
34
61
|
default_prompt_template="Tekst: {text}\nSpørgsmål: {question}\nSvar med maks. "
|
|
35
62
|
"3 ord: {label}",
|
|
@@ -37,7 +64,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
37
64
|
"teksten ovenfor med maks. 3 ord.\n\nSpørgsmål: {question}",
|
|
38
65
|
default_prompt_label_mapping=dict(),
|
|
39
66
|
),
|
|
40
|
-
|
|
67
|
+
GERMAN: PromptConfig(
|
|
41
68
|
default_prompt_prefix="Im Folgenden finden Sie Texte mit den dazugehörigen "
|
|
42
69
|
"Fragen und Antworten.",
|
|
43
70
|
default_prompt_template="Text: {text}\nFragen: {question}\nFragen Antwort in "
|
|
@@ -46,7 +73,16 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
46
73
|
"zum obigen Text in höchstens 3 Wörtern.\n\nFrage: {question}",
|
|
47
74
|
default_prompt_label_mapping=dict(),
|
|
48
75
|
),
|
|
49
|
-
|
|
76
|
+
GREEK: PromptConfig(
|
|
77
|
+
default_prompt_prefix="Ακολουθούν κείμενα με τις αντίστοιχες ερωτήσεις και "
|
|
78
|
+
"απαντήσεις.",
|
|
79
|
+
default_prompt_template="Κείμενο: {text}\nΕρώτηση: {question}\nΑπάντηση σε το "
|
|
80
|
+
"πολύ 3 λέξεις: {label}",
|
|
81
|
+
default_instruction_prompt="Κείμενο: {text}\n\nΑπαντήστε την παρακάτω ερώτηση "
|
|
82
|
+
"σχετικά με το παραπάνω κείμενο σε το πολύ 3 λέξεις.\n\nΕρώτηση: {question}",
|
|
83
|
+
default_prompt_label_mapping=dict(),
|
|
84
|
+
),
|
|
85
|
+
ENGLISH: PromptConfig(
|
|
50
86
|
default_prompt_prefix="The following are texts with accompanying questions and "
|
|
51
87
|
"answers.",
|
|
52
88
|
default_prompt_template="Text: {text}\nQuestion: {question}\nAnswer in max "
|
|
@@ -55,7 +91,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
55
91
|
"about the above text in at most 3 words.\n\nQuestion: {question}",
|
|
56
92
|
default_prompt_label_mapping=dict(),
|
|
57
93
|
),
|
|
58
|
-
|
|
94
|
+
SPANISH: PromptConfig(
|
|
59
95
|
default_prompt_prefix="A continuación se presentan textos con sus preguntas y "
|
|
60
96
|
"respuestas correspondientes.",
|
|
61
97
|
default_prompt_template="Texto: {text}\nPregunta: {question}\nRespuesta en "
|
|
@@ -64,7 +100,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
64
100
|
"sobre el texto anterior en máximo 3 palabras.\n\nPregunta: {question}",
|
|
65
101
|
default_prompt_label_mapping=dict(),
|
|
66
102
|
),
|
|
67
|
-
|
|
103
|
+
ESTONIAN: PromptConfig(
|
|
68
104
|
default_prompt_prefix="Järgnevad on tekstid koos küsimuste ja vastustega.",
|
|
69
105
|
default_prompt_template="Tekst: {text}\nKüsimus: {question}\nVasta "
|
|
70
106
|
"maksimaalselt 3 sõnaga: {label}",
|
|
@@ -72,7 +108,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
72
108
|
"ülevaltoodud teksti kohta maksimaalselt 3 sõnaga.\n\nKüsimus: {question}",
|
|
73
109
|
default_prompt_label_mapping=dict(),
|
|
74
110
|
),
|
|
75
|
-
|
|
111
|
+
FINNISH: PromptConfig(
|
|
76
112
|
default_prompt_prefix="Seuraavassa on tekstejä ja niihin liittyviä kysymyksiä "
|
|
77
113
|
"ja vastauksia.",
|
|
78
114
|
default_prompt_template="Teksti: {text}\nKysymys: {question} "
|
|
@@ -82,7 +118,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
82
118
|
"Kysymys: {question}",
|
|
83
119
|
default_prompt_label_mapping=dict(),
|
|
84
120
|
),
|
|
85
|
-
|
|
121
|
+
FAROESE: PromptConfig(
|
|
86
122
|
default_prompt_prefix="Hetta eru tekstir saman við spurningum og svar.",
|
|
87
123
|
default_prompt_template="Tekstur: {text}\nSpurningur: {question}\nSvara við í "
|
|
88
124
|
"mesta lagi trimum orðum: {label}",
|
|
@@ -90,7 +126,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
90
126
|
"tekstin uppiyvir við í mesta lagi trimum orðum.\n\nSpurningur: {question}",
|
|
91
127
|
default_prompt_label_mapping=dict(),
|
|
92
128
|
),
|
|
93
|
-
|
|
129
|
+
FRENCH: PromptConfig(
|
|
94
130
|
default_prompt_prefix="Les textes suivants sont accompagnés de questions et de "
|
|
95
131
|
"réponses.",
|
|
96
132
|
default_prompt_template="Texte: {text}\nQuestion: {question}\nRéponse en 3 "
|
|
@@ -99,7 +135,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
99
135
|
"sur le texte ci-dessus en 3 mots maximum.\n\nQuestion: {question}",
|
|
100
136
|
default_prompt_label_mapping=dict(),
|
|
101
137
|
),
|
|
102
|
-
|
|
138
|
+
ICELANDIC: PromptConfig(
|
|
103
139
|
default_prompt_prefix="Eftirfarandi eru textar með tilheyrandi spurningum og "
|
|
104
140
|
"svörum.",
|
|
105
141
|
default_prompt_template="Texti: {text}\nSpurning: {question}\nSvaraðu með að "
|
|
@@ -108,7 +144,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
108
144
|
"textann að hámarki í 3 orðum.\n\nSpurning: {question}",
|
|
109
145
|
default_prompt_label_mapping=dict(),
|
|
110
146
|
),
|
|
111
|
-
|
|
147
|
+
ITALIAN: PromptConfig(
|
|
112
148
|
default_prompt_prefix="I testi che seguono sono accompagnati da domande e "
|
|
113
149
|
"risposte.",
|
|
114
150
|
default_prompt_template="Testo: {text}\nDomanda: {question}\nRispondere in "
|
|
@@ -117,7 +153,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
117
153
|
"sul in un massimo di 3 parole.\n\nDomanda: {question}",
|
|
118
154
|
default_prompt_label_mapping=dict(),
|
|
119
155
|
),
|
|
120
|
-
|
|
156
|
+
LITHUANIAN: PromptConfig(
|
|
121
157
|
default_prompt_prefix="Toliau pateikti tekstai su atitinkamais klausimais ir "
|
|
122
158
|
"atsakymais.",
|
|
123
159
|
default_prompt_template="Tekstas: {text}\nKlausimas: {question}\nAtsakykite ne "
|
|
@@ -126,7 +162,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
126
162
|
"aukščiau pateiktą tekstą ne daugiau kaip 3 žodžiais.\n\nKlausimas: {question}",
|
|
127
163
|
default_prompt_label_mapping=dict(),
|
|
128
164
|
),
|
|
129
|
-
|
|
165
|
+
LATVIAN: PromptConfig(
|
|
130
166
|
default_prompt_prefix="Turpmāk seko teksti ar atbilstošiem jautājumiem un "
|
|
131
167
|
"atbildēm.",
|
|
132
168
|
default_prompt_template="Teksts: {text}\nJautājums: {question}\nAtbildēt ar "
|
|
@@ -135,7 +171,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
135
171
|
"iepriekš minēto tekstu ar maksimāli 3 vārdiem.\n\nJautājums: {question}",
|
|
136
172
|
default_prompt_label_mapping=dict(),
|
|
137
173
|
),
|
|
138
|
-
|
|
174
|
+
NORWEGIAN_BOKMÅL: PromptConfig(
|
|
139
175
|
default_prompt_prefix="Her følger tekster med tilhørende spørsmål og svar.",
|
|
140
176
|
default_prompt_template="Tekst: {text}\nSpørsmål: {question}\nSvar på maks 3 "
|
|
141
177
|
"ord: {label}",
|
|
@@ -143,7 +179,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
143
179
|
"teksten ovenfor med maks 3 ord.\n\nSpørsmål: {question}",
|
|
144
180
|
default_prompt_label_mapping=dict(),
|
|
145
181
|
),
|
|
146
|
-
|
|
182
|
+
DUTCH: PromptConfig(
|
|
147
183
|
default_prompt_prefix="Hieronder volgen teksten met bijbehorende vragen en "
|
|
148
184
|
"antwoorden.",
|
|
149
185
|
default_prompt_template="Tekst: {text}\nVraag: {question}\nAntwoord in max "
|
|
@@ -152,7 +188,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
152
188
|
"over de bovenstaande tekst in maximaal 3 woorden.\n\nVraag: {question}",
|
|
153
189
|
default_prompt_label_mapping=dict(),
|
|
154
190
|
),
|
|
155
|
-
|
|
191
|
+
NORWEGIAN_NYNORSK: PromptConfig(
|
|
156
192
|
default_prompt_prefix="Her følger tekster med tilhørende spørsmål og svar.",
|
|
157
193
|
default_prompt_template="Tekst: {text}\nSpørsmål: {question}\nSvar på maks 3 "
|
|
158
194
|
"ord: {label}",
|
|
@@ -160,7 +196,7 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
160
196
|
"teksten ovenfor med maks 3 ord.\n\nSpørsmål: {question}",
|
|
161
197
|
default_prompt_label_mapping=dict(),
|
|
162
198
|
),
|
|
163
|
-
|
|
199
|
+
NORWEGIAN: PromptConfig(
|
|
164
200
|
default_prompt_prefix="Her følger tekster med tilhørende spørsmål og svar.",
|
|
165
201
|
default_prompt_template="Tekst: {text}\nSpørsmål: {question}\nSvar på maks 3 "
|
|
166
202
|
"ord: {label}",
|
|
@@ -168,17 +204,18 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
168
204
|
"teksten ovenfor med maks 3 ord.\n\nSpørsmål: {question}",
|
|
169
205
|
default_prompt_label_mapping=dict(),
|
|
170
206
|
),
|
|
171
|
-
|
|
207
|
+
POLISH: PromptConfig(
|
|
172
208
|
default_prompt_prefix=(
|
|
173
209
|
"Poniżej znajdują się teksty z towarzyszącymi pytaniami i odpowiedziami."
|
|
174
210
|
),
|
|
175
|
-
default_prompt_template="Tekst: {text}\nPytanie: {question}\nOdpowiedź
|
|
176
|
-
"maksymalnie 3
|
|
211
|
+
default_prompt_template="Tekst: {text}\nPytanie: {question}\nOdpowiedź z "
|
|
212
|
+
"użyciem maksymalnie 3 słów: {label}",
|
|
177
213
|
default_instruction_prompt="Tekst: {text}\n\nOdpowiedz na następujące pytanie "
|
|
178
|
-
"dotyczące powyższego tekstu
|
|
214
|
+
"dotyczące powyższego tekstu, używając maksymalnie 3 słów.\n\nPytanie: "
|
|
215
|
+
"{question}",
|
|
179
216
|
default_prompt_label_mapping=dict(),
|
|
180
217
|
),
|
|
181
|
-
|
|
218
|
+
PORTUGUESE: PromptConfig(
|
|
182
219
|
default_prompt_prefix="Os textos que se seguem são acompanhados de perguntas "
|
|
183
220
|
"e respostas.",
|
|
184
221
|
default_prompt_template="Texto: {text}\nPergunta: {question}\nResposta com "
|
|
@@ -187,7 +224,33 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
187
224
|
"sobre o texto acima num máximo de 3 palavras.\n\nPergunta: {question}",
|
|
188
225
|
default_prompt_label_mapping=dict(),
|
|
189
226
|
),
|
|
190
|
-
|
|
227
|
+
SLOVAK: PromptConfig(
|
|
228
|
+
default_prompt_prefix=("Nasledujú texty s pridruženými otázkami a odpoveďami."),
|
|
229
|
+
default_prompt_template=(
|
|
230
|
+
"Text: {text}\nOtázka: {question}\nOdpoveď na maximálne 3 slová: {label}"
|
|
231
|
+
),
|
|
232
|
+
default_instruction_prompt=(
|
|
233
|
+
"Text: {text}\n\n"
|
|
234
|
+
"Odpovedzte na nasledujúcu otázku týkajúcu sa textu uvedeného vyššie "
|
|
235
|
+
"maximálne 3 slovami.\n\nOtázka: {question}"
|
|
236
|
+
),
|
|
237
|
+
default_prompt_label_mapping=dict(),
|
|
238
|
+
),
|
|
239
|
+
SERBIAN: PromptConfig(
|
|
240
|
+
default_prompt_prefix=(
|
|
241
|
+
"Следе текстови са одговарајућим питањима и одговорима."
|
|
242
|
+
),
|
|
243
|
+
default_prompt_template=(
|
|
244
|
+
"Текст: {text}\nПитање: {question}\nОдговор у максимум 3 речи: {label}"
|
|
245
|
+
),
|
|
246
|
+
default_instruction_prompt=(
|
|
247
|
+
"Текст: {text}\n\n"
|
|
248
|
+
"Одговорите на следеће питање о горњем тексту у максимум 3 речи.\n\n"
|
|
249
|
+
"Питање: {question}"
|
|
250
|
+
),
|
|
251
|
+
default_prompt_label_mapping=dict(),
|
|
252
|
+
),
|
|
253
|
+
SWEDISH: PromptConfig(
|
|
191
254
|
default_prompt_prefix="Nedan följer texter med tillhörande frågor och svar.",
|
|
192
255
|
default_prompt_template="Text: {text}\nFråga: {question}\nSvar på max 3 ord: "
|
|
193
256
|
"{label}",
|
|
@@ -195,4 +258,18 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
195
258
|
"ovan med högst 3 ord.\n\nFråga: {question}",
|
|
196
259
|
default_prompt_label_mapping=dict(),
|
|
197
260
|
),
|
|
261
|
+
UKRAINIAN: PromptConfig(
|
|
262
|
+
default_prompt_prefix=(
|
|
263
|
+
"Нижче наведені тексти з відповідними питаннями та відповідями."
|
|
264
|
+
),
|
|
265
|
+
default_prompt_template=(
|
|
266
|
+
"Текст: {text}\nПитання: {question}\nВідповідь максимум 3 словами: {label}"
|
|
267
|
+
),
|
|
268
|
+
default_instruction_prompt=(
|
|
269
|
+
"Текст: {text}\n\n"
|
|
270
|
+
"Відповідь на наступне питання про вищезазначений текст максимум 3 "
|
|
271
|
+
"словами.\n\nПитання: {question}"
|
|
272
|
+
),
|
|
273
|
+
default_prompt_label_mapping=dict(),
|
|
274
|
+
),
|
|
198
275
|
}
|
|
@@ -4,32 +4,48 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from ..data_models import PromptConfig
|
|
6
6
|
from ..languages import (
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
7
|
+
BULGARIAN,
|
|
8
|
+
CZECH,
|
|
9
|
+
DANISH,
|
|
10
|
+
DUTCH,
|
|
11
|
+
ENGLISH,
|
|
12
|
+
ESTONIAN,
|
|
13
|
+
FAROESE,
|
|
14
|
+
FINNISH,
|
|
15
|
+
FRENCH,
|
|
16
|
+
GERMAN,
|
|
17
|
+
GREEK,
|
|
18
|
+
ICELANDIC,
|
|
19
|
+
ITALIAN,
|
|
20
|
+
LATVIAN,
|
|
21
|
+
LITHUANIAN,
|
|
22
|
+
NORWEGIAN,
|
|
23
|
+
NORWEGIAN_BOKMÅL,
|
|
24
|
+
NORWEGIAN_NYNORSK,
|
|
25
|
+
POLISH,
|
|
26
|
+
PORTUGUESE,
|
|
27
|
+
SERBIAN,
|
|
28
|
+
SLOVAK,
|
|
29
|
+
SPANISH,
|
|
30
|
+
SWEDISH,
|
|
31
|
+
UKRAINIAN,
|
|
26
32
|
)
|
|
27
33
|
|
|
28
34
|
if t.TYPE_CHECKING:
|
|
29
|
-
from ..
|
|
35
|
+
from ..languages import Language
|
|
30
36
|
|
|
31
37
|
SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
32
|
-
|
|
38
|
+
BULGARIAN: PromptConfig(
|
|
39
|
+
default_prompt_label_mapping=dict(
|
|
40
|
+
positive="позитивен", neutral="неутрален", negative="негативен"
|
|
41
|
+
),
|
|
42
|
+
default_prompt_prefix="Следват документи и техният сентимент, който може да "
|
|
43
|
+
"бъде{labels_str}.",
|
|
44
|
+
default_prompt_template="Документ: {text}\nСентимент: {label}",
|
|
45
|
+
default_instruction_prompt="Документ: {text}\n\nКласифицирайте сентимента в "
|
|
46
|
+
"документа. Отговорете с {labels_str}, и нищо друго.",
|
|
47
|
+
),
|
|
48
|
+
DANISH: PromptConfig(
|
|
33
49
|
default_prompt_label_mapping=dict(
|
|
34
50
|
positive="positiv", neutral="neutral", negative="negativ"
|
|
35
51
|
),
|
|
@@ -39,7 +55,17 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
39
55
|
default_instruction_prompt="Dokument: {text}\n\nKlassificer sentimentet i "
|
|
40
56
|
"dokumentet. Svar kun med {labels_str}, og intet andet.",
|
|
41
57
|
),
|
|
42
|
-
|
|
58
|
+
CZECH: PromptConfig(
|
|
59
|
+
default_prompt_label_mapping=dict(
|
|
60
|
+
positive="pozitivní", neutral="neutrální", negative="negativní"
|
|
61
|
+
),
|
|
62
|
+
default_prompt_prefix="Následují dokumenty a jejich sentiment, který může být "
|
|
63
|
+
"{labels_str}.",
|
|
64
|
+
default_prompt_template="Dokument: {text}\nSentiment: {label}",
|
|
65
|
+
default_instruction_prompt="Dokument: {text}\n\nKlasifikujte sentiment v "
|
|
66
|
+
"dokumentu. Odpovězte pouze s {labels_str}, a nic jiného.",
|
|
67
|
+
),
|
|
68
|
+
GERMAN: PromptConfig(
|
|
43
69
|
default_prompt_label_mapping=dict(
|
|
44
70
|
positive="positiv", neutral="neutral", negative="negativ"
|
|
45
71
|
),
|
|
@@ -49,7 +75,17 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
49
75
|
default_instruction_prompt="Dokument: {text}\n\nKlassifizieren Sie die "
|
|
50
76
|
"Stimmung im Dokument. Antworten Sie mit {labels_str}, und nichts anderes.",
|
|
51
77
|
),
|
|
52
|
-
|
|
78
|
+
GREEK: PromptConfig(
|
|
79
|
+
default_prompt_label_mapping=dict(
|
|
80
|
+
positive="θετικό", neutral="ουδέτερο", negative="αρνητικό"
|
|
81
|
+
),
|
|
82
|
+
default_prompt_prefix="Τα ακόλουθα είναι έγγραφα και το συναίσθημά τους, "
|
|
83
|
+
"το οποίο μπορεί να είναι {labels_str}.",
|
|
84
|
+
default_prompt_template="Έγγραφο: {text}\nΣυναίσθημα: {label}",
|
|
85
|
+
default_instruction_prompt="Έγγραφο: {text}\n\nΤαξινομήστε το συναίσθημα "
|
|
86
|
+
"στο έγγραφο. Απαντήστε με {labels_str}, και τίποτα άλλο.",
|
|
87
|
+
),
|
|
88
|
+
ENGLISH: PromptConfig(
|
|
53
89
|
default_prompt_label_mapping=dict(
|
|
54
90
|
positive="positive", neutral="neutral", negative="negative"
|
|
55
91
|
),
|
|
@@ -59,7 +95,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
59
95
|
default_instruction_prompt="Document: {text}\n\nClassify the sentiment in the "
|
|
60
96
|
"document. Answer with {labels_str}, and nothing else.",
|
|
61
97
|
),
|
|
62
|
-
|
|
98
|
+
SPANISH: PromptConfig(
|
|
63
99
|
default_prompt_label_mapping=dict(
|
|
64
100
|
positive="positivo", neutral="neutral", negative="negativo"
|
|
65
101
|
),
|
|
@@ -69,7 +105,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
69
105
|
default_instruction_prompt="Documento: {text}\n\nClasifica el sentimiento del "
|
|
70
106
|
"documento. Responde con {labels_str}, y nada más.",
|
|
71
107
|
),
|
|
72
|
-
|
|
108
|
+
ESTONIAN: PromptConfig(
|
|
73
109
|
default_prompt_label_mapping=dict(
|
|
74
110
|
positive="positiivne", neutral="neutraalne", negative="negatiivne"
|
|
75
111
|
),
|
|
@@ -80,7 +116,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
80
116
|
"meelestatuse järgi. Võimalikud vastused: {labels_str}. Muud vastused "
|
|
81
117
|
"ei ole lubatud.",
|
|
82
118
|
),
|
|
83
|
-
|
|
119
|
+
POLISH: PromptConfig(
|
|
84
120
|
default_prompt_label_mapping=dict(
|
|
85
121
|
positive="pozytywny", neutral="neutralny", negative="negatywny"
|
|
86
122
|
),
|
|
@@ -91,10 +127,10 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
91
127
|
default_prompt_template="Dokument: {text}\nSentyment: {label}",
|
|
92
128
|
default_instruction_prompt=(
|
|
93
129
|
"Dokument: {text}\n\nKlasyfikuj sentyment w dokumencie. "
|
|
94
|
-
"Odpowiedz
|
|
130
|
+
"Odpowiedz jednym słowem: {labels_str}."
|
|
95
131
|
),
|
|
96
132
|
),
|
|
97
|
-
|
|
133
|
+
PORTUGUESE: PromptConfig(
|
|
98
134
|
default_prompt_label_mapping=dict(
|
|
99
135
|
positive="positivo", neutral="neutro", negative="negativo"
|
|
100
136
|
),
|
|
@@ -104,7 +140,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
104
140
|
default_instruction_prompt="Documento: {text}\n\nClassifica o "
|
|
105
141
|
"sentimento do documento. Responde apenas com {labels_str}.",
|
|
106
142
|
),
|
|
107
|
-
|
|
143
|
+
FINNISH: PromptConfig(
|
|
108
144
|
default_prompt_label_mapping=dict(
|
|
109
145
|
positive="positiivinen", neutral="neutrali", negative="negatiivinen"
|
|
110
146
|
),
|
|
@@ -114,7 +150,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
114
150
|
default_instruction_prompt="Teksti: {text}\n\nLuokittele arvostelun tunnesävy. "
|
|
115
151
|
"Vastaa vain {labels_str}, ei muuta.",
|
|
116
152
|
),
|
|
117
|
-
|
|
153
|
+
FAROESE: PromptConfig(
|
|
118
154
|
default_prompt_label_mapping=dict(
|
|
119
155
|
positive="positivt", neutral="neutralt", negative="negativt"
|
|
120
156
|
),
|
|
@@ -124,7 +160,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
124
160
|
default_instruction_prompt="Skjal: {text}\n\nFlokka kensluna í skjalinum. "
|
|
125
161
|
"Svara við {labels_str}, og einki annað.",
|
|
126
162
|
),
|
|
127
|
-
|
|
163
|
+
FRENCH: PromptConfig(
|
|
128
164
|
default_prompt_label_mapping=dict(
|
|
129
165
|
positive="positif", neutral="neutre", negative="négatif"
|
|
130
166
|
),
|
|
@@ -134,7 +170,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
134
170
|
default_instruction_prompt="Document: {text}\n\nClassez le sentiment dans le "
|
|
135
171
|
"document. Répondez par {labels_str}, et rien d'autre.",
|
|
136
172
|
),
|
|
137
|
-
|
|
173
|
+
ICELANDIC: PromptConfig(
|
|
138
174
|
default_prompt_label_mapping=dict(
|
|
139
175
|
positive="jákvætt", neutral="hlutlaust", negative="neikvætt"
|
|
140
176
|
),
|
|
@@ -144,7 +180,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
144
180
|
default_instruction_prompt="Textabrot: {text}\n\nGreindu lyndið í "
|
|
145
181
|
"textabrotinu. Svaraðu með {labels_str}, og ekkert annað.",
|
|
146
182
|
),
|
|
147
|
-
|
|
183
|
+
ITALIAN: PromptConfig(
|
|
148
184
|
default_prompt_label_mapping=dict(
|
|
149
185
|
positive="positivo", neutral="neutro", negative="negativo"
|
|
150
186
|
),
|
|
@@ -154,7 +190,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
154
190
|
default_instruction_prompt="Documento: {text}\n\nClassificare il sentiment del "
|
|
155
191
|
"documento. Rispondere con {labels_str}, e nient'altro.",
|
|
156
192
|
),
|
|
157
|
-
|
|
193
|
+
LITHUANIAN: PromptConfig(
|
|
158
194
|
default_prompt_label_mapping=dict(
|
|
159
195
|
positive="teigiamas", neutral="neutralus", negative="neigiamas"
|
|
160
196
|
),
|
|
@@ -164,7 +200,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
164
200
|
default_instruction_prompt="Dokumentas: {text}\n\nKlasifikuokite nuotaiką "
|
|
165
201
|
"dokumente. Atsakykite su {labels_str}, ir nieko kito.",
|
|
166
202
|
),
|
|
167
|
-
|
|
203
|
+
LATVIAN: PromptConfig(
|
|
168
204
|
default_prompt_label_mapping=dict(
|
|
169
205
|
positive="pozitīvs", neutral="neitrāls", negative="negatīvs"
|
|
170
206
|
),
|
|
@@ -174,7 +210,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
174
210
|
default_instruction_prompt="Dokuments: {text}\n\nKlasificējiet noskaņojumu "
|
|
175
211
|
"dokumentā. Atbildiet ar {labels_str}, un neko citu.",
|
|
176
212
|
),
|
|
177
|
-
|
|
213
|
+
NORWEGIAN_BOKMÅL: PromptConfig(
|
|
178
214
|
default_prompt_label_mapping=dict(
|
|
179
215
|
positive="positiv", neutral="nøytral", negative="negativ"
|
|
180
216
|
),
|
|
@@ -184,7 +220,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
184
220
|
default_instruction_prompt="Dokument: {text}\n\nKlassifiser følelsen i "
|
|
185
221
|
"teksten. Svar med {labels_str}, og ikke noe annet.",
|
|
186
222
|
),
|
|
187
|
-
|
|
223
|
+
DUTCH: PromptConfig(
|
|
188
224
|
default_prompt_label_mapping=dict(
|
|
189
225
|
positive="positief", neutral="neutraal", negative="negatief"
|
|
190
226
|
),
|
|
@@ -194,7 +230,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
194
230
|
default_instruction_prompt="Document: {text}\n\nClassificeer het sentiment in "
|
|
195
231
|
"het document. Antwoord met {labels_str}, en verder niets.",
|
|
196
232
|
),
|
|
197
|
-
|
|
233
|
+
NORWEGIAN_NYNORSK: PromptConfig(
|
|
198
234
|
default_prompt_label_mapping=dict(
|
|
199
235
|
positive="positiv", neutral="nøytral", negative="negativ"
|
|
200
236
|
),
|
|
@@ -204,7 +240,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
204
240
|
default_instruction_prompt="Dokument: {text}\n\nKlassifiser følelsen i "
|
|
205
241
|
"teksten. Svar med {labels_str}, og ikke noe annet.",
|
|
206
242
|
),
|
|
207
|
-
|
|
243
|
+
NORWEGIAN: PromptConfig(
|
|
208
244
|
default_prompt_label_mapping=dict(
|
|
209
245
|
positive="positiv", neutral="nøytral", negative="negativ"
|
|
210
246
|
),
|
|
@@ -214,7 +250,27 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
214
250
|
default_instruction_prompt="Dokument: {text}\n\nKlassifiser følelsen i "
|
|
215
251
|
"teksten. Svar med {labels_str}, og ikke noe annet.",
|
|
216
252
|
),
|
|
217
|
-
|
|
253
|
+
SLOVAK: PromptConfig(
|
|
254
|
+
default_prompt_label_mapping=dict(
|
|
255
|
+
positive="pozitívne", neutral="neutrálne", negative="negatívne"
|
|
256
|
+
),
|
|
257
|
+
default_prompt_prefix="Nižšie sú dokumenty a ich sentiment, ktorý môže byť "
|
|
258
|
+
"{labels_str}.",
|
|
259
|
+
default_prompt_template="Dokument: {text}\nSentiment: {label}",
|
|
260
|
+
default_instruction_prompt="Dokument: {text}\n\nKlasifikujte pocit v "
|
|
261
|
+
"dokumente. Odpovedzte so {labels_str}, a nič iné.",
|
|
262
|
+
),
|
|
263
|
+
SERBIAN: PromptConfig(
|
|
264
|
+
default_prompt_label_mapping=dict(
|
|
265
|
+
positive="pozitivan", neutral="neutralan", negative="negativan"
|
|
266
|
+
),
|
|
267
|
+
default_prompt_prefix="U nastavku su dokumenti i njihov sentiment, koji može "
|
|
268
|
+
"biti {labels_str}.",
|
|
269
|
+
default_prompt_template="Dokument: {text}\nSentiment: {label}",
|
|
270
|
+
default_instruction_prompt="Dokument: {text}\n\nKlasifikujte sentiment u "
|
|
271
|
+
"dokumentu. Odgovorite sa {labels_str}, i ništa drugo.",
|
|
272
|
+
),
|
|
273
|
+
SWEDISH: PromptConfig(
|
|
218
274
|
default_prompt_label_mapping=dict(
|
|
219
275
|
positive="positiv", neutral="neutral", negative="negativ"
|
|
220
276
|
),
|
|
@@ -224,4 +280,18 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
224
280
|
default_instruction_prompt="Dokument: {text}\n\nKlassificera känslan i "
|
|
225
281
|
"dokumentet. Svara med {labels_str}, och inget annat.",
|
|
226
282
|
),
|
|
283
|
+
UKRAINIAN: PromptConfig(
|
|
284
|
+
default_prompt_label_mapping=dict(
|
|
285
|
+
positive="позитивний", neutral="нейтральний", negative="негативний"
|
|
286
|
+
),
|
|
287
|
+
default_prompt_prefix=(
|
|
288
|
+
"Нижче наведені документи і їх настрій, який може бути {labels_str}."
|
|
289
|
+
),
|
|
290
|
+
default_prompt_template="Документ: {text}\nНастрій: {label}",
|
|
291
|
+
default_instruction_prompt=(
|
|
292
|
+
"Документ: {text}\n\n"
|
|
293
|
+
"Класифікуйте настрій у документі. "
|
|
294
|
+
"Відповідайте {labels_str}, і нічого більше."
|
|
295
|
+
),
|
|
296
|
+
),
|
|
227
297
|
}
|