EuroEval 16.4.0__py3-none-any.whl → 16.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +6 -0
- euroeval/benchmark_config_factory.py +51 -46
- euroeval/benchmark_modules/base.py +6 -5
- euroeval/benchmark_modules/hf.py +2 -9
- euroeval/benchmark_modules/litellm.py +14 -12
- euroeval/benchmark_modules/vllm.py +17 -10
- euroeval/benchmarker.py +61 -44
- euroeval/caching_utils.py +1 -1
- euroeval/cli.py +86 -8
- euroeval/constants.py +3 -0
- euroeval/data_loading.py +78 -30
- euroeval/data_models.py +326 -326
- euroeval/dataset_configs/__init__.py +10 -3
- euroeval/dataset_configs/bulgarian.py +56 -0
- euroeval/dataset_configs/czech.py +25 -29
- euroeval/dataset_configs/danish.py +51 -88
- euroeval/dataset_configs/dutch.py +48 -86
- euroeval/dataset_configs/english.py +45 -76
- euroeval/dataset_configs/estonian.py +36 -38
- euroeval/dataset_configs/faroese.py +19 -60
- euroeval/dataset_configs/finnish.py +36 -68
- euroeval/dataset_configs/french.py +39 -74
- euroeval/dataset_configs/german.py +45 -81
- euroeval/dataset_configs/greek.py +64 -0
- euroeval/dataset_configs/icelandic.py +54 -91
- euroeval/dataset_configs/italian.py +42 -78
- euroeval/dataset_configs/latvian.py +28 -34
- euroeval/dataset_configs/lithuanian.py +22 -26
- euroeval/dataset_configs/norwegian.py +72 -114
- euroeval/dataset_configs/polish.py +33 -60
- euroeval/dataset_configs/portuguese.py +33 -65
- euroeval/dataset_configs/serbian.py +64 -0
- euroeval/dataset_configs/slovak.py +19 -24
- euroeval/dataset_configs/spanish.py +42 -76
- euroeval/dataset_configs/swedish.py +48 -84
- euroeval/dataset_configs/ukrainian.py +64 -0
- euroeval/exceptions.py +1 -1
- euroeval/finetuning.py +3 -2
- euroeval/generation.py +5 -4
- euroeval/generation_utils.py +6 -5
- euroeval/languages.py +395 -323
- euroeval/metrics/huggingface.py +14 -3
- euroeval/metrics/llm_as_a_judge.py +1 -1
- euroeval/model_cache.py +6 -5
- euroeval/model_loading.py +1 -1
- euroeval/prompt_templates/__init__.py +2 -0
- euroeval/prompt_templates/classification.py +206 -0
- euroeval/prompt_templates/linguistic_acceptability.py +82 -43
- euroeval/prompt_templates/multiple_choice.py +81 -41
- euroeval/prompt_templates/named_entity_recognition.py +125 -44
- euroeval/prompt_templates/reading_comprehension.py +92 -43
- euroeval/prompt_templates/sentiment_classification.py +91 -43
- euroeval/prompt_templates/summarization.py +64 -39
- euroeval/prompt_templates/token_classification.py +279 -0
- euroeval/scores.py +4 -3
- euroeval/speed_benchmark.py +2 -1
- euroeval/task_group_utils/multiple_choice_classification.py +2 -1
- euroeval/task_group_utils/question_answering.py +24 -13
- euroeval/task_group_utils/sequence_classification.py +5 -4
- euroeval/task_group_utils/text_to_text.py +2 -1
- euroeval/task_group_utils/token_classification.py +11 -8
- euroeval/tasks.py +44 -1
- euroeval/tokenisation_utils.py +19 -10
- euroeval/types.py +10 -9
- euroeval/utils.py +6 -3
- {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/METADATA +194 -37
- euroeval-16.5.0.dist-info/RECORD +81 -0
- euroeval-16.4.0.dist-info/RECORD +0 -75
- {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/WHEEL +0 -0
- {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/entry_points.txt +0 -0
- {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,34 +4,45 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from ..data_models import PromptConfig
|
|
6
6
|
from ..languages import (
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
7
|
+
BULGARIAN,
|
|
8
|
+
CZECH,
|
|
9
|
+
DANISH,
|
|
10
|
+
DUTCH,
|
|
11
|
+
ENGLISH,
|
|
12
|
+
ESTONIAN,
|
|
13
|
+
FINNISH,
|
|
14
|
+
FRENCH,
|
|
15
|
+
GERMAN,
|
|
16
|
+
GREEK,
|
|
17
|
+
ICELANDIC,
|
|
18
|
+
ITALIAN,
|
|
19
|
+
LATVIAN,
|
|
20
|
+
LITHUANIAN,
|
|
21
|
+
NORWEGIAN,
|
|
22
|
+
NORWEGIAN_BOKMÅL,
|
|
23
|
+
NORWEGIAN_NYNORSK,
|
|
24
|
+
POLISH,
|
|
25
|
+
PORTUGUESE,
|
|
26
|
+
SERBIAN,
|
|
27
|
+
SLOVAK,
|
|
28
|
+
SPANISH,
|
|
29
|
+
SWEDISH,
|
|
30
|
+
UKRAINIAN,
|
|
27
31
|
)
|
|
28
32
|
|
|
29
33
|
if t.TYPE_CHECKING:
|
|
30
|
-
from ..
|
|
34
|
+
from ..languages import Language
|
|
31
35
|
|
|
32
36
|
# TODO: Missing Faroese
|
|
33
37
|
MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
34
|
-
|
|
38
|
+
BULGARIAN: PromptConfig(
|
|
39
|
+
default_prompt_prefix="Следват въпроси с множествен избор (с отговори).",
|
|
40
|
+
default_prompt_template="Въпрос: {text}\nОтговор: {label}",
|
|
41
|
+
default_instruction_prompt="Въпрос: {text}\n\nОтговорете на горния въпрос "
|
|
42
|
+
"като отговорите с {labels_str}, и нищо друго.",
|
|
43
|
+
default_prompt_label_mapping="auto",
|
|
44
|
+
),
|
|
45
|
+
CZECH: PromptConfig(
|
|
35
46
|
default_prompt_prefix=(
|
|
36
47
|
"Následující jsou otázky s výběrem z více možností (s odpověďmi)."
|
|
37
48
|
),
|
|
@@ -42,14 +53,14 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
42
53
|
),
|
|
43
54
|
default_prompt_label_mapping="auto",
|
|
44
55
|
),
|
|
45
|
-
|
|
56
|
+
DANISH: PromptConfig(
|
|
46
57
|
default_prompt_prefix="Følgende er multiple choice spørgsmål (med svar).",
|
|
47
58
|
default_prompt_template="Spørgsmål: {text}\nSvar: {label}",
|
|
48
59
|
default_instruction_prompt="Spørgsmål: {text}\n\nBesvar ovenstående spørgsmål "
|
|
49
60
|
"ved at svare med {labels_str}, og intet andet.",
|
|
50
61
|
default_prompt_label_mapping="auto",
|
|
51
62
|
),
|
|
52
|
-
|
|
63
|
+
GERMAN: PromptConfig(
|
|
53
64
|
default_prompt_prefix="Die folgenden Fragen sind Multiple-Choice-Fragen "
|
|
54
65
|
"(mit Antworten).",
|
|
55
66
|
default_prompt_template="Frage: {text}\nAntwort: {label}",
|
|
@@ -57,7 +68,15 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
57
68
|
"mit {labels_str}, und nichts anderes.",
|
|
58
69
|
default_prompt_label_mapping="auto",
|
|
59
70
|
),
|
|
60
|
-
|
|
71
|
+
GREEK: PromptConfig(
|
|
72
|
+
default_prompt_prefix="Ακολουθούν ερωτήσεις πολλαπλών επιλογών "
|
|
73
|
+
"(με απαντήσεις).",
|
|
74
|
+
default_prompt_template="Ερώτηση: {text}\nΑπάντηση: {label}",
|
|
75
|
+
default_instruction_prompt="Ερώτηση: {text}\n\nΑπαντήστε στην παραπάνω ερώτηση "
|
|
76
|
+
"χρησιμοποιώντας {labels_str}, και τίποτα άλλο.",
|
|
77
|
+
default_prompt_label_mapping="auto",
|
|
78
|
+
),
|
|
79
|
+
ENGLISH: PromptConfig(
|
|
61
80
|
default_prompt_prefix="The following are multiple choice questions (with "
|
|
62
81
|
"answers).",
|
|
63
82
|
default_prompt_template="Question: {text}\nAnswer: {label}",
|
|
@@ -65,7 +84,7 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
65
84
|
"replying with {labels_str}, and nothing else.",
|
|
66
85
|
default_prompt_label_mapping="auto",
|
|
67
86
|
),
|
|
68
|
-
|
|
87
|
+
SPANISH: PromptConfig(
|
|
69
88
|
default_prompt_prefix="Las siguientes son preguntas de opción múltiple "
|
|
70
89
|
"(con respuestas).",
|
|
71
90
|
default_prompt_template="Pregunta: {text}\nRespuesta: {label}",
|
|
@@ -73,7 +92,7 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
73
92
|
"usando solo {labels_str}, y nada más.",
|
|
74
93
|
default_prompt_label_mapping="auto",
|
|
75
94
|
),
|
|
76
|
-
|
|
95
|
+
ESTONIAN: PromptConfig(
|
|
77
96
|
default_prompt_prefix="Järgnevad on vastusevariantidega küsimused (koos "
|
|
78
97
|
"vastustega).",
|
|
79
98
|
default_prompt_template="Küsimus: {text}\nVastus: {label}",
|
|
@@ -81,7 +100,7 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
81
100
|
"ainult {labels_str}, ja mitte millegi muuga.",
|
|
82
101
|
default_prompt_label_mapping="auto",
|
|
83
102
|
),
|
|
84
|
-
|
|
103
|
+
PORTUGUESE: PromptConfig(
|
|
85
104
|
default_prompt_prefix="As seguintes são perguntas de escolha múltipla "
|
|
86
105
|
"(com respostas).",
|
|
87
106
|
default_prompt_template="Pergunta: {text}\nResposta: {label}",
|
|
@@ -89,14 +108,14 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
89
108
|
"acima usando só {labels_str}, e nada mais.",
|
|
90
109
|
default_prompt_label_mapping="auto",
|
|
91
110
|
),
|
|
92
|
-
|
|
111
|
+
FINNISH: PromptConfig(
|
|
93
112
|
default_prompt_prefix="Seuraavat ovat monivalintakysymyksiä (vastauksineen).",
|
|
94
113
|
default_prompt_template="Kysymys: {text}\nVastaus: {label}",
|
|
95
114
|
default_instruction_prompt="Kysymys: {text}\n\nVastaa yllä olevaan kysymykseen "
|
|
96
115
|
"käyttämällä {labels_str}, äläkä mitään muuta.",
|
|
97
116
|
default_prompt_label_mapping="auto",
|
|
98
117
|
),
|
|
99
|
-
|
|
118
|
+
FRENCH: PromptConfig(
|
|
100
119
|
default_prompt_prefix="Les questions suivantes sont des questions à choix "
|
|
101
120
|
"multiples (avec réponses).",
|
|
102
121
|
default_prompt_template="Question: {text}\nRéponse: {label}",
|
|
@@ -104,14 +123,14 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
104
123
|
"ci-dessus par {labels_str}, et rien d'autre.",
|
|
105
124
|
default_prompt_label_mapping="auto",
|
|
106
125
|
),
|
|
107
|
-
|
|
126
|
+
ICELANDIC: PromptConfig(
|
|
108
127
|
default_prompt_prefix="Eftirfarandi eru fjölvalsspurningar (með svörum).",
|
|
109
128
|
default_prompt_template="Spurningar: {text}\nSvara: {label}",
|
|
110
129
|
default_instruction_prompt="Spurningar: {text}\n\nSvaraðu eftirfarandi "
|
|
111
130
|
"spurningum með {labels_str}, og engu öðru.",
|
|
112
131
|
default_prompt_label_mapping="auto",
|
|
113
132
|
),
|
|
114
|
-
|
|
133
|
+
ITALIAN: PromptConfig(
|
|
115
134
|
default_prompt_prefix="Le seguenti sono domande a scelta multipla "
|
|
116
135
|
"(con relative risposte).",
|
|
117
136
|
default_prompt_template="Domanda: {text}\nRisposta: {label}",
|
|
@@ -119,7 +138,7 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
119
138
|
"precedente con {labels_str}, e nient'altro.",
|
|
120
139
|
default_prompt_label_mapping="auto",
|
|
121
140
|
),
|
|
122
|
-
|
|
141
|
+
LITHUANIAN: PromptConfig(
|
|
123
142
|
default_prompt_prefix="Toliau pateikti daugiavariančiai klausimai "
|
|
124
143
|
"(su atsakymais).",
|
|
125
144
|
default_prompt_template="Klausimas: {text}\nAtsakymas: {label}",
|
|
@@ -127,7 +146,7 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
127
146
|
"pateiktą klausimą atsakydami {labels_str}, ir nieko daugiau.",
|
|
128
147
|
default_prompt_label_mapping="auto",
|
|
129
148
|
),
|
|
130
|
-
|
|
149
|
+
LATVIAN: PromptConfig(
|
|
131
150
|
default_prompt_prefix="Tālāk seko jautājumi ar vairākām atbilžu izvēlēm "
|
|
132
151
|
"(ar atbildēm).",
|
|
133
152
|
default_prompt_template="Jautājums: {text}\nAtbilde: {label}",
|
|
@@ -135,35 +154,35 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
135
154
|
"jautājumu, atbildot ar {labels_str}, un nekas cits.",
|
|
136
155
|
default_prompt_label_mapping="auto",
|
|
137
156
|
),
|
|
138
|
-
|
|
157
|
+
NORWEGIAN_BOKMÅL: PromptConfig(
|
|
139
158
|
default_prompt_prefix="Følgende er flervalgsspørsmål (med svar).",
|
|
140
159
|
default_prompt_template="Spørsmål: {text}\nSvar: {label}",
|
|
141
160
|
default_instruction_prompt="Spørsmål: {text}\n\nBesvar følgende spørsmål med "
|
|
142
161
|
"{labels_str}, og ikke noe annet.",
|
|
143
162
|
default_prompt_label_mapping="auto",
|
|
144
163
|
),
|
|
145
|
-
|
|
164
|
+
DUTCH: PromptConfig(
|
|
146
165
|
default_prompt_prefix="Hieronder staan meerkeuzevragen (met antwoorden).",
|
|
147
166
|
default_prompt_template="Vraag: {text}\nAntwoord: {label}",
|
|
148
167
|
default_instruction_prompt="Vraag: {text}\n\nBeantwoord de bovenstaande vraag "
|
|
149
168
|
"met {labels_str}, en niets anders.",
|
|
150
169
|
default_prompt_label_mapping="auto",
|
|
151
170
|
),
|
|
152
|
-
|
|
171
|
+
NORWEGIAN_NYNORSK: PromptConfig(
|
|
153
172
|
default_prompt_prefix="Følgende er flervalgsspørsmål (med svar).",
|
|
154
173
|
default_prompt_template="Spørsmål: {text}\nSvar: {label}",
|
|
155
174
|
default_instruction_prompt="Spørsmål: {text}\n\nBesvar følgende spørsmål med "
|
|
156
175
|
"{labels_str}, og ikke noe annet.",
|
|
157
176
|
default_prompt_label_mapping="auto",
|
|
158
177
|
),
|
|
159
|
-
|
|
178
|
+
NORWEGIAN: PromptConfig(
|
|
160
179
|
default_prompt_prefix="Følgende er flervalgsspørsmål (med svar).",
|
|
161
180
|
default_prompt_template="Spørsmål: {text}\nSvar: {label}",
|
|
162
181
|
default_instruction_prompt="Spørsmål: {text}\n\nBesvar følgende spørsmål med "
|
|
163
182
|
"{labels_str}, og ikke noe annet.",
|
|
164
183
|
default_prompt_label_mapping="auto",
|
|
165
184
|
),
|
|
166
|
-
|
|
185
|
+
POLISH: PromptConfig(
|
|
167
186
|
default_prompt_prefix="Poniżej znajdują się pytania wielokrotnego wyboru "
|
|
168
187
|
"(z odpowiedziami).",
|
|
169
188
|
default_prompt_template="Pytanie: {text}\nOdpowiedź: {label}",
|
|
@@ -171,7 +190,7 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
171
190
|
"używając {labels_str} i niczego więcej.",
|
|
172
191
|
default_prompt_label_mapping="auto",
|
|
173
192
|
),
|
|
174
|
-
|
|
193
|
+
SLOVAK: PromptConfig(
|
|
175
194
|
default_prompt_prefix=(
|
|
176
195
|
"Nasledujú otázky s viacerými možnosťami (s odpoveďami)."
|
|
177
196
|
),
|
|
@@ -182,11 +201,32 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
182
201
|
),
|
|
183
202
|
default_prompt_label_mapping="auto",
|
|
184
203
|
),
|
|
185
|
-
|
|
204
|
+
SERBIAN: PromptConfig(
|
|
205
|
+
default_prompt_prefix=("Slede pitanja višestrukog izbora (sa odgovorima)."),
|
|
206
|
+
default_prompt_template="Pitanje: {text}\nOdgovor: {label}",
|
|
207
|
+
default_instruction_prompt=(
|
|
208
|
+
"Pitanje: {text}\n\n"
|
|
209
|
+
"Odgovorite na navedeno pitanje koristeći {labels_str}, i ništa drugo."
|
|
210
|
+
),
|
|
211
|
+
default_prompt_label_mapping="auto",
|
|
212
|
+
),
|
|
213
|
+
SWEDISH: PromptConfig(
|
|
186
214
|
default_prompt_prefix="Följande är flervalsfrågor (med svar).",
|
|
187
215
|
default_prompt_template="Fråga: {text}\nSvar: {label}",
|
|
188
216
|
default_instruction_prompt="Fråga: {text}\n\nBesvara följande fråga med "
|
|
189
217
|
"{labels_str}, och inget annat.",
|
|
190
218
|
default_prompt_label_mapping="auto",
|
|
191
219
|
),
|
|
220
|
+
UKRAINIAN: PromptConfig(
|
|
221
|
+
default_prompt_prefix=(
|
|
222
|
+
"Нижче наведено питання з кількома варіантами відповідей (з відповідями)."
|
|
223
|
+
),
|
|
224
|
+
default_prompt_template=("Питання: {text}\nВідповідь: {label}"),
|
|
225
|
+
default_instruction_prompt=(
|
|
226
|
+
"Питання: {text}\n\n"
|
|
227
|
+
"Дайте відповідь на наведене вище питання, використовуючи "
|
|
228
|
+
"{labels_str}, і нічого іншого."
|
|
229
|
+
),
|
|
230
|
+
default_prompt_label_mapping="auto",
|
|
231
|
+
),
|
|
192
232
|
}
|
|
@@ -4,35 +4,58 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from ..data_models import PromptConfig
|
|
6
6
|
from ..languages import (
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
7
|
+
BULGARIAN,
|
|
8
|
+
CZECH,
|
|
9
|
+
DANISH,
|
|
10
|
+
DUTCH,
|
|
11
|
+
ENGLISH,
|
|
12
|
+
ESTONIAN,
|
|
13
|
+
FAROESE,
|
|
14
|
+
FINNISH,
|
|
15
|
+
FRENCH,
|
|
16
|
+
GERMAN,
|
|
17
|
+
GREEK,
|
|
18
|
+
ICELANDIC,
|
|
19
|
+
ITALIAN,
|
|
20
|
+
LATVIAN,
|
|
21
|
+
LITHUANIAN,
|
|
22
|
+
NORWEGIAN,
|
|
23
|
+
NORWEGIAN_BOKMÅL,
|
|
24
|
+
NORWEGIAN_NYNORSK,
|
|
25
|
+
POLISH,
|
|
26
|
+
PORTUGUESE,
|
|
27
|
+
SERBIAN,
|
|
28
|
+
SLOVAK,
|
|
29
|
+
SPANISH,
|
|
30
|
+
SWEDISH,
|
|
31
|
+
UKRAINIAN,
|
|
28
32
|
)
|
|
29
33
|
|
|
30
34
|
if t.TYPE_CHECKING:
|
|
31
|
-
from ..
|
|
35
|
+
from ..languages import Language
|
|
32
36
|
|
|
33
37
|
|
|
34
38
|
NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
35
|
-
|
|
39
|
+
BULGARIAN: PromptConfig(
|
|
40
|
+
default_prompt_label_mapping={
|
|
41
|
+
"b-per": "лице",
|
|
42
|
+
"i-per": "лице",
|
|
43
|
+
"b-loc": "място",
|
|
44
|
+
"i-loc": "място",
|
|
45
|
+
"b-org": "организация",
|
|
46
|
+
"i-org": "организация",
|
|
47
|
+
"b-misc": "разни",
|
|
48
|
+
"i-misc": "разни",
|
|
49
|
+
},
|
|
50
|
+
default_prompt_prefix="По-долу са изречения и JSON речници с именуваните "
|
|
51
|
+
"обекти, които се срещат в дадените изречения.",
|
|
52
|
+
default_prompt_template="Изречение: {text}\nИменувани обекти: {label}",
|
|
53
|
+
default_instruction_prompt="Изречение: {text}\n\nИдентифицирайте именуваните "
|
|
54
|
+
"обекти в изречението. Трябва да изведете това като JSON речник с ключовете "
|
|
55
|
+
"{labels_str}. Стойностите трябва да бъдат списъци на именуваните обекти от "
|
|
56
|
+
"този тип, точно както се появяват в изречението.",
|
|
57
|
+
),
|
|
58
|
+
CZECH: PromptConfig(
|
|
36
59
|
default_prompt_label_mapping={
|
|
37
60
|
"b-per": "osoba",
|
|
38
61
|
"i-per": "osoba",
|
|
@@ -51,7 +74,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
51
74
|
"Hodnoty by měly být seznamy pojmenovaných entit tohoto typu, přesně tak, "
|
|
52
75
|
"jak se objevují ve větě.",
|
|
53
76
|
),
|
|
54
|
-
|
|
77
|
+
DANISH: PromptConfig(
|
|
55
78
|
default_prompt_label_mapping={
|
|
56
79
|
"b-per": "person",
|
|
57
80
|
"i-per": "person",
|
|
@@ -70,7 +93,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
70
93
|
"{labels_str}. Værdierne skal være lister over de navngivne enheder af den "
|
|
71
94
|
"type, præcis som de forekommer i sætningen.",
|
|
72
95
|
),
|
|
73
|
-
|
|
96
|
+
GERMAN: PromptConfig(
|
|
74
97
|
default_prompt_label_mapping={
|
|
75
98
|
"b-per": "person",
|
|
76
99
|
"i-per": "person",
|
|
@@ -89,7 +112,26 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
89
112
|
"Schlüsseln {labels_str} ausgeben. Die Werte sollten Listen der "
|
|
90
113
|
"benannten Entitäten dieses Typs sein, genau wie sie im Satz erscheinen.",
|
|
91
114
|
),
|
|
92
|
-
|
|
115
|
+
GREEK: PromptConfig(
|
|
116
|
+
default_prompt_label_mapping={
|
|
117
|
+
"b-per": "πρόσωπο",
|
|
118
|
+
"i-per": "πρόσωπο",
|
|
119
|
+
"b-loc": "τοποθεσία",
|
|
120
|
+
"i-loc": "τοποθεσία",
|
|
121
|
+
"b-org": "οργανισμός",
|
|
122
|
+
"i-org": "οργανισμός",
|
|
123
|
+
"b-misc": "διάφορα",
|
|
124
|
+
"i-misc": "διάφορα",
|
|
125
|
+
},
|
|
126
|
+
default_prompt_prefix="Ακολουθούν προτάσεις και λεξικά JSON με τις "
|
|
127
|
+
"ονομαστικές οντότητες που εμφανίζονται στην δεδομένη πρόταση.",
|
|
128
|
+
default_prompt_template="Πρόταση: {text}\nΟνομαστικές οντότητες: {label}",
|
|
129
|
+
default_instruction_prompt="Πρόταση: {text}\n\nΑναγνωρίστε τις ονομαστικές "
|
|
130
|
+
"οντότητες στην πρόταση. Θα πρέπει να παράγετε αυτό ως λεξικό JSON με "
|
|
131
|
+
"κλειδιά {labels_str}. Οι τιμές πρέπει να είναι λίστες των ονομαστικών "
|
|
132
|
+
"οντοτήτων αυτού του τύπου, ακριβώς όπως εμφανίζονται στην πρόταση.",
|
|
133
|
+
),
|
|
134
|
+
ENGLISH: PromptConfig(
|
|
93
135
|
default_prompt_label_mapping={
|
|
94
136
|
"b-per": "person",
|
|
95
137
|
"i-per": "person",
|
|
@@ -108,7 +150,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
108
150
|
"{labels_str}. The values should be lists of the named entities of that "
|
|
109
151
|
"type, exactly as they appear in the sentence.",
|
|
110
152
|
),
|
|
111
|
-
|
|
153
|
+
SPANISH: PromptConfig(
|
|
112
154
|
default_prompt_label_mapping={
|
|
113
155
|
"b-per": "persona",
|
|
114
156
|
"i-per": "persona",
|
|
@@ -127,7 +169,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
127
169
|
"claves {labels_str}. Los valores deben ser listas de las "
|
|
128
170
|
"entidades nombradas de ese tipo, exactamente como aparecen en la oración.",
|
|
129
171
|
),
|
|
130
|
-
|
|
172
|
+
ESTONIAN: PromptConfig(
|
|
131
173
|
default_prompt_label_mapping={
|
|
132
174
|
"b-per": "inimene",
|
|
133
175
|
"i-per": "inimene",
|
|
@@ -146,7 +188,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
146
188
|
"mille võtmed on {labels_str}. Väärtused peaksid olema kindlat tüüpi nimetatud "
|
|
147
189
|
"üksuste loendid, täpselt nii nagu need lauses esinevad.",
|
|
148
190
|
),
|
|
149
|
-
|
|
191
|
+
PORTUGUESE: PromptConfig(
|
|
150
192
|
default_prompt_label_mapping={
|
|
151
193
|
"b-per": "pessoa",
|
|
152
194
|
"i-per": "pessoa",
|
|
@@ -165,7 +207,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
165
207
|
"{labels_str}. Os valores devem ser listas contendo as entidades "
|
|
166
208
|
"mencionadas desse tipo, tal como ocorrem na frase.",
|
|
167
209
|
),
|
|
168
|
-
|
|
210
|
+
FINNISH: PromptConfig(
|
|
169
211
|
default_prompt_label_mapping={
|
|
170
212
|
"b-per": "henkilö",
|
|
171
213
|
"i-per": "henkilö",
|
|
@@ -184,7 +226,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
184
226
|
"Arvojen tulee olla listoja kyseisen tyypin nimetyistä entiteeteistä "
|
|
185
227
|
"täsmälleen siinä muodossa kuin ne esiintyvät lauseessa.",
|
|
186
228
|
),
|
|
187
|
-
|
|
229
|
+
FAROESE: PromptConfig(
|
|
188
230
|
default_prompt_label_mapping={
|
|
189
231
|
"b-per": "persónur",
|
|
190
232
|
"i-per": "persónur",
|
|
@@ -203,7 +245,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
203
245
|
"{labels_str}. Gildin ættu að vera listi yfir nevndu einingarnar af "
|
|
204
246
|
"þeirri gerð, nákvæmlega eins og þær koma fram í setningunni.",
|
|
205
247
|
),
|
|
206
|
-
|
|
248
|
+
FRENCH: PromptConfig(
|
|
207
249
|
default_prompt_label_mapping={
|
|
208
250
|
"b-per": "personne",
|
|
209
251
|
"i-per": "personne",
|
|
@@ -224,7 +266,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
224
266
|
"entités nommées de ce type, exactement comme elles apparaissent dans "
|
|
225
267
|
"la phrase.",
|
|
226
268
|
),
|
|
227
|
-
|
|
269
|
+
ICELANDIC: PromptConfig(
|
|
228
270
|
default_prompt_label_mapping={
|
|
229
271
|
"b-per": "einstaklingur",
|
|
230
272
|
"i-per": "einstaklingur",
|
|
@@ -244,7 +286,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
244
286
|
"einingarnar af þeirri gerð, nákvæmlega eins og þær koma fram í "
|
|
245
287
|
"setningunni.",
|
|
246
288
|
),
|
|
247
|
-
|
|
289
|
+
ITALIAN: PromptConfig(
|
|
248
290
|
default_prompt_label_mapping={
|
|
249
291
|
"b-per": "persona",
|
|
250
292
|
"i-per": "persona",
|
|
@@ -263,7 +305,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
263
305
|
"{labels_str}. I valori devono essere elenchi di entità "
|
|
264
306
|
"nominate di quel tipo, esattamente come appaiono nella frase.",
|
|
265
307
|
),
|
|
266
|
-
|
|
308
|
+
LITHUANIAN: PromptConfig(
|
|
267
309
|
default_prompt_label_mapping={
|
|
268
310
|
"b-per": "asmuo",
|
|
269
311
|
"i-per": "asmuo",
|
|
@@ -282,7 +324,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
282
324
|
"{labels_str}. Reikšmės turi būti to tipo vardinių vienetų sąrašai, "
|
|
283
325
|
"tiksliai taip, kaip jie rodomi sakinyje.",
|
|
284
326
|
),
|
|
285
|
-
|
|
327
|
+
LATVIAN: PromptConfig(
|
|
286
328
|
default_prompt_label_mapping={
|
|
287
329
|
"b-per": "persona",
|
|
288
330
|
"i-per": "persona",
|
|
@@ -302,7 +344,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
302
344
|
"{labels_str}. Vērtībām jābūt šī tipa nosaukto objektu sarakstiem, "
|
|
303
345
|
"tieši tā, kā tie parādās teikumā.",
|
|
304
346
|
),
|
|
305
|
-
|
|
347
|
+
NORWEGIAN_BOKMÅL: PromptConfig(
|
|
306
348
|
default_prompt_label_mapping={
|
|
307
349
|
"b-per": "person",
|
|
308
350
|
"i-per": "person",
|
|
@@ -321,7 +363,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
321
363
|
"{labels_str}. Verdiene skal være lister over de navngitte enhetene av den "
|
|
322
364
|
"typen, akkurat som de vises i frasen.",
|
|
323
365
|
),
|
|
324
|
-
|
|
366
|
+
DUTCH: PromptConfig(
|
|
325
367
|
default_prompt_label_mapping={
|
|
326
368
|
"b-per": "persoon",
|
|
327
369
|
"i-per": "persoon",
|
|
@@ -340,7 +382,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
340
382
|
"{labels_str}. De waarden moeten lijsten zijn van de "
|
|
341
383
|
"genoemde entiteiten van dat type, precies zoals ze voorkomen in de zin.",
|
|
342
384
|
),
|
|
343
|
-
|
|
385
|
+
NORWEGIAN_NYNORSK: PromptConfig(
|
|
344
386
|
default_prompt_label_mapping={
|
|
345
387
|
"b-per": "person",
|
|
346
388
|
"i-per": "person",
|
|
@@ -359,7 +401,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
359
401
|
"Verdiene skal være lister over de navngitte enhetene "
|
|
360
402
|
"av den typen, akkurat som de vises i frasen.",
|
|
361
403
|
),
|
|
362
|
-
|
|
404
|
+
NORWEGIAN: PromptConfig(
|
|
363
405
|
default_prompt_label_mapping={
|
|
364
406
|
"b-per": "person",
|
|
365
407
|
"i-per": "person",
|
|
@@ -378,7 +420,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
378
420
|
"Verdiene skal være lister over de navngitte enhetene "
|
|
379
421
|
"av den typen, akkurat som de vises i frasen.",
|
|
380
422
|
),
|
|
381
|
-
|
|
423
|
+
POLISH: PromptConfig(
|
|
382
424
|
default_prompt_label_mapping={
|
|
383
425
|
"b-per": "osoba",
|
|
384
426
|
"i-per": "osoba",
|
|
@@ -397,7 +439,7 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
397
439
|
"{labels_str}. Wartości odpowiadające kluczom powinny być listami jednostek "
|
|
398
440
|
"nazewniczych danego typu, dokładnie tak, jak pojawiają się w zdaniu.",
|
|
399
441
|
),
|
|
400
|
-
|
|
442
|
+
SLOVAK: PromptConfig(
|
|
401
443
|
default_prompt_label_mapping={
|
|
402
444
|
"b-per": "osoba",
|
|
403
445
|
"i-per": "osoba",
|
|
@@ -416,7 +458,26 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
416
458
|
"{labels_str}. Hodnoty by mali byť zoznamy pomenovaných entít danej "
|
|
417
459
|
"kategórie, presne tak, ako sa vyskytujú vo vete.",
|
|
418
460
|
),
|
|
419
|
-
|
|
461
|
+
SERBIAN: PromptConfig(
|
|
462
|
+
default_prompt_label_mapping={
|
|
463
|
+
"b-per": "osoba",
|
|
464
|
+
"i-per": "osoba",
|
|
465
|
+
"b-loc": "mesto",
|
|
466
|
+
"i-loc": "mesto",
|
|
467
|
+
"b-org": "organizacija",
|
|
468
|
+
"i-org": "organizacija",
|
|
469
|
+
"b-misc": "razno",
|
|
470
|
+
"i-misc": "razno",
|
|
471
|
+
},
|
|
472
|
+
default_prompt_prefix="Sledeće su rečenice i JSON rečnici sa imenovanim "
|
|
473
|
+
"entitetima koji se pojavljuju u datoj rečenici.",
|
|
474
|
+
default_prompt_template="Rečenica: {text}\nImenovani entiteti: {label}",
|
|
475
|
+
default_instruction_prompt="Rečenica: {text}\n\nIdentifikujte imenovane "
|
|
476
|
+
"entitete u rečenici. Trebalo bi da ovo ispišete kao JSON rečnik sa ključevima "
|
|
477
|
+
"{labels_str}. Vrednosti treba da budu liste imenovanih entiteta te "
|
|
478
|
+
"kategorije, tačno onako kako se pojavljuju u rečenici.",
|
|
479
|
+
),
|
|
480
|
+
SWEDISH: PromptConfig(
|
|
420
481
|
default_prompt_label_mapping={
|
|
421
482
|
"b-per": "person",
|
|
422
483
|
"i-per": "person",
|
|
@@ -432,7 +493,27 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
432
493
|
default_prompt_template="Mening: {text}\nNamngivna entiteter: {label}",
|
|
433
494
|
default_instruction_prompt="Mening: {text}\n\nIdentifiera de namngivna "
|
|
434
495
|
"enheterna i meningen. Du ska outputta detta som en JSON-ordbok med nycklarna "
|
|
435
|
-
"{labels_str}. Värdena ska vara listor över de namngivna
|
|
496
|
+
"{labels_str}. Värdena ska vara listor över de namngivna enheterna av den "
|
|
436
497
|
"typen, precis som de förekommer i meningen.",
|
|
437
498
|
),
|
|
499
|
+
UKRAINIAN: PromptConfig(
|
|
500
|
+
default_prompt_label_mapping={
|
|
501
|
+
"b-per": "особа",
|
|
502
|
+
"i-per": "особа",
|
|
503
|
+
"b-loc": "місце",
|
|
504
|
+
"i-loc": "місце",
|
|
505
|
+
"b-org": "організація",
|
|
506
|
+
"i-org": "організація",
|
|
507
|
+
"b-misc": "різне",
|
|
508
|
+
"i-misc": "різне",
|
|
509
|
+
},
|
|
510
|
+
default_prompt_prefix="Нижче наведені речення та JSON-словники з іменованими "
|
|
511
|
+
"сутностями, які присутні у даному реченні.",
|
|
512
|
+
default_prompt_template="Речення: {text}\nІменовані сутності: {label}",
|
|
513
|
+
default_instruction_prompt="Речення: {text}\n\n"
|
|
514
|
+
"Ідентифікуйте іменовані сутності у "
|
|
515
|
+
"реченні. Ви повинні вивести це як JSON-словник з ключами {labels_str}. "
|
|
516
|
+
"Значення мають бути списками іменованих сутностей цього типу, точно "
|
|
517
|
+
"такими, як вони з'являються у реченні.",
|
|
518
|
+
),
|
|
438
519
|
}
|