EuroEval 15.12.0__py3-none-any.whl → 16.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- euroeval/__init__.py +32 -14
- euroeval/benchmark_config_factory.py +92 -180
- euroeval/benchmark_modules/base.py +49 -39
- euroeval/benchmark_modules/fresh.py +35 -21
- euroeval/benchmark_modules/hf.py +280 -244
- euroeval/benchmark_modules/litellm.py +752 -312
- euroeval/benchmark_modules/vllm.py +570 -268
- euroeval/benchmarker.py +651 -528
- euroeval/caching_utils.py +79 -0
- euroeval/callbacks.py +5 -7
- euroeval/cli.py +49 -38
- euroeval/constants.py +44 -25
- euroeval/data_loading.py +111 -55
- euroeval/data_models.py +490 -323
- euroeval/dataset_configs/__init__.py +26 -4
- euroeval/dataset_configs/bosnian.py +39 -0
- euroeval/dataset_configs/bulgarian.py +56 -0
- euroeval/dataset_configs/croatian.py +56 -0
- euroeval/dataset_configs/czech.py +75 -0
- euroeval/dataset_configs/danish.py +78 -50
- euroeval/dataset_configs/dutch.py +74 -44
- euroeval/dataset_configs/english.py +71 -36
- euroeval/dataset_configs/estonian.py +111 -0
- euroeval/dataset_configs/faroese.py +25 -18
- euroeval/dataset_configs/finnish.py +63 -26
- euroeval/dataset_configs/french.py +65 -32
- euroeval/dataset_configs/german.py +77 -36
- euroeval/dataset_configs/greek.py +64 -0
- euroeval/dataset_configs/icelandic.py +68 -57
- euroeval/dataset_configs/italian.py +68 -36
- euroeval/dataset_configs/latvian.py +87 -0
- euroeval/dataset_configs/lithuanian.py +64 -0
- euroeval/dataset_configs/norwegian.py +98 -72
- euroeval/dataset_configs/polish.py +96 -0
- euroeval/dataset_configs/portuguese.py +63 -40
- euroeval/dataset_configs/serbian.py +64 -0
- euroeval/dataset_configs/slovak.py +55 -0
- euroeval/dataset_configs/slovene.py +56 -0
- euroeval/dataset_configs/spanish.py +68 -34
- euroeval/dataset_configs/swedish.py +82 -41
- euroeval/dataset_configs/ukrainian.py +64 -0
- euroeval/enums.py +12 -6
- euroeval/exceptions.py +21 -1
- euroeval/finetuning.py +34 -26
- euroeval/generation.py +76 -41
- euroeval/generation_utils.py +169 -34
- euroeval/languages.py +1020 -188
- euroeval/logging_utils.py +268 -0
- euroeval/metrics/__init__.py +6 -0
- euroeval/metrics/base.py +85 -0
- euroeval/metrics/huggingface.py +216 -0
- euroeval/metrics/llm_as_a_judge.py +260 -0
- euroeval/metrics/pipeline.py +289 -0
- euroeval/metrics/speed.py +48 -0
- euroeval/model_cache.py +40 -21
- euroeval/model_config.py +4 -5
- euroeval/model_loading.py +3 -0
- euroeval/prompt_templates/__init__.py +2 -0
- euroeval/prompt_templates/classification.py +206 -0
- euroeval/prompt_templates/linguistic_acceptability.py +157 -22
- euroeval/prompt_templates/multiple_choice.py +159 -17
- euroeval/prompt_templates/named_entity_recognition.py +318 -21
- euroeval/prompt_templates/reading_comprehension.py +207 -16
- euroeval/prompt_templates/sentiment_classification.py +205 -22
- euroeval/prompt_templates/summarization.py +122 -22
- euroeval/prompt_templates/token_classification.py +279 -0
- euroeval/scores.py +20 -9
- euroeval/speed_benchmark.py +11 -12
- euroeval/task_group_utils/multiple_choice_classification.py +21 -12
- euroeval/task_group_utils/question_answering.py +101 -73
- euroeval/task_group_utils/sequence_classification.py +144 -61
- euroeval/task_group_utils/text_to_text.py +33 -12
- euroeval/task_group_utils/token_classification.py +86 -89
- euroeval/tasks.py +75 -16
- euroeval/tokenisation_utils.py +603 -0
- euroeval/types.py +17 -11
- euroeval/utils.py +332 -137
- euroeval-16.7.1.dist-info/METADATA +623 -0
- euroeval-16.7.1.dist-info/RECORD +84 -0
- {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/entry_points.txt +0 -1
- euroeval/human_evaluation.py +0 -737
- euroeval/metrics.py +0 -452
- euroeval/tokenization_utils.py +0 -498
- euroeval-15.12.0.dist-info/METADATA +0 -285
- euroeval-15.12.0.dist-info/RECORD +0 -63
- {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/WHEEL +0 -0
- {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,130 @@
|
|
|
1
1
|
"""Templates for the Named Entity Recognition task."""
|
|
2
2
|
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
3
5
|
from ..data_models import PromptConfig
|
|
4
|
-
from ..languages import
|
|
6
|
+
from ..languages import (
|
|
7
|
+
BOSNIAN,
|
|
8
|
+
BULGARIAN,
|
|
9
|
+
CROATIAN,
|
|
10
|
+
CZECH,
|
|
11
|
+
DANISH,
|
|
12
|
+
DUTCH,
|
|
13
|
+
ENGLISH,
|
|
14
|
+
ESTONIAN,
|
|
15
|
+
FAROESE,
|
|
16
|
+
FINNISH,
|
|
17
|
+
FRENCH,
|
|
18
|
+
GERMAN,
|
|
19
|
+
GREEK,
|
|
20
|
+
ICELANDIC,
|
|
21
|
+
ITALIAN,
|
|
22
|
+
LATVIAN,
|
|
23
|
+
LITHUANIAN,
|
|
24
|
+
NORWEGIAN,
|
|
25
|
+
NORWEGIAN_BOKMÅL,
|
|
26
|
+
NORWEGIAN_NYNORSK,
|
|
27
|
+
POLISH,
|
|
28
|
+
PORTUGUESE,
|
|
29
|
+
SERBIAN,
|
|
30
|
+
SLOVAK,
|
|
31
|
+
SLOVENE,
|
|
32
|
+
SPANISH,
|
|
33
|
+
SWEDISH,
|
|
34
|
+
UKRAINIAN,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
if t.TYPE_CHECKING:
|
|
38
|
+
from ..languages import Language
|
|
39
|
+
|
|
5
40
|
|
|
6
|
-
NER_TEMPLATES = {
|
|
7
|
-
|
|
41
|
+
NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
42
|
+
BOSNIAN: PromptConfig(
|
|
43
|
+
default_prompt_label_mapping={
|
|
44
|
+
"b-per": "osoba",
|
|
45
|
+
"i-per": "osoba",
|
|
46
|
+
"b-loc": "mjesto",
|
|
47
|
+
"i-loc": "mjesto",
|
|
48
|
+
"b-org": "organizacija",
|
|
49
|
+
"i-org": "organizacija",
|
|
50
|
+
"b-misc": "razno",
|
|
51
|
+
"i-misc": "razno",
|
|
52
|
+
},
|
|
53
|
+
default_prompt_prefix=(
|
|
54
|
+
"Slijede rečenice i JSON riječnici s imenovanim entitetima "
|
|
55
|
+
"koji se pojavljuju u rečenicama."
|
|
56
|
+
),
|
|
57
|
+
default_prompt_template=("Rečenica: {text}\nImenovani entiteti: {label}"),
|
|
58
|
+
default_instruction_prompt=(
|
|
59
|
+
"Rečenica: {text}\n\n"
|
|
60
|
+
"Identificirajte imenovane entitete u rečenici. Prikažite ih kao "
|
|
61
|
+
"JSON riječnik s ključevima {labels_str}. Vrijednosti trebaju biti "
|
|
62
|
+
"popisi imenovanih entiteta navedenog tipa, točno kako se pojavljuju "
|
|
63
|
+
"u rečenici."
|
|
64
|
+
),
|
|
65
|
+
),
|
|
66
|
+
BULGARIAN: PromptConfig(
|
|
67
|
+
default_prompt_label_mapping={
|
|
68
|
+
"b-per": "лице",
|
|
69
|
+
"i-per": "лице",
|
|
70
|
+
"b-loc": "място",
|
|
71
|
+
"i-loc": "място",
|
|
72
|
+
"b-org": "организация",
|
|
73
|
+
"i-org": "организация",
|
|
74
|
+
"b-misc": "разни",
|
|
75
|
+
"i-misc": "разни",
|
|
76
|
+
},
|
|
77
|
+
default_prompt_prefix="По-долу са изречения и JSON речници с именуваните "
|
|
78
|
+
"обекти, които се срещат в дадените изречения.",
|
|
79
|
+
default_prompt_template="Изречение: {text}\nИменувани обекти: {label}",
|
|
80
|
+
default_instruction_prompt="Изречение: {text}\n\nИдентифицирайте именуваните "
|
|
81
|
+
"обекти в изречението. Трябва да изведете това като JSON речник с ключовете "
|
|
82
|
+
"{labels_str}. Стойностите трябва да бъдат списъци на именуваните обекти от "
|
|
83
|
+
"този тип, точно както се появяват в изречението.",
|
|
84
|
+
),
|
|
85
|
+
CROATIAN: PromptConfig(
|
|
86
|
+
default_prompt_label_mapping={
|
|
87
|
+
"b-per": "osoba",
|
|
88
|
+
"i-per": "osoba",
|
|
89
|
+
"b-loc": "mjesto",
|
|
90
|
+
"i-loc": "mjesto",
|
|
91
|
+
"b-org": "organizacija",
|
|
92
|
+
"i-org": "organizacija",
|
|
93
|
+
"b-misc": "razno",
|
|
94
|
+
"i-misc": "razno",
|
|
95
|
+
},
|
|
96
|
+
default_prompt_prefix=(
|
|
97
|
+
"Sljedeće su rečenice i JSON rječnici s imenicama koje se pojavljuju u "
|
|
98
|
+
"rečenicama."
|
|
99
|
+
),
|
|
100
|
+
default_prompt_template=("Rečenica: {text}\nImenovane entiteti: {label}"),
|
|
101
|
+
default_instruction_prompt=(
|
|
102
|
+
"Rečenica: {text}\n\n"
|
|
103
|
+
"Identificirajte imenovane entitete u rečenici. Prikažite ih kao JSON "
|
|
104
|
+
"rječnik s ključevima {labels_str}. Vrijednosti trebaju biti popisi "
|
|
105
|
+
"imenovanih entiteta navedenog tipa, točno kako se pojavljuju u rečenici."
|
|
106
|
+
),
|
|
107
|
+
),
|
|
108
|
+
CZECH: PromptConfig(
|
|
109
|
+
default_prompt_label_mapping={
|
|
110
|
+
"b-per": "osoba",
|
|
111
|
+
"i-per": "osoba",
|
|
112
|
+
"b-loc": "místo",
|
|
113
|
+
"i-loc": "místo",
|
|
114
|
+
"b-org": "organizace",
|
|
115
|
+
"i-org": "organizace",
|
|
116
|
+
"b-misc": "různé",
|
|
117
|
+
"i-misc": "různé",
|
|
118
|
+
},
|
|
119
|
+
default_prompt_prefix="Následující jsou věty a JSON slovníky s pojmenovanými "
|
|
120
|
+
"entitami, které se v dané větě vyskytují.",
|
|
121
|
+
default_prompt_template="Věta: {text}\nPojmenované entity: {label}",
|
|
122
|
+
default_instruction_prompt="Věta: {text}\n\nIdentifikujte pojmenované entity "
|
|
123
|
+
"ve větě. Měli byste to vypsat jako JSON slovník s klíči {labels_str}. "
|
|
124
|
+
"Hodnoty by měly být seznamy pojmenovaných entit tohoto typu, přesně tak, "
|
|
125
|
+
"jak se objevují ve větě.",
|
|
126
|
+
),
|
|
127
|
+
DANISH: PromptConfig(
|
|
8
128
|
default_prompt_label_mapping={
|
|
9
129
|
"b-per": "person",
|
|
10
130
|
"i-per": "person",
|
|
@@ -23,7 +143,7 @@ NER_TEMPLATES = {
|
|
|
23
143
|
"{labels_str}. Værdierne skal være lister over de navngivne enheder af den "
|
|
24
144
|
"type, præcis som de forekommer i sætningen.",
|
|
25
145
|
),
|
|
26
|
-
|
|
146
|
+
GERMAN: PromptConfig(
|
|
27
147
|
default_prompt_label_mapping={
|
|
28
148
|
"b-per": "person",
|
|
29
149
|
"i-per": "person",
|
|
@@ -42,7 +162,26 @@ NER_TEMPLATES = {
|
|
|
42
162
|
"Schlüsseln {labels_str} ausgeben. Die Werte sollten Listen der "
|
|
43
163
|
"benannten Entitäten dieses Typs sein, genau wie sie im Satz erscheinen.",
|
|
44
164
|
),
|
|
45
|
-
|
|
165
|
+
GREEK: PromptConfig(
|
|
166
|
+
default_prompt_label_mapping={
|
|
167
|
+
"b-per": "πρόσωπο",
|
|
168
|
+
"i-per": "πρόσωπο",
|
|
169
|
+
"b-loc": "τοποθεσία",
|
|
170
|
+
"i-loc": "τοποθεσία",
|
|
171
|
+
"b-org": "οργανισμός",
|
|
172
|
+
"i-org": "οργανισμός",
|
|
173
|
+
"b-misc": "διάφορα",
|
|
174
|
+
"i-misc": "διάφορα",
|
|
175
|
+
},
|
|
176
|
+
default_prompt_prefix="Ακολουθούν προτάσεις και λεξικά JSON με τις "
|
|
177
|
+
"ονομαστικές οντότητες που εμφανίζονται στην δεδομένη πρόταση.",
|
|
178
|
+
default_prompt_template="Πρόταση: {text}\nΟνομαστικές οντότητες: {label}",
|
|
179
|
+
default_instruction_prompt="Πρόταση: {text}\n\nΑναγνωρίστε τις ονομαστικές "
|
|
180
|
+
"οντότητες στην πρόταση. Θα πρέπει να παράγετε αυτό ως λεξικό JSON με "
|
|
181
|
+
"κλειδιά {labels_str}. Οι τιμές πρέπει να είναι λίστες των ονομαστικών "
|
|
182
|
+
"οντοτήτων αυτού του τύπου, ακριβώς όπως εμφανίζονται στην πρόταση.",
|
|
183
|
+
),
|
|
184
|
+
ENGLISH: PromptConfig(
|
|
46
185
|
default_prompt_label_mapping={
|
|
47
186
|
"b-per": "person",
|
|
48
187
|
"i-per": "person",
|
|
@@ -61,7 +200,7 @@ NER_TEMPLATES = {
|
|
|
61
200
|
"{labels_str}. The values should be lists of the named entities of that "
|
|
62
201
|
"type, exactly as they appear in the sentence.",
|
|
63
202
|
),
|
|
64
|
-
|
|
203
|
+
SPANISH: PromptConfig(
|
|
65
204
|
default_prompt_label_mapping={
|
|
66
205
|
"b-per": "persona",
|
|
67
206
|
"i-per": "persona",
|
|
@@ -80,7 +219,26 @@ NER_TEMPLATES = {
|
|
|
80
219
|
"claves {labels_str}. Los valores deben ser listas de las "
|
|
81
220
|
"entidades nombradas de ese tipo, exactamente como aparecen en la oración.",
|
|
82
221
|
),
|
|
83
|
-
|
|
222
|
+
ESTONIAN: PromptConfig(
|
|
223
|
+
default_prompt_label_mapping={
|
|
224
|
+
"b-per": "inimene",
|
|
225
|
+
"i-per": "inimene",
|
|
226
|
+
"b-loc": "asukoht",
|
|
227
|
+
"i-loc": "asukoht",
|
|
228
|
+
"b-org": "organisatsioon",
|
|
229
|
+
"i-org": "organisatsioon",
|
|
230
|
+
"b-misc": "muu",
|
|
231
|
+
"i-misc": "muu",
|
|
232
|
+
},
|
|
233
|
+
default_prompt_prefix="Allpool on laused ja JSON-sõnastikud, mis sisaldavad "
|
|
234
|
+
"antud lauses esinevaid nimetatud üksuseid.",
|
|
235
|
+
default_prompt_template="Lause: {text}\nNimetatud üksused: {label}",
|
|
236
|
+
default_instruction_prompt="Lause: {text}\n\nTuvasta lauses "
|
|
237
|
+
"nimetatud üksused. Väljund peaks olema JSON-sõnastik, "
|
|
238
|
+
"mille võtmed on {labels_str}. Väärtused peaksid olema kindlat tüüpi nimetatud "
|
|
239
|
+
"üksuste loendid, täpselt nii nagu need lauses esinevad.",
|
|
240
|
+
),
|
|
241
|
+
PORTUGUESE: PromptConfig(
|
|
84
242
|
default_prompt_label_mapping={
|
|
85
243
|
"b-per": "pessoa",
|
|
86
244
|
"i-per": "pessoa",
|
|
@@ -99,7 +257,7 @@ NER_TEMPLATES = {
|
|
|
99
257
|
"{labels_str}. Os valores devem ser listas contendo as entidades "
|
|
100
258
|
"mencionadas desse tipo, tal como ocorrem na frase.",
|
|
101
259
|
),
|
|
102
|
-
|
|
260
|
+
FINNISH: PromptConfig(
|
|
103
261
|
default_prompt_label_mapping={
|
|
104
262
|
"b-per": "henkilö",
|
|
105
263
|
"i-per": "henkilö",
|
|
@@ -118,7 +276,7 @@ NER_TEMPLATES = {
|
|
|
118
276
|
"Arvojen tulee olla listoja kyseisen tyypin nimetyistä entiteeteistä "
|
|
119
277
|
"täsmälleen siinä muodossa kuin ne esiintyvät lauseessa.",
|
|
120
278
|
),
|
|
121
|
-
|
|
279
|
+
FAROESE: PromptConfig(
|
|
122
280
|
default_prompt_label_mapping={
|
|
123
281
|
"b-per": "persónur",
|
|
124
282
|
"i-per": "persónur",
|
|
@@ -132,12 +290,12 @@ NER_TEMPLATES = {
|
|
|
132
290
|
default_prompt_prefix="Her eru nakrir setningar og nakrar JSON orðabøkur við "
|
|
133
291
|
"nevndar eindir, sum eru í setningunum.",
|
|
134
292
|
default_prompt_template="Setningur: {text}\nNevndar eindir: {label}",
|
|
135
|
-
default_instruction_prompt="Setningur: {text}\n\
|
|
293
|
+
default_instruction_prompt="Setningur: {text}\n\nGreindu nevndu einingarnar í "
|
|
136
294
|
"setningunni. Þú ættir að skila þessu sem JSON orðabók með lyklunum "
|
|
137
295
|
"{labels_str}. Gildin ættu að vera listi yfir nevndu einingarnar af "
|
|
138
296
|
"þeirri gerð, nákvæmlega eins og þær koma fram í setningunni.",
|
|
139
297
|
),
|
|
140
|
-
|
|
298
|
+
FRENCH: PromptConfig(
|
|
141
299
|
default_prompt_label_mapping={
|
|
142
300
|
"b-per": "personne",
|
|
143
301
|
"i-per": "personne",
|
|
@@ -158,7 +316,7 @@ NER_TEMPLATES = {
|
|
|
158
316
|
"entités nommées de ce type, exactement comme elles apparaissent dans "
|
|
159
317
|
"la phrase.",
|
|
160
318
|
),
|
|
161
|
-
|
|
319
|
+
ICELANDIC: PromptConfig(
|
|
162
320
|
default_prompt_label_mapping={
|
|
163
321
|
"b-per": "einstaklingur",
|
|
164
322
|
"i-per": "einstaklingur",
|
|
@@ -171,14 +329,14 @@ NER_TEMPLATES = {
|
|
|
171
329
|
},
|
|
172
330
|
default_prompt_prefix="Eftirfarandi eru setningar ásamt JSON lyklum með "
|
|
173
331
|
"nefndum einingum sem koma fyrir í setningunum.",
|
|
174
|
-
default_prompt_template="Setning: {text}\
|
|
175
|
-
default_instruction_prompt="Setning: {text}\n\
|
|
332
|
+
default_prompt_template="Setning: {text}\nNafneiningar: {label}",
|
|
333
|
+
default_instruction_prompt="Setning: {text}\n\nGreindu nefndu einingarnar í "
|
|
176
334
|
"setningunni. Þú ættir að skila þessu sem JSON orðabók með lyklunum "
|
|
177
335
|
"{labels_str}. Gildin ættu að vera listi yfir nefndu "
|
|
178
336
|
"einingarnar af þeirri gerð, nákvæmlega eins og þær koma fram í "
|
|
179
337
|
"setningunni.",
|
|
180
338
|
),
|
|
181
|
-
|
|
339
|
+
ITALIAN: PromptConfig(
|
|
182
340
|
default_prompt_label_mapping={
|
|
183
341
|
"b-per": "persona",
|
|
184
342
|
"i-per": "persona",
|
|
@@ -197,7 +355,46 @@ NER_TEMPLATES = {
|
|
|
197
355
|
"{labels_str}. I valori devono essere elenchi di entità "
|
|
198
356
|
"nominate di quel tipo, esattamente come appaiono nella frase.",
|
|
199
357
|
),
|
|
200
|
-
|
|
358
|
+
LITHUANIAN: PromptConfig(
|
|
359
|
+
default_prompt_label_mapping={
|
|
360
|
+
"b-per": "asmuo",
|
|
361
|
+
"i-per": "asmuo",
|
|
362
|
+
"b-loc": "vieta",
|
|
363
|
+
"i-loc": "vieta",
|
|
364
|
+
"b-org": "organizacija",
|
|
365
|
+
"i-org": "organizacija",
|
|
366
|
+
"b-misc": "kita",
|
|
367
|
+
"i-misc": "kita",
|
|
368
|
+
},
|
|
369
|
+
default_prompt_prefix="Toliau pateikti sakiniai ir JSON žodynai su vardiniais "
|
|
370
|
+
"vienetais, kurie pateikiame sakinyje.",
|
|
371
|
+
default_prompt_template="Sakinys: {text}\nVardiniai vienetai: {label}",
|
|
372
|
+
default_instruction_prompt="Sakinys: {text}\n\nIdentifikuokite vardinius "
|
|
373
|
+
"vienetus sakinyje. Turėtumėte pateikti tai kaip JSON žodyną su raktais "
|
|
374
|
+
"{labels_str}. Reikšmės turi būti to tipo vardinių vienetų sąrašai, "
|
|
375
|
+
"tiksliai taip, kaip jie rodomi sakinyje.",
|
|
376
|
+
),
|
|
377
|
+
LATVIAN: PromptConfig(
|
|
378
|
+
default_prompt_label_mapping={
|
|
379
|
+
"b-per": "persona",
|
|
380
|
+
"i-per": "persona",
|
|
381
|
+
"b-loc": "vieta",
|
|
382
|
+
"i-loc": "vieta",
|
|
383
|
+
"b-org": "organizācija",
|
|
384
|
+
"i-org": "organizācija",
|
|
385
|
+
"b-misc": "dažādi",
|
|
386
|
+
"i-misc": "dažādi",
|
|
387
|
+
},
|
|
388
|
+
default_prompt_prefix="Tālāk ir teikumi un JSON vārdnīcas ar nosauktajiem "
|
|
389
|
+
"objektiem, kas parādās dotajā teikumā.",
|
|
390
|
+
default_prompt_template="Teikums: {text}\nNosauktie objekti: {label}",
|
|
391
|
+
default_instruction_prompt="Teikums: {text}\n\n"
|
|
392
|
+
"Identificējiet nosauktos objektus "
|
|
393
|
+
"teikumā. Jums jāizvada šī informācija kā JSON vārdnīcu ar atslēgām "
|
|
394
|
+
"{labels_str}. Vērtībām jābūt šī tipa nosaukto objektu sarakstiem, "
|
|
395
|
+
"tieši tā, kā tie parādās teikumā.",
|
|
396
|
+
),
|
|
397
|
+
NORWEGIAN_BOKMÅL: PromptConfig(
|
|
201
398
|
default_prompt_label_mapping={
|
|
202
399
|
"b-per": "person",
|
|
203
400
|
"i-per": "person",
|
|
@@ -216,7 +413,7 @@ NER_TEMPLATES = {
|
|
|
216
413
|
"{labels_str}. Verdiene skal være lister over de navngitte enhetene av den "
|
|
217
414
|
"typen, akkurat som de vises i frasen.",
|
|
218
415
|
),
|
|
219
|
-
|
|
416
|
+
DUTCH: PromptConfig(
|
|
220
417
|
default_prompt_label_mapping={
|
|
221
418
|
"b-per": "persoon",
|
|
222
419
|
"i-per": "persoon",
|
|
@@ -235,7 +432,7 @@ NER_TEMPLATES = {
|
|
|
235
432
|
"{labels_str}. De waarden moeten lijsten zijn van de "
|
|
236
433
|
"genoemde entiteiten van dat type, precies zoals ze voorkomen in de zin.",
|
|
237
434
|
),
|
|
238
|
-
|
|
435
|
+
NORWEGIAN_NYNORSK: PromptConfig(
|
|
239
436
|
default_prompt_label_mapping={
|
|
240
437
|
"b-per": "person",
|
|
241
438
|
"i-per": "person",
|
|
@@ -254,7 +451,7 @@ NER_TEMPLATES = {
|
|
|
254
451
|
"Verdiene skal være lister over de navngitte enhetene "
|
|
255
452
|
"av den typen, akkurat som de vises i frasen.",
|
|
256
453
|
),
|
|
257
|
-
|
|
454
|
+
NORWEGIAN: PromptConfig(
|
|
258
455
|
default_prompt_label_mapping={
|
|
259
456
|
"b-per": "person",
|
|
260
457
|
"i-per": "person",
|
|
@@ -273,7 +470,87 @@ NER_TEMPLATES = {
|
|
|
273
470
|
"Verdiene skal være lister over de navngitte enhetene "
|
|
274
471
|
"av den typen, akkurat som de vises i frasen.",
|
|
275
472
|
),
|
|
276
|
-
|
|
473
|
+
POLISH: PromptConfig(
|
|
474
|
+
default_prompt_label_mapping={
|
|
475
|
+
"b-per": "osoba",
|
|
476
|
+
"i-per": "osoba",
|
|
477
|
+
"b-loc": "miejsce",
|
|
478
|
+
"i-loc": "miejsce",
|
|
479
|
+
"b-org": "organizacja",
|
|
480
|
+
"i-org": "organizacja",
|
|
481
|
+
"b-misc": "inne",
|
|
482
|
+
"i-misc": "inne",
|
|
483
|
+
},
|
|
484
|
+
default_prompt_prefix="Poniżej znajdują się zdania i słowniki JSON "
|
|
485
|
+
"z jednostkami nazewniczymi, które występują w danym zdaniu.",
|
|
486
|
+
default_prompt_template="Zdanie: {text}\nJednostki nazewnicze: {label}",
|
|
487
|
+
default_instruction_prompt="Zdanie: {text}\n\nZidentyfikuj jednostki "
|
|
488
|
+
"nazewnicze w zdaniu. Wypisz je jako słownik JSON z kluczami "
|
|
489
|
+
"{labels_str}. Wartości odpowiadające kluczom powinny być listami jednostek "
|
|
490
|
+
"nazewniczych danego typu, dokładnie tak, jak pojawiają się w zdaniu.",
|
|
491
|
+
),
|
|
492
|
+
SLOVAK: PromptConfig(
|
|
493
|
+
default_prompt_label_mapping={
|
|
494
|
+
"b-per": "osoba",
|
|
495
|
+
"i-per": "osoba",
|
|
496
|
+
"b-loc": "miesto",
|
|
497
|
+
"i-loc": "miesto",
|
|
498
|
+
"b-org": "organizácia",
|
|
499
|
+
"i-org": "organizácia",
|
|
500
|
+
"b-misc": "rôzne",
|
|
501
|
+
"i-misc": "rôzne",
|
|
502
|
+
},
|
|
503
|
+
default_prompt_prefix="Nasledujúce sú vety a JSON-objekty s pomenovanými "
|
|
504
|
+
"entitami, ktoré sa nachádzajú v danej vete.",
|
|
505
|
+
default_prompt_template="Veta: {text}\nPomenované entity: {label}",
|
|
506
|
+
default_instruction_prompt="Veta: {text}\n\nIdentifikujte pomenované "
|
|
507
|
+
"entity vo vete. Výstup by mal byť vo forme JSON-objektu s kľúčmi "
|
|
508
|
+
"{labels_str}. Hodnoty by mali byť zoznamy pomenovaných entít danej "
|
|
509
|
+
"kategórie, presne tak, ako sa vyskytujú vo vete.",
|
|
510
|
+
),
|
|
511
|
+
SLOVENE: PromptConfig(
|
|
512
|
+
default_prompt_label_mapping={
|
|
513
|
+
"b-per": "oseba",
|
|
514
|
+
"i-per": "oseba",
|
|
515
|
+
"b-loc": "kraj",
|
|
516
|
+
"i-loc": "kraj",
|
|
517
|
+
"b-org": "organizacija",
|
|
518
|
+
"i-org": "organizacija",
|
|
519
|
+
"b-misc": "razno",
|
|
520
|
+
"i-misc": "razno",
|
|
521
|
+
},
|
|
522
|
+
default_prompt_prefix=(
|
|
523
|
+
"Naslednje so povedi in JSON slovarji z poimenovanimi "
|
|
524
|
+
"entitetami, ki se pojavijo v dani povedi."
|
|
525
|
+
),
|
|
526
|
+
default_prompt_template=("Poved: {text}\nPoimenovane entitete: {label}"),
|
|
527
|
+
default_instruction_prompt=(
|
|
528
|
+
"Poved: {text}\n\nIdentificirajte poimenovane entitete v povedi. "
|
|
529
|
+
"To morate izpisati kot JSON slovar s ključi {labels_str}. "
|
|
530
|
+
"Vrednosti morajo biti seznami poimenovanih entitet te kategorije, "
|
|
531
|
+
"tako kot se pojavijo v povedi."
|
|
532
|
+
),
|
|
533
|
+
),
|
|
534
|
+
SERBIAN: PromptConfig(
|
|
535
|
+
default_prompt_label_mapping={
|
|
536
|
+
"b-per": "osoba",
|
|
537
|
+
"i-per": "osoba",
|
|
538
|
+
"b-loc": "mesto",
|
|
539
|
+
"i-loc": "mesto",
|
|
540
|
+
"b-org": "organizacija",
|
|
541
|
+
"i-org": "organizacija",
|
|
542
|
+
"b-misc": "razno",
|
|
543
|
+
"i-misc": "razno",
|
|
544
|
+
},
|
|
545
|
+
default_prompt_prefix="Sledeće su rečenice i JSON rečnici sa imenovanim "
|
|
546
|
+
"entitetima koji se pojavljuju u datoj rečenici.",
|
|
547
|
+
default_prompt_template="Rečenica: {text}\nImenovani entiteti: {label}",
|
|
548
|
+
default_instruction_prompt="Rečenica: {text}\n\nIdentifikujte imenovane "
|
|
549
|
+
"entitete u rečenici. Trebalo bi da ovo ispišete kao JSON rečnik sa ključevima "
|
|
550
|
+
"{labels_str}. Vrednosti treba da budu liste imenovanih entiteta te "
|
|
551
|
+
"kategorije, tačno onako kako se pojavljuju u rečenici.",
|
|
552
|
+
),
|
|
553
|
+
SWEDISH: PromptConfig(
|
|
277
554
|
default_prompt_label_mapping={
|
|
278
555
|
"b-per": "person",
|
|
279
556
|
"i-per": "person",
|
|
@@ -289,7 +566,27 @@ NER_TEMPLATES = {
|
|
|
289
566
|
default_prompt_template="Mening: {text}\nNamngivna entiteter: {label}",
|
|
290
567
|
default_instruction_prompt="Mening: {text}\n\nIdentifiera de namngivna "
|
|
291
568
|
"enheterna i meningen. Du ska outputta detta som en JSON-ordbok med nycklarna "
|
|
292
|
-
"{labels_str}. Värdena ska vara listor över de namngivna
|
|
569
|
+
"{labels_str}. Värdena ska vara listor över de namngivna enheterna av den "
|
|
293
570
|
"typen, precis som de förekommer i meningen.",
|
|
294
571
|
),
|
|
572
|
+
UKRAINIAN: PromptConfig(
|
|
573
|
+
default_prompt_label_mapping={
|
|
574
|
+
"b-per": "особа",
|
|
575
|
+
"i-per": "особа",
|
|
576
|
+
"b-loc": "місце",
|
|
577
|
+
"i-loc": "місце",
|
|
578
|
+
"b-org": "організація",
|
|
579
|
+
"i-org": "організація",
|
|
580
|
+
"b-misc": "різне",
|
|
581
|
+
"i-misc": "різне",
|
|
582
|
+
},
|
|
583
|
+
default_prompt_prefix="Нижче наведені речення та JSON-словники з іменованими "
|
|
584
|
+
"сутностями, які присутні у даному реченні.",
|
|
585
|
+
default_prompt_template="Речення: {text}\nІменовані сутності: {label}",
|
|
586
|
+
default_instruction_prompt="Речення: {text}\n\n"
|
|
587
|
+
"Ідентифікуйте іменовані сутності у "
|
|
588
|
+
"реченні. Ви повинні вивести це як JSON-словник з ключами {labels_str}. "
|
|
589
|
+
"Значення мають бути списками іменованих сутностей цього типу, точно "
|
|
590
|
+
"такими, як вони з'являються у реченні.",
|
|
591
|
+
),
|
|
295
592
|
}
|