EuroEval 16.4.0__py3-none-any.whl → 16.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +6 -0
- euroeval/benchmark_config_factory.py +51 -46
- euroeval/benchmark_modules/base.py +6 -5
- euroeval/benchmark_modules/hf.py +2 -9
- euroeval/benchmark_modules/litellm.py +14 -12
- euroeval/benchmark_modules/vllm.py +17 -10
- euroeval/benchmarker.py +61 -44
- euroeval/caching_utils.py +1 -1
- euroeval/cli.py +86 -8
- euroeval/constants.py +3 -0
- euroeval/data_loading.py +78 -30
- euroeval/data_models.py +326 -326
- euroeval/dataset_configs/__init__.py +10 -3
- euroeval/dataset_configs/bulgarian.py +56 -0
- euroeval/dataset_configs/czech.py +25 -29
- euroeval/dataset_configs/danish.py +51 -88
- euroeval/dataset_configs/dutch.py +48 -86
- euroeval/dataset_configs/english.py +45 -76
- euroeval/dataset_configs/estonian.py +36 -38
- euroeval/dataset_configs/faroese.py +19 -60
- euroeval/dataset_configs/finnish.py +36 -68
- euroeval/dataset_configs/french.py +39 -74
- euroeval/dataset_configs/german.py +45 -81
- euroeval/dataset_configs/greek.py +64 -0
- euroeval/dataset_configs/icelandic.py +54 -91
- euroeval/dataset_configs/italian.py +42 -78
- euroeval/dataset_configs/latvian.py +28 -34
- euroeval/dataset_configs/lithuanian.py +22 -26
- euroeval/dataset_configs/norwegian.py +72 -114
- euroeval/dataset_configs/polish.py +33 -60
- euroeval/dataset_configs/portuguese.py +33 -65
- euroeval/dataset_configs/serbian.py +64 -0
- euroeval/dataset_configs/slovak.py +19 -24
- euroeval/dataset_configs/spanish.py +42 -76
- euroeval/dataset_configs/swedish.py +48 -84
- euroeval/dataset_configs/ukrainian.py +64 -0
- euroeval/exceptions.py +1 -1
- euroeval/finetuning.py +3 -2
- euroeval/generation.py +5 -4
- euroeval/generation_utils.py +6 -5
- euroeval/languages.py +395 -323
- euroeval/metrics/huggingface.py +14 -3
- euroeval/metrics/llm_as_a_judge.py +1 -1
- euroeval/model_cache.py +6 -5
- euroeval/model_loading.py +1 -1
- euroeval/prompt_templates/__init__.py +2 -0
- euroeval/prompt_templates/classification.py +206 -0
- euroeval/prompt_templates/linguistic_acceptability.py +82 -43
- euroeval/prompt_templates/multiple_choice.py +81 -41
- euroeval/prompt_templates/named_entity_recognition.py +125 -44
- euroeval/prompt_templates/reading_comprehension.py +92 -43
- euroeval/prompt_templates/sentiment_classification.py +91 -43
- euroeval/prompt_templates/summarization.py +64 -39
- euroeval/prompt_templates/token_classification.py +279 -0
- euroeval/scores.py +4 -3
- euroeval/speed_benchmark.py +2 -1
- euroeval/task_group_utils/multiple_choice_classification.py +2 -1
- euroeval/task_group_utils/question_answering.py +24 -13
- euroeval/task_group_utils/sequence_classification.py +5 -4
- euroeval/task_group_utils/text_to_text.py +2 -1
- euroeval/task_group_utils/token_classification.py +11 -8
- euroeval/tasks.py +44 -1
- euroeval/tokenisation_utils.py +19 -10
- euroeval/types.py +10 -9
- euroeval/utils.py +6 -3
- {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/METADATA +194 -37
- euroeval-16.5.0.dist-info/RECORD +81 -0
- euroeval-16.4.0.dist-info/RECORD +0 -75
- {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/WHEEL +0 -0
- {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/entry_points.txt +0 -0
- {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/licenses/LICENSE +0 -0
euroeval/metrics/huggingface.py
CHANGED
|
@@ -121,7 +121,10 @@ class HuggingFaceMetric(Metric):
|
|
|
121
121
|
if self.metric is None:
|
|
122
122
|
self.download(cache_dir=benchmark_config.cache_dir)
|
|
123
123
|
|
|
124
|
-
assert self.metric is not None
|
|
124
|
+
assert self.metric is not None, (
|
|
125
|
+
"Metric has not been downloaded. Please call download() before using the "
|
|
126
|
+
"__call__ method."
|
|
127
|
+
)
|
|
125
128
|
|
|
126
129
|
with no_terminal_output(disable=benchmark_config.verbose):
|
|
127
130
|
results = self.metric.compute(
|
|
@@ -140,13 +143,21 @@ class HuggingFaceMetric(Metric):
|
|
|
140
143
|
if isinstance(score, np.floating):
|
|
141
144
|
score = float(score)
|
|
142
145
|
|
|
146
|
+
self.close()
|
|
143
147
|
return score
|
|
144
148
|
|
|
149
|
+
def close(self) -> None:
|
|
150
|
+
"""Close any resources held by the metric."""
|
|
151
|
+
if self.metric is not None:
|
|
152
|
+
if self.metric.filelock is not None:
|
|
153
|
+
self.metric.filelock.release(force=True)
|
|
154
|
+
if self.metric.writer is not None:
|
|
155
|
+
self.metric.writer.finalize(close_stream=True)
|
|
156
|
+
|
|
145
157
|
def __del__(self) -> None:
|
|
146
158
|
"""Clean up the metric from memory."""
|
|
147
159
|
if self.metric is not None:
|
|
148
|
-
|
|
149
|
-
self.metric.writer.close()
|
|
160
|
+
self.close()
|
|
150
161
|
del self.metric
|
|
151
162
|
|
|
152
163
|
|
|
@@ -9,7 +9,6 @@ from pydantic import BaseModel, Field
|
|
|
9
9
|
|
|
10
10
|
from ..exceptions import InvalidBenchmark
|
|
11
11
|
from ..logging_utils import log
|
|
12
|
-
from ..model_cache import ModelCache
|
|
13
12
|
from ..utils import extract_json_dict_from_string
|
|
14
13
|
from .base import Metric
|
|
15
14
|
|
|
@@ -111,6 +110,7 @@ class LLMAsAJudgeMetric(Metric):
|
|
|
111
110
|
"""
|
|
112
111
|
# Importing here to avoid circular imports
|
|
113
112
|
from ..benchmark_modules import LiteLLMModel
|
|
113
|
+
from ..model_cache import ModelCache
|
|
114
114
|
|
|
115
115
|
if not predictions or not references:
|
|
116
116
|
return None
|
euroeval/model_cache.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""ModelCache class for caching model outputs."""
|
|
2
2
|
|
|
3
|
+
import collections.abc as c
|
|
3
4
|
import hashlib
|
|
4
5
|
import json
|
|
5
6
|
import logging
|
|
@@ -94,7 +95,7 @@ class ModelCache:
|
|
|
94
95
|
with self.cache_path.open("w") as f:
|
|
95
96
|
json.dump(dict(), f)
|
|
96
97
|
|
|
97
|
-
def _hash_key(self, key: str |
|
|
98
|
+
def _hash_key(self, key: str | c.Sequence[dict[str, str]]) -> str:
|
|
98
99
|
"""Hash the key to use as an index in the cache.
|
|
99
100
|
|
|
100
101
|
Args:
|
|
@@ -107,7 +108,7 @@ class ModelCache:
|
|
|
107
108
|
return hashlib.md5(string=str(key).encode()).hexdigest()
|
|
108
109
|
|
|
109
110
|
def __getitem__(
|
|
110
|
-
self, key: str |
|
|
111
|
+
self, key: str | c.Sequence[dict[str, str]]
|
|
111
112
|
) -> SingleGenerativeModelOutput:
|
|
112
113
|
"""Get an item from the cache.
|
|
113
114
|
|
|
@@ -122,7 +123,7 @@ class ModelCache:
|
|
|
122
123
|
return self.cache[hashed_key]
|
|
123
124
|
|
|
124
125
|
def __setitem__(
|
|
125
|
-
self, key: str |
|
|
126
|
+
self, key: str | c.Sequence[dict[str, str]], value: SingleGenerativeModelOutput
|
|
126
127
|
) -> None:
|
|
127
128
|
"""Set an item in the cache.
|
|
128
129
|
|
|
@@ -140,7 +141,7 @@ class ModelCache:
|
|
|
140
141
|
self.cache_path.unlink()
|
|
141
142
|
del self.cache
|
|
142
143
|
|
|
143
|
-
def __contains__(self, key: str |
|
|
144
|
+
def __contains__(self, key: str | c.Sequence[dict[str, str]]) -> bool:
|
|
144
145
|
"""Check if a key is in the cache.
|
|
145
146
|
|
|
146
147
|
Args:
|
|
@@ -258,7 +259,7 @@ def load_cached_model_outputs(
|
|
|
258
259
|
The model output containing the cached sequences.
|
|
259
260
|
"""
|
|
260
261
|
input_column = "messages" if "messages" in cached_dataset.column_names else "text"
|
|
261
|
-
cached_model_outputs:
|
|
262
|
+
cached_model_outputs: c.Sequence[SingleGenerativeModelOutput] = [
|
|
262
263
|
cache[prompt] for prompt in cached_dataset[input_column]
|
|
263
264
|
]
|
|
264
265
|
|
euroeval/model_loading.py
CHANGED
|
@@ -35,7 +35,7 @@ def load_model(
|
|
|
35
35
|
Returns:
|
|
36
36
|
The model.
|
|
37
37
|
"""
|
|
38
|
-
log_once(f"
|
|
38
|
+
log_once(f"\nLoading the model {model_config.model_id}...")
|
|
39
39
|
|
|
40
40
|
# The order matters; the first model type that matches will be used. For this
|
|
41
41
|
# reason, they have been ordered in terms of the most common model types.
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
"""The different prompt templates used in EuroEval."""
|
|
2
2
|
|
|
3
|
+
from .classification import CLASSIFICATION_TEMPLATES
|
|
3
4
|
from .linguistic_acceptability import LA_TEMPLATES
|
|
4
5
|
from .multiple_choice import MULTIPLE_CHOICE_TEMPLATES
|
|
5
6
|
from .named_entity_recognition import NER_TEMPLATES
|
|
6
7
|
from .reading_comprehension import RC_TEMPLATES
|
|
7
8
|
from .sentiment_classification import SENT_TEMPLATES
|
|
8
9
|
from .summarization import SUMM_TEMPLATES
|
|
10
|
+
from .token_classification import TOKEN_CLASSIFICATION_TEMPLATES
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Templates for the classification task."""
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from ..data_models import PromptConfig
|
|
6
|
+
from ..languages import (
|
|
7
|
+
BULGARIAN,
|
|
8
|
+
CZECH,
|
|
9
|
+
DANISH,
|
|
10
|
+
DUTCH,
|
|
11
|
+
ENGLISH,
|
|
12
|
+
ESTONIAN,
|
|
13
|
+
FAROESE,
|
|
14
|
+
FINNISH,
|
|
15
|
+
FRENCH,
|
|
16
|
+
GERMAN,
|
|
17
|
+
GREEK,
|
|
18
|
+
ICELANDIC,
|
|
19
|
+
ITALIAN,
|
|
20
|
+
LATVIAN,
|
|
21
|
+
LITHUANIAN,
|
|
22
|
+
NORWEGIAN,
|
|
23
|
+
NORWEGIAN_BOKMÅL,
|
|
24
|
+
NORWEGIAN_NYNORSK,
|
|
25
|
+
POLISH,
|
|
26
|
+
PORTUGUESE,
|
|
27
|
+
SLOVAK,
|
|
28
|
+
SPANISH,
|
|
29
|
+
SWEDISH,
|
|
30
|
+
UKRAINIAN,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
if t.TYPE_CHECKING:
|
|
34
|
+
from ..languages import Language
|
|
35
|
+
|
|
36
|
+
CLASSIFICATION_TEMPLATES: dict["Language", PromptConfig] = {
|
|
37
|
+
ENGLISH: PromptConfig(
|
|
38
|
+
default_prompt_prefix="The following are texts and their labels.",
|
|
39
|
+
default_prompt_template="Text: {text}\nLabel: {label}",
|
|
40
|
+
default_instruction_prompt="Here is a text:\n'{text}'.\n\nClassify the text "
|
|
41
|
+
"into the categories {labels_str}, and reply with only the label.",
|
|
42
|
+
default_prompt_label_mapping="auto",
|
|
43
|
+
),
|
|
44
|
+
BULGARIAN: PromptConfig(
|
|
45
|
+
default_prompt_prefix="Следват текстове и техните етикети.",
|
|
46
|
+
default_prompt_template="Текст: {text}\nЕтикет: {label}",
|
|
47
|
+
default_instruction_prompt="Ето един текст:\n'{text}'.\n\nКласифицирайте "
|
|
48
|
+
"текста в категориите {labels_str} и отговорете само с етикета.",
|
|
49
|
+
default_prompt_label_mapping="auto",
|
|
50
|
+
),
|
|
51
|
+
CZECH: PromptConfig(
|
|
52
|
+
default_prompt_prefix="Následují texty a jejich štítky.",
|
|
53
|
+
default_prompt_template="Text: {text}\nŠtítek: {label}",
|
|
54
|
+
default_instruction_prompt="Zde je text:\n'{text}'.\n\nKlasifikujte text do "
|
|
55
|
+
"kategorií {labels_str} a odpovězte pouze štítkem.",
|
|
56
|
+
default_prompt_label_mapping="auto",
|
|
57
|
+
),
|
|
58
|
+
DANISH: PromptConfig(
|
|
59
|
+
default_prompt_prefix="Følgende er tekster og deres etiketter.",
|
|
60
|
+
default_prompt_template="Tekst: {text}\nEtiket: {label}",
|
|
61
|
+
default_instruction_prompt="Her er en tekst:\n'{text}'.\n\nKlassificer teksten "
|
|
62
|
+
"i kategorierne {labels_str}, og svar kun med etiketten.",
|
|
63
|
+
default_prompt_label_mapping="auto",
|
|
64
|
+
),
|
|
65
|
+
GERMAN: PromptConfig(
|
|
66
|
+
default_prompt_prefix="Im Folgenden sind Texte und ihre Labels aufgeführt.",
|
|
67
|
+
default_prompt_template="Text: {text}\nLabel: {label}",
|
|
68
|
+
default_instruction_prompt="Hier ist ein Text:\n'{text}'.\n\nKlassifiziere den "
|
|
69
|
+
"Text in die Kategorien {labels_str} und antworte nur mit dem Label.",
|
|
70
|
+
default_prompt_label_mapping="auto",
|
|
71
|
+
),
|
|
72
|
+
GREEK: PromptConfig(
|
|
73
|
+
default_prompt_prefix="Ακολουθούν κείμενα και οι ετικέτες τους.",
|
|
74
|
+
default_prompt_template="Κείμενο: {text}\nΕτικέτα: {label}",
|
|
75
|
+
default_instruction_prompt="Εδώ είναι ένα κείμενο:\n'{text}'.\n\n"
|
|
76
|
+
"Κατηγοριοποιήστε το κείμενο στις κατηγορίες {labels_str} και απαντήστε μόνο "
|
|
77
|
+
"με την ετικέτα.",
|
|
78
|
+
default_prompt_label_mapping="auto",
|
|
79
|
+
),
|
|
80
|
+
SPANISH: PromptConfig(
|
|
81
|
+
default_prompt_prefix="A continuación se presentan textos y sus etiquetas.",
|
|
82
|
+
default_prompt_template="Texto: {text}\nEtiqueta: {label}",
|
|
83
|
+
default_instruction_prompt="Aquí hay un texto:\n'{text}'.\n\nClasifica el "
|
|
84
|
+
"texto en las categorías {labels_str} y responde solo con la etiqueta.",
|
|
85
|
+
default_prompt_label_mapping="auto",
|
|
86
|
+
),
|
|
87
|
+
ESTONIAN: PromptConfig(
|
|
88
|
+
default_prompt_prefix="Järgnevad on tekstid ja nende sildid.",
|
|
89
|
+
default_prompt_template="Tekst: {text}\nSilt: {label}",
|
|
90
|
+
default_instruction_prompt="Siin on tekst:\n'{text}'.\n\nKlassifitseeri tekst "
|
|
91
|
+
"kategooriatesse {labels_str} ja vasta ainult sildiga.",
|
|
92
|
+
default_prompt_label_mapping="auto",
|
|
93
|
+
),
|
|
94
|
+
FINNISH: PromptConfig(
|
|
95
|
+
default_prompt_prefix="Seuraavassa on tekstejä ja niiden tunnisteita.",
|
|
96
|
+
default_prompt_template="Teksti: {text}\nTunniste: {label}",
|
|
97
|
+
default_instruction_prompt="Tässä on teksti:\n'{text}'.\n\nLuokittele teksti "
|
|
98
|
+
"kategorioihin {labels_str} ja vastaa vain tunnisteella.",
|
|
99
|
+
default_prompt_label_mapping="auto",
|
|
100
|
+
),
|
|
101
|
+
FAROESE: PromptConfig(
|
|
102
|
+
default_prompt_prefix="Hér eru tekster og teirra etikettir.",
|
|
103
|
+
default_prompt_template="Tekstur: {text}\nEtikettur: {label}",
|
|
104
|
+
default_instruction_prompt="Her er ein tekstur:\n'{text}'.\n\nFlokka teksturin "
|
|
105
|
+
"í bólkar {labels_str} og svara bert við etikettinum.",
|
|
106
|
+
default_prompt_label_mapping="auto",
|
|
107
|
+
),
|
|
108
|
+
FRENCH: PromptConfig(
|
|
109
|
+
default_prompt_prefix="Voici des textes et leurs étiquettes.",
|
|
110
|
+
default_prompt_template="Texte : {text}\nÉtiquette : {label}",
|
|
111
|
+
default_instruction_prompt="Voici un texte :\n'{text}'.\n\nClassifiez le texte "
|
|
112
|
+
"dans les catégories {labels_str} et répondez uniquement avec l'étiquette.",
|
|
113
|
+
default_prompt_label_mapping="auto",
|
|
114
|
+
),
|
|
115
|
+
ICELANDIC: PromptConfig(
|
|
116
|
+
default_prompt_prefix="Hér fyrir neðan eru textar og merkingar þeirra.",
|
|
117
|
+
default_prompt_template="Texti: {text}\nMerking: {label}",
|
|
118
|
+
default_instruction_prompt="Hér er texti:\n'{text}'.\n\nFlokkaðu textann "
|
|
119
|
+
"í flokkana {labels_str} og svaraðu aðeins með merkingenni.",
|
|
120
|
+
default_prompt_label_mapping="auto",
|
|
121
|
+
),
|
|
122
|
+
ITALIAN: PromptConfig(
|
|
123
|
+
default_prompt_prefix="Di seguito sono riportati testi e le loro etichette.",
|
|
124
|
+
default_prompt_template="Testo: {text}\nEtichetta: {label}",
|
|
125
|
+
default_instruction_prompt="Ecco un testo:\n'{text}'.\n\nClassifica il testo "
|
|
126
|
+
"nelle categorie {labels_str} e rispondi solo con l'etichetta.",
|
|
127
|
+
default_prompt_label_mapping="auto",
|
|
128
|
+
),
|
|
129
|
+
LITHUANIAN: PromptConfig(
|
|
130
|
+
default_prompt_prefix="Toliau pateikiami tekstai ir jų etiketės.",
|
|
131
|
+
default_prompt_template="Tekstas: {text}\nEtiketė: {label}",
|
|
132
|
+
default_instruction_prompt="Štai tekstas:\n'{text}'.\n\nKlasifikuokite tekstą "
|
|
133
|
+
"į kategorijas {labels_str} ir atsakykite tik etiketę.",
|
|
134
|
+
default_prompt_label_mapping="auto",
|
|
135
|
+
),
|
|
136
|
+
LATVIAN: PromptConfig(
|
|
137
|
+
default_prompt_prefix="Turpmāk ir teksti un to etiķetes.",
|
|
138
|
+
default_prompt_template="Teksts: {text}\nEtiķete: {label}",
|
|
139
|
+
default_instruction_prompt="Šeit ir teksts:\n'{text}'.\n\nKlasificējiet tekstu "
|
|
140
|
+
"kategorijās {labels_str} un atbildiet tikai ar etiķeti.",
|
|
141
|
+
default_prompt_label_mapping="auto",
|
|
142
|
+
),
|
|
143
|
+
NORWEGIAN_BOKMÅL: PromptConfig(
|
|
144
|
+
default_prompt_prefix="Følgende er tekster og deres etiketter.",
|
|
145
|
+
default_prompt_template="Tekst: {text}\nEtikett: {label}",
|
|
146
|
+
default_instruction_prompt="Her er en tekst:\n'{text}'.\n\nKlassifiser teksten "
|
|
147
|
+
"i kategoriene {labels_str}, og svar kun med etiketten.",
|
|
148
|
+
default_prompt_label_mapping="auto",
|
|
149
|
+
),
|
|
150
|
+
DUTCH: PromptConfig(
|
|
151
|
+
default_prompt_prefix="Hieronder volgen teksten en hun labels.",
|
|
152
|
+
default_prompt_template="Tekst: {text}\nLabel: {label}",
|
|
153
|
+
default_instruction_prompt="Hier is een tekst:\n'{text}'.\n\nClassificeer de "
|
|
154
|
+
"tekst in de categorieën {labels_str} en antwoord alleen met het label.",
|
|
155
|
+
default_prompt_label_mapping="auto",
|
|
156
|
+
),
|
|
157
|
+
NORWEGIAN_NYNORSK: PromptConfig(
|
|
158
|
+
default_prompt_prefix="Følgjande er tekstar og deira etikettar.",
|
|
159
|
+
default_prompt_template="Tekst: {text}\nEtikett: {label}",
|
|
160
|
+
default_instruction_prompt="Her er ein tekst:\n'{text}'.\n\nKlassifiser "
|
|
161
|
+
"teksten i kategoriane {labels_str}, og svar berre med etiketten.",
|
|
162
|
+
default_prompt_label_mapping="auto",
|
|
163
|
+
),
|
|
164
|
+
NORWEGIAN: PromptConfig(
|
|
165
|
+
default_prompt_prefix="Følgende er tekster og deres etiketter.",
|
|
166
|
+
default_prompt_template="Tekst: {text}\nEtikett: {label}",
|
|
167
|
+
default_instruction_prompt="Her er en tekst:\n'{text}'.\n\nKlassifiser teksten "
|
|
168
|
+
"i kategoriene {labels_str}, og svar kun med etiketten.",
|
|
169
|
+
default_prompt_label_mapping="auto",
|
|
170
|
+
),
|
|
171
|
+
POLISH: PromptConfig(
|
|
172
|
+
default_prompt_prefix="Poniżej znajdują się teksty i ich etykiety.",
|
|
173
|
+
default_prompt_template="Tekst: {text}\nEtykieta: {label}",
|
|
174
|
+
default_instruction_prompt="Oto tekst:\n'{text}'.\n\nSklasyfikuj tekst do "
|
|
175
|
+
"kategorii {labels_str} i odpowiedz tylko etykietą.",
|
|
176
|
+
default_prompt_label_mapping="auto",
|
|
177
|
+
),
|
|
178
|
+
PORTUGUESE: PromptConfig(
|
|
179
|
+
default_prompt_prefix="A seguir estão textos e seus rótulos.",
|
|
180
|
+
default_prompt_template="Texto: {text}\nRótulo: {label}",
|
|
181
|
+
default_instruction_prompt="Aqui está um texto:\n'{text}'.\n\nClassifique o "
|
|
182
|
+
"texto nas categorias {labels_str} e responda apenas com o rótulo.",
|
|
183
|
+
default_prompt_label_mapping="auto",
|
|
184
|
+
),
|
|
185
|
+
SLOVAK: PromptConfig(
|
|
186
|
+
default_prompt_prefix="Nasledujú texty a ich štítky.",
|
|
187
|
+
default_prompt_template="Text: {text}\nŠtítok: {label}",
|
|
188
|
+
default_instruction_prompt="Tu je text:\n'{text}'.\n\nKlasifikujte text do "
|
|
189
|
+
"kategorií {labels_str} a odpovedzte iba štítkom.",
|
|
190
|
+
default_prompt_label_mapping="auto",
|
|
191
|
+
),
|
|
192
|
+
SWEDISH: PromptConfig(
|
|
193
|
+
default_prompt_prefix="Följande är texter och deras etiketter.",
|
|
194
|
+
default_prompt_template="Text: {text}\nEtikett: {label}",
|
|
195
|
+
default_instruction_prompt="Här är en text:\n'{text}'.\n\nKlassificera texten "
|
|
196
|
+
"i kategorierna {labels_str} och svara endast med etiketten.",
|
|
197
|
+
default_prompt_label_mapping="auto",
|
|
198
|
+
),
|
|
199
|
+
UKRAINIAN: PromptConfig(
|
|
200
|
+
default_prompt_prefix="Нижче наведено тексти та їхні позначки.",
|
|
201
|
+
default_prompt_template="Текст: {text}\nПозначка: {label}",
|
|
202
|
+
default_instruction_prompt="Ось текст:\n'{text}'.\n\nКласифікуйте текст у "
|
|
203
|
+
"категорії {labels_str} і відповідайте лише позначкою.",
|
|
204
|
+
default_prompt_label_mapping="auto",
|
|
205
|
+
),
|
|
206
|
+
}
|
|
@@ -4,48 +4,59 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from ..data_models import PromptConfig
|
|
6
6
|
from ..languages import (
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
7
|
+
BULGARIAN,
|
|
8
|
+
CZECH,
|
|
9
|
+
DANISH,
|
|
10
|
+
DUTCH,
|
|
11
|
+
ENGLISH,
|
|
12
|
+
ESTONIAN,
|
|
13
|
+
FAROESE,
|
|
14
|
+
FINNISH,
|
|
15
|
+
FRENCH,
|
|
16
|
+
GERMAN,
|
|
17
|
+
GREEK,
|
|
18
|
+
ICELANDIC,
|
|
19
|
+
ITALIAN,
|
|
20
|
+
LATVIAN,
|
|
21
|
+
LITHUANIAN,
|
|
22
|
+
NORWEGIAN,
|
|
23
|
+
NORWEGIAN_BOKMÅL,
|
|
24
|
+
NORWEGIAN_NYNORSK,
|
|
25
|
+
POLISH,
|
|
26
|
+
PORTUGUESE,
|
|
27
|
+
SERBIAN,
|
|
28
|
+
SLOVAK,
|
|
29
|
+
SPANISH,
|
|
30
|
+
SWEDISH,
|
|
31
|
+
UKRAINIAN,
|
|
28
32
|
)
|
|
29
33
|
|
|
30
34
|
if t.TYPE_CHECKING:
|
|
31
|
-
from ..
|
|
35
|
+
from ..languages import Language
|
|
32
36
|
|
|
33
37
|
LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
34
|
-
|
|
38
|
+
BULGARIAN: PromptConfig(
|
|
39
|
+
default_prompt_label_mapping=dict(correct="да", incorrect="не"),
|
|
40
|
+
default_prompt_prefix="Следват изречения и дали са граматически правилни.",
|
|
41
|
+
default_prompt_template="Изречение: {text}\nГраматически правилно: {label}",
|
|
42
|
+
default_instruction_prompt="Изречение: {text}\n\nОпределете дали изречението е "
|
|
43
|
+
"граматически правилно или не. Отговорете с {labels_str}, и нищо друго.",
|
|
44
|
+
),
|
|
45
|
+
CZECH: PromptConfig(
|
|
35
46
|
default_prompt_label_mapping=dict(correct="ano", incorrect="ne"),
|
|
36
47
|
default_prompt_prefix="Následující jsou věty a zda jsou gramaticky správné.",
|
|
37
48
|
default_prompt_template="Věta: {text}\nGramaticky správná: {label}",
|
|
38
49
|
default_instruction_prompt="Věta: {text}\n\nUrčete, zda je věta gramaticky "
|
|
39
50
|
"správná nebo ne. Odpovězte {labels_str}, a nic jiné.",
|
|
40
51
|
),
|
|
41
|
-
|
|
52
|
+
DANISH: PromptConfig(
|
|
42
53
|
default_prompt_label_mapping=dict(correct="ja", incorrect="nej"),
|
|
43
54
|
default_prompt_prefix="Følgende er sætninger og om de er grammatisk korrekte.",
|
|
44
55
|
default_prompt_template="Sætning: {text}\nGrammatisk korrekt: {label}",
|
|
45
56
|
default_instruction_prompt="Sætning: {text}\n\nBestem om sætningen er "
|
|
46
57
|
"grammatisk korrekt eller ej. Svar kun med {labels_str}, og intet andet.",
|
|
47
58
|
),
|
|
48
|
-
|
|
59
|
+
GERMAN: PromptConfig(
|
|
49
60
|
default_prompt_label_mapping=dict(correct="ja", incorrect="nein"),
|
|
50
61
|
default_prompt_prefix="Die folgenden Sätze und ob sie grammatikalisch korrekt "
|
|
51
62
|
"sind.",
|
|
@@ -54,7 +65,15 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
54
65
|
"grammatikalisch korrekt ist oder nicht. Antworten Sie mit {labels_str}, und "
|
|
55
66
|
"nichts anderes.",
|
|
56
67
|
),
|
|
57
|
-
|
|
68
|
+
GREEK: PromptConfig(
|
|
69
|
+
default_prompt_label_mapping=dict(correct="ναι", incorrect="όχι"),
|
|
70
|
+
default_prompt_prefix="Οι ακόλουθες είναι προτάσεις και εάν είναι "
|
|
71
|
+
"γραμματικά σωστές.",
|
|
72
|
+
default_prompt_template="Πρόταση: {text}\nΓραμματικά σωστή: {label}",
|
|
73
|
+
default_instruction_prompt="Πρόταση: {text}\n\nΠροσδιορίστε εάν η πρόταση "
|
|
74
|
+
"είναι γραμματικά σωστή ή όχι. Απαντήστε με {labels_str}, και τίποτα άλλο.",
|
|
75
|
+
),
|
|
76
|
+
ENGLISH: PromptConfig(
|
|
58
77
|
default_prompt_label_mapping=dict(correct="yes", incorrect="no"),
|
|
59
78
|
default_prompt_prefix="The following are sentences and whether they are "
|
|
60
79
|
"grammatically correct.",
|
|
@@ -62,7 +81,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
62
81
|
default_instruction_prompt="Sentence: {text}\n\nDetermine whether the sentence "
|
|
63
82
|
"is grammatically correct or not. Answer with {labels_str}, and nothing else.",
|
|
64
83
|
),
|
|
65
|
-
|
|
84
|
+
SPANISH: PromptConfig(
|
|
66
85
|
default_prompt_label_mapping=dict(correct="sí", incorrect="no"),
|
|
67
86
|
default_prompt_prefix="Lo siguiente son textos y si son gramaticalmente "
|
|
68
87
|
"correctos.",
|
|
@@ -70,7 +89,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
70
89
|
default_instruction_prompt="Texto: {text}\n\nDetermina si el texto es "
|
|
71
90
|
"gramaticalmente correcto o no. Responde con {labels_str}, y nada más.",
|
|
72
91
|
),
|
|
73
|
-
|
|
92
|
+
ESTONIAN: PromptConfig(
|
|
74
93
|
default_prompt_label_mapping=dict(correct="jah", incorrect="ei"),
|
|
75
94
|
default_prompt_prefix="Järgnevad on laused ja kas need on grammatiliselt "
|
|
76
95
|
"õiged.",
|
|
@@ -78,7 +97,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
78
97
|
default_instruction_prompt="Lause: {text}\n\nOtsusta, kas lause on "
|
|
79
98
|
"grammatiliselt õige või mitte. Vasta {labels_str}, ja mitte midagi muud.",
|
|
80
99
|
),
|
|
81
|
-
|
|
100
|
+
POLISH: PromptConfig(
|
|
82
101
|
default_prompt_label_mapping=dict(correct="tak", incorrect="nie"),
|
|
83
102
|
default_prompt_prefix="Poniżej znajdują się teksty i informacja, czy są "
|
|
84
103
|
"gramatycznie poprawne.",
|
|
@@ -86,7 +105,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
86
105
|
default_instruction_prompt="Tekst: {text}\n\nOkreśl, czy tekst jest "
|
|
87
106
|
"gramatycznie poprawny. Odpowiedz używając wyłącznie {labels_str}.",
|
|
88
107
|
),
|
|
89
|
-
|
|
108
|
+
PORTUGUESE: PromptConfig(
|
|
90
109
|
default_prompt_label_mapping=dict(correct="sim", incorrect="não"),
|
|
91
110
|
default_prompt_prefix="Seguem-se abaixo textos e se são "
|
|
92
111
|
"gramaticalmente correctos",
|
|
@@ -94,7 +113,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
94
113
|
default_instruction_prompt="Texto: {text}\n\nDetermina se o texto é "
|
|
95
114
|
"gramaticalmente correcto ou não. Responde com {labels_str}, e nada mais.",
|
|
96
115
|
),
|
|
97
|
-
|
|
116
|
+
FINNISH: PromptConfig(
|
|
98
117
|
default_prompt_label_mapping=dict(correct="kyllä", incorrect="ei"),
|
|
99
118
|
default_prompt_prefix="Seuraavat ovat lauseita ja ovatko ne "
|
|
100
119
|
"kieliopillisesti oikein.",
|
|
@@ -102,7 +121,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
102
121
|
default_instruction_prompt="Lause: {text}\n\nMääritä onko lause "
|
|
103
122
|
"oikein vai ei. Vastaa {labels_str}, ja ei mitään muuta.",
|
|
104
123
|
),
|
|
105
|
-
|
|
124
|
+
FAROESE: PromptConfig(
|
|
106
125
|
default_prompt_label_mapping=dict(correct="ja", incorrect="nei"),
|
|
107
126
|
default_prompt_prefix="Hetta eru nakrir setningar og um teir eru mállæruliga "
|
|
108
127
|
"rættir.",
|
|
@@ -110,7 +129,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
110
129
|
default_instruction_prompt="Setningur: {text}\n\nGreindu hvort setningurin er "
|
|
111
130
|
"mállæruliga rættur ella ikki. Svara við {labels_str}, og einki annað.",
|
|
112
131
|
),
|
|
113
|
-
|
|
132
|
+
FRENCH: PromptConfig(
|
|
114
133
|
default_prompt_label_mapping=dict(correct="oui", incorrect="non"),
|
|
115
134
|
default_prompt_prefix="Les phrases suivantes indiquent si elles sont "
|
|
116
135
|
"grammaticalement correctes.",
|
|
@@ -119,7 +138,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
119
138
|
default_instruction_prompt="Phrase: {text}\n\nDéterminez si la phrase est "
|
|
120
139
|
"grammaticalement correcte ou non. Répondez par {labels_str}, et rien d'autre.",
|
|
121
140
|
),
|
|
122
|
-
|
|
141
|
+
ICELANDIC: PromptConfig(
|
|
123
142
|
default_prompt_label_mapping=dict(correct="já", incorrect="nei"),
|
|
124
143
|
default_prompt_prefix="Hér fyrir neðan eru setningar ásamt mati á því hvort "
|
|
125
144
|
"þær eru málfræðilega réttar.",
|
|
@@ -128,7 +147,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
128
147
|
"málfræðilega rétt. Svaraðu með 'já' ef setningin er rétt og 'nei' ef hún "
|
|
129
148
|
"er það ekki.",
|
|
130
149
|
),
|
|
131
|
-
|
|
150
|
+
ITALIAN: PromptConfig(
|
|
132
151
|
default_prompt_label_mapping=dict(correct="si", incorrect="no"),
|
|
133
152
|
default_prompt_prefix="Di seguito sono riportate le frasi e la loro "
|
|
134
153
|
"correttezza grammaticale.",
|
|
@@ -136,7 +155,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
136
155
|
default_instruction_prompt="Frase: {text}\n\nStabilite se la frase è "
|
|
137
156
|
"grammaticalmente corretta o meno. Rispondere con {labels_str}, e nient'altro.",
|
|
138
157
|
),
|
|
139
|
-
|
|
158
|
+
LITHUANIAN: PromptConfig(
|
|
140
159
|
default_prompt_label_mapping=dict(correct="taip", incorrect="ne"),
|
|
141
160
|
default_prompt_prefix="Toliau pateikti sakiniai ir ar jie yra gramatiškai "
|
|
142
161
|
"teisingi.",
|
|
@@ -144,14 +163,14 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
144
163
|
default_instruction_prompt="Sakinys: {text}\n\nNustatykite, ar sakinys yra "
|
|
145
164
|
"gramatiškai teisingas, ar ne. Atsakykite su {labels_str}, ir nieko kito.",
|
|
146
165
|
),
|
|
147
|
-
|
|
166
|
+
LATVIAN: PromptConfig(
|
|
148
167
|
default_prompt_label_mapping=dict(correct="jā", incorrect="nē"),
|
|
149
168
|
default_prompt_prefix="Šie ir teikumi un to gramatiskie pareizumi.",
|
|
150
169
|
default_prompt_template="Teikums: {text}\nGramatiski pareizs: {label}",
|
|
151
170
|
default_instruction_prompt="Teikums: {text}\n\nNoteiciet, vai teikums ir "
|
|
152
171
|
"gramatiski pareizs vai nē. Atbildiet ar {labels_str}, un neko citu.",
|
|
153
172
|
),
|
|
154
|
-
|
|
173
|
+
NORWEGIAN_BOKMÅL: PromptConfig(
|
|
155
174
|
default_prompt_label_mapping=dict(correct="ja", incorrect="nei"),
|
|
156
175
|
default_prompt_prefix="Følgende er setninger og hvorvidt de er grammatisk "
|
|
157
176
|
"korrekte.",
|
|
@@ -159,7 +178,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
159
178
|
default_instruction_prompt="Setning: {text}\n\nBestem om setningen er "
|
|
160
179
|
"grammatisk korrekt eller ikke. Svar med {labels_str}, og ikke noe annet.",
|
|
161
180
|
),
|
|
162
|
-
|
|
181
|
+
DUTCH: PromptConfig(
|
|
163
182
|
default_prompt_label_mapping=dict(correct="ja", incorrect="nee"),
|
|
164
183
|
default_prompt_prefix="Hieronder staan zinnen en of ze grammaticaal correct "
|
|
165
184
|
"zijn.",
|
|
@@ -167,7 +186,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
167
186
|
default_instruction_prompt="Zin: {text}\n\nBepaal of de zin grammaticaal "
|
|
168
187
|
"correct is of niet. Antwoord met {labels_str}, en verder niets.",
|
|
169
188
|
),
|
|
170
|
-
|
|
189
|
+
NORWEGIAN_NYNORSK: PromptConfig(
|
|
171
190
|
default_prompt_label_mapping=dict(correct="ja", incorrect="nei"),
|
|
172
191
|
default_prompt_prefix="Følgende er setninger og hvorvidt de er grammatisk "
|
|
173
192
|
"korrekte.",
|
|
@@ -175,7 +194,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
175
194
|
default_instruction_prompt="Setning: {text}\n\nBestem om setningen er "
|
|
176
195
|
"grammatisk korrekt eller ikke. Svar med {labels_str}, og ikke noe annet.",
|
|
177
196
|
),
|
|
178
|
-
|
|
197
|
+
NORWEGIAN: PromptConfig(
|
|
179
198
|
default_prompt_label_mapping=dict(correct="ja", incorrect="nei"),
|
|
180
199
|
default_prompt_prefix="Følgende er setninger og hvorvidt de er grammatisk "
|
|
181
200
|
"korrekte.",
|
|
@@ -183,7 +202,7 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
183
202
|
default_instruction_prompt="Setning: {text}\n\nBestem om setningen er "
|
|
184
203
|
"grammatisk korrekt eller ikke. Svar med {labels_str}, og ikke noe annet.",
|
|
185
204
|
),
|
|
186
|
-
|
|
205
|
+
SLOVAK: PromptConfig(
|
|
187
206
|
default_prompt_label_mapping=dict(correct="áno", incorrect="nie"),
|
|
188
207
|
default_prompt_prefix="Nasledujú vety a či sú gramaticky správne.",
|
|
189
208
|
default_prompt_template="Veta: {text}\nGramaticky správna: {label}",
|
|
@@ -192,7 +211,16 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
192
211
|
"Odpovedzte so {labels_str}, a nič iné."
|
|
193
212
|
),
|
|
194
213
|
),
|
|
195
|
-
|
|
214
|
+
SERBIAN: PromptConfig(
|
|
215
|
+
default_prompt_label_mapping=dict(correct="da", incorrect="ne"),
|
|
216
|
+
default_prompt_prefix="U nastavku su rečenice i da li su gramatički ispravne.",
|
|
217
|
+
default_prompt_template="Rečenica: {text}\nGramatički ispravna: {label}",
|
|
218
|
+
default_instruction_prompt=(
|
|
219
|
+
"Rečenica: {text}\n\nOdredite da li je rečenica gramatički ispravna ili "
|
|
220
|
+
"ne. Odgovorite sa {labels_str}, i ništa drugo."
|
|
221
|
+
),
|
|
222
|
+
),
|
|
223
|
+
SWEDISH: PromptConfig(
|
|
196
224
|
default_prompt_label_mapping=dict(correct="ja", incorrect="nej"),
|
|
197
225
|
default_prompt_prefix="Följande är meningar och huruvida de är grammatiskt "
|
|
198
226
|
"korrekta.",
|
|
@@ -200,4 +228,15 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
200
228
|
default_instruction_prompt="Mening: {text}\n\nBestäm om meningen är "
|
|
201
229
|
"grammatiskt korrekt eller inte. Svara med {labels_str}, och inget annat.",
|
|
202
230
|
),
|
|
231
|
+
UKRAINIAN: PromptConfig(
|
|
232
|
+
default_prompt_label_mapping=dict(correct="так", incorrect="ні"),
|
|
233
|
+
default_prompt_prefix=(
|
|
234
|
+
"Нижче наведені речення і їхня граматична правильність."
|
|
235
|
+
),
|
|
236
|
+
default_prompt_template=("Речення: {text}\nГраматично правильно: {label}"),
|
|
237
|
+
default_instruction_prompt=(
|
|
238
|
+
"Речення: {text}\n\nВизначте, чи речення граматично правильне чи ні. "
|
|
239
|
+
"Відповідайте {labels_str}, і нічого більше."
|
|
240
|
+
),
|
|
241
|
+
),
|
|
203
242
|
}
|