EuroEval 16.2.2__py3-none-any.whl → 16.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +7 -4
- euroeval/benchmark_config_factory.py +0 -4
- euroeval/benchmark_modules/base.py +3 -16
- euroeval/benchmark_modules/fresh.py +5 -2
- euroeval/benchmark_modules/hf.py +107 -66
- euroeval/benchmark_modules/litellm.py +103 -55
- euroeval/benchmark_modules/vllm.py +155 -82
- euroeval/benchmarker.py +184 -129
- euroeval/caching_utils.py +79 -0
- euroeval/callbacks.py +5 -7
- euroeval/cli.py +1 -1
- euroeval/constants.py +9 -0
- euroeval/data_loading.py +14 -11
- euroeval/data_models.py +12 -4
- euroeval/dataset_configs/__init__.py +3 -0
- euroeval/dataset_configs/czech.py +79 -0
- euroeval/dataset_configs/danish.py +10 -13
- euroeval/dataset_configs/dutch.py +0 -3
- euroeval/dataset_configs/english.py +0 -3
- euroeval/dataset_configs/estonian.py +11 -1
- euroeval/dataset_configs/finnish.py +0 -3
- euroeval/dataset_configs/french.py +0 -3
- euroeval/dataset_configs/german.py +0 -3
- euroeval/dataset_configs/italian.py +0 -3
- euroeval/dataset_configs/latvian.py +2 -4
- euroeval/dataset_configs/lithuanian.py +68 -0
- euroeval/dataset_configs/norwegian.py +0 -3
- euroeval/dataset_configs/polish.py +0 -3
- euroeval/dataset_configs/portuguese.py +0 -3
- euroeval/dataset_configs/slovak.py +60 -0
- euroeval/dataset_configs/spanish.py +0 -3
- euroeval/dataset_configs/swedish.py +10 -15
- euroeval/finetuning.py +21 -15
- euroeval/generation.py +10 -10
- euroeval/generation_utils.py +2 -3
- euroeval/logging_utils.py +250 -0
- euroeval/metrics/base.py +0 -3
- euroeval/metrics/huggingface.py +10 -6
- euroeval/metrics/llm_as_a_judge.py +5 -3
- euroeval/metrics/pipeline.py +22 -9
- euroeval/metrics/speed.py +0 -3
- euroeval/model_cache.py +11 -14
- euroeval/model_config.py +4 -5
- euroeval/model_loading.py +3 -0
- euroeval/prompt_templates/linguistic_acceptability.py +30 -3
- euroeval/prompt_templates/multiple_choice.py +34 -1
- euroeval/prompt_templates/named_entity_recognition.py +71 -11
- euroeval/prompt_templates/reading_comprehension.py +41 -3
- euroeval/prompt_templates/sentiment_classification.py +34 -1
- euroeval/prompt_templates/summarization.py +26 -6
- euroeval/scores.py +7 -7
- euroeval/speed_benchmark.py +3 -5
- euroeval/task_group_utils/multiple_choice_classification.py +0 -3
- euroeval/task_group_utils/question_answering.py +0 -3
- euroeval/task_group_utils/sequence_classification.py +43 -31
- euroeval/task_group_utils/text_to_text.py +17 -8
- euroeval/task_group_utils/token_classification.py +10 -9
- euroeval/tokenisation_utils.py +22 -20
- euroeval/utils.py +30 -147
- {euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/METADATA +182 -61
- euroeval-16.4.0.dist-info/RECORD +75 -0
- euroeval-16.2.2.dist-info/RECORD +0 -70
- {euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/WHEEL +0 -0
- {euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/entry_points.txt +0 -0
- {euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/licenses/LICENSE +0 -0
euroeval/metrics/speed.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Inference speed metric."""
|
|
2
2
|
|
|
3
3
|
import collections.abc as c
|
|
4
|
-
import logging
|
|
5
4
|
import typing as t
|
|
6
5
|
|
|
7
6
|
from .base import Metric
|
|
@@ -11,8 +10,6 @@ if t.TYPE_CHECKING:
|
|
|
11
10
|
|
|
12
11
|
from ..data_models import BenchmarkConfig, DatasetConfig
|
|
13
12
|
|
|
14
|
-
logger: logging.Logger = logging.getLogger("euroeval")
|
|
15
|
-
|
|
16
13
|
|
|
17
14
|
class SpeedMetric(Metric):
|
|
18
15
|
"""Speed metric."""
|
euroeval/model_cache.py
CHANGED
|
@@ -8,11 +8,9 @@ import typing as t
|
|
|
8
8
|
from collections import defaultdict
|
|
9
9
|
from dataclasses import asdict
|
|
10
10
|
|
|
11
|
-
from tqdm.auto import tqdm
|
|
12
|
-
|
|
13
11
|
from .constants import NUM_GENERATION_TOKENS_FOR_CLASSIFICATION
|
|
14
12
|
from .data_models import GenerativeModelOutput, SingleGenerativeModelOutput
|
|
15
|
-
from .
|
|
13
|
+
from .logging_utils import get_pbar, log, log_once
|
|
16
14
|
|
|
17
15
|
if t.TYPE_CHECKING:
|
|
18
16
|
from pathlib import Path
|
|
@@ -20,9 +18,6 @@ if t.TYPE_CHECKING:
|
|
|
20
18
|
from datasets import Dataset
|
|
21
19
|
|
|
22
20
|
|
|
23
|
-
logger = logging.getLogger("euroeval")
|
|
24
|
-
|
|
25
|
-
|
|
26
21
|
class ModelCache:
|
|
27
22
|
"""A cache for model outputs.
|
|
28
23
|
|
|
@@ -65,9 +60,10 @@ class ModelCache:
|
|
|
65
60
|
with self.cache_path.open() as f:
|
|
66
61
|
json_cache = json.load(f)
|
|
67
62
|
except json.JSONDecodeError:
|
|
68
|
-
|
|
63
|
+
log(
|
|
69
64
|
f"Failed to load the cache from {self.cache_path}. The cache will be "
|
|
70
|
-
f"re-initialised."
|
|
65
|
+
f"re-initialised.",
|
|
66
|
+
level=logging.WARNING,
|
|
71
67
|
)
|
|
72
68
|
json_cache = dict()
|
|
73
69
|
with self.cache_path.open("w") as f:
|
|
@@ -89,9 +85,10 @@ class ModelCache:
|
|
|
89
85
|
with self.cache_path.open("w") as f:
|
|
90
86
|
json.dump(dumpable_cache, f)
|
|
91
87
|
except KeyError:
|
|
92
|
-
|
|
88
|
+
log(
|
|
93
89
|
f"Failed to load the cache from {self.cache_path}. The cache will be "
|
|
94
|
-
f"re-initialised."
|
|
90
|
+
f"re-initialised.",
|
|
91
|
+
level=logging.WARNING,
|
|
95
92
|
)
|
|
96
93
|
self.cache = dict()
|
|
97
94
|
with self.cache_path.open("w") as f:
|
|
@@ -172,18 +169,18 @@ class ModelCache:
|
|
|
172
169
|
|
|
173
170
|
# Double check that the number of inputs and outputs match
|
|
174
171
|
if not len(model_inputs) == len(model_output.sequences):
|
|
175
|
-
|
|
172
|
+
log(
|
|
176
173
|
f"Number of model inputs ({len(model_inputs)}) does not match the "
|
|
177
174
|
f"number of model outputs ({len(model_output.sequences)}). We will not "
|
|
178
|
-
f"cache the model outputs."
|
|
175
|
+
f"cache the model outputs.",
|
|
176
|
+
level=logging.WARNING,
|
|
179
177
|
)
|
|
180
178
|
return
|
|
181
179
|
|
|
182
180
|
# Store the generated sequences in the cache, one by one
|
|
183
|
-
with
|
|
181
|
+
with get_pbar(
|
|
184
182
|
iterable=model_inputs,
|
|
185
183
|
desc="Caching model outputs",
|
|
186
|
-
leave=False,
|
|
187
184
|
disable=hasattr(sys, "_called_from_test"),
|
|
188
185
|
) as pbar:
|
|
189
186
|
for sample_idx, model_input in enumerate(pbar):
|
euroeval/model_config.py
CHANGED
|
@@ -5,14 +5,12 @@ import typing as t
|
|
|
5
5
|
|
|
6
6
|
from . import benchmark_modules
|
|
7
7
|
from .exceptions import InvalidModel, NeedsEnvironmentVariable, NeedsExtraInstalled
|
|
8
|
+
from .logging_utils import log
|
|
8
9
|
|
|
9
10
|
if t.TYPE_CHECKING:
|
|
10
11
|
from .data_models import BenchmarkConfig, ModelConfig
|
|
11
12
|
|
|
12
13
|
|
|
13
|
-
logger = logging.getLogger("euroeval")
|
|
14
|
-
|
|
15
|
-
|
|
16
14
|
def get_model_config(
|
|
17
15
|
model_id: str, benchmark_config: "BenchmarkConfig"
|
|
18
16
|
) -> "ModelConfig":
|
|
@@ -51,9 +49,10 @@ def get_model_config(
|
|
|
51
49
|
elif isinstance(exists_or_err, NeedsEnvironmentVariable):
|
|
52
50
|
needs_env_vars.append(exists_or_err.env_var)
|
|
53
51
|
elif exists_or_err is True:
|
|
54
|
-
|
|
52
|
+
log(
|
|
55
53
|
f"The model {model_id!r} was identified by the "
|
|
56
|
-
f"{benchmark_module.__name__} benchmark module."
|
|
54
|
+
f"{benchmark_module.__name__} benchmark module.",
|
|
55
|
+
logging.DEBUG,
|
|
57
56
|
)
|
|
58
57
|
model_config = benchmark_module.get_model_config(
|
|
59
58
|
model_id=model_id, benchmark_config=benchmark_config
|
euroeval/model_loading.py
CHANGED
|
@@ -10,6 +10,7 @@ from .benchmark_modules import (
|
|
|
10
10
|
)
|
|
11
11
|
from .enums import InferenceBackend, ModelType
|
|
12
12
|
from .exceptions import InvalidModel
|
|
13
|
+
from .logging_utils import log_once
|
|
13
14
|
|
|
14
15
|
if t.TYPE_CHECKING:
|
|
15
16
|
from .benchmark_modules import BenchmarkModule
|
|
@@ -34,6 +35,8 @@ def load_model(
|
|
|
34
35
|
Returns:
|
|
35
36
|
The model.
|
|
36
37
|
"""
|
|
38
|
+
log_once(f"Loading the model {model_config.model_id}...")
|
|
39
|
+
|
|
37
40
|
# The order matters; the first model type that matches will be used. For this
|
|
38
41
|
# reason, they have been ordered in terms of the most common model types.
|
|
39
42
|
model_class: t.Type[BenchmarkModule]
|
|
@@ -4,6 +4,7 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from ..data_models import PromptConfig
|
|
6
6
|
from ..languages import (
|
|
7
|
+
CS,
|
|
7
8
|
DA,
|
|
8
9
|
DE,
|
|
9
10
|
EN,
|
|
@@ -14,6 +15,7 @@ from ..languages import (
|
|
|
14
15
|
FR,
|
|
15
16
|
IS,
|
|
16
17
|
IT,
|
|
18
|
+
LT,
|
|
17
19
|
LV,
|
|
18
20
|
NB,
|
|
19
21
|
NL,
|
|
@@ -21,6 +23,7 @@ from ..languages import (
|
|
|
21
23
|
NO,
|
|
22
24
|
PL,
|
|
23
25
|
PT,
|
|
26
|
+
SK,
|
|
24
27
|
SV,
|
|
25
28
|
)
|
|
26
29
|
|
|
@@ -28,6 +31,13 @@ if t.TYPE_CHECKING:
|
|
|
28
31
|
from ..data_models import Language
|
|
29
32
|
|
|
30
33
|
LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
34
|
+
CS: PromptConfig(
|
|
35
|
+
default_prompt_label_mapping=dict(correct="ano", incorrect="ne"),
|
|
36
|
+
default_prompt_prefix="Následující jsou věty a zda jsou gramaticky správné.",
|
|
37
|
+
default_prompt_template="Věta: {text}\nGramaticky správná: {label}",
|
|
38
|
+
default_instruction_prompt="Věta: {text}\n\nUrčete, zda je věta gramaticky "
|
|
39
|
+
"správná nebo ne. Odpovězte {labels_str}, a nic jiné.",
|
|
40
|
+
),
|
|
31
41
|
DA: PromptConfig(
|
|
32
42
|
default_prompt_label_mapping=dict(correct="ja", incorrect="nej"),
|
|
33
43
|
default_prompt_prefix="Følgende er sætninger og om de er grammatisk korrekte.",
|
|
@@ -70,11 +80,11 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
70
80
|
),
|
|
71
81
|
PL: PromptConfig(
|
|
72
82
|
default_prompt_label_mapping=dict(correct="tak", incorrect="nie"),
|
|
73
|
-
default_prompt_prefix="Poniżej znajdują się teksty i czy są "
|
|
83
|
+
default_prompt_prefix="Poniżej znajdują się teksty i informacja, czy są "
|
|
74
84
|
"gramatycznie poprawne.",
|
|
75
85
|
default_prompt_template="Tekst: {text}\nGramatycznie poprawny: {label}",
|
|
76
|
-
default_instruction_prompt="Tekst: {text}\n\nOkreśl czy tekst jest "
|
|
77
|
-
"gramatycznie poprawny
|
|
86
|
+
default_instruction_prompt="Tekst: {text}\n\nOkreśl, czy tekst jest "
|
|
87
|
+
"gramatycznie poprawny. Odpowiedz używając wyłącznie {labels_str}.",
|
|
78
88
|
),
|
|
79
89
|
PT: PromptConfig(
|
|
80
90
|
default_prompt_label_mapping=dict(correct="sim", incorrect="não"),
|
|
@@ -126,6 +136,14 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
126
136
|
default_instruction_prompt="Frase: {text}\n\nStabilite se la frase è "
|
|
127
137
|
"grammaticalmente corretta o meno. Rispondere con {labels_str}, e nient'altro.",
|
|
128
138
|
),
|
|
139
|
+
LT: PromptConfig(
|
|
140
|
+
default_prompt_label_mapping=dict(correct="taip", incorrect="ne"),
|
|
141
|
+
default_prompt_prefix="Toliau pateikti sakiniai ir ar jie yra gramatiškai "
|
|
142
|
+
"teisingi.",
|
|
143
|
+
default_prompt_template="Sakinys: {text}\nGramatiškai teisingas: {label}",
|
|
144
|
+
default_instruction_prompt="Sakinys: {text}\n\nNustatykite, ar sakinys yra "
|
|
145
|
+
"gramatiškai teisingas, ar ne. Atsakykite su {labels_str}, ir nieko kito.",
|
|
146
|
+
),
|
|
129
147
|
LV: PromptConfig(
|
|
130
148
|
default_prompt_label_mapping=dict(correct="jā", incorrect="nē"),
|
|
131
149
|
default_prompt_prefix="Šie ir teikumi un to gramatiskie pareizumi.",
|
|
@@ -165,6 +183,15 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
165
183
|
default_instruction_prompt="Setning: {text}\n\nBestem om setningen er "
|
|
166
184
|
"grammatisk korrekt eller ikke. Svar med {labels_str}, og ikke noe annet.",
|
|
167
185
|
),
|
|
186
|
+
SK: PromptConfig(
|
|
187
|
+
default_prompt_label_mapping=dict(correct="áno", incorrect="nie"),
|
|
188
|
+
default_prompt_prefix="Nasledujú vety a či sú gramaticky správne.",
|
|
189
|
+
default_prompt_template="Veta: {text}\nGramaticky správna: {label}",
|
|
190
|
+
default_instruction_prompt=(
|
|
191
|
+
"Veta: {text}\n\nUrčite, či je veta gramaticky správna alebo nie. "
|
|
192
|
+
"Odpovedzte so {labels_str}, a nič iné."
|
|
193
|
+
),
|
|
194
|
+
),
|
|
168
195
|
SV: PromptConfig(
|
|
169
196
|
default_prompt_label_mapping=dict(correct="ja", incorrect="nej"),
|
|
170
197
|
default_prompt_prefix="Följande är meningar och huruvida de är grammatiskt "
|
|
@@ -4,6 +4,7 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from ..data_models import PromptConfig
|
|
6
6
|
from ..languages import (
|
|
7
|
+
CS,
|
|
7
8
|
DA,
|
|
8
9
|
DE,
|
|
9
10
|
EN,
|
|
@@ -13,6 +14,7 @@ from ..languages import (
|
|
|
13
14
|
FR,
|
|
14
15
|
IS,
|
|
15
16
|
IT,
|
|
17
|
+
LT,
|
|
16
18
|
LV,
|
|
17
19
|
NB,
|
|
18
20
|
NL,
|
|
@@ -20,6 +22,7 @@ from ..languages import (
|
|
|
20
22
|
NO,
|
|
21
23
|
PL,
|
|
22
24
|
PT,
|
|
25
|
+
SK,
|
|
23
26
|
SV,
|
|
24
27
|
)
|
|
25
28
|
|
|
@@ -28,6 +31,17 @@ if t.TYPE_CHECKING:
|
|
|
28
31
|
|
|
29
32
|
# TODO: Missing Faroese
|
|
30
33
|
MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
34
|
+
CS: PromptConfig(
|
|
35
|
+
default_prompt_prefix=(
|
|
36
|
+
"Následující jsou otázky s výběrem z více možností (s odpověďmi)."
|
|
37
|
+
),
|
|
38
|
+
default_prompt_template="Otázka: {text}\nOdpověď: {label}",
|
|
39
|
+
default_instruction_prompt=(
|
|
40
|
+
"Otázka: {text}\n\nOdpovězte na výše uvedenou otázku "
|
|
41
|
+
"pomocí {labels_str}, a nic jiného."
|
|
42
|
+
),
|
|
43
|
+
default_prompt_label_mapping="auto",
|
|
44
|
+
),
|
|
31
45
|
DA: PromptConfig(
|
|
32
46
|
default_prompt_prefix="Følgende er multiple choice spørgsmål (med svar).",
|
|
33
47
|
default_prompt_template="Spørgsmål: {text}\nSvar: {label}",
|
|
@@ -105,6 +119,14 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
105
119
|
"precedente con {labels_str}, e nient'altro.",
|
|
106
120
|
default_prompt_label_mapping="auto",
|
|
107
121
|
),
|
|
122
|
+
LT: PromptConfig(
|
|
123
|
+
default_prompt_prefix="Toliau pateikti daugiavariančiai klausimai "
|
|
124
|
+
"(su atsakymais).",
|
|
125
|
+
default_prompt_template="Klausimas: {text}\nAtsakymas: {label}",
|
|
126
|
+
default_instruction_prompt="Klausimas: {text}\n\nAtsakykite į aukščiau "
|
|
127
|
+
"pateiktą klausimą atsakydami {labels_str}, ir nieko daugiau.",
|
|
128
|
+
default_prompt_label_mapping="auto",
|
|
129
|
+
),
|
|
108
130
|
LV: PromptConfig(
|
|
109
131
|
default_prompt_prefix="Tālāk seko jautājumi ar vairākām atbilžu izvēlēm "
|
|
110
132
|
"(ar atbildēm).",
|
|
@@ -146,7 +168,18 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
146
168
|
"(z odpowiedziami).",
|
|
147
169
|
default_prompt_template="Pytanie: {text}\nOdpowiedź: {label}",
|
|
148
170
|
default_instruction_prompt="Pytanie: {text}\n\nOdpowiedz na powyższe pytanie, "
|
|
149
|
-
"
|
|
171
|
+
"używając {labels_str} i niczego więcej.",
|
|
172
|
+
default_prompt_label_mapping="auto",
|
|
173
|
+
),
|
|
174
|
+
SK: PromptConfig(
|
|
175
|
+
default_prompt_prefix=(
|
|
176
|
+
"Nasledujú otázky s viacerými možnosťami (s odpoveďami)."
|
|
177
|
+
),
|
|
178
|
+
default_prompt_template="Otázka: {text}\nOdpoveď: {label}",
|
|
179
|
+
default_instruction_prompt=(
|
|
180
|
+
"Otázka: {text}\n\n"
|
|
181
|
+
"Odpovedzte na nasledujúcu otázku použitím {labels_str}, a nič iné."
|
|
182
|
+
),
|
|
150
183
|
default_prompt_label_mapping="auto",
|
|
151
184
|
),
|
|
152
185
|
SV: PromptConfig(
|
|
@@ -4,6 +4,7 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from ..data_models import PromptConfig
|
|
6
6
|
from ..languages import (
|
|
7
|
+
CS,
|
|
7
8
|
DA,
|
|
8
9
|
DE,
|
|
9
10
|
EN,
|
|
@@ -14,6 +15,7 @@ from ..languages import (
|
|
|
14
15
|
FR,
|
|
15
16
|
IS,
|
|
16
17
|
IT,
|
|
18
|
+
LT,
|
|
17
19
|
LV,
|
|
18
20
|
NB,
|
|
19
21
|
NL,
|
|
@@ -21,6 +23,7 @@ from ..languages import (
|
|
|
21
23
|
NO,
|
|
22
24
|
PL,
|
|
23
25
|
PT,
|
|
26
|
+
SK,
|
|
24
27
|
SV,
|
|
25
28
|
)
|
|
26
29
|
|
|
@@ -29,6 +32,25 @@ if t.TYPE_CHECKING:
|
|
|
29
32
|
|
|
30
33
|
|
|
31
34
|
NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
35
|
+
CS: PromptConfig(
|
|
36
|
+
default_prompt_label_mapping={
|
|
37
|
+
"b-per": "osoba",
|
|
38
|
+
"i-per": "osoba",
|
|
39
|
+
"b-loc": "místo",
|
|
40
|
+
"i-loc": "místo",
|
|
41
|
+
"b-org": "organizace",
|
|
42
|
+
"i-org": "organizace",
|
|
43
|
+
"b-misc": "různé",
|
|
44
|
+
"i-misc": "různé",
|
|
45
|
+
},
|
|
46
|
+
default_prompt_prefix="Následující jsou věty a JSON slovníky s pojmenovanými "
|
|
47
|
+
"entitami, které se v dané větě vyskytují.",
|
|
48
|
+
default_prompt_template="Věta: {text}\nPojmenované entity: {label}",
|
|
49
|
+
default_instruction_prompt="Věta: {text}\n\nIdentifikujte pojmenované entity "
|
|
50
|
+
"ve větě. Měli byste to vypsat jako JSON slovník s klíči {labels_str}. "
|
|
51
|
+
"Hodnoty by měly být seznamy pojmenovaných entit tohoto typu, přesně tak, "
|
|
52
|
+
"jak se objevují ve větě.",
|
|
53
|
+
),
|
|
32
54
|
DA: PromptConfig(
|
|
33
55
|
default_prompt_label_mapping={
|
|
34
56
|
"b-per": "person",
|
|
@@ -241,6 +263,25 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
241
263
|
"{labels_str}. I valori devono essere elenchi di entità "
|
|
242
264
|
"nominate di quel tipo, esattamente come appaiono nella frase.",
|
|
243
265
|
),
|
|
266
|
+
LT: PromptConfig(
|
|
267
|
+
default_prompt_label_mapping={
|
|
268
|
+
"b-per": "asmuo",
|
|
269
|
+
"i-per": "asmuo",
|
|
270
|
+
"b-loc": "vieta",
|
|
271
|
+
"i-loc": "vieta",
|
|
272
|
+
"b-org": "organizacija",
|
|
273
|
+
"i-org": "organizacija",
|
|
274
|
+
"b-misc": "kita",
|
|
275
|
+
"i-misc": "kita",
|
|
276
|
+
},
|
|
277
|
+
default_prompt_prefix="Toliau pateikti sakiniai ir JSON žodynai su vardiniais "
|
|
278
|
+
"vienetais, kurie pateikiame sakinyje.",
|
|
279
|
+
default_prompt_template="Sakinys: {text}\nVardiniai vienetai: {label}",
|
|
280
|
+
default_instruction_prompt="Sakinys: {text}\n\nIdentifikuokite vardinius "
|
|
281
|
+
"vienetus sakinyje. Turėtumėte pateikti tai kaip JSON žodyną su raktais "
|
|
282
|
+
"{labels_str}. Reikšmės turi būti to tipo vardinių vienetų sąrašai, "
|
|
283
|
+
"tiksliai taip, kaip jie rodomi sakinyje.",
|
|
284
|
+
),
|
|
244
285
|
LV: PromptConfig(
|
|
245
286
|
default_prompt_label_mapping={
|
|
246
287
|
"b-per": "persona",
|
|
@@ -341,20 +382,39 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
341
382
|
default_prompt_label_mapping={
|
|
342
383
|
"b-per": "osoba",
|
|
343
384
|
"i-per": "osoba",
|
|
344
|
-
"b-loc": "
|
|
345
|
-
"i-loc": "
|
|
385
|
+
"b-loc": "miejsce",
|
|
386
|
+
"i-loc": "miejsce",
|
|
346
387
|
"b-org": "organizacja",
|
|
347
388
|
"i-org": "organizacja",
|
|
348
|
-
"b-misc": "
|
|
349
|
-
"i-misc": "
|
|
389
|
+
"b-misc": "inne",
|
|
390
|
+
"i-misc": "inne",
|
|
391
|
+
},
|
|
392
|
+
default_prompt_prefix="Poniżej znajdują się zdania i słowniki JSON "
|
|
393
|
+
"z jednostkami nazewniczymi, które występują w danym zdaniu.",
|
|
394
|
+
default_prompt_template="Zdanie: {text}\nJednostki nazewnicze: {label}",
|
|
395
|
+
default_instruction_prompt="Zdanie: {text}\n\nZidentyfikuj jednostki "
|
|
396
|
+
"nazewnicze w zdaniu. Wypisz je jako słownik JSON z kluczami "
|
|
397
|
+
"{labels_str}. Wartości odpowiadające kluczom powinny być listami jednostek "
|
|
398
|
+
"nazewniczych danego typu, dokładnie tak, jak pojawiają się w zdaniu.",
|
|
399
|
+
),
|
|
400
|
+
SK: PromptConfig(
|
|
401
|
+
default_prompt_label_mapping={
|
|
402
|
+
"b-per": "osoba",
|
|
403
|
+
"i-per": "osoba",
|
|
404
|
+
"b-loc": "miesto",
|
|
405
|
+
"i-loc": "miesto",
|
|
406
|
+
"b-org": "organizácia",
|
|
407
|
+
"i-org": "organizácia",
|
|
408
|
+
"b-misc": "rôzne",
|
|
409
|
+
"i-misc": "rôzne",
|
|
350
410
|
},
|
|
351
|
-
default_prompt_prefix="
|
|
352
|
-
"
|
|
353
|
-
default_prompt_template="
|
|
354
|
-
default_instruction_prompt="
|
|
355
|
-
"
|
|
356
|
-
"{labels_str}.
|
|
357
|
-
"
|
|
411
|
+
default_prompt_prefix="Nasledujúce sú vety a JSON-objekty s pomenovanými "
|
|
412
|
+
"entitami, ktoré sa nachádzajú v danej vete.",
|
|
413
|
+
default_prompt_template="Veta: {text}\nPomenované entity: {label}",
|
|
414
|
+
default_instruction_prompt="Veta: {text}\n\nIdentifikujte pomenované "
|
|
415
|
+
"entity vo vete. Výstup by mal byť vo forme JSON-objektu s kľúčmi "
|
|
416
|
+
"{labels_str}. Hodnoty by mali byť zoznamy pomenovaných entít danej "
|
|
417
|
+
"kategórie, presne tak, ako sa vyskytujú vo vete.",
|
|
358
418
|
),
|
|
359
419
|
SV: PromptConfig(
|
|
360
420
|
default_prompt_label_mapping={
|
|
@@ -4,6 +4,7 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from ..data_models import PromptConfig
|
|
6
6
|
from ..languages import (
|
|
7
|
+
CS,
|
|
7
8
|
DA,
|
|
8
9
|
DE,
|
|
9
10
|
EN,
|
|
@@ -14,6 +15,7 @@ from ..languages import (
|
|
|
14
15
|
FR,
|
|
15
16
|
IS,
|
|
16
17
|
IT,
|
|
18
|
+
LT,
|
|
17
19
|
LV,
|
|
18
20
|
NB,
|
|
19
21
|
NL,
|
|
@@ -21,6 +23,7 @@ from ..languages import (
|
|
|
21
23
|
NO,
|
|
22
24
|
PL,
|
|
23
25
|
PT,
|
|
26
|
+
SK,
|
|
24
27
|
SV,
|
|
25
28
|
)
|
|
26
29
|
|
|
@@ -28,6 +31,19 @@ if t.TYPE_CHECKING:
|
|
|
28
31
|
from ..data_models import Language
|
|
29
32
|
|
|
30
33
|
RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
34
|
+
CS: PromptConfig(
|
|
35
|
+
default_prompt_prefix="Následující texty obsahují otázky a odpovědi.",
|
|
36
|
+
default_prompt_template=(
|
|
37
|
+
"Text: {text}\nOtázka: {question}\nOdpověď maximálně 3 slovy: {label}"
|
|
38
|
+
),
|
|
39
|
+
default_instruction_prompt=(
|
|
40
|
+
"Text: {text}\n\n"
|
|
41
|
+
"Odpovězte na následující otázku k výše uvedenému textu "
|
|
42
|
+
"maximálně 3 slovy.\n\n"
|
|
43
|
+
"Otázka: {question}"
|
|
44
|
+
),
|
|
45
|
+
default_prompt_label_mapping=dict(),
|
|
46
|
+
),
|
|
31
47
|
DA: PromptConfig(
|
|
32
48
|
default_prompt_prefix="Følgende er tekster med tilhørende spørgsmål og svar.",
|
|
33
49
|
default_prompt_template="Tekst: {text}\nSpørgsmål: {question}\nSvar med maks. "
|
|
@@ -116,6 +132,15 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
116
132
|
"sul in un massimo di 3 parole.\n\nDomanda: {question}",
|
|
117
133
|
default_prompt_label_mapping=dict(),
|
|
118
134
|
),
|
|
135
|
+
LT: PromptConfig(
|
|
136
|
+
default_prompt_prefix="Toliau pateikti tekstai su atitinkamais klausimais ir "
|
|
137
|
+
"atsakymais.",
|
|
138
|
+
default_prompt_template="Tekstas: {text}\nKlausimas: {question}\nAtsakykite ne "
|
|
139
|
+
"daugiau kaip 3 žodžiais: {label}",
|
|
140
|
+
default_instruction_prompt="Tekstas: {text}\n\nAtsakykite į šį klausimą apie "
|
|
141
|
+
"aukščiau pateiktą tekstą ne daugiau kaip 3 žodžiais.\n\nKlausimas: {question}",
|
|
142
|
+
default_prompt_label_mapping=dict(),
|
|
143
|
+
),
|
|
119
144
|
LV: PromptConfig(
|
|
120
145
|
default_prompt_prefix="Turpmāk seko teksti ar atbilstošiem jautājumiem un "
|
|
121
146
|
"atbildēm.",
|
|
@@ -162,10 +187,11 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
162
187
|
default_prompt_prefix=(
|
|
163
188
|
"Poniżej znajdują się teksty z towarzyszącymi pytaniami i odpowiedziami."
|
|
164
189
|
),
|
|
165
|
-
default_prompt_template="Tekst: {text}\nPytanie: {question}\nOdpowiedź
|
|
166
|
-
"maksymalnie 3
|
|
190
|
+
default_prompt_template="Tekst: {text}\nPytanie: {question}\nOdpowiedź z "
|
|
191
|
+
"użyciem maksymalnie 3 słów: {label}",
|
|
167
192
|
default_instruction_prompt="Tekst: {text}\n\nOdpowiedz na następujące pytanie "
|
|
168
|
-
"dotyczące powyższego tekstu
|
|
193
|
+
"dotyczące powyższego tekstu, używając maksymalnie 3 słów.\n\nPytanie: "
|
|
194
|
+
"{question}",
|
|
169
195
|
default_prompt_label_mapping=dict(),
|
|
170
196
|
),
|
|
171
197
|
PT: PromptConfig(
|
|
@@ -177,6 +203,18 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
177
203
|
"sobre o texto acima num máximo de 3 palavras.\n\nPergunta: {question}",
|
|
178
204
|
default_prompt_label_mapping=dict(),
|
|
179
205
|
),
|
|
206
|
+
SK: PromptConfig(
|
|
207
|
+
default_prompt_prefix=("Nasledujú texty s pridruženými otázkami a odpoveďami."),
|
|
208
|
+
default_prompt_template=(
|
|
209
|
+
"Text: {text}\nOtázka: {question}\nOdpoveď na maximálne 3 slová: {label}"
|
|
210
|
+
),
|
|
211
|
+
default_instruction_prompt=(
|
|
212
|
+
"Text: {text}\n\n"
|
|
213
|
+
"Odpovedzte na nasledujúcu otázku týkajúcu sa textu uvedeného vyššie "
|
|
214
|
+
"maximálne 3 slovami.\n\nOtázka: {question}"
|
|
215
|
+
),
|
|
216
|
+
default_prompt_label_mapping=dict(),
|
|
217
|
+
),
|
|
180
218
|
SV: PromptConfig(
|
|
181
219
|
default_prompt_prefix="Nedan följer texter med tillhörande frågor och svar.",
|
|
182
220
|
default_prompt_template="Text: {text}\nFråga: {question}\nSvar på max 3 ord: "
|
|
@@ -4,6 +4,7 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from ..data_models import PromptConfig
|
|
6
6
|
from ..languages import (
|
|
7
|
+
CS,
|
|
7
8
|
DA,
|
|
8
9
|
DE,
|
|
9
10
|
EN,
|
|
@@ -14,6 +15,7 @@ from ..languages import (
|
|
|
14
15
|
FR,
|
|
15
16
|
IS,
|
|
16
17
|
IT,
|
|
18
|
+
LT,
|
|
17
19
|
LV,
|
|
18
20
|
NB,
|
|
19
21
|
NL,
|
|
@@ -21,6 +23,7 @@ from ..languages import (
|
|
|
21
23
|
NO,
|
|
22
24
|
PL,
|
|
23
25
|
PT,
|
|
26
|
+
SK,
|
|
24
27
|
SV,
|
|
25
28
|
)
|
|
26
29
|
|
|
@@ -38,6 +41,16 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
38
41
|
default_instruction_prompt="Dokument: {text}\n\nKlassificer sentimentet i "
|
|
39
42
|
"dokumentet. Svar kun med {labels_str}, og intet andet.",
|
|
40
43
|
),
|
|
44
|
+
CS: PromptConfig(
|
|
45
|
+
default_prompt_label_mapping=dict(
|
|
46
|
+
positive="pozitivní", neutral="neutrální", negative="negativní"
|
|
47
|
+
),
|
|
48
|
+
default_prompt_prefix="Následují dokumenty a jejich sentiment, který může být "
|
|
49
|
+
"{labels_str}.",
|
|
50
|
+
default_prompt_template="Dokument: {text}\nSentiment: {label}",
|
|
51
|
+
default_instruction_prompt="Dokument: {text}\n\nKlasifikujte sentiment v "
|
|
52
|
+
"dokumentu. Odpovězte pouze s {labels_str}, a nic jiného.",
|
|
53
|
+
),
|
|
41
54
|
DE: PromptConfig(
|
|
42
55
|
default_prompt_label_mapping=dict(
|
|
43
56
|
positive="positiv", neutral="neutral", negative="negativ"
|
|
@@ -90,7 +103,7 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
90
103
|
default_prompt_template="Dokument: {text}\nSentyment: {label}",
|
|
91
104
|
default_instruction_prompt=(
|
|
92
105
|
"Dokument: {text}\n\nKlasyfikuj sentyment w dokumencie. "
|
|
93
|
-
"Odpowiedz
|
|
106
|
+
"Odpowiedz jednym słowem: {labels_str}."
|
|
94
107
|
),
|
|
95
108
|
),
|
|
96
109
|
PT: PromptConfig(
|
|
@@ -153,6 +166,16 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
153
166
|
default_instruction_prompt="Documento: {text}\n\nClassificare il sentiment del "
|
|
154
167
|
"documento. Rispondere con {labels_str}, e nient'altro.",
|
|
155
168
|
),
|
|
169
|
+
LT: PromptConfig(
|
|
170
|
+
default_prompt_label_mapping=dict(
|
|
171
|
+
positive="teigiamas", neutral="neutralus", negative="neigiamas"
|
|
172
|
+
),
|
|
173
|
+
default_prompt_prefix="Toliau pateikti dokumentai ir jų nuotaika, kuri "
|
|
174
|
+
"gali būti {labels_str}.",
|
|
175
|
+
default_prompt_template="Dokumentas: {text}\nNuotaika: {label}",
|
|
176
|
+
default_instruction_prompt="Dokumentas: {text}\n\nKlasifikuokite nuotaiką "
|
|
177
|
+
"dokumente. Atsakykite su {labels_str}, ir nieko kito.",
|
|
178
|
+
),
|
|
156
179
|
LV: PromptConfig(
|
|
157
180
|
default_prompt_label_mapping=dict(
|
|
158
181
|
positive="pozitīvs", neutral="neitrāls", negative="negatīvs"
|
|
@@ -203,6 +226,16 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
203
226
|
default_instruction_prompt="Dokument: {text}\n\nKlassifiser følelsen i "
|
|
204
227
|
"teksten. Svar med {labels_str}, og ikke noe annet.",
|
|
205
228
|
),
|
|
229
|
+
SK: PromptConfig(
|
|
230
|
+
default_prompt_label_mapping=dict(
|
|
231
|
+
positive="pozitívne", neutral="neutrálne", negative="negatívne"
|
|
232
|
+
),
|
|
233
|
+
default_prompt_prefix="Nižšie sú dokumenty a ich sentiment, ktorý môže byť "
|
|
234
|
+
"{labels_str}.",
|
|
235
|
+
default_prompt_template="Dokument: {text}\nSentiment: {label}",
|
|
236
|
+
default_instruction_prompt="Dokument: {text}\n\nKlasifikujte pocit v "
|
|
237
|
+
"dokumente. Odpovedzte so {labels_str}, a nič iné.",
|
|
238
|
+
),
|
|
206
239
|
SV: PromptConfig(
|
|
207
240
|
default_prompt_label_mapping=dict(
|
|
208
241
|
positive="positiv", neutral="neutral", negative="negativ"
|
|
@@ -4,6 +4,7 @@ import typing as t
|
|
|
4
4
|
|
|
5
5
|
from ..data_models import PromptConfig
|
|
6
6
|
from ..languages import (
|
|
7
|
+
CS,
|
|
7
8
|
DA,
|
|
8
9
|
DE,
|
|
9
10
|
EN,
|
|
@@ -13,6 +14,7 @@ from ..languages import (
|
|
|
13
14
|
FR,
|
|
14
15
|
IS,
|
|
15
16
|
IT,
|
|
17
|
+
LT,
|
|
16
18
|
LV,
|
|
17
19
|
NB,
|
|
18
20
|
NL,
|
|
@@ -28,6 +30,14 @@ if t.TYPE_CHECKING:
|
|
|
28
30
|
|
|
29
31
|
# TODO: Missing Faroese
|
|
30
32
|
SUMM_TEMPLATES: dict["Language", PromptConfig] = {
|
|
33
|
+
CS: PromptConfig(
|
|
34
|
+
default_prompt_prefix=("Následující jsou dokumenty s přiloženými souhrny."),
|
|
35
|
+
default_prompt_template=("Dokument: {text}\nSouhrn: {target_text}"),
|
|
36
|
+
default_instruction_prompt=(
|
|
37
|
+
"Dokument: {text}\n\nNapište souhrn výše uvedeného dokumentu."
|
|
38
|
+
),
|
|
39
|
+
default_prompt_label_mapping=dict(),
|
|
40
|
+
),
|
|
31
41
|
DA: PromptConfig(
|
|
32
42
|
default_prompt_prefix="Følgende er dokumenter med tilhørende resuméer.",
|
|
33
43
|
default_prompt_template="Dokument: {text}\nResumé: {target_text}",
|
|
@@ -96,11 +106,14 @@ SUMM_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
96
106
|
),
|
|
97
107
|
default_prompt_label_mapping=dict(),
|
|
98
108
|
),
|
|
99
|
-
|
|
100
|
-
default_prompt_prefix=
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
"
|
|
109
|
+
LT: PromptConfig(
|
|
110
|
+
default_prompt_prefix=(
|
|
111
|
+
"Žemiau pateikiami dokumentai su pridėtomis santraukomis."
|
|
112
|
+
),
|
|
113
|
+
default_prompt_template=("Dokumentas: {text}\nSantrauka: {target_text}"),
|
|
114
|
+
default_instruction_prompt=(
|
|
115
|
+
"Dokumentas: {text}\n\nParašykite aukščiau pateikto dokumento santrauką."
|
|
116
|
+
),
|
|
104
117
|
default_prompt_label_mapping=dict(),
|
|
105
118
|
),
|
|
106
119
|
IT: PromptConfig(
|
|
@@ -111,6 +124,13 @@ SUMM_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
111
124
|
"documento di cui sopra.",
|
|
112
125
|
default_prompt_label_mapping=dict(),
|
|
113
126
|
),
|
|
127
|
+
IS: PromptConfig(
|
|
128
|
+
default_prompt_prefix="Eftirfarandi eru skjöl með meðfylgjandi samantektum.",
|
|
129
|
+
default_prompt_template="Skjal: {text}\nSamantekt: {target_text}",
|
|
130
|
+
default_instruction_prompt="Skjal: {text}\n\nSkrifaðu samantekt á ofangreindu "
|
|
131
|
+
"skjali.",
|
|
132
|
+
default_prompt_label_mapping=dict(),
|
|
133
|
+
),
|
|
114
134
|
NB: PromptConfig(
|
|
115
135
|
default_prompt_prefix="Nedenfor følger dokumenter med tilhørende sammendrag.",
|
|
116
136
|
default_prompt_template="Dokument: {text}\nSammendrag: {target_text}",
|
|
@@ -142,7 +162,7 @@ SUMM_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
142
162
|
),
|
|
143
163
|
PL: PromptConfig(
|
|
144
164
|
default_prompt_prefix="Poniżej znajdują się artykuły z towarzyszącymi "
|
|
145
|
-
"streszczeniami.",
|
|
165
|
+
"im streszczeniami.",
|
|
146
166
|
default_prompt_template="Artykuł: {text}\nStreszczenie: {target_text}",
|
|
147
167
|
default_instruction_prompt="Artykuł: {text}\n\nNapisz streszczenie "
|
|
148
168
|
"powyższego artykułu.",
|