EuroEval 16.2.2__py3-none-any.whl → 16.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +4 -2
- euroeval/benchmark_modules/fresh.py +3 -1
- euroeval/benchmark_modules/hf.py +8 -4
- euroeval/benchmark_modules/litellm.py +5 -17
- euroeval/benchmark_modules/vllm.py +88 -23
- euroeval/benchmarker.py +110 -61
- euroeval/cli.py +1 -1
- euroeval/constants.py +3 -0
- euroeval/dataset_configs/__init__.py +1 -0
- euroeval/dataset_configs/danish.py +0 -2
- euroeval/dataset_configs/dutch.py +0 -2
- euroeval/dataset_configs/english.py +0 -2
- euroeval/dataset_configs/finnish.py +0 -2
- euroeval/dataset_configs/french.py +0 -2
- euroeval/dataset_configs/german.py +0 -2
- euroeval/dataset_configs/italian.py +0 -2
- euroeval/dataset_configs/latvian.py +2 -3
- euroeval/dataset_configs/lithuanian.py +62 -0
- euroeval/dataset_configs/norwegian.py +0 -2
- euroeval/dataset_configs/polish.py +0 -2
- euroeval/dataset_configs/portuguese.py +0 -2
- euroeval/dataset_configs/spanish.py +0 -2
- euroeval/dataset_configs/swedish.py +0 -3
- euroeval/metrics/huggingface.py +1 -1
- euroeval/metrics/pipeline.py +5 -0
- euroeval/prompt_templates/linguistic_acceptability.py +9 -0
- euroeval/prompt_templates/multiple_choice.py +9 -0
- euroeval/prompt_templates/named_entity_recognition.py +20 -0
- euroeval/prompt_templates/reading_comprehension.py +10 -0
- euroeval/prompt_templates/sentiment_classification.py +11 -0
- euroeval/tokenisation_utils.py +8 -8
- euroeval/utils.py +1 -1
- {euroeval-16.2.2.dist-info → euroeval-16.3.0.dist-info}/METADATA +181 -60
- euroeval-16.3.0.dist-info/RECORD +71 -0
- euroeval-16.2.2.dist-info/RECORD +0 -70
- {euroeval-16.2.2.dist-info → euroeval-16.3.0.dist-info}/WHEEL +0 -0
- {euroeval-16.2.2.dist-info → euroeval-16.3.0.dist-info}/entry_points.txt +0 -0
- {euroeval-16.2.2.dist-info → euroeval-16.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""All Finnish dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..enums import ModelType
|
|
5
4
|
from ..languages import FI
|
|
6
5
|
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, LA, MCRC, NER, RC, SENT, SUMM
|
|
7
6
|
|
|
@@ -111,7 +110,6 @@ WINOGRANDE_FI_CONFIG = DatasetConfig(
|
|
|
111
110
|
languages=[FI],
|
|
112
111
|
splits=["train", "test"],
|
|
113
112
|
_labels=["a", "b"],
|
|
114
|
-
_allowed_model_types=[ModelType.GENERATIVE],
|
|
115
113
|
unofficial=True,
|
|
116
114
|
)
|
|
117
115
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""All French dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..enums import ModelType
|
|
5
4
|
from ..languages import FR
|
|
6
5
|
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
7
6
|
|
|
@@ -123,7 +122,6 @@ WINOGRANDE_FR_CONFIG = DatasetConfig(
|
|
|
123
122
|
languages=[FR],
|
|
124
123
|
splits=["train", "test"],
|
|
125
124
|
_labels=["a", "b"],
|
|
126
|
-
_allowed_model_types=[ModelType.GENERATIVE],
|
|
127
125
|
unofficial=True,
|
|
128
126
|
)
|
|
129
127
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""All German dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..enums import ModelType
|
|
5
4
|
from ..languages import DE
|
|
6
5
|
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
7
6
|
|
|
@@ -140,7 +139,6 @@ WINOGRANDE_DE_CONFIG = DatasetConfig(
|
|
|
140
139
|
languages=[DE],
|
|
141
140
|
splits=["train", "test"],
|
|
142
141
|
_labels=["a", "b"],
|
|
143
|
-
_allowed_model_types=[ModelType.GENERATIVE],
|
|
144
142
|
unofficial=True,
|
|
145
143
|
)
|
|
146
144
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""All Italian dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..enums import ModelType
|
|
5
4
|
from ..languages import IT
|
|
6
5
|
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
7
6
|
|
|
@@ -131,7 +130,6 @@ WINOGRANDE_IT_CONFIG = DatasetConfig(
|
|
|
131
130
|
languages=[IT],
|
|
132
131
|
splits=["train", "test"],
|
|
133
132
|
_labels=["a", "b"],
|
|
134
|
-
_allowed_model_types=[ModelType.GENERATIVE],
|
|
135
133
|
unofficial=True,
|
|
136
134
|
)
|
|
137
135
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""All Latvian dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..enums import ModelType
|
|
5
4
|
from ..languages import LV
|
|
6
5
|
from ..tasks import COMMON_SENSE, KNOW, LA, NER, RC, SENT, SUMM
|
|
7
6
|
|
|
@@ -25,7 +24,8 @@ SCALA_LV_CONFIG = DatasetConfig(
|
|
|
25
24
|
|
|
26
25
|
FULLSTACK_NER_LV_CONFIG = DatasetConfig(
|
|
27
26
|
name="fullstack-ner-lv",
|
|
28
|
-
pretty_name="the truncated version of the
|
|
27
|
+
pretty_name="the truncated version of the Latvian named entity recognition "
|
|
28
|
+
"dataset FullStack-NER-lv",
|
|
29
29
|
huggingface_id="EuroEval/fullstack-ner-lv-mini",
|
|
30
30
|
task=NER,
|
|
31
31
|
languages=[LV],
|
|
@@ -90,6 +90,5 @@ WINOGRANDE_LV_CONFIG = DatasetConfig(
|
|
|
90
90
|
languages=[LV],
|
|
91
91
|
splits=["train", "test"],
|
|
92
92
|
_labels=["a", "b"],
|
|
93
|
-
_allowed_model_types=[ModelType.GENERATIVE],
|
|
94
93
|
unofficial=True,
|
|
95
94
|
)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""All Lithuanian dataset configurations used in EuroEval."""
|
|
2
|
+
|
|
3
|
+
from ..data_models import DatasetConfig
|
|
4
|
+
from ..languages import LT
|
|
5
|
+
from ..tasks import COMMON_SENSE, KNOW, LA, NER, RC, SENT
|
|
6
|
+
|
|
7
|
+
### Official datasets ###
|
|
8
|
+
|
|
9
|
+
LITHUANIAN_EMOTIONS_CONFIG = DatasetConfig(
|
|
10
|
+
name="lithuanian-emotions",
|
|
11
|
+
pretty_name="the truncated version of the Lithuanian sentiment "
|
|
12
|
+
"classification dataset Lithuanian Emotions",
|
|
13
|
+
huggingface_id="EuroEval/lithuanian-emotions-mini",
|
|
14
|
+
task=SENT,
|
|
15
|
+
languages=[LT],
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
SCALA_LT_CONFIG = DatasetConfig(
|
|
19
|
+
name="scala-lt",
|
|
20
|
+
pretty_name="the Lithuanian part of the linguistic acceptability dataset ScaLA",
|
|
21
|
+
huggingface_id="EuroEval/scala-lt",
|
|
22
|
+
task=LA,
|
|
23
|
+
languages=[LT],
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
WIKIANN_LT_CONFIG = DatasetConfig(
|
|
27
|
+
name="wikiann-lt",
|
|
28
|
+
pretty_name="the truncated version of the Lithuanian part of the named entity "
|
|
29
|
+
"recognition dataset WikiANN",
|
|
30
|
+
huggingface_id="EuroEval/wikiann-lt-mini",
|
|
31
|
+
task=NER,
|
|
32
|
+
languages=[LT],
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
MULTI_WIKI_QA_LT_CONFIG = DatasetConfig(
|
|
36
|
+
name="multi-wiki-qa-lt",
|
|
37
|
+
pretty_name="the truncated version of the Lithuanian part of the reading "
|
|
38
|
+
"comprehension dataset MultiWikiQA",
|
|
39
|
+
huggingface_id="EuroEval/multi-wiki-qa-lt-mini",
|
|
40
|
+
task=RC,
|
|
41
|
+
languages=[LT],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
LT_HISTORY_CONFIG = DatasetConfig(
|
|
45
|
+
name="lt-history",
|
|
46
|
+
pretty_name="the Lithuanian knowledge dataset LT-History",
|
|
47
|
+
huggingface_id="EuroEval/lt-history",
|
|
48
|
+
task=KNOW,
|
|
49
|
+
languages=[LT],
|
|
50
|
+
splits=["train", "test"],
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
WINOGRANDE_LT_CONFIG = DatasetConfig(
|
|
54
|
+
name="winogrande-lt",
|
|
55
|
+
pretty_name="the Lithuanian common-sense reasoning dataset Winogrande-lt, "
|
|
56
|
+
"translated from the English Winogrande dataset",
|
|
57
|
+
huggingface_id="EuroEval/winogrande-lt",
|
|
58
|
+
task=COMMON_SENSE,
|
|
59
|
+
languages=[LT],
|
|
60
|
+
splits=["train", "test"],
|
|
61
|
+
_labels=["a", "b"],
|
|
62
|
+
)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""All Norwegian dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..enums import ModelType
|
|
5
4
|
from ..languages import NB, NN, NO
|
|
6
5
|
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
7
6
|
|
|
@@ -226,7 +225,6 @@ WINOGRANDE_NO_CONFIG = DatasetConfig(
|
|
|
226
225
|
languages=[NB, NN, NO],
|
|
227
226
|
splits=["train", "test"],
|
|
228
227
|
_labels=["a", "b"],
|
|
229
|
-
_allowed_model_types=[ModelType.GENERATIVE],
|
|
230
228
|
unofficial=True,
|
|
231
229
|
)
|
|
232
230
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""All Polish dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..enums import ModelType
|
|
5
4
|
from ..languages import PL
|
|
6
5
|
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, NER, RC, SENT, SUMM
|
|
7
6
|
|
|
@@ -64,7 +63,6 @@ WINOGRANDE_PL_CONFIG = DatasetConfig(
|
|
|
64
63
|
languages=[PL],
|
|
65
64
|
splits=["train", "test"],
|
|
66
65
|
_labels=["a", "b"],
|
|
67
|
-
_allowed_model_types=[ModelType.GENERATIVE],
|
|
68
66
|
)
|
|
69
67
|
|
|
70
68
|
EUROPEAN_VALUES_PL_CONFIG = DatasetConfig(
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""All Portuguese dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..enums import ModelType
|
|
5
4
|
from ..languages import PT
|
|
6
5
|
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
7
6
|
|
|
@@ -101,7 +100,6 @@ WINOGRANDE_PT_CONFIG = DatasetConfig(
|
|
|
101
100
|
languages=[PT],
|
|
102
101
|
splits=["train", "test"],
|
|
103
102
|
_labels=["a", "b"],
|
|
104
|
-
_allowed_model_types=[ModelType.GENERATIVE],
|
|
105
103
|
unofficial=True,
|
|
106
104
|
)
|
|
107
105
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""All Spanish dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..enums import ModelType
|
|
5
4
|
from ..languages import ES
|
|
6
5
|
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
7
6
|
|
|
@@ -129,7 +128,6 @@ WINOGRANDE_ES_CONFIG = DatasetConfig(
|
|
|
129
128
|
languages=[ES],
|
|
130
129
|
splits=["train", "test"],
|
|
131
130
|
_labels=["a", "b"],
|
|
132
|
-
_allowed_model_types=[ModelType.GENERATIVE],
|
|
133
131
|
unofficial=True,
|
|
134
132
|
)
|
|
135
133
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""All Swedish dataset configurations used in EuroEval."""
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
|
-
from ..enums import ModelType
|
|
5
4
|
from ..languages import SV
|
|
6
5
|
from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
|
|
7
6
|
|
|
@@ -140,7 +139,6 @@ WINOGRANDE_SV_CONFIG = DatasetConfig(
|
|
|
140
139
|
languages=[SV],
|
|
141
140
|
splits=["train", "test"],
|
|
142
141
|
_labels=["a", "b"],
|
|
143
|
-
_allowed_model_types=[ModelType.GENERATIVE],
|
|
144
142
|
unofficial=True,
|
|
145
143
|
)
|
|
146
144
|
|
|
@@ -177,6 +175,5 @@ SKOLPROV_CONFIG = DatasetConfig(
|
|
|
177
175
|
task=KNOW,
|
|
178
176
|
languages=[SV],
|
|
179
177
|
splits=["train", "test"],
|
|
180
|
-
_allowed_model_types=[ModelType.GENERATIVE],
|
|
181
178
|
unofficial=True,
|
|
182
179
|
)
|
euroeval/metrics/huggingface.py
CHANGED
|
@@ -197,7 +197,7 @@ bert_score_metric = HuggingFaceMetric(
|
|
|
197
197
|
huggingface_id="bertscore",
|
|
198
198
|
results_key="f1",
|
|
199
199
|
compute_kwargs=dict(
|
|
200
|
-
model_type="microsoft/mdeberta-v3-base", device="
|
|
200
|
+
model_type="microsoft/mdeberta-v3-base", device="auto", batch_size=1
|
|
201
201
|
),
|
|
202
202
|
)
|
|
203
203
|
|
euroeval/metrics/pipeline.py
CHANGED
|
@@ -191,6 +191,11 @@ def european_values_preprocessing_fn(
|
|
|
191
191
|
for idx, choice in idx_to_choice.items()
|
|
192
192
|
if choice is not None
|
|
193
193
|
}
|
|
194
|
+
if prediction not in idx_to_choice:
|
|
195
|
+
raise InvalidBenchmark(
|
|
196
|
+
f"The prediction {prediction} is not a valid index for the "
|
|
197
|
+
f"question with choices {idx_to_choice}."
|
|
198
|
+
)
|
|
194
199
|
integer_prediction = idx_to_choice[prediction]
|
|
195
200
|
integer_predictions.append(integer_prediction)
|
|
196
201
|
|
|
@@ -14,6 +14,7 @@ from ..languages import (
|
|
|
14
14
|
FR,
|
|
15
15
|
IS,
|
|
16
16
|
IT,
|
|
17
|
+
LT,
|
|
17
18
|
LV,
|
|
18
19
|
NB,
|
|
19
20
|
NL,
|
|
@@ -126,6 +127,14 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
126
127
|
default_instruction_prompt="Frase: {text}\n\nStabilite se la frase è "
|
|
127
128
|
"grammaticalmente corretta o meno. Rispondere con {labels_str}, e nient'altro.",
|
|
128
129
|
),
|
|
130
|
+
LT: PromptConfig(
|
|
131
|
+
default_prompt_label_mapping=dict(correct="taip", incorrect="ne"),
|
|
132
|
+
default_prompt_prefix="Toliau pateikti sakiniai ir ar jie yra gramatiškai "
|
|
133
|
+
"teisingi.",
|
|
134
|
+
default_prompt_template="Sakinys: {text}\nGramatiškai teisingas: {label}",
|
|
135
|
+
default_instruction_prompt="Sakinys: {text}\n\nNustatykite, ar sakinys yra "
|
|
136
|
+
"gramatiškai teisingas, ar ne. Atsakykite su {labels_str}, ir nieko kito.",
|
|
137
|
+
),
|
|
129
138
|
LV: PromptConfig(
|
|
130
139
|
default_prompt_label_mapping=dict(correct="jā", incorrect="nē"),
|
|
131
140
|
default_prompt_prefix="Šie ir teikumi un to gramatiskie pareizumi.",
|
|
@@ -13,6 +13,7 @@ from ..languages import (
|
|
|
13
13
|
FR,
|
|
14
14
|
IS,
|
|
15
15
|
IT,
|
|
16
|
+
LT,
|
|
16
17
|
LV,
|
|
17
18
|
NB,
|
|
18
19
|
NL,
|
|
@@ -105,6 +106,14 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
105
106
|
"precedente con {labels_str}, e nient'altro.",
|
|
106
107
|
default_prompt_label_mapping="auto",
|
|
107
108
|
),
|
|
109
|
+
LT: PromptConfig(
|
|
110
|
+
default_prompt_prefix="Toliau pateikti daugiavariančiai klausimai "
|
|
111
|
+
"(su atsakymais).",
|
|
112
|
+
default_prompt_template="Klausimas: {text}\nAtsakymas: {label}",
|
|
113
|
+
default_instruction_prompt="Klausimas: {text}\n\nAtsakykite į aukščiau "
|
|
114
|
+
"pateiktą klausimą atsakydami {labels_str}, ir nieko daugiau.",
|
|
115
|
+
default_prompt_label_mapping="auto",
|
|
116
|
+
),
|
|
108
117
|
LV: PromptConfig(
|
|
109
118
|
default_prompt_prefix="Tālāk seko jautājumi ar vairākām atbilžu izvēlēm "
|
|
110
119
|
"(ar atbildēm).",
|
|
@@ -14,6 +14,7 @@ from ..languages import (
|
|
|
14
14
|
FR,
|
|
15
15
|
IS,
|
|
16
16
|
IT,
|
|
17
|
+
LT,
|
|
17
18
|
LV,
|
|
18
19
|
NB,
|
|
19
20
|
NL,
|
|
@@ -241,6 +242,25 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
241
242
|
"{labels_str}. I valori devono essere elenchi di entità "
|
|
242
243
|
"nominate di quel tipo, esattamente come appaiono nella frase.",
|
|
243
244
|
),
|
|
245
|
+
LT: PromptConfig(
|
|
246
|
+
default_prompt_label_mapping={
|
|
247
|
+
"b-per": "asmuo",
|
|
248
|
+
"i-per": "asmuo",
|
|
249
|
+
"b-loc": "vieta",
|
|
250
|
+
"i-loc": "vieta",
|
|
251
|
+
"b-org": "organizacija",
|
|
252
|
+
"i-org": "organizacija",
|
|
253
|
+
"b-misc": "kita",
|
|
254
|
+
"i-misc": "kita",
|
|
255
|
+
},
|
|
256
|
+
default_prompt_prefix="Toliau pateikti sakiniai ir JSON žodynai su vardiniais "
|
|
257
|
+
"vienetais, kurie pateikiame sakinyje.",
|
|
258
|
+
default_prompt_template="Sakinys: {text}\nVardiniai vienetai: {label}",
|
|
259
|
+
default_instruction_prompt="Sakinys: {text}\n\nIdentifikuokite vardinius "
|
|
260
|
+
"vienetus sakinyje. Turėtumėte pateikti tai kaip JSON žodyną su raktais "
|
|
261
|
+
"{labels_str}. Reikšmės turi būti to tipo vardinių vienetų sąrašai, "
|
|
262
|
+
"tiksliai taip, kaip jie rodomi sakinyje.",
|
|
263
|
+
),
|
|
244
264
|
LV: PromptConfig(
|
|
245
265
|
default_prompt_label_mapping={
|
|
246
266
|
"b-per": "persona",
|
|
@@ -14,6 +14,7 @@ from ..languages import (
|
|
|
14
14
|
FR,
|
|
15
15
|
IS,
|
|
16
16
|
IT,
|
|
17
|
+
LT,
|
|
17
18
|
LV,
|
|
18
19
|
NB,
|
|
19
20
|
NL,
|
|
@@ -116,6 +117,15 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
116
117
|
"sul in un massimo di 3 parole.\n\nDomanda: {question}",
|
|
117
118
|
default_prompt_label_mapping=dict(),
|
|
118
119
|
),
|
|
120
|
+
LT: PromptConfig(
|
|
121
|
+
default_prompt_prefix="Toliau pateikti tekstai su atitinkamais klausimais ir "
|
|
122
|
+
"atsakymais.",
|
|
123
|
+
default_prompt_template="Tekstas: {text}\nKlausimas: {question}\nAtsakykite ne "
|
|
124
|
+
"daugiau kaip 3 žodžiais: {label}",
|
|
125
|
+
default_instruction_prompt="Tekstas: {text}\n\nAtsakykite į šį klausimą apie "
|
|
126
|
+
"aukščiau pateiktą tekstą ne daugiau kaip 3 žodžiais.\n\nKlausimas: {question}",
|
|
127
|
+
default_prompt_label_mapping=dict(),
|
|
128
|
+
),
|
|
119
129
|
LV: PromptConfig(
|
|
120
130
|
default_prompt_prefix="Turpmāk seko teksti ar atbilstošiem jautājumiem un "
|
|
121
131
|
"atbildēm.",
|
|
@@ -14,6 +14,7 @@ from ..languages import (
|
|
|
14
14
|
FR,
|
|
15
15
|
IS,
|
|
16
16
|
IT,
|
|
17
|
+
LT,
|
|
17
18
|
LV,
|
|
18
19
|
NB,
|
|
19
20
|
NL,
|
|
@@ -153,6 +154,16 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
|
|
|
153
154
|
default_instruction_prompt="Documento: {text}\n\nClassificare il sentiment del "
|
|
154
155
|
"documento. Rispondere con {labels_str}, e nient'altro.",
|
|
155
156
|
),
|
|
157
|
+
LT: PromptConfig(
|
|
158
|
+
default_prompt_label_mapping=dict(
|
|
159
|
+
positive="teigiamas", neutral="neutralus", negative="neigiamas"
|
|
160
|
+
),
|
|
161
|
+
default_prompt_prefix="Toliau pateikti dokumentai ir jų nuotaika, kuri "
|
|
162
|
+
"gali būti {labels_str}.",
|
|
163
|
+
default_prompt_template="Dokumentas: {text}\nNuotaika: {label}",
|
|
164
|
+
default_instruction_prompt="Dokumentas: {text}\n\nKlasifikuokite nuotaiką "
|
|
165
|
+
"dokumente. Atsakykite su {labels_str}, ir nieko kito.",
|
|
166
|
+
),
|
|
156
167
|
LV: PromptConfig(
|
|
157
168
|
default_prompt_label_mapping=dict(
|
|
158
169
|
positive="pozitīvs", neutral="neitrāls", negative="negatīvs"
|
euroeval/tokenisation_utils.py
CHANGED
|
@@ -521,7 +521,14 @@ def has_chat_template(tokeniser: "PreTrainedTokenizer") -> bool:
|
|
|
521
521
|
Returns:
|
|
522
522
|
Whether the tokeniser has a chat template.
|
|
523
523
|
"""
|
|
524
|
-
if
|
|
524
|
+
if isinstance(tokeniser, MistralCommonTokenizer):
|
|
525
|
+
log_once(
|
|
526
|
+
"The tokeniser is a Mistral tokeniser, so assuming that the model is "
|
|
527
|
+
"instruction tuned.",
|
|
528
|
+
level=logging.DEBUG,
|
|
529
|
+
)
|
|
530
|
+
return True
|
|
531
|
+
elif hasattr(tokeniser, "chat_template"):
|
|
525
532
|
has_template = tokeniser.chat_template is not None
|
|
526
533
|
if has_template:
|
|
527
534
|
log_once(
|
|
@@ -530,13 +537,6 @@ def has_chat_template(tokeniser: "PreTrainedTokenizer") -> bool:
|
|
|
530
537
|
level=logging.DEBUG,
|
|
531
538
|
)
|
|
532
539
|
return has_template
|
|
533
|
-
elif isinstance(tokeniser, MistralCommonTokenizer):
|
|
534
|
-
log_once(
|
|
535
|
-
"The tokeniser is a Mistral tokeniser, so assuming that the model is "
|
|
536
|
-
"instruction tuned.",
|
|
537
|
-
level=logging.DEBUG,
|
|
538
|
-
)
|
|
539
|
-
return True
|
|
540
540
|
else:
|
|
541
541
|
log_once(
|
|
542
542
|
"We cannot find a chat template for the tokeniser, so assuming that the "
|
euroeval/utils.py
CHANGED
|
@@ -462,7 +462,7 @@ def extract_json_dict_from_string(s: str) -> dict | None:
|
|
|
462
462
|
Returns:
|
|
463
463
|
The extracted JSON dictionary, or None if no JSON dictionary could be found.
|
|
464
464
|
"""
|
|
465
|
-
json_regex = r"\{[^{}]
|
|
465
|
+
json_regex = r"\{[^{}]*?\}"
|
|
466
466
|
if (json_match := re.search(pattern=json_regex, string=s, flags=re.DOTALL)) is None:
|
|
467
467
|
logger.debug(
|
|
468
468
|
"The model output does not contain any JSON dictionary, so cannot parse "
|