EuroEval 15.16.0__py3-none-any.whl → 16.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (64) hide show
  1. euroeval/__init__.py +8 -7
  2. euroeval/benchmark_config_factory.py +3 -7
  3. euroeval/benchmark_modules/base.py +35 -19
  4. euroeval/benchmark_modules/fresh.py +24 -19
  5. euroeval/benchmark_modules/hf.py +136 -154
  6. euroeval/benchmark_modules/litellm.py +190 -110
  7. euroeval/benchmark_modules/vllm.py +199 -139
  8. euroeval/benchmarker.py +49 -22
  9. euroeval/cli.py +3 -3
  10. euroeval/constants.py +19 -15
  11. euroeval/data_loading.py +33 -28
  12. euroeval/data_models.py +73 -23
  13. euroeval/dataset_configs/__init__.py +2 -0
  14. euroeval/dataset_configs/danish.py +35 -1
  15. euroeval/dataset_configs/dutch.py +38 -1
  16. euroeval/dataset_configs/english.py +38 -1
  17. euroeval/dataset_configs/estonian.py +95 -0
  18. euroeval/dataset_configs/faroese.py +38 -0
  19. euroeval/dataset_configs/finnish.py +39 -1
  20. euroeval/dataset_configs/french.py +38 -1
  21. euroeval/dataset_configs/german.py +38 -1
  22. euroeval/dataset_configs/icelandic.py +39 -1
  23. euroeval/dataset_configs/italian.py +38 -1
  24. euroeval/dataset_configs/latvian.py +81 -0
  25. euroeval/dataset_configs/norwegian.py +38 -1
  26. euroeval/dataset_configs/portuguese.py +38 -1
  27. euroeval/dataset_configs/spanish.py +38 -1
  28. euroeval/dataset_configs/swedish.py +38 -1
  29. euroeval/enums.py +0 -6
  30. euroeval/finetuning.py +6 -6
  31. euroeval/generation.py +25 -14
  32. euroeval/generation_utils.py +90 -20
  33. euroeval/languages.py +947 -187
  34. euroeval/metrics/__init__.py +6 -0
  35. euroeval/metrics/base.py +76 -0
  36. euroeval/metrics/huggingface.py +192 -0
  37. euroeval/metrics/llm_as_a_judge.py +257 -0
  38. euroeval/metrics/pipeline.py +276 -0
  39. euroeval/metrics/speed.py +51 -0
  40. euroeval/model_cache.py +13 -1
  41. euroeval/prompt_templates/linguistic_acceptability.py +40 -2
  42. euroeval/prompt_templates/multiple_choice.py +23 -2
  43. euroeval/prompt_templates/named_entity_recognition.py +65 -2
  44. euroeval/prompt_templates/reading_comprehension.py +42 -2
  45. euroeval/prompt_templates/sentiment_classification.py +46 -2
  46. euroeval/prompt_templates/summarization.py +24 -4
  47. euroeval/scores.py +7 -2
  48. euroeval/speed_benchmark.py +6 -6
  49. euroeval/task_group_utils/multiple_choice_classification.py +19 -8
  50. euroeval/task_group_utils/question_answering.py +35 -28
  51. euroeval/task_group_utils/sequence_classification.py +128 -42
  52. euroeval/task_group_utils/text_to_text.py +7 -3
  53. euroeval/task_group_utils/token_classification.py +59 -73
  54. euroeval/tasks.py +33 -6
  55. euroeval/tokenization_utils.py +294 -207
  56. euroeval/utils.py +150 -35
  57. {euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/METADATA +13 -14
  58. euroeval-16.0.1.dist-info/RECORD +69 -0
  59. {euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/entry_points.txt +0 -1
  60. euroeval/human_evaluation.py +0 -738
  61. euroeval/metrics.py +0 -470
  62. euroeval-15.16.0.dist-info/RECORD +0 -63
  63. {euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/WHEEL +0 -0
  64. {euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,276 @@
1
+ """Metrics based on a scikit-learn Pipeline."""
2
+
3
+ import collections.abc as c
4
+ import logging
5
+ import typing as t
6
+ from pathlib import Path
7
+
8
+ import cloudpickle
9
+ import huggingface_hub as hf_hub
10
+ import numpy as np
11
+ from scipy.special import expit as sigmoid
12
+
13
+ from ..exceptions import InvalidBenchmark
14
+ from ..utils import unscramble
15
+ from .base import Metric
16
+
17
+ if t.TYPE_CHECKING:
18
+ from datasets.arrow_dataset import Dataset
19
+ from sklearn.pipeline import Pipeline
20
+
21
+ from ..data_models import BenchmarkConfig, DatasetConfig
22
+
23
+ logger: logging.Logger = logging.getLogger("euroeval")
24
+
25
+
26
+ T = t.TypeVar("T", bound=int | float | str | bool)
27
+
28
+
29
+ class PreprocessingFunction(t.Protocol):
30
+ """A protocol for a preprocessing function."""
31
+
32
+ def __call__(
33
+ self, predictions: c.Sequence[int], dataset: "Dataset"
34
+ ) -> c.Sequence[int]:
35
+ """Preprocess the model predictions before they are passed to the pipeline.
36
+
37
+ Args:
38
+ predictions:
39
+ The model predictions.
40
+ dataset:
41
+ The dataset used for evaluation. This is only used in case any
42
+ additional metadata is used to compute the metrics.
43
+
44
+ Returns:
45
+ The preprocessed model predictions.
46
+ """
47
+ ...
48
+
49
+
50
+ class PipelineMetric(Metric):
51
+ """Load a scikit-learn pipeline and use it to get scores from the predictions."""
52
+
53
+ def __init__(
54
+ self,
55
+ name: str,
56
+ pretty_name: str,
57
+ pipeline_repo: str,
58
+ pipeline_scoring_function: c.Callable[["Pipeline", c.Sequence], float],
59
+ pipeline_file_name: str = "pipeline.pkl",
60
+ preprocessing_fn: PreprocessingFunction | None = None,
61
+ postprocessing_fn: c.Callable[[float], tuple[float, str]] | None = None,
62
+ ) -> None:
63
+ """Initialise the pipeline transform metric.
64
+
65
+ Args:
66
+ name:
67
+ The name of the metric in snake_case.
68
+ pretty_name:
69
+ The pretty name of the metric, used for display purposes.
70
+ pipeline_repo:
71
+ The Hugging Face repository ID of the scikit-learn pipeline to load.
72
+ pipeline_scoring_method:
73
+ The method to use for scoring the predictions with the pipeline. Takes
74
+ a 1D sequence of predictions and returns a float score.
75
+ pipeline_file_name (optional):
76
+ The name of the file to download from the Hugging Face repository.
77
+ Defaults to "pipeline.joblib".
78
+ preprocessing_fn (optional):
79
+ A function to apply to the predictions before they are passed to the
80
+ pipeline. This is useful for preprocessing the predictions to match
81
+ the expected input format of the pipeline. Defaults to a no-op function
82
+ that returns the input unchanged.
83
+ postprocessing_fn (optional):
84
+ A function to apply to the metric scores after they are computed,
85
+ taking the score to the postprocessed score along with its string
86
+ representation. Defaults to x -> (100 * x, f"{x:.2%}").
87
+ """
88
+ super().__init__(
89
+ name=name, pretty_name=pretty_name, postprocessing_fn=postprocessing_fn
90
+ )
91
+ self.pipeline_repo = pipeline_repo
92
+ self.pipeline_file_name = pipeline_file_name
93
+ self.pipeline_scoring_function = pipeline_scoring_function
94
+ self.pipeline: "Pipeline | None" = None
95
+ self.preprocessing_fn = preprocessing_fn
96
+
97
+ def __call__(
98
+ self,
99
+ predictions: c.Sequence,
100
+ references: c.Sequence,
101
+ dataset: "Dataset",
102
+ dataset_config: "DatasetConfig",
103
+ benchmark_config: "BenchmarkConfig",
104
+ ) -> float | None:
105
+ """Calculate the metric score using the scikit-learn pipeline.
106
+
107
+ Args:
108
+ predictions:
109
+ The model predictions.
110
+ references:
111
+ Not used, but required for consistency with the Metric interface.
112
+ dataset:
113
+ The dataset used for evaluation. This is only used in case any
114
+ additional metadata is used to compute the metrics.
115
+ dataset_config:
116
+ The dataset configuration.
117
+ benchmark_config:
118
+ The benchmark configuration.
119
+
120
+ Returns:
121
+ The calculated metric score, or None if the score should be ignored.
122
+ """
123
+ if self.pipeline is None:
124
+ self.pipeline = self._download_pipeline()
125
+ if self.preprocessing_fn is not None:
126
+ predictions = self.preprocessing_fn(
127
+ predictions=predictions, dataset=dataset
128
+ )
129
+ return self.pipeline_scoring_function(self.pipeline, predictions)
130
+
131
+ def _download_pipeline(self) -> "Pipeline":
132
+ """Download the scikit-learn pipeline from the given URL.
133
+
134
+ Returns:
135
+ The downloaded scikit-learn pipeline.
136
+
137
+ Raises:
138
+ InvalidBenchmark:
139
+ If the loading of the pipeline fails for any reason.
140
+ """
141
+ logger.debug(f"Loading pipeline from {self.pipeline_repo}...")
142
+ folder_path = hf_hub.HfApi(
143
+ token=unscramble("HjccJFhIozVymqXDVqTUTXKvYhZMTbfIjMxG_")
144
+ ).snapshot_download(repo_id=self.pipeline_repo, repo_type="model")
145
+ model_path = Path(folder_path, self.pipeline_file_name)
146
+ try:
147
+ with model_path.open(mode="rb") as f:
148
+ pipeline = cloudpickle.load(f)
149
+ except Exception as e:
150
+ raise InvalidBenchmark(
151
+ f"Failed to load pipeline from {self.pipeline_repo!r}: {e}"
152
+ ) from e
153
+ logger.debug(f"Successfully loaded pipeline: {pipeline}")
154
+ return pipeline
155
+
156
+
157
+ ### European Values Metric ###
158
+
159
+
160
+ def european_values_preprocessing_fn(
161
+ predictions: c.Sequence[int], dataset: "Dataset"
162
+ ) -> c.Sequence[int]:
163
+ """Preprocess the model predictions for the European Values metric.
164
+
165
+ Args:
166
+ predictions:
167
+ The model predictions, a sequence of integers representing the predicted
168
+ choices for each question.
169
+ dataset:
170
+ The dataset used for evaluation. This is only used in case any additional
171
+ metadata is used to compute the metrics.
172
+
173
+ Returns:
174
+ The preprocessed model predictions, a sequence of integers representing the
175
+ final predicted choices for each question after any necessary aggregation and
176
+ mapping.
177
+
178
+ Raises:
179
+ AssertionError:
180
+ If the number of predictions is not a multiple of 53, which is required
181
+ for the European Values metric.
182
+ """
183
+ num_questions = 53
184
+ num_phrasings_per_question = 5
185
+
186
+ # Convert the predictions to integers
187
+ integer_predictions = []
188
+ for prediction, idx_to_choice in zip(predictions, dataset["idx_to_choice"]):
189
+ idx_to_choice = {
190
+ int(idx): int(choice)
191
+ for idx, choice in idx_to_choice.items()
192
+ if choice is not None
193
+ }
194
+ integer_prediction = idx_to_choice[prediction]
195
+ integer_predictions.append(integer_prediction)
196
+
197
+ assert len(predictions) % num_questions == 0, (
198
+ f"The number of predictions ({len(predictions)}) is not a multiple of "
199
+ f"{num_questions}, which is required for the European Values metric."
200
+ )
201
+
202
+ # When we are using the situational version of the dataset, there are 5 phrasings
203
+ # for each question, so we need to aggregate the predictions by question, which we
204
+ # do using majority voting.
205
+ using_situational = len(predictions) == num_questions * num_phrasings_per_question
206
+ if using_situational:
207
+ # Reshape the predictions to a 2D array with `num_phrasings_per_question` rows
208
+ # (one for each phrasing) and `num_questions` columns (one for each question).
209
+ # The five phrasings for each question appear right after each other, e.g.,
210
+ # (0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, ...)
211
+ # Shape: (num_questions, num_phrasings_per_question)
212
+ arr = np.array(
213
+ [
214
+ integer_predictions[i : i + num_phrasings_per_question]
215
+ for i in range(0, len(predictions), num_phrasings_per_question)
216
+ ]
217
+ )
218
+
219
+ # Double check that we reshaped the predictions correctly
220
+ for idx, pred in enumerate(predictions):
221
+ assert arr[idx // 5, idx % 5] == pred, (
222
+ f"Reshaped predictions do not match the original predictions at index "
223
+ f"{idx}: {arr[idx // 5, idx % 5]} != {pred}."
224
+ )
225
+
226
+ # Use majority voting to get the final prediction for each question
227
+ # Shape: (53,)
228
+ arr = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=arr)
229
+
230
+ # Convert the array to a list
231
+ integer_predictions = arr.tolist()
232
+
233
+ # Some of the questions are categorical and we're only interested in whether the
234
+ # model chooses a specific choice or not. This mapping takes the question index
235
+ # to the choice value that we're interested in.
236
+ question_choices = {
237
+ 0: 1,
238
+ 1: 5,
239
+ 3: 3,
240
+ 6: 1,
241
+ 15: 4,
242
+ 20: 2,
243
+ 47: 8,
244
+ 48: 7,
245
+ 49: 4,
246
+ 51: 4,
247
+ 52: 4,
248
+ }
249
+
250
+ # Map the predictions to the choices we're interested in
251
+ integer_predictions = list(integer_predictions)
252
+ for question_idx, choice in question_choices.items():
253
+ integer_predictions[question_idx] = (
254
+ 1 if integer_predictions[question_idx] == choice else 0
255
+ )
256
+
257
+ return integer_predictions
258
+
259
+
260
+ def european_values_scoring_function(
261
+ pipeline: "Pipeline", predictions: c.Sequence[int]
262
+ ) -> float:
263
+ """Scoring function for the European Values metric."""
264
+ normalised_predictions = pipeline[0].transform([predictions])
265
+ log_likelihoods = pipeline[1].transform(normalised_predictions)[0]
266
+ score = sigmoid(pipeline[2].alpha_ * (log_likelihoods - pipeline[2].center_))
267
+ return score.item()
268
+
269
+
270
+ european_values_metric = PipelineMetric(
271
+ name="european_values",
272
+ pretty_name="European Values",
273
+ pipeline_repo="EuroEval/european-values-pipeline",
274
+ pipeline_scoring_function=european_values_scoring_function,
275
+ preprocessing_fn=european_values_preprocessing_fn,
276
+ )
@@ -0,0 +1,51 @@
1
+ """Inference speed metric."""
2
+
3
+ import collections.abc as c
4
+ import logging
5
+ import typing as t
6
+
7
+ from .base import Metric
8
+
9
+ if t.TYPE_CHECKING:
10
+ from datasets.arrow_dataset import Dataset
11
+
12
+ from ..data_models import BenchmarkConfig, DatasetConfig
13
+
14
+ logger: logging.Logger = logging.getLogger("euroeval")
15
+
16
+
17
+ class SpeedMetric(Metric):
18
+ """Speed metric."""
19
+
20
+ def __init__(self, name: str, pretty_name: str) -> None:
21
+ """Initialise the speed metric.
22
+
23
+ Args:
24
+ name:
25
+ The name of the metric in snake_case.
26
+ pretty_name:
27
+ The pretty name of the metric, used for display purposes.
28
+ """
29
+ super().__init__(
30
+ name=name,
31
+ pretty_name=pretty_name,
32
+ postprocessing_fn=lambda raw_score: (raw_score, f"{raw_score:,.0f}"),
33
+ )
34
+
35
+ def __call__(
36
+ self,
37
+ predictions: c.Sequence,
38
+ references: c.Sequence,
39
+ dataset: "Dataset",
40
+ dataset_config: "DatasetConfig",
41
+ benchmark_config: "BenchmarkConfig",
42
+ ) -> float | None:
43
+ """Not used with the speed metric, but required for consistency."""
44
+ raise NotImplementedError
45
+
46
+
47
+ speed_metric = SpeedMetric(name="speed", pretty_name="Tokens per second")
48
+
49
+ speed_short_metric = SpeedMetric(
50
+ name="speed_short", pretty_name="Tokens per second on short documents"
51
+ )
euroeval/model_cache.py CHANGED
@@ -10,7 +10,9 @@ from dataclasses import asdict
10
10
 
11
11
  from tqdm.auto import tqdm
12
12
 
13
+ from .constants import NUM_GENERATION_TOKENS_FOR_CLASSIFICATION
13
14
  from .data_models import GenerativeModelOutput, SingleGenerativeModelOutput
15
+ from .utils import log_once
14
16
 
15
17
  if t.TYPE_CHECKING:
16
18
  from pathlib import Path
@@ -189,10 +191,20 @@ class ModelCache:
189
191
  # the indices of the top scores, to save space. Further, we only store
190
192
  # the scores if the generated sequence is shorter than the maximum
191
193
  # length
192
- if model_output.scores is not None and self.max_generated_tokens < 8:
194
+ if (
195
+ model_output.scores is not None
196
+ and self.max_generated_tokens
197
+ <= NUM_GENERATION_TOKENS_FOR_CLASSIFICATION
198
+ ):
193
199
  assert model_output.scores is not None
194
200
  scores = model_output.scores[sample_idx]
195
201
  else:
202
+ if model_output.scores is not None:
203
+ log_once(
204
+ "The generated sequence is longer than the maximum "
205
+ "length for classification. Not caching the scores.",
206
+ level=logging.DEBUG,
207
+ )
196
208
  scores = None
197
209
  self[model_input] = SingleGenerativeModelOutput(
198
210
  sequence=model_output.sequences[sample_idx], scores=scores
@@ -1,9 +1,32 @@
1
1
  """Templates for the Linguistic Acceptability task."""
2
2
 
3
+ import typing as t
4
+
3
5
  from ..data_models import PromptConfig
4
- from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
6
+ from ..languages import (
7
+ DA,
8
+ DE,
9
+ EN,
10
+ ES,
11
+ ET,
12
+ FI,
13
+ FO,
14
+ FR,
15
+ IS,
16
+ IT,
17
+ LV,
18
+ NB,
19
+ NL,
20
+ NN,
21
+ NO,
22
+ PT,
23
+ SV,
24
+ )
25
+
26
+ if t.TYPE_CHECKING:
27
+ from ..data_models import Language
5
28
 
6
- LA_TEMPLATES = {
29
+ LA_TEMPLATES: dict["Language", PromptConfig] = {
7
30
  DA: PromptConfig(
8
31
  default_prompt_label_mapping=dict(correct="ja", incorrect="nej"),
9
32
  default_prompt_prefix="Følgende er sætninger og om de er grammatisk korrekte.",
@@ -36,6 +59,14 @@ LA_TEMPLATES = {
36
59
  default_instruction_prompt="Texto: {text}\n\nDetermina si el texto es "
37
60
  "gramaticalmente correcto o no. Responde con {labels_str}, y nada más.",
38
61
  ),
62
+ ET: PromptConfig(
63
+ default_prompt_label_mapping=dict(correct="jah", incorrect="ei"),
64
+ default_prompt_prefix="Järgnevad on laused ja kas need on grammatiliselt "
65
+ "õiged.",
66
+ default_prompt_template="Lause: {text}\nGrammatikaliselt õige: {label}",
67
+ default_instruction_prompt="Lause: {text}\n\nOtsusta, kas lause on "
68
+ "grammatiliselt õige või mitte. Vasta {labels_str}, ja mitte midagi muud.",
69
+ ),
39
70
  PT: PromptConfig(
40
71
  default_prompt_label_mapping=dict(correct="sim", incorrect="não"),
41
72
  default_prompt_prefix="Seguem-se abaixo textos e se são "
@@ -85,6 +116,13 @@ LA_TEMPLATES = {
85
116
  default_instruction_prompt="Frase: {text}\n\nStabilite se la frase è "
86
117
  "grammaticalmente corretta o meno. Rispondere con {labels_str}, e nient'altro.",
87
118
  ),
119
+ LV: PromptConfig(
120
+ default_prompt_label_mapping=dict(correct="jā", incorrect="nē"),
121
+ default_prompt_prefix="Šie ir teikumi un to gramatiskie pareizumi.",
122
+ default_prompt_template="Teikums: {text}\nGramatiski pareizs: {label}",
123
+ default_instruction_prompt="Teikums: {text}\n\nNoteiciet, vai teikums ir "
124
+ "gramatiski pareizs vai nē. Atbildiet ar {labels_str}, un neko citu.",
125
+ ),
88
126
  NB: PromptConfig(
89
127
  default_prompt_label_mapping=dict(correct="ja", incorrect="nei"),
90
128
  default_prompt_prefix="Følgende er setninger og hvorvidt de er grammatisk "
@@ -1,10 +1,15 @@
1
1
  """Templates for all multiple choice tasks."""
2
2
 
3
+ import typing as t
4
+
3
5
  from ..data_models import PromptConfig
4
- from ..languages import DA, DE, EN, ES, FI, FR, IS, IT, NB, NL, NN, NO, PT, SV
6
+ from ..languages import DA, DE, EN, ES, ET, FI, FR, IS, IT, LV, NB, NL, NN, NO, PT, SV
7
+
8
+ if t.TYPE_CHECKING:
9
+ from ..data_models import Language
5
10
 
6
11
  # TODO: Missing Faroese
7
- MULTIPLE_CHOICE_TEMPLATES = {
12
+ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
8
13
  DA: PromptConfig(
9
14
  default_prompt_prefix="Følgende er multiple choice spørgsmål (med svar).",
10
15
  default_prompt_template="Spørgsmål: {text}\nSvar: {label}",
@@ -36,6 +41,14 @@ MULTIPLE_CHOICE_TEMPLATES = {
36
41
  "usando solo {labels_str}, y nada más.",
37
42
  default_prompt_label_mapping="auto",
38
43
  ),
44
+ ET: PromptConfig(
45
+ default_prompt_prefix="Järgnevad on vastusevariantidega küsimused (koos "
46
+ "vastustega).",
47
+ default_prompt_template="Küsimus: {text}\nVastus: {label}",
48
+ default_instruction_prompt="Küsimus: {text}\n\nVasta ülaltoodud küsimusele "
49
+ "ainult {labels_str}, ja mitte millegi muuga.",
50
+ default_prompt_label_mapping="auto",
51
+ ),
39
52
  PT: PromptConfig(
40
53
  default_prompt_prefix="As seguintes são perguntas de escolha múltipla "
41
54
  "(com respostas).",
@@ -74,6 +87,14 @@ MULTIPLE_CHOICE_TEMPLATES = {
74
87
  "precedente con {labels_str}, e nient'altro.",
75
88
  default_prompt_label_mapping="auto",
76
89
  ),
90
+ LV: PromptConfig(
91
+ default_prompt_prefix="Tālāk seko jautājumi ar vairākām atbilžu izvēlēm "
92
+ "(ar atbildēm).",
93
+ default_prompt_template="Jautājums: {text}\nAtbilde: {label}",
94
+ default_instruction_prompt="Jautājums: {text}\n\nAtbildiet uz iepriekšējo "
95
+ "jautājumu, atbildot ar {labels_str}, un nekas cits.",
96
+ default_prompt_label_mapping="auto",
97
+ ),
77
98
  NB: PromptConfig(
78
99
  default_prompt_prefix="Følgende er flervalgsspørsmål (med svar).",
79
100
  default_prompt_template="Spørsmål: {text}\nSvar: {label}",
@@ -1,9 +1,33 @@
1
1
  """Templates for the Named Entity Recognition task."""
2
2
 
3
+ import typing as t
4
+
3
5
  from ..data_models import PromptConfig
4
- from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
6
+ from ..languages import (
7
+ DA,
8
+ DE,
9
+ EN,
10
+ ES,
11
+ ET,
12
+ FI,
13
+ FO,
14
+ FR,
15
+ IS,
16
+ IT,
17
+ LV,
18
+ NB,
19
+ NL,
20
+ NN,
21
+ NO,
22
+ PT,
23
+ SV,
24
+ )
25
+
26
+ if t.TYPE_CHECKING:
27
+ from ..data_models import Language
28
+
5
29
 
6
- NER_TEMPLATES = {
30
+ NER_TEMPLATES: dict["Language", PromptConfig] = {
7
31
  DA: PromptConfig(
8
32
  default_prompt_label_mapping={
9
33
  "b-per": "person",
@@ -80,6 +104,25 @@ NER_TEMPLATES = {
80
104
  "claves {labels_str}. Los valores deben ser listas de las "
81
105
  "entidades nombradas de ese tipo, exactamente como aparecen en la oración.",
82
106
  ),
107
+ ET: PromptConfig(
108
+ default_prompt_label_mapping={
109
+ "b-per": "inimene",
110
+ "i-per": "inimene",
111
+ "b-loc": "asukoht",
112
+ "i-loc": "asukoht",
113
+ "b-org": "organisatsioon",
114
+ "i-org": "organisatsioon",
115
+ "b-misc": "muu",
116
+ "i-misc": "muu",
117
+ },
118
+ default_prompt_prefix="Allpool on laused ja JSON-sõnastikud, mis sisaldavad "
119
+ "antud lauses esinevaid nimetatud üksuseid.",
120
+ default_prompt_template="Lause: {text}\nNimetatud üksused: {label}",
121
+ default_instruction_prompt="Lause: {text}\n\nTuvasta lauses "
122
+ "nimetatud üksused. Väljund peaks olema JSON-sõnastik, "
123
+ "mille võtmed on {labels_str}. Väärtused peaksid olema kindlat tüüpi nimetatud "
124
+ "üksuste loendid, täpselt nii nagu need lauses esinevad.",
125
+ ),
83
126
  PT: PromptConfig(
84
127
  default_prompt_label_mapping={
85
128
  "b-per": "pessoa",
@@ -197,6 +240,26 @@ NER_TEMPLATES = {
197
240
  "{labels_str}. I valori devono essere elenchi di entità "
198
241
  "nominate di quel tipo, esattamente come appaiono nella frase.",
199
242
  ),
243
+ LV: PromptConfig(
244
+ default_prompt_label_mapping={
245
+ "b-per": "persona",
246
+ "i-per": "persona",
247
+ "b-loc": "vieta",
248
+ "i-loc": "vieta",
249
+ "b-org": "organizācija",
250
+ "i-org": "organizācija",
251
+ "b-misc": "dažādi",
252
+ "i-misc": "dažādi",
253
+ },
254
+ default_prompt_prefix="Tālāk ir teikumi un JSON vārdnīcas ar nosauktajiem "
255
+ "objektiem, kas parādās dotajā teikumā.",
256
+ default_prompt_template="Teikums: {text}\nNosauktie objekti: {label}",
257
+ default_instruction_prompt="Teikums: {text}\n\n"
258
+ "Identificējiet nosauktos objektus "
259
+ "teikumā. Jums jāizvada šī informācija kā JSON vārdnīcu ar atslēgām "
260
+ "{labels_str}. Vērtībām jābūt šī tipa nosaukto objektu sarakstiem, "
261
+ "tieši tā, kā tie parādās teikumā.",
262
+ ),
200
263
  NB: PromptConfig(
201
264
  default_prompt_label_mapping={
202
265
  "b-per": "person",
@@ -1,9 +1,32 @@
1
1
  """Templates for the Reading Comprehension task."""
2
2
 
3
+ import typing as t
4
+
3
5
  from ..data_models import PromptConfig
4
- from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
6
+ from ..languages import (
7
+ DA,
8
+ DE,
9
+ EN,
10
+ ES,
11
+ ET,
12
+ FI,
13
+ FO,
14
+ FR,
15
+ IS,
16
+ IT,
17
+ LV,
18
+ NB,
19
+ NL,
20
+ NN,
21
+ NO,
22
+ PT,
23
+ SV,
24
+ )
25
+
26
+ if t.TYPE_CHECKING:
27
+ from ..data_models import Language
5
28
 
6
- RC_TEMPLATES = {
29
+ RC_TEMPLATES: dict["Language", PromptConfig] = {
7
30
  DA: PromptConfig(
8
31
  default_prompt_prefix="Følgende er tekster med tilhørende spørgsmål og svar.",
9
32
  default_prompt_template="Tekst: {text}\nSpørgsmål: {question}\nSvar med maks. "
@@ -39,6 +62,14 @@ RC_TEMPLATES = {
39
62
  "sobre el texto anterior en máximo 3 palabras.\n\nPregunta: {question}",
40
63
  default_prompt_label_mapping=dict(),
41
64
  ),
65
+ ET: PromptConfig(
66
+ default_prompt_prefix="Järgnevad on tekstid koos küsimuste ja vastustega.",
67
+ default_prompt_template="Tekst: {text}\nKüsimus: {question}\nVasta "
68
+ "maksimaalselt 3 sõnaga: {label}",
69
+ default_instruction_prompt="Tekst: {text}\n\nVasta järgmisele küsimusele "
70
+ "ülevaltoodud teksti kohta maksimaalselt 3 sõnaga.\n\nKüsimus: {question}",
71
+ default_prompt_label_mapping=dict(),
72
+ ),
42
73
  FI: PromptConfig(
43
74
  default_prompt_prefix="Seuraavassa on tekstejä ja niihin liittyviä kysymyksiä "
44
75
  "ja vastauksia.",
@@ -84,6 +115,15 @@ RC_TEMPLATES = {
84
115
  "sul in un massimo di 3 parole.\n\nDomanda: {question}",
85
116
  default_prompt_label_mapping=dict(),
86
117
  ),
118
+ LV: PromptConfig(
119
+ default_prompt_prefix="Turpmāk seko teksti ar atbilstošiem jautājumiem un "
120
+ "atbildēm.",
121
+ default_prompt_template="Teksts: {text}\nJautājums: {question}\nAtbildēt ar "
122
+ "maksimāli 3 vārdiem: {label}",
123
+ default_instruction_prompt="Teksts: {text}\n\nAtbildiet uz šo jautājumu par "
124
+ "iepriekš minēto tekstu ar maksimāli 3 vārdiem.\n\nJautājums: {question}",
125
+ default_prompt_label_mapping=dict(),
126
+ ),
87
127
  NB: PromptConfig(
88
128
  default_prompt_prefix="Her følger tekster med tilhørende spørsmål og svar.",
89
129
  default_prompt_template="Tekst: {text}\nSpørsmål: {question}\nSvar på maks 3 "
@@ -1,9 +1,32 @@
1
1
  """Templates for the Sentiment Analysis task."""
2
2
 
3
+ import typing as t
4
+
3
5
  from ..data_models import PromptConfig
4
- from ..languages import DA, DE, EN, ES, FI, FO, FR, IS, IT, NB, NL, NN, NO, PT, SV
6
+ from ..languages import (
7
+ DA,
8
+ DE,
9
+ EN,
10
+ ES,
11
+ ET,
12
+ FI,
13
+ FO,
14
+ FR,
15
+ IS,
16
+ IT,
17
+ LV,
18
+ NB,
19
+ NL,
20
+ NN,
21
+ NO,
22
+ PT,
23
+ SV,
24
+ )
25
+
26
+ if t.TYPE_CHECKING:
27
+ from ..data_models import Language
5
28
 
6
- SENT_TEMPLATES = {
29
+ SENT_TEMPLATES: dict["Language", PromptConfig] = {
7
30
  DA: PromptConfig(
8
31
  default_prompt_label_mapping=dict(
9
32
  positive="positiv", neutral="neutral", negative="negativ"
@@ -44,6 +67,17 @@ SENT_TEMPLATES = {
44
67
  default_instruction_prompt="Documento: {text}\n\nClasifica el sentimiento del "
45
68
  "documento. Responde con {labels_str}, y nada más.",
46
69
  ),
70
+ ET: PromptConfig(
71
+ default_prompt_label_mapping=dict(
72
+ positive="positiivne", neutral="neutraalne", negative="negatiivne"
73
+ ),
74
+ default_prompt_prefix="Järgmised on dokumendid ja nende meelestatus, "
75
+ "mis võib olla {labels_str}.",
76
+ default_prompt_template="Dokument: {text}\nMeelestatus: {label}",
77
+ default_instruction_prompt="Dokument: {text}\n\nKlassifitseeri dokument "
78
+ "meelestatuse järgi. Võimalikud vastused: {labels_str}. Muud vastused "
79
+ "ei ole lubatud.",
80
+ ),
47
81
  PT: PromptConfig(
48
82
  default_prompt_label_mapping=dict(
49
83
  positive="positivo", neutral="neutro", negative="negativo"
@@ -104,6 +138,16 @@ SENT_TEMPLATES = {
104
138
  default_instruction_prompt="Documento: {text}\n\nClassificare il sentiment del "
105
139
  "documento. Rispondere con {labels_str}, e nient'altro.",
106
140
  ),
141
+ LV: PromptConfig(
142
+ default_prompt_label_mapping=dict(
143
+ positive="pozitīvs", neutral="neitrāls", negative="negatīvs"
144
+ ),
145
+ default_prompt_prefix="Tālāk ir dokumenti un to noskaņojums, kas var būt "
146
+ "{labels_str}.",
147
+ default_prompt_template="Dokuments: {text}\nNoskaņojums: {label}",
148
+ default_instruction_prompt="Dokuments: {text}\n\nKlasificējiet noskaņojumu "
149
+ "dokumentā. Atbildiet ar {labels_str}, un neko citu.",
150
+ ),
107
151
  NB: PromptConfig(
108
152
  default_prompt_label_mapping=dict(
109
153
  positive="positiv", neutral="nøytral", negative="negativ"