EuroEval 16.2.1__py3-none-any.whl → 16.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (39) hide show
  1. euroeval/__init__.py +4 -2
  2. euroeval/benchmark_modules/fresh.py +3 -1
  3. euroeval/benchmark_modules/hf.py +8 -4
  4. euroeval/benchmark_modules/litellm.py +5 -17
  5. euroeval/benchmark_modules/vllm.py +98 -30
  6. euroeval/benchmarker.py +291 -405
  7. euroeval/cli.py +1 -1
  8. euroeval/constants.py +3 -0
  9. euroeval/data_models.py +35 -35
  10. euroeval/dataset_configs/__init__.py +1 -0
  11. euroeval/dataset_configs/danish.py +0 -2
  12. euroeval/dataset_configs/dutch.py +0 -2
  13. euroeval/dataset_configs/english.py +0 -2
  14. euroeval/dataset_configs/finnish.py +0 -2
  15. euroeval/dataset_configs/french.py +0 -2
  16. euroeval/dataset_configs/german.py +0 -2
  17. euroeval/dataset_configs/italian.py +0 -2
  18. euroeval/dataset_configs/latvian.py +2 -3
  19. euroeval/dataset_configs/lithuanian.py +62 -0
  20. euroeval/dataset_configs/norwegian.py +0 -2
  21. euroeval/dataset_configs/polish.py +0 -2
  22. euroeval/dataset_configs/portuguese.py +0 -2
  23. euroeval/dataset_configs/spanish.py +0 -2
  24. euroeval/dataset_configs/swedish.py +0 -3
  25. euroeval/metrics/huggingface.py +1 -1
  26. euroeval/metrics/pipeline.py +5 -0
  27. euroeval/prompt_templates/linguistic_acceptability.py +9 -0
  28. euroeval/prompt_templates/multiple_choice.py +9 -0
  29. euroeval/prompt_templates/named_entity_recognition.py +20 -0
  30. euroeval/prompt_templates/reading_comprehension.py +10 -0
  31. euroeval/prompt_templates/sentiment_classification.py +11 -0
  32. euroeval/tokenisation_utils.py +8 -8
  33. euroeval/utils.py +10 -5
  34. {euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/METADATA +181 -60
  35. euroeval-16.3.0.dist-info/RECORD +71 -0
  36. euroeval-16.2.1.dist-info/RECORD +0 -70
  37. {euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/WHEEL +0 -0
  38. {euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/entry_points.txt +0 -0
  39. {euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/licenses/LICENSE +0 -0
euroeval/cli.py CHANGED
@@ -188,7 +188,7 @@ from .tasks import get_all_tasks
188
188
  )
189
189
  @click.option(
190
190
  "--gpu-memory-utilization",
191
- default=0.9,
191
+ default=0.8,
192
192
  show_default=True,
193
193
  help="The GPU memory utilization to use for vLLM. A larger value will result in "
194
194
  "faster evaluation, but at the risk of running out of GPU memory. Only reduce this "
euroeval/constants.py CHANGED
@@ -50,9 +50,11 @@ METRIC_ATTRIBUTES_TAKING_UP_MEMORY = ["cached_bertscorer"]
50
50
  # Hugging Face Hub tags used to classify models as merge models
51
51
  MERGE_TAGS = ["merge", "mergekit"]
52
52
 
53
+
53
54
  # The minimum required CUDA compute capability for using bfloat16 in vLLM
54
55
  VLLM_BF16_MIN_CUDA_COMPUTE_CAPABILITY = 8.0
55
56
 
57
+
56
58
  # Used to detect whether a model is a reasoning model
57
59
  REASONING_TOKENS = [
58
60
  ("<think>", "</think>"),
@@ -60,6 +62,7 @@ REASONING_TOKENS = [
60
62
  ("<reasoning>", "</reasoning>"),
61
63
  ]
62
64
 
65
+
63
66
  # These tokens are sometimes used by models to indicate the end of a generated
64
67
  # response, but they do not use them as a proper EOS token, so we have to deal with them
65
68
  # manually. We only use them as stop tokens if they actually appear in the model's
euroeval/data_models.py CHANGED
@@ -170,14 +170,16 @@ class BenchmarkConfig:
170
170
  """General benchmarking configuration, across datasets and models.
171
171
 
172
172
  Attributes:
173
- model_languages:
174
- The languages of the models to benchmark.
175
- dataset_languages:
176
- The languages of the datasets in the benchmark.
177
173
  tasks:
178
174
  The tasks benchmark the model(s) on.
179
175
  datasets:
180
176
  The datasets to benchmark on.
177
+ model_languages:
178
+ The languages of the models to benchmark.
179
+ dataset_languages:
180
+ The languages of the datasets in the benchmark.
181
+ device:
182
+ The device to use for benchmarking.
181
183
  batch_size:
182
184
  The batch size to use.
183
185
  raise_errors:
@@ -186,17 +188,16 @@ class BenchmarkConfig:
186
188
  Directory to store cached models and datasets.
187
189
  api_key:
188
190
  The API key to use for a given inference API.
189
- force:
190
- Whether to force the benchmark to run even if the results are already
191
- cached.
191
+ api_base:
192
+ The base URL for a given inference API. Only relevant if `model` refers to a
193
+ model on an inference API.
194
+ api_version:
195
+ The version of the API to use. Only relevant if `model` refers to a model on
196
+ an inference API.
192
197
  progress_bar:
193
198
  Whether to show a progress bar.
194
199
  save_results:
195
200
  Whether to save the benchmark results to 'euroeval_benchmark_results.json'.
196
- device:
197
- The device to use for benchmarking.
198
- verbose:
199
- Whether to print verbose output.
200
201
  trust_remote_code:
201
202
  Whether to trust remote code when loading models from the Hugging Face Hub.
202
203
  clear_model_cache:
@@ -208,21 +209,11 @@ class BenchmarkConfig:
208
209
  if the model is generative.
209
210
  num_iterations:
210
211
  The number of iterations each model should be evaluated for.
211
- api_base:
212
- The base URL for a given inference API. Only relevant if `model` refers to a
213
- model on an inference API.
214
- api_version:
215
- The version of the API to use. Only relevant if `model` refers to a model on
216
- an inference API.
217
212
  gpu_memory_utilization:
218
213
  The GPU memory utilization to use for vLLM. A larger value will result in
219
214
  faster evaluation, but at the risk of running out of GPU memory. Only reduce
220
215
  this if you are running out of GPU memory. Only relevant if the model is
221
216
  generative.
222
- debug:
223
- Whether to run the benchmark in debug mode.
224
- run_with_cli:
225
- Whether the benchmark is being run with the CLI.
226
217
  requires_safetensors:
227
218
  Whether to only allow models that use the safetensors format.
228
219
  generative_type:
@@ -231,6 +222,15 @@ class BenchmarkConfig:
231
222
  download_only:
232
223
  Whether to only download the models, metrics and datasets without
233
224
  evaluating.
225
+ force:
226
+ Whether to force the benchmark to run even if the results are already
227
+ cached.
228
+ verbose:
229
+ Whether to print verbose output.
230
+ debug:
231
+ Whether to run the benchmark in debug mode.
232
+ run_with_cli:
233
+ Whether the benchmark is being run with the CLI.
234
234
  """
235
235
 
236
236
  model_languages: list[Language]
@@ -241,24 +241,24 @@ class BenchmarkConfig:
241
241
  raise_errors: bool
242
242
  cache_dir: str
243
243
  api_key: str | None
244
- force: bool
244
+ api_base: str | None
245
+ api_version: str | None
245
246
  progress_bar: bool
246
247
  save_results: bool
247
248
  device: torch.device
248
- verbose: bool
249
249
  trust_remote_code: bool
250
250
  clear_model_cache: bool
251
251
  evaluate_test_split: bool
252
252
  few_shot: bool
253
253
  num_iterations: int
254
- api_base: str | None
255
- api_version: str | None
256
254
  gpu_memory_utilization: float
257
- debug: bool
258
- run_with_cli: bool
259
255
  requires_safetensors: bool
260
256
  generative_type: GenerativeType | None
261
257
  download_only: bool
258
+ force: bool
259
+ verbose: bool
260
+ debug: bool
261
+ run_with_cli: bool
262
262
 
263
263
 
264
264
  class BenchmarkConfigParams(pydantic.BaseModel):
@@ -266,10 +266,10 @@ class BenchmarkConfigParams(pydantic.BaseModel):
266
266
 
267
267
  model_config = pydantic.ConfigDict(protected_namespaces=())
268
268
 
269
- progress_bar: bool
270
- save_results: bool
271
269
  task: str | list[str] | None
272
270
  dataset: str | list[str] | None
271
+ progress_bar: bool
272
+ save_results: bool
273
273
  language: str | list[str]
274
274
  model_language: str | list[str] | None
275
275
  dataset_language: str | list[str] | None
@@ -278,21 +278,21 @@ class BenchmarkConfigParams(pydantic.BaseModel):
278
278
  raise_errors: bool
279
279
  cache_dir: str
280
280
  api_key: str | None
281
- force: bool
282
- verbose: bool
281
+ api_base: str | None
282
+ api_version: str | None
283
283
  trust_remote_code: bool
284
284
  clear_model_cache: bool
285
285
  evaluate_test_split: bool
286
286
  few_shot: bool
287
287
  num_iterations: int
288
- api_base: str | None
289
- api_version: str | None
288
+ requires_safetensors: bool
289
+ download_only: bool
290
290
  gpu_memory_utilization: float
291
291
  generative_type: GenerativeType | None
292
- download_only: bool
292
+ force: bool
293
+ verbose: bool
293
294
  debug: bool
294
295
  run_with_cli: bool
295
- requires_safetensors: bool
296
296
 
297
297
 
298
298
  class BenchmarkResult(pydantic.BaseModel):
@@ -14,6 +14,7 @@ from .german import * # noqa: F403
14
14
  from .icelandic import * # noqa: F403
15
15
  from .italian import * # noqa: F403
16
16
  from .latvian import * # noqa: F403
17
+ from .lithuanian import * # noqa: F403
17
18
  from .norwegian import * # noqa: F403
18
19
  from .polish import * # noqa: F403
19
20
  from .portuguese import * # noqa: F403
@@ -1,7 +1,6 @@
1
1
  """All Danish dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import DA
6
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
7
6
 
@@ -159,7 +158,6 @@ WINOGRANDE_DA_CONFIG = DatasetConfig(
159
158
  languages=[DA],
160
159
  splits=["train", "test"],
161
160
  _labels=["a", "b"],
162
- _allowed_model_types=[ModelType.GENERATIVE],
163
161
  unofficial=True,
164
162
  )
165
163
 
@@ -1,7 +1,6 @@
1
1
  """All Dutch dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import NL
6
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
7
6
 
@@ -152,7 +151,6 @@ WINOGRANDE_NL_CONFIG = DatasetConfig(
152
151
  languages=[NL],
153
152
  splits=["train", "test"],
154
153
  _labels=["a", "b"],
155
- _allowed_model_types=[ModelType.GENERATIVE],
156
154
  unofficial=True,
157
155
  )
158
156
 
@@ -1,7 +1,6 @@
1
1
  """All English dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import EN
6
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
7
6
 
@@ -135,7 +134,6 @@ WINOGRANDE_CONFIG = DatasetConfig(
135
134
  languages=[EN],
136
135
  splits=["train", "test"],
137
136
  _labels=["a", "b"],
138
- _allowed_model_types=[ModelType.GENERATIVE],
139
137
  unofficial=True,
140
138
  )
141
139
 
@@ -1,7 +1,6 @@
1
1
  """All Finnish dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import FI
6
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, LA, MCRC, NER, RC, SENT, SUMM
7
6
 
@@ -111,7 +110,6 @@ WINOGRANDE_FI_CONFIG = DatasetConfig(
111
110
  languages=[FI],
112
111
  splits=["train", "test"],
113
112
  _labels=["a", "b"],
114
- _allowed_model_types=[ModelType.GENERATIVE],
115
113
  unofficial=True,
116
114
  )
117
115
 
@@ -1,7 +1,6 @@
1
1
  """All French dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import FR
6
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
7
6
 
@@ -123,7 +122,6 @@ WINOGRANDE_FR_CONFIG = DatasetConfig(
123
122
  languages=[FR],
124
123
  splits=["train", "test"],
125
124
  _labels=["a", "b"],
126
- _allowed_model_types=[ModelType.GENERATIVE],
127
125
  unofficial=True,
128
126
  )
129
127
 
@@ -1,7 +1,6 @@
1
1
  """All German dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import DE
6
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
7
6
 
@@ -140,7 +139,6 @@ WINOGRANDE_DE_CONFIG = DatasetConfig(
140
139
  languages=[DE],
141
140
  splits=["train", "test"],
142
141
  _labels=["a", "b"],
143
- _allowed_model_types=[ModelType.GENERATIVE],
144
142
  unofficial=True,
145
143
  )
146
144
 
@@ -1,7 +1,6 @@
1
1
  """All Italian dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import IT
6
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
7
6
 
@@ -131,7 +130,6 @@ WINOGRANDE_IT_CONFIG = DatasetConfig(
131
130
  languages=[IT],
132
131
  splits=["train", "test"],
133
132
  _labels=["a", "b"],
134
- _allowed_model_types=[ModelType.GENERATIVE],
135
133
  unofficial=True,
136
134
  )
137
135
 
@@ -1,7 +1,6 @@
1
1
  """All Latvian dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import LV
6
5
  from ..tasks import COMMON_SENSE, KNOW, LA, NER, RC, SENT, SUMM
7
6
 
@@ -25,7 +24,8 @@ SCALA_LV_CONFIG = DatasetConfig(
25
24
 
26
25
  FULLSTACK_NER_LV_CONFIG = DatasetConfig(
27
26
  name="fullstack-ner-lv",
28
- pretty_name="the truncated version of the FullStack NER dataset",
27
+ pretty_name="the truncated version of the Latvian named entity recognition "
28
+ "dataset FullStack-NER-lv",
29
29
  huggingface_id="EuroEval/fullstack-ner-lv-mini",
30
30
  task=NER,
31
31
  languages=[LV],
@@ -90,6 +90,5 @@ WINOGRANDE_LV_CONFIG = DatasetConfig(
90
90
  languages=[LV],
91
91
  splits=["train", "test"],
92
92
  _labels=["a", "b"],
93
- _allowed_model_types=[ModelType.GENERATIVE],
94
93
  unofficial=True,
95
94
  )
@@ -0,0 +1,62 @@
1
+ """All Lithuanian dataset configurations used in EuroEval."""
2
+
3
+ from ..data_models import DatasetConfig
4
+ from ..languages import LT
5
+ from ..tasks import COMMON_SENSE, KNOW, LA, NER, RC, SENT
6
+
7
+ ### Official datasets ###
8
+
9
+ LITHUANIAN_EMOTIONS_CONFIG = DatasetConfig(
10
+ name="lithuanian-emotions",
11
+ pretty_name="the truncated version of the Lithuanian sentiment "
12
+ "classification dataset Lithuanian Emotions",
13
+ huggingface_id="EuroEval/lithuanian-emotions-mini",
14
+ task=SENT,
15
+ languages=[LT],
16
+ )
17
+
18
+ SCALA_LT_CONFIG = DatasetConfig(
19
+ name="scala-lt",
20
+ pretty_name="the Lithuanian part of the linguistic acceptability dataset ScaLA",
21
+ huggingface_id="EuroEval/scala-lt",
22
+ task=LA,
23
+ languages=[LT],
24
+ )
25
+
26
+ WIKIANN_LT_CONFIG = DatasetConfig(
27
+ name="wikiann-lt",
28
+ pretty_name="the truncated version of the Lithuanian part of the named entity "
29
+ "recognition dataset WikiANN",
30
+ huggingface_id="EuroEval/wikiann-lt-mini",
31
+ task=NER,
32
+ languages=[LT],
33
+ )
34
+
35
+ MULTI_WIKI_QA_LT_CONFIG = DatasetConfig(
36
+ name="multi-wiki-qa-lt",
37
+ pretty_name="the truncated version of the Lithuanian part of the reading "
38
+ "comprehension dataset MultiWikiQA",
39
+ huggingface_id="EuroEval/multi-wiki-qa-lt-mini",
40
+ task=RC,
41
+ languages=[LT],
42
+ )
43
+
44
+ LT_HISTORY_CONFIG = DatasetConfig(
45
+ name="lt-history",
46
+ pretty_name="the Lithuanian knowledge dataset LT-History",
47
+ huggingface_id="EuroEval/lt-history",
48
+ task=KNOW,
49
+ languages=[LT],
50
+ splits=["train", "test"],
51
+ )
52
+
53
+ WINOGRANDE_LT_CONFIG = DatasetConfig(
54
+ name="winogrande-lt",
55
+ pretty_name="the Lithuanian common-sense reasoning dataset Winogrande-lt, "
56
+ "translated from the English Winogrande dataset",
57
+ huggingface_id="EuroEval/winogrande-lt",
58
+ task=COMMON_SENSE,
59
+ languages=[LT],
60
+ splits=["train", "test"],
61
+ _labels=["a", "b"],
62
+ )
@@ -1,7 +1,6 @@
1
1
  """All Norwegian dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import NB, NN, NO
6
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
7
6
 
@@ -226,7 +225,6 @@ WINOGRANDE_NO_CONFIG = DatasetConfig(
226
225
  languages=[NB, NN, NO],
227
226
  splits=["train", "test"],
228
227
  _labels=["a", "b"],
229
- _allowed_model_types=[ModelType.GENERATIVE],
230
228
  unofficial=True,
231
229
  )
232
230
 
@@ -1,7 +1,6 @@
1
1
  """All Polish dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import PL
6
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, NER, RC, SENT, SUMM
7
6
 
@@ -64,7 +63,6 @@ WINOGRANDE_PL_CONFIG = DatasetConfig(
64
63
  languages=[PL],
65
64
  splits=["train", "test"],
66
65
  _labels=["a", "b"],
67
- _allowed_model_types=[ModelType.GENERATIVE],
68
66
  )
69
67
 
70
68
  EUROPEAN_VALUES_PL_CONFIG = DatasetConfig(
@@ -1,7 +1,6 @@
1
1
  """All Portuguese dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import PT
6
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
7
6
 
@@ -101,7 +100,6 @@ WINOGRANDE_PT_CONFIG = DatasetConfig(
101
100
  languages=[PT],
102
101
  splits=["train", "test"],
103
102
  _labels=["a", "b"],
104
- _allowed_model_types=[ModelType.GENERATIVE],
105
103
  unofficial=True,
106
104
  )
107
105
 
@@ -1,7 +1,6 @@
1
1
  """All Spanish dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import ES
6
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
7
6
 
@@ -129,7 +128,6 @@ WINOGRANDE_ES_CONFIG = DatasetConfig(
129
128
  languages=[ES],
130
129
  splits=["train", "test"],
131
130
  _labels=["a", "b"],
132
- _allowed_model_types=[ModelType.GENERATIVE],
133
131
  unofficial=True,
134
132
  )
135
133
 
@@ -1,7 +1,6 @@
1
1
  """All Swedish dataset configurations used in EuroEval."""
2
2
 
3
3
  from ..data_models import DatasetConfig
4
- from ..enums import ModelType
5
4
  from ..languages import SV
6
5
  from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
7
6
 
@@ -140,7 +139,6 @@ WINOGRANDE_SV_CONFIG = DatasetConfig(
140
139
  languages=[SV],
141
140
  splits=["train", "test"],
142
141
  _labels=["a", "b"],
143
- _allowed_model_types=[ModelType.GENERATIVE],
144
142
  unofficial=True,
145
143
  )
146
144
 
@@ -177,6 +175,5 @@ SKOLPROV_CONFIG = DatasetConfig(
177
175
  task=KNOW,
178
176
  languages=[SV],
179
177
  splits=["train", "test"],
180
- _allowed_model_types=[ModelType.GENERATIVE],
181
178
  unofficial=True,
182
179
  )
@@ -197,7 +197,7 @@ bert_score_metric = HuggingFaceMetric(
197
197
  huggingface_id="bertscore",
198
198
  results_key="f1",
199
199
  compute_kwargs=dict(
200
- model_type="microsoft/mdeberta-v3-base", device="cpu", batch_size=16
200
+ model_type="microsoft/mdeberta-v3-base", device="auto", batch_size=1
201
201
  ),
202
202
  )
203
203
 
@@ -191,6 +191,11 @@ def european_values_preprocessing_fn(
191
191
  for idx, choice in idx_to_choice.items()
192
192
  if choice is not None
193
193
  }
194
+ if prediction not in idx_to_choice:
195
+ raise InvalidBenchmark(
196
+ f"The prediction {prediction} is not a valid index for the "
197
+ f"question with choices {idx_to_choice}."
198
+ )
194
199
  integer_prediction = idx_to_choice[prediction]
195
200
  integer_predictions.append(integer_prediction)
196
201
 
@@ -14,6 +14,7 @@ from ..languages import (
14
14
  FR,
15
15
  IS,
16
16
  IT,
17
+ LT,
17
18
  LV,
18
19
  NB,
19
20
  NL,
@@ -126,6 +127,14 @@ LA_TEMPLATES: dict["Language", PromptConfig] = {
126
127
  default_instruction_prompt="Frase: {text}\n\nStabilite se la frase è "
127
128
  "grammaticalmente corretta o meno. Rispondere con {labels_str}, e nient'altro.",
128
129
  ),
130
+ LT: PromptConfig(
131
+ default_prompt_label_mapping=dict(correct="taip", incorrect="ne"),
132
+ default_prompt_prefix="Toliau pateikti sakiniai ir ar jie yra gramatiškai "
133
+ "teisingi.",
134
+ default_prompt_template="Sakinys: {text}\nGramatiškai teisingas: {label}",
135
+ default_instruction_prompt="Sakinys: {text}\n\nNustatykite, ar sakinys yra "
136
+ "gramatiškai teisingas, ar ne. Atsakykite su {labels_str}, ir nieko kito.",
137
+ ),
129
138
  LV: PromptConfig(
130
139
  default_prompt_label_mapping=dict(correct="jā", incorrect="nē"),
131
140
  default_prompt_prefix="Šie ir teikumi un to gramatiskie pareizumi.",
@@ -13,6 +13,7 @@ from ..languages import (
13
13
  FR,
14
14
  IS,
15
15
  IT,
16
+ LT,
16
17
  LV,
17
18
  NB,
18
19
  NL,
@@ -105,6 +106,14 @@ MULTIPLE_CHOICE_TEMPLATES: dict["Language", PromptConfig] = {
105
106
  "precedente con {labels_str}, e nient'altro.",
106
107
  default_prompt_label_mapping="auto",
107
108
  ),
109
+ LT: PromptConfig(
110
+ default_prompt_prefix="Toliau pateikti daugiavariančiai klausimai "
111
+ "(su atsakymais).",
112
+ default_prompt_template="Klausimas: {text}\nAtsakymas: {label}",
113
+ default_instruction_prompt="Klausimas: {text}\n\nAtsakykite į aukščiau "
114
+ "pateiktą klausimą atsakydami {labels_str}, ir nieko daugiau.",
115
+ default_prompt_label_mapping="auto",
116
+ ),
108
117
  LV: PromptConfig(
109
118
  default_prompt_prefix="Tālāk seko jautājumi ar vairākām atbilžu izvēlēm "
110
119
  "(ar atbildēm).",
@@ -14,6 +14,7 @@ from ..languages import (
14
14
  FR,
15
15
  IS,
16
16
  IT,
17
+ LT,
17
18
  LV,
18
19
  NB,
19
20
  NL,
@@ -241,6 +242,25 @@ NER_TEMPLATES: dict["Language", PromptConfig] = {
241
242
  "{labels_str}. I valori devono essere elenchi di entità "
242
243
  "nominate di quel tipo, esattamente come appaiono nella frase.",
243
244
  ),
245
+ LT: PromptConfig(
246
+ default_prompt_label_mapping={
247
+ "b-per": "asmuo",
248
+ "i-per": "asmuo",
249
+ "b-loc": "vieta",
250
+ "i-loc": "vieta",
251
+ "b-org": "organizacija",
252
+ "i-org": "organizacija",
253
+ "b-misc": "kita",
254
+ "i-misc": "kita",
255
+ },
256
+ default_prompt_prefix="Toliau pateikti sakiniai ir JSON žodynai su vardiniais "
257
+ "vienetais, kurie pateikiame sakinyje.",
258
+ default_prompt_template="Sakinys: {text}\nVardiniai vienetai: {label}",
259
+ default_instruction_prompt="Sakinys: {text}\n\nIdentifikuokite vardinius "
260
+ "vienetus sakinyje. Turėtumėte pateikti tai kaip JSON žodyną su raktais "
261
+ "{labels_str}. Reikšmės turi būti to tipo vardinių vienetų sąrašai, "
262
+ "tiksliai taip, kaip jie rodomi sakinyje.",
263
+ ),
244
264
  LV: PromptConfig(
245
265
  default_prompt_label_mapping={
246
266
  "b-per": "persona",
@@ -14,6 +14,7 @@ from ..languages import (
14
14
  FR,
15
15
  IS,
16
16
  IT,
17
+ LT,
17
18
  LV,
18
19
  NB,
19
20
  NL,
@@ -116,6 +117,15 @@ RC_TEMPLATES: dict["Language", PromptConfig] = {
116
117
  "sul in un massimo di 3 parole.\n\nDomanda: {question}",
117
118
  default_prompt_label_mapping=dict(),
118
119
  ),
120
+ LT: PromptConfig(
121
+ default_prompt_prefix="Toliau pateikti tekstai su atitinkamais klausimais ir "
122
+ "atsakymais.",
123
+ default_prompt_template="Tekstas: {text}\nKlausimas: {question}\nAtsakykite ne "
124
+ "daugiau kaip 3 žodžiais: {label}",
125
+ default_instruction_prompt="Tekstas: {text}\n\nAtsakykite į šį klausimą apie "
126
+ "aukščiau pateiktą tekstą ne daugiau kaip 3 žodžiais.\n\nKlausimas: {question}",
127
+ default_prompt_label_mapping=dict(),
128
+ ),
119
129
  LV: PromptConfig(
120
130
  default_prompt_prefix="Turpmāk seko teksti ar atbilstošiem jautājumiem un "
121
131
  "atbildēm.",
@@ -14,6 +14,7 @@ from ..languages import (
14
14
  FR,
15
15
  IS,
16
16
  IT,
17
+ LT,
17
18
  LV,
18
19
  NB,
19
20
  NL,
@@ -153,6 +154,16 @@ SENT_TEMPLATES: dict["Language", PromptConfig] = {
153
154
  default_instruction_prompt="Documento: {text}\n\nClassificare il sentiment del "
154
155
  "documento. Rispondere con {labels_str}, e nient'altro.",
155
156
  ),
157
+ LT: PromptConfig(
158
+ default_prompt_label_mapping=dict(
159
+ positive="teigiamas", neutral="neutralus", negative="neigiamas"
160
+ ),
161
+ default_prompt_prefix="Toliau pateikti dokumentai ir jų nuotaika, kuri "
162
+ "gali būti {labels_str}.",
163
+ default_prompt_template="Dokumentas: {text}\nNuotaika: {label}",
164
+ default_instruction_prompt="Dokumentas: {text}\n\nKlasifikuokite nuotaiką "
165
+ "dokumente. Atsakykite su {labels_str}, ir nieko kito.",
166
+ ),
156
167
  LV: PromptConfig(
157
168
  default_prompt_label_mapping=dict(
158
169
  positive="pozitīvs", neutral="neitrāls", negative="negatīvs"
@@ -521,7 +521,14 @@ def has_chat_template(tokeniser: "PreTrainedTokenizer") -> bool:
521
521
  Returns:
522
522
  Whether the tokeniser has a chat template.
523
523
  """
524
- if hasattr(tokeniser, "chat_template"):
524
+ if isinstance(tokeniser, MistralCommonTokenizer):
525
+ log_once(
526
+ "The tokeniser is a Mistral tokeniser, so assuming that the model is "
527
+ "instruction tuned.",
528
+ level=logging.DEBUG,
529
+ )
530
+ return True
531
+ elif hasattr(tokeniser, "chat_template"):
525
532
  has_template = tokeniser.chat_template is not None
526
533
  if has_template:
527
534
  log_once(
@@ -530,13 +537,6 @@ def has_chat_template(tokeniser: "PreTrainedTokenizer") -> bool:
530
537
  level=logging.DEBUG,
531
538
  )
532
539
  return has_template
533
- elif isinstance(tokeniser, MistralCommonTokenizer):
534
- log_once(
535
- "The tokeniser is a Mistral tokeniser, so assuming that the model is "
536
- "instruction tuned.",
537
- level=logging.DEBUG,
538
- )
539
- return True
540
540
  else:
541
541
  log_once(
542
542
  "We cannot find a chat template for the tokeniser, so assuming that the "