EuroEval 16.2.2__py3-none-any.whl → 16.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (65) hide show
  1. euroeval/__init__.py +7 -4
  2. euroeval/benchmark_config_factory.py +0 -4
  3. euroeval/benchmark_modules/base.py +3 -16
  4. euroeval/benchmark_modules/fresh.py +5 -2
  5. euroeval/benchmark_modules/hf.py +107 -66
  6. euroeval/benchmark_modules/litellm.py +103 -55
  7. euroeval/benchmark_modules/vllm.py +155 -82
  8. euroeval/benchmarker.py +184 -129
  9. euroeval/caching_utils.py +79 -0
  10. euroeval/callbacks.py +5 -7
  11. euroeval/cli.py +1 -1
  12. euroeval/constants.py +9 -0
  13. euroeval/data_loading.py +14 -11
  14. euroeval/data_models.py +12 -4
  15. euroeval/dataset_configs/__init__.py +3 -0
  16. euroeval/dataset_configs/czech.py +79 -0
  17. euroeval/dataset_configs/danish.py +10 -13
  18. euroeval/dataset_configs/dutch.py +0 -3
  19. euroeval/dataset_configs/english.py +0 -3
  20. euroeval/dataset_configs/estonian.py +11 -1
  21. euroeval/dataset_configs/finnish.py +0 -3
  22. euroeval/dataset_configs/french.py +0 -3
  23. euroeval/dataset_configs/german.py +0 -3
  24. euroeval/dataset_configs/italian.py +0 -3
  25. euroeval/dataset_configs/latvian.py +2 -4
  26. euroeval/dataset_configs/lithuanian.py +68 -0
  27. euroeval/dataset_configs/norwegian.py +0 -3
  28. euroeval/dataset_configs/polish.py +0 -3
  29. euroeval/dataset_configs/portuguese.py +0 -3
  30. euroeval/dataset_configs/slovak.py +60 -0
  31. euroeval/dataset_configs/spanish.py +0 -3
  32. euroeval/dataset_configs/swedish.py +10 -15
  33. euroeval/finetuning.py +21 -15
  34. euroeval/generation.py +10 -10
  35. euroeval/generation_utils.py +2 -3
  36. euroeval/logging_utils.py +250 -0
  37. euroeval/metrics/base.py +0 -3
  38. euroeval/metrics/huggingface.py +10 -6
  39. euroeval/metrics/llm_as_a_judge.py +5 -3
  40. euroeval/metrics/pipeline.py +22 -9
  41. euroeval/metrics/speed.py +0 -3
  42. euroeval/model_cache.py +11 -14
  43. euroeval/model_config.py +4 -5
  44. euroeval/model_loading.py +3 -0
  45. euroeval/prompt_templates/linguistic_acceptability.py +30 -3
  46. euroeval/prompt_templates/multiple_choice.py +34 -1
  47. euroeval/prompt_templates/named_entity_recognition.py +71 -11
  48. euroeval/prompt_templates/reading_comprehension.py +41 -3
  49. euroeval/prompt_templates/sentiment_classification.py +34 -1
  50. euroeval/prompt_templates/summarization.py +26 -6
  51. euroeval/scores.py +7 -7
  52. euroeval/speed_benchmark.py +3 -5
  53. euroeval/task_group_utils/multiple_choice_classification.py +0 -3
  54. euroeval/task_group_utils/question_answering.py +0 -3
  55. euroeval/task_group_utils/sequence_classification.py +43 -31
  56. euroeval/task_group_utils/text_to_text.py +17 -8
  57. euroeval/task_group_utils/token_classification.py +10 -9
  58. euroeval/tokenisation_utils.py +22 -20
  59. euroeval/utils.py +30 -147
  60. {euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/METADATA +182 -61
  61. euroeval-16.4.0.dist-info/RECORD +75 -0
  62. euroeval-16.2.2.dist-info/RECORD +0 -70
  63. {euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/WHEEL +0 -0
  64. {euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/entry_points.txt +0 -0
  65. {euroeval-16.2.2.dist-info → euroeval-16.4.0.dist-info}/licenses/LICENSE +0 -0
euroeval/scores.py CHANGED
@@ -6,12 +6,12 @@ import warnings
6
6
 
7
7
  import numpy as np
8
8
 
9
+ from .logging_utils import log
10
+
9
11
  if t.TYPE_CHECKING:
10
12
  from .metrics import Metric
11
13
  from .types import ScoreDict
12
14
 
13
- logger = logging.getLogger("euroeval")
14
-
15
15
 
16
16
  def log_scores(
17
17
  dataset_name: str,
@@ -48,9 +48,8 @@ def log_scores(
48
48
  if model_param is not None:
49
49
  model_id += f"#{model_param}"
50
50
 
51
- logger.info(f"Finished evaluation of {model_id} on {dataset_name}.")
52
-
53
51
  total_dict: dict[str, float] = dict()
52
+ all_log_strs: list[str] = [f"Finished benchmarking {model_id} on {dataset_name}."]
54
53
  for metric in metrics:
55
54
  test_score, test_se = aggregate_scores(scores=scores, metric=metric)
56
55
  test_score, test_score_str = metric.postprocessing_fn(test_score)
@@ -58,11 +57,12 @@ def log_scores(
58
57
  total_dict[f"test_{metric.name}"] = test_score
59
58
  total_dict[f"test_{metric.name}_se"] = test_se
60
59
  log_str = (
61
- f"{metric.pretty_name}: {test_score_str} ± {test_se_str}"
60
+ f"- {metric.pretty_name}: {test_score_str} ± {test_se_str}"
62
61
  if not np.isnan(test_se)
63
- else f"{metric.pretty_name}: {test_score_str}"
62
+ else f"- {metric.pretty_name}: {test_score_str}"
64
63
  )
65
- logger.info(log_str)
64
+ all_log_strs.append(log_str)
65
+ log("\n".join(all_log_strs), level=logging.INFO)
66
66
 
67
67
  return dict(raw=scores, total=total_dict)
68
68
 
@@ -4,19 +4,17 @@ import logging
4
4
  import typing as t
5
5
 
6
6
  import pyinfer
7
- from tqdm.auto import tqdm
8
7
  from transformers.models.auto.tokenization_auto import AutoTokenizer
9
8
 
10
9
  from .benchmark_modules import HuggingFaceEncoderModel, LiteLLMModel, VLLMModel
11
10
  from .exceptions import InvalidBenchmark
11
+ from .logging_utils import get_pbar, log
12
12
  from .utils import clear_memory
13
13
 
14
14
  if t.TYPE_CHECKING:
15
15
  from .benchmark_modules import BenchmarkModule
16
16
  from .data_models import BenchmarkConfig
17
17
 
18
- logger = logging.getLogger("euroeval")
19
-
20
18
 
21
19
  def benchmark_speed(
22
20
  model: "BenchmarkModule", benchmark_config: "BenchmarkConfig"
@@ -33,7 +31,7 @@ def benchmark_speed(
33
31
  Dictionary of scores.
34
32
  """
35
33
  scores: list[dict[str, float]] = list()
36
- for idx in tqdm(
34
+ for idx in get_pbar(
37
35
  iterable=range(benchmark_config.num_iterations),
38
36
  desc="Benchmarking",
39
37
  disable=not benchmark_config.progress_bar,
@@ -41,7 +39,7 @@ def benchmark_speed(
41
39
  itr_scores = benchmark_speed_single_iteration(model=model, itr_idx=idx)
42
40
  clear_memory()
43
41
  scores.append(itr_scores)
44
- logger.debug(f"Scores for iteration {idx}: {itr_scores}")
42
+ log(f"Scores for iteration {idx}: {itr_scores}", level=logging.DEBUG)
45
43
  return scores
46
44
 
47
45
 
@@ -1,7 +1,6 @@
1
1
  """Utility functions related to the multiple-choice classification task group."""
2
2
 
3
3
  import hashlib
4
- import logging
5
4
  import re
6
5
  import typing as t
7
6
  from collections import defaultdict
@@ -18,8 +17,6 @@ if t.TYPE_CHECKING:
18
17
 
19
18
  from ..types import Labels, Predictions
20
19
 
21
- logger = logging.getLogger("euroeval")
22
-
23
20
 
24
21
  class MultipleChoiceClassificationTrainer(Trainer):
25
22
  """Trainer subclass for multiple-choice classification tasks."""
@@ -1,7 +1,6 @@
1
1
  """Utility functions related to the question-answering task group."""
2
2
 
3
3
  import collections.abc as c
4
- import logging
5
4
  import typing as t
6
5
  from collections import defaultdict
7
6
 
@@ -26,8 +25,6 @@ if t.TYPE_CHECKING:
26
25
  from ..data_models import BenchmarkConfig, DatasetConfig, GenerativeModelOutput
27
26
  from ..types import Labels, Predictions
28
27
 
29
- logger = logging.getLogger("euroeval")
30
-
31
28
 
32
29
  class QuestionAnsweringTrainer(Trainer):
33
30
  """Trainer subclass for question answering tasks."""
@@ -19,13 +19,15 @@ if t.TYPE_CHECKING:
19
19
  from datasets.arrow_dataset import Dataset
20
20
  from transformers.trainer_utils import EvalPrediction
21
21
 
22
- from ..data_models import BenchmarkConfig, DatasetConfig, GenerativeModelOutput
22
+ from ..data_models import (
23
+ BenchmarkConfig,
24
+ DatasetConfig,
25
+ GenerativeModelOutput,
26
+ ModelConfig,
27
+ )
23
28
  from ..types import Labels, Predictions
24
29
 
25
30
 
26
- logger = logging.getLogger("euroeval")
27
-
28
-
29
31
  def compute_metrics(
30
32
  model_outputs_and_labels: "tuple[Predictions, Labels] | EvalPrediction",
31
33
  dataset_config: "DatasetConfig",
@@ -106,6 +108,7 @@ def extract_labels_from_generation(
106
108
  input_batch: dict[str, list],
107
109
  model_output: "GenerativeModelOutput",
108
110
  dataset_config: "DatasetConfig",
111
+ model_config: "ModelConfig",
109
112
  first_label_token_mapping: dict[str, str] | bool,
110
113
  ) -> list[str]:
111
114
  """Extract the predicted labels from the generated output.
@@ -118,6 +121,8 @@ def extract_labels_from_generation(
118
121
  The raw generated output of the model.
119
122
  dataset_config:
120
123
  The configuration of the dataset.
124
+ model_config:
125
+ The configuration of the model.
121
126
  first_label_token_mapping:
122
127
  A mapping from labels to the first token in each label, or alternatively a
123
128
  Boolean value indicating whether the model should output scores (if the
@@ -167,6 +172,7 @@ def extract_labels_from_generation(
167
172
  )
168
173
 
169
174
  new_predicted_labels: list[str] = list()
175
+ num_predictions_being_very_off = 0
170
176
  for idx, predicted_label in enumerate(model_output.sequences):
171
177
  # If the prediction includes a boxed answer, use that instead of the full
172
178
  # generation
@@ -199,34 +205,40 @@ def extract_labels_from_generation(
199
205
  # word edit distance to the predicted label (if invalid model outputs are
200
206
  # allowed), or we raise an error
201
207
  if min(edit_distances) >= 1000:
202
- if dataset_config.allow_invalid_model_outputs:
203
- logger.warning(
204
- "No candidate labels found for the predicted label "
205
- f"{predicted_label!r}, out of the candidate labels "
206
- f"{sample_candidate_labels[idx]}. This likely means that the model "
207
- "output is completely off, but since invalid model outputs are "
208
- "allowed for this task, we will use the closest candidate label "
209
- f"({best_candidate_label})) as the output label. If you see this "
210
- "warning very often, please report this issue to the EuroEval "
211
- "team at github.com/EuroEval/EuroEval/issues."
212
- )
213
- logger.debug(
214
- "The candidate labels were extracted from the prompt: "
215
- f"{input_batch['text'][idx]!r}."
216
- )
217
- else:
218
- raise InvalidBenchmark(
219
- "No candidate labels found for the predicted label "
220
- f"{predicted_label!r}, out of the candidate labels "
221
- f"{sample_candidate_labels[idx]}. This likely means that the model "
222
- "output is completely off, and we cannot extract any labels from "
223
- "it. Please check the model output and the candidate labels. The "
224
- "candidate labels were extracted from the prompt: "
225
- f"{input_batch['text'][idx]!r}."
226
- )
208
+ num_predictions_being_very_off += 1
227
209
 
228
210
  new_predicted_labels.append(best_candidate_label)
229
211
 
212
+ if num_predictions_being_very_off > 0:
213
+ if dataset_config.allow_invalid_model_outputs:
214
+ log_msg = (
215
+ "No candidate labels found for the predicted label in "
216
+ f"{num_predictions_being_very_off:,}/{len(model_output.sequences):,} "
217
+ f"of the samples with the model {model_config.model_id!r}. This "
218
+ "likely means that the model were completely off in these cases, "
219
+ "but since invalid model outputs are allowed for this task, we used "
220
+ "the closest candidate labels as the output labels."
221
+ )
222
+ level = logging.DEBUG
223
+ if num_predictions_being_very_off / len(model_output.sequences) > 0.5:
224
+ log_msg += (
225
+ " Since this happened for most of the model's predictions, please "
226
+ "report this issue to the EuroEval team at "
227
+ "github.com/EuroEval/EuroEval/issues."
228
+ )
229
+ level = logging.WARNING
230
+ log_once(log_msg, level=level)
231
+ else:
232
+ raise InvalidBenchmark(
233
+ "No candidate labels found for the predicted label in "
234
+ f"{num_predictions_being_very_off:,}/{len(model_output.sequences):,} "
235
+ "of the samples. This likely means that the model were completely "
236
+ "off in these cases. Since this task does not allow invalid model "
237
+ "outputs, we have to abort the evaluation. Please re-run the "
238
+ "evaluation with the `--debug` flag (or `debug=True` if you're using "
239
+ "the `Benchmarker` API) to see the precise model outputs."
240
+ )
241
+
230
242
  return new_predicted_labels
231
243
 
232
244
 
@@ -355,7 +367,7 @@ def get_closest_logprobs_labels(
355
367
  "be determined. This means that using logprobs to extract the "
356
368
  "labels is not reliable, and we will instead fall back to "
357
369
  "extracting the labels using word edit distance.",
358
- level=logging.INFO,
370
+ level=logging.DEBUG,
359
371
  )
360
372
  else:
361
373
  log_once(
@@ -363,7 +375,7 @@ def get_closest_logprobs_labels(
363
375
  "means that using logprobs to extract the labels is not reliable, "
364
376
  "and we will instead fall back to extracting the labels using "
365
377
  "word edit distance.",
366
- level=logging.INFO,
378
+ level=logging.DEBUG,
367
379
  )
368
380
  return None
369
381
 
@@ -7,6 +7,7 @@ import numpy as np
7
7
 
8
8
  from ..constants import METRIC_ATTRIBUTES_TAKING_UP_MEMORY
9
9
  from ..exceptions import InvalidBenchmark
10
+ from ..logging_utils import log
10
11
  from ..metrics import HuggingFaceMetric
11
12
  from ..utils import raise_if_model_output_contains_nan_values
12
13
 
@@ -18,9 +19,6 @@ if t.TYPE_CHECKING:
18
19
  from ..types import Labels, Predictions
19
20
 
20
21
 
21
- logger = logging.getLogger("euroeval")
22
-
23
-
24
22
  def compute_metrics(
25
23
  model_outputs_and_labels: "tuple[Predictions, Labels] | EvalPrediction",
26
24
  dataset_config: "DatasetConfig",
@@ -44,6 +42,10 @@ def compute_metrics(
44
42
  Returns:
45
43
  A dictionary with the names of the metrics as keys and the metric values as
46
44
  values.
45
+
46
+ Raises:
47
+ InvalidBenchmark:
48
+ If the metric computation fails.
47
49
  """
48
50
  model_outputs, labels = model_outputs_and_labels
49
51
 
@@ -72,7 +74,7 @@ def compute_metrics(
72
74
  ):
73
75
  metric.compute_kwargs["device"] = benchmark_config.device.type
74
76
 
75
- while True:
77
+ for _ in range(num_attempts := 5):
76
78
  try:
77
79
  score: float | None = metric(
78
80
  predictions=predictions,
@@ -96,21 +98,28 @@ def compute_metrics(
96
98
  and metric.compute_kwargs.get("device", "cpu") != "cpu"
97
99
  ):
98
100
  metric.compute_kwargs["device"] = "cpu"
99
- logger.debug(
101
+ log(
100
102
  "Out of memory error occurred during the computation of "
101
103
  f"the metric {metric.pretty_name}. Moving the computation to "
102
- "the CPU."
104
+ "the CPU.",
105
+ level=logging.DEBUG,
103
106
  )
104
107
  else:
105
108
  raise InvalidBenchmark(str(e)) from e
106
109
  finally:
107
110
  for attribute in METRIC_ATTRIBUTES_TAKING_UP_MEMORY:
108
111
  if hasattr(metric, attribute):
109
- logger.debug(
112
+ log(
110
113
  f"Deleting the {attribute!r} attribute of the metric "
111
- f"{metric.pretty_name} to free up memory."
114
+ f"{metric.pretty_name} to free up memory.",
115
+ level=logging.DEBUG,
112
116
  )
113
117
  delattr(metric, attribute)
118
+ else:
119
+ raise InvalidBenchmark(
120
+ f"Could not compute the metric {metric.pretty_name} after "
121
+ f"{num_attempts} attempts due to out of memory errors."
122
+ )
114
123
 
115
124
  # The metric returns None if we are running on multi-GPU and the current
116
125
  # process is not the main process
@@ -7,6 +7,7 @@ from copy import deepcopy
7
7
  import numpy as np
8
8
 
9
9
  from ..exceptions import InvalidBenchmark
10
+ from ..logging_utils import log
10
11
  from ..utils import (
11
12
  extract_json_dict_from_string,
12
13
  raise_if_model_output_contains_nan_values,
@@ -22,9 +23,6 @@ if t.TYPE_CHECKING:
22
23
  from ..types import Labels, Predictions
23
24
 
24
25
 
25
- logger = logging.getLogger("euroeval")
26
-
27
-
28
26
  def compute_metrics(
29
27
  model_outputs_and_labels: "tuple[Predictions, Labels] | EvalPrediction",
30
28
  has_misc_tags: bool,
@@ -216,17 +214,19 @@ def extract_labels_from_generation(
216
214
  prompt_label_mapping = dataset_config.prompt_label_mapping
217
215
  for prompt_tag_name, named_entities in prediction_dict.items():
218
216
  if not isinstance(named_entities, list):
219
- logger.debug(
217
+ log(
220
218
  "The model produced an invalid format for the named entities. "
221
- f"Expected a list but got {type(named_entities)}. Skipping."
219
+ f"Expected a list but got {type(named_entities)}. Skipping.",
220
+ level=logging.DEBUG,
222
221
  )
223
222
  continue
224
223
  try:
225
224
  named_entities = [str(ne) for ne in named_entities]
226
225
  except Exception:
227
- logger.debug(
226
+ log(
228
227
  "The model produced an invalid format for the named entities. "
229
- f"Expected a list of strings but got {named_entities}. Skipping."
228
+ f"Expected a list of strings but got {named_entities}. Skipping.",
229
+ level=logging.DEBUG,
230
230
  )
231
231
  continue
232
232
  try:
@@ -236,9 +236,10 @@ def extract_labels_from_generation(
236
236
  if prompt_tag == prompt_tag_name
237
237
  ][0]
238
238
  except IndexError:
239
- logger.debug(
239
+ log(
240
240
  "The model produced an invalid prompt tag name, "
241
- f"{prompt_tag_name}. Skipping."
241
+ f"{prompt_tag_name}. Skipping.",
242
+ level=logging.DEBUG,
242
243
  )
243
244
  continue
244
245
 
@@ -5,11 +5,11 @@ import re
5
5
  import typing as t
6
6
 
7
7
  import torch
8
- from transformers import MistralCommonTokenizer
8
+ from transformers.tokenization_mistral_common import MistralCommonTokenizer
9
9
 
10
10
  from .enums import GenerativeType
11
11
  from .exceptions import InvalidModel
12
- from .utils import log_once
12
+ from .logging_utils import log, log_once
13
13
 
14
14
  if t.TYPE_CHECKING:
15
15
  from transformers.tokenization_utils import PreTrainedTokenizer
@@ -18,9 +18,6 @@ if t.TYPE_CHECKING:
18
18
  from .data_models import DatasetConfig, ModelConfig
19
19
 
20
20
 
21
- logger = logging.getLogger("euroeval")
22
-
23
-
24
21
  def get_special_token_metadata(tokeniser: "PreTrainedTokenizerBase") -> dict:
25
22
  """Get the special token metadata for a tokeniser.
26
23
 
@@ -182,7 +179,7 @@ def get_bos_token(
182
179
  "The model does not have a beginning-of-sequence token. Please ensure that "
183
180
  "this has been set in the tokeniser's configuration. Using no BOS token."
184
181
  " This may lead to unexpected behavior in the model.",
185
- level=logging.INFO,
182
+ level=logging.WARNING,
186
183
  )
187
184
  return None, None
188
185
 
@@ -223,14 +220,14 @@ def get_eos_token(
223
220
  "The model does not have an end-of-sequence token. Please ensure that this "
224
221
  "has been set in the tokeniser's configuration. Using no EOS token. This "
225
222
  "may lead to unexpected behavior in the model.",
226
- level=logging.INFO,
223
+ level=logging.WARNING,
227
224
  )
228
225
  return None, None
229
226
 
230
227
  log_once(
231
228
  f"End-of-sequence token was not set, but detected it as {eos_token!r} with "
232
229
  f"ID {eos_token_id}.",
233
- level=logging.DEBUG,
230
+ level=logging.WARNING,
234
231
  )
235
232
  return eos_token, eos_token_id
236
233
 
@@ -306,7 +303,7 @@ def get_pad_token(
306
303
  "Could not identify a padding token for the model. Please ensure that "
307
304
  "this has been set in the tokeniser's configuration. Using no padding "
308
305
  "token. This may lead to unexpected behavior in the model.",
309
- level=logging.INFO,
306
+ level=logging.WARNING,
310
307
  )
311
308
  return None, None
312
309
 
@@ -358,12 +355,16 @@ def get_end_of_chat_token_ids(
358
355
  x_token_index = idx
359
356
  break
360
357
  else:
361
- logger.debug("Could not locate the end-of-chat token for the model.")
358
+ log(
359
+ "Could not locate the end-of-chat token for the model.", level=logging.DEBUG
360
+ )
362
361
  return None
363
362
 
364
363
  end_of_chat_tokens = token_ids[x_token_index + 1 :]
365
364
  if len(end_of_chat_tokens) == 0:
366
- logger.debug("Could not locate the end-of-chat token for the model.")
365
+ log(
366
+ "Could not locate the end-of-chat token for the model.", level=logging.DEBUG
367
+ )
367
368
  return None
368
369
 
369
370
  log_once(
@@ -506,7 +507,8 @@ def get_first_label_token_mapping(
506
507
  log_once(
507
508
  "We will not use logprobs with the model since the first tokens of the "
508
509
  "labels are not distinct. The first tokens for the labels "
509
- f"{local_labels} are {first_tokens}"
510
+ f"{local_labels} are {first_tokens}",
511
+ level=logging.DEBUG,
510
512
  )
511
513
  return False
512
514
 
@@ -521,7 +523,14 @@ def has_chat_template(tokeniser: "PreTrainedTokenizer") -> bool:
521
523
  Returns:
522
524
  Whether the tokeniser has a chat template.
523
525
  """
524
- if hasattr(tokeniser, "chat_template"):
526
+ if isinstance(tokeniser, MistralCommonTokenizer):
527
+ log_once(
528
+ "The tokeniser is a Mistral tokeniser, so assuming that the model is "
529
+ "instruction tuned.",
530
+ level=logging.DEBUG,
531
+ )
532
+ return True
533
+ elif hasattr(tokeniser, "chat_template"):
525
534
  has_template = tokeniser.chat_template is not None
526
535
  if has_template:
527
536
  log_once(
@@ -530,13 +539,6 @@ def has_chat_template(tokeniser: "PreTrainedTokenizer") -> bool:
530
539
  level=logging.DEBUG,
531
540
  )
532
541
  return has_template
533
- elif isinstance(tokeniser, MistralCommonTokenizer):
534
- log_once(
535
- "The tokeniser is a Mistral tokeniser, so assuming that the model is "
536
- "instruction tuned.",
537
- level=logging.DEBUG,
538
- )
539
- return True
540
542
  else:
541
543
  log_once(
542
544
  "We cannot find a chat template for the tokeniser, so assuming that the "