EuroEval 15.7.0__py3-none-any.whl → 15.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

@@ -1,11 +1,8 @@
1
1
  """Generative models from an inference API, using the LiteLLM framework."""
2
2
 
3
3
  import collections.abc as c
4
- import itertools as it
5
- import json
6
4
  import logging
7
5
  import os
8
- import random
9
6
  import re
10
7
  import typing as t
11
8
  from functools import cached_property, partial
@@ -60,6 +57,7 @@ from ..exceptions import (
60
57
  NeedsEnvironmentVariable,
61
58
  NeedsExtraInstalled,
62
59
  )
60
+ from ..generation_utils import apply_prompt, extract_few_shot_examples
63
61
  from ..task_group_utils import (
64
62
  question_answering,
65
63
  sequence_classification,
@@ -943,14 +941,22 @@ class LiteLLMModel(BenchmarkModule):
943
941
  )
944
942
 
945
943
  if self.benchmark_config.few_shot:
946
- few_shot_examples = self._extract_few_shot_examples(
947
- dataset=dataset, task=task, itr_idx=itr_idx
944
+ few_shot_examples = extract_few_shot_examples(
945
+ dataset=dataset, dataset_config=self.dataset_config, itr_idx=itr_idx
948
946
  )
949
947
  else:
950
948
  few_shot_examples = list()
951
949
 
952
950
  dataset["test"] = dataset["test"].map(
953
- partial(self._apply_prompt, few_shot_examples=few_shot_examples, task=task),
951
+ partial(
952
+ apply_prompt,
953
+ few_shot_examples=few_shot_examples,
954
+ model_config=self.model_config,
955
+ dataset_config=self.dataset_config,
956
+ instruction_model=True,
957
+ always_populate_text_field=False,
958
+ tokenizer=None,
959
+ ),
954
960
  batched=True,
955
961
  load_from_cache_file=False,
956
962
  keep_in_memory=True,
@@ -958,253 +964,6 @@ class LiteLLMModel(BenchmarkModule):
958
964
 
959
965
  return dataset
960
966
 
961
- def _extract_few_shot_examples(
962
- self, dataset: DatasetDict, task: Task, itr_idx: int
963
- ) -> list[dict[str, t.Any]]:
964
- """Extract few-shot examples from a dataset.
965
-
966
- This will always extract the examples from the training split.
967
-
968
- We ensure that the few-shot examples are unique by picking them one at a time.
969
-
970
- Args:
971
- dataset:
972
- The dataset to extract the few-shot examples from.
973
- task:
974
- The task that is being benchmarked.
975
- itr_idx:
976
- The index of the dataset in the iterator.
977
-
978
- Returns:
979
- The few-shot examples.
980
- """
981
- random_seed = 4242 + itr_idx
982
- num_few_shots = self.dataset_config.num_few_shot_examples
983
- few_shot_examples: list[dict[str, t.Any]] = list()
984
- shuffled_train = dataset["train"].shuffle(seed=random_seed)
985
-
986
- match task.task_group:
987
- case (
988
- TaskGroup.SEQUENCE_CLASSIFICATION
989
- | TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION
990
- ):
991
- labels = it.cycle(self.dataset_config.labels)
992
- while (
993
- len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0
994
- ):
995
- label = next(labels)
996
- possible_examples = shuffled_train.filter(
997
- lambda x: x["label"].lower() == label.lower()
998
- )
999
- if len(possible_examples) == 0:
1000
- continue
1001
- example = possible_examples.select(range(1))[0]
1002
- few_shot_examples.append(example)
1003
- shuffled_train = shuffled_train.filter(
1004
- lambda x: x["text"] != example["text"]
1005
- )
1006
-
1007
- case TaskGroup.TEXT_TO_TEXT:
1008
- while (
1009
- len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0
1010
- ):
1011
- example = shuffled_train.select(range(1))[0]
1012
- few_shot_examples.append(example)
1013
- shuffled_train = shuffled_train.filter(
1014
- lambda x: x["text"] != example["text"]
1015
- )
1016
-
1017
- case TaskGroup.TOKEN_CLASSIFICATION:
1018
- labels = it.cycle(
1019
- [
1020
- label.lower()
1021
- for label in self.dataset_config.labels
1022
- if label.lower().startswith("b-")
1023
- ]
1024
- )
1025
- while (
1026
- len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0
1027
- ):
1028
- label = next(labels)
1029
- possible_examples = shuffled_train.filter(
1030
- lambda x: label in [tag.lower() for tag in x["labels"]]
1031
- )
1032
- if len(possible_examples) == 0:
1033
- continue
1034
- example = possible_examples.select(range(1))[0]
1035
- few_shot_examples.append(example)
1036
- shuffled_train = shuffled_train.filter(
1037
- lambda x: x["tokens"] != example["tokens"]
1038
- )
1039
-
1040
- case TaskGroup.QUESTION_ANSWERING:
1041
- # Locate the maximum number of tokens that constitutes a short example
1042
- for max_num_tokens in [512, 1024, 2048, 4096, 8192]:
1043
- train_with_short_examples = dataset["train"].filter(
1044
- lambda example: len(example["context"]) < max_num_tokens
1045
- )
1046
- num_short_examples = len(train_with_short_examples)
1047
- if num_short_examples >= self.dataset_config.num_few_shot_examples:
1048
- break
1049
- else:
1050
- raise InvalidBenchmark(
1051
- "Could not find enough short examples for few-shot learning."
1052
- )
1053
-
1054
- shuffled_train = train_with_short_examples.shuffle(seed=random_seed)
1055
- while (
1056
- len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0
1057
- ):
1058
- example = shuffled_train.select(range(1))[0]
1059
- few_shot_examples.append(example)
1060
- shuffled_train = shuffled_train.filter(
1061
- lambda x: x["context"] != example["context"]
1062
- )
1063
-
1064
- case _:
1065
- raise NotImplementedError(f"Unsupported task group: {task.task_group}.")
1066
-
1067
- random.seed(random_seed)
1068
- random.shuffle(few_shot_examples)
1069
- return few_shot_examples
1070
-
1071
- def _apply_prompt(
1072
- self,
1073
- examples: dict[str, t.Any],
1074
- few_shot_examples: list[dict[str, t.Any]],
1075
- task: Task,
1076
- ) -> dict[str, t.Any]:
1077
- """Apply prompt template to an example, potentially with few-shot examples.
1078
-
1079
- Args:
1080
- examples:
1081
- The examples to apply the few-shot examples to.
1082
- few_shot_examples:
1083
- The few-shot examples to apply.
1084
- task:
1085
- The task that is being benchmarked.
1086
-
1087
- Returns:
1088
- The example with the few-shot examples applied.
1089
- """
1090
-
1091
- def create_prompt(**kwargs: str) -> tuple[str, str]:
1092
- """Create a prompt from the given keyword arguments.
1093
-
1094
- Args:
1095
- kwargs:
1096
- The keyword arguments to use in the prompt.
1097
-
1098
- Returns:
1099
- A pair (prompt, label), where "label" is an empty string if the model is
1100
- not instruction tuned (as in this case it is included in the prompt).
1101
- """
1102
- label_key = "label" if "label" in kwargs else "target_text"
1103
- label = kwargs.pop(label_key)
1104
- label_mapping = self.dataset_config.prompt_label_mapping
1105
- label = label_mapping.get(label, label)
1106
- prompt = self.dataset_config.instruction_prompt.format(**kwargs)
1107
- return prompt, label
1108
-
1109
- match task.task_group:
1110
- case (
1111
- TaskGroup.SEQUENCE_CLASSIFICATION
1112
- | TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION
1113
- ):
1114
- few_shot_sections = [
1115
- create_prompt(
1116
- text=example["text"].replace("\n", " ").strip(),
1117
- label=example["label"].replace("\n", " ").strip(),
1118
- )
1119
- for example in few_shot_examples
1120
- ]
1121
- new_sections = [
1122
- create_prompt(text=text.replace("\n", " ").strip(), label="")
1123
- for text in examples["text"]
1124
- ]
1125
-
1126
- case TaskGroup.TEXT_TO_TEXT:
1127
- few_shot_sections = [
1128
- create_prompt(
1129
- text=example["text"].replace("\n", " ").strip(),
1130
- target_text=example["target_text"].replace("\n", " ").strip(),
1131
- )
1132
- for example in few_shot_examples
1133
- ]
1134
- new_sections = [
1135
- create_prompt(text=text.replace("\n", " ").strip(), target_text="")
1136
- for text in examples["text"]
1137
- ]
1138
-
1139
- case TaskGroup.TOKEN_CLASSIFICATION:
1140
-
1141
- def create_label(example: dict) -> str:
1142
- prompt_labels = self.dataset_config.prompt_label_mapping.values()
1143
- labels: dict[str, list[str]] = {
1144
- prompt_label: list() for prompt_label in prompt_labels
1145
- }
1146
- for token, label in zip(example["tokens"], example["labels"]):
1147
- label = label.lower()
1148
- if label == "o":
1149
- continue
1150
- prompt_label = self.dataset_config.prompt_label_mapping[label]
1151
- if label.startswith("b-"):
1152
- labels[prompt_label].append(token)
1153
- elif label.startswith("i-"):
1154
- labels[prompt_label][-1] += " " + token
1155
- return json.dumps(labels, ensure_ascii=False)
1156
-
1157
- few_shot_sections = [
1158
- create_prompt(
1159
- text=" ".join(example["tokens"]).replace("\n", " ").strip(),
1160
- label=create_label(example=example),
1161
- )
1162
- for example in few_shot_examples
1163
- ]
1164
- new_sections = [
1165
- create_prompt(
1166
- text=" ".join(tokens).replace("\n", " ").strip(), label=""
1167
- )
1168
- for tokens in examples["tokens"]
1169
- ]
1170
-
1171
- case TaskGroup.QUESTION_ANSWERING:
1172
- few_shot_sections = [
1173
- create_prompt(
1174
- text=example["context"].replace("\n", " ").strip(),
1175
- question=example["question"].replace("\n", " ").strip(),
1176
- label=example["answers"]["text"][0].replace("\n", " "),
1177
- )
1178
- for example in few_shot_examples
1179
- ]
1180
- new_sections = [
1181
- create_prompt(
1182
- text=context.replace("\n", " ").strip(),
1183
- question=question.replace("\n", " ").strip(),
1184
- label="",
1185
- )
1186
- for context, question in zip(
1187
- examples["context"], examples["question"]
1188
- )
1189
- ]
1190
-
1191
- case _:
1192
- raise NotImplementedError(f"Unsupported task group: {task.task_group}.")
1193
-
1194
- few_shot_messages = [
1195
- dict(role=role, content=content)
1196
- for prompt, label in few_shot_sections
1197
- for role, content in [("user", prompt), ("assistant", label)]
1198
- ]
1199
-
1200
- messages_list = [
1201
- few_shot_messages + [dict(role="user", content=prompt)]
1202
- for prompt, _ in new_sections
1203
- ]
1204
-
1205
- examples["messages"] = messages_list
1206
- return examples
1207
-
1208
967
 
1209
968
  def raise_if_wrong_params(
1210
969
  model_config: ModelConfig, allowed_params: dict[str, list[str]]
@@ -3,11 +3,9 @@
3
3
  import collections.abc as c
4
4
  import contextlib
5
5
  import importlib.util
6
- import itertools as it
7
6
  import json
8
7
  import logging
9
8
  import os
10
- import random
11
9
  import re
12
10
  import sys
13
11
  import typing as t
@@ -56,6 +54,7 @@ from ..exceptions import (
56
54
  NeedsEnvironmentVariable,
57
55
  NeedsExtraInstalled,
58
56
  )
57
+ from ..generation_utils import apply_prompt, extract_few_shot_examples
59
58
  from ..languages import get_all_languages
60
59
  from ..task_group_utils import (
61
60
  question_answering,
@@ -258,14 +257,22 @@ class VLLMModel(HuggingFaceEncoderModel):
258
257
  )
259
258
 
260
259
  if self.benchmark_config.few_shot:
261
- few_shot_examples = self._extract_few_shot_examples(
262
- dataset=dataset, task=task, itr_idx=itr_idx
260
+ few_shot_examples = extract_few_shot_examples(
261
+ dataset=dataset, dataset_config=self.dataset_config, itr_idx=itr_idx
263
262
  )
264
263
  else:
265
264
  few_shot_examples = list()
266
265
 
267
266
  dataset["test"] = dataset["test"].map(
268
- partial(self._apply_prompt, few_shot_examples=few_shot_examples, task=task),
267
+ partial(
268
+ apply_prompt,
269
+ few_shot_examples=few_shot_examples,
270
+ model_config=self.model_config,
271
+ dataset_config=self.dataset_config,
272
+ instruction_model=self.buffer["instruction_model"],
273
+ always_populate_text_field=True,
274
+ tokenizer=self._tokenizer,
275
+ ),
269
276
  batched=True,
270
277
  load_from_cache_file=False,
271
278
  keep_in_memory=True,
@@ -439,6 +446,7 @@ class VLLMModel(HuggingFaceEncoderModel):
439
446
  logger.info(
440
447
  "Prompts are too long, so truncating them and trying again..."
441
448
  )
449
+ logger.debug(f"The error message was: {str(e)}")
442
450
  tokenized_prompts = self._tokenizer(
443
451
  text=prompts,
444
452
  truncation=True,
@@ -499,7 +507,6 @@ class VLLMModel(HuggingFaceEncoderModel):
499
507
 
500
508
  # Sanity check
501
509
  if len(completions) != len(prompts):
502
- breakpoint()
503
510
  raise InvalidBenchmark(
504
511
  f"Expected {len(prompts):,} completions, but got {len(completions):,}."
505
512
  )
@@ -615,303 +622,6 @@ class VLLMModel(HuggingFaceEncoderModel):
615
622
 
616
623
  return model_config
617
624
 
618
- def _extract_few_shot_examples(
619
- self, dataset: DatasetDict, task: Task, itr_idx: int
620
- ) -> list[dict[str, t.Any]]:
621
- """Extract few-shot examples from a dataset.
622
-
623
- This will always extract the examples from the training split.
624
-
625
- We ensure that the few-shot examples are unique by picking them one at a time.
626
-
627
- Args:
628
- dataset:
629
- The dataset to extract the few-shot examples from.
630
- task:
631
- The task that is being benchmarked.
632
- itr_idx:
633
- The index of the dataset in the iterator.
634
-
635
- Returns:
636
- The few-shot examples.
637
- """
638
- random_seed = 4242 + itr_idx
639
- num_few_shots = self.dataset_config.num_few_shot_examples
640
- few_shot_examples: list[dict[str, t.Any]] = list()
641
- shuffled_train = dataset["train"].shuffle(seed=random_seed)
642
-
643
- match task.task_group:
644
- case (
645
- TaskGroup.SEQUENCE_CLASSIFICATION
646
- | TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION
647
- ):
648
- labels = it.cycle(self.dataset_config.labels)
649
- while (
650
- len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0
651
- ):
652
- label = next(labels)
653
- possible_examples = shuffled_train.filter(
654
- lambda x: x["label"].lower() == label.lower()
655
- )
656
- if len(possible_examples) == 0:
657
- continue
658
- example = possible_examples.select(range(1))[0]
659
- few_shot_examples.append(example)
660
- shuffled_train = shuffled_train.filter(
661
- lambda x: x["text"] != example["text"]
662
- )
663
-
664
- case TaskGroup.TEXT_TO_TEXT:
665
- while (
666
- len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0
667
- ):
668
- example = shuffled_train.select(range(1))[0]
669
- few_shot_examples.append(example)
670
- shuffled_train = shuffled_train.filter(
671
- lambda x: x["text"] != example["text"]
672
- )
673
-
674
- case TaskGroup.TOKEN_CLASSIFICATION:
675
- labels = it.cycle(
676
- [
677
- label.lower()
678
- for label in self.dataset_config.labels
679
- if label.lower().startswith("b-")
680
- ]
681
- )
682
- while (
683
- len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0
684
- ):
685
- label = next(labels)
686
- possible_examples = shuffled_train.filter(
687
- lambda x: label in [tag.lower() for tag in x["labels"]]
688
- )
689
- if len(possible_examples) == 0:
690
- continue
691
- example = possible_examples.select(range(1))[0]
692
- few_shot_examples.append(example)
693
- shuffled_train = shuffled_train.filter(
694
- lambda x: x["tokens"] != example["tokens"]
695
- )
696
-
697
- case TaskGroup.QUESTION_ANSWERING:
698
- # Locate the maximum number of tokens that constitutes a short example
699
- for max_num_tokens in [512, 1024, 2048, 4096, 8192]:
700
- train_with_short_examples = dataset["train"].filter(
701
- lambda example: len(example["context"]) < max_num_tokens
702
- )
703
- num_short_examples = len(train_with_short_examples)
704
- if num_short_examples >= self.dataset_config.num_few_shot_examples:
705
- break
706
- else:
707
- raise InvalidBenchmark(
708
- "Could not find enough short examples for few-shot learning."
709
- )
710
-
711
- shuffled_train = train_with_short_examples.shuffle(seed=random_seed)
712
- while (
713
- len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0
714
- ):
715
- example = shuffled_train.select(range(1))[0]
716
- few_shot_examples.append(example)
717
- shuffled_train = shuffled_train.filter(
718
- lambda x: x["context"] != example["context"]
719
- )
720
-
721
- case _:
722
- raise NotImplementedError(f"Unsupported task group: {task.task_group}.")
723
-
724
- random.seed(random_seed)
725
- random.shuffle(few_shot_examples)
726
- return few_shot_examples
727
-
728
- def _apply_prompt(
729
- self,
730
- examples: dict[str, t.Any],
731
- few_shot_examples: list[dict[str, t.Any]],
732
- task: Task,
733
- ) -> dict[str, t.Any]:
734
- """Apply prompt template to an example, potentially with few-shot examples.
735
-
736
- Args:
737
- examples:
738
- The examples to apply the few-shot examples to.
739
- few_shot_examples:
740
- The few-shot examples to apply.
741
- task:
742
- The task that is being benchmarked.
743
-
744
- Returns:
745
- The example with the few-shot examples applied.
746
- """
747
-
748
- def create_prompt(**kwargs: str) -> tuple[str, str]:
749
- """Create a prompt from the given keyword arguments.
750
-
751
- Args:
752
- kwargs:
753
- The keyword arguments to use in the prompt.
754
-
755
- Returns:
756
- A pair (prompt, label), where "label" is an empty string if the model is
757
- not instruction tuned (as in this case it is included in the prompt).
758
- """
759
- label_key = "label" if "label" in kwargs else "target_text"
760
- label = kwargs.pop(label_key)
761
- assert label is not None, (
762
- f"Found a None label for the prompt: {kwargs}. This should not happen."
763
- )
764
- label_mapping = self.dataset_config.prompt_label_mapping
765
- label = label_mapping.get(label, label)
766
- if self.buffer["instruction_model"]:
767
- prompt = self.dataset_config.instruction_prompt.format(**kwargs)
768
- return prompt, label
769
- else:
770
- kwargs[label_key] = label
771
- return self.dataset_config.prompt_template.format(**kwargs), ""
772
-
773
- match task.task_group:
774
- case (
775
- TaskGroup.SEQUENCE_CLASSIFICATION
776
- | TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION
777
- ):
778
- few_shot_sections = [
779
- create_prompt(
780
- text=example["text"].replace("\n", " ").strip(),
781
- label=example["label"].replace("\n", " ").strip(),
782
- )
783
- for example in few_shot_examples
784
- ]
785
- new_sections = [
786
- create_prompt(text=text.replace("\n", " ").strip(), label="")
787
- for text in examples["text"]
788
- ]
789
-
790
- case TaskGroup.TEXT_TO_TEXT:
791
- few_shot_sections = [
792
- create_prompt(
793
- text=example["text"].replace("\n", " ").strip(),
794
- target_text=example["target_text"].replace("\n", " ").strip(),
795
- )
796
- for example in few_shot_examples
797
- ]
798
- new_sections = [
799
- create_prompt(text=text.replace("\n", " ").strip(), target_text="")
800
- for text in examples["text"]
801
- ]
802
-
803
- case TaskGroup.TOKEN_CLASSIFICATION:
804
-
805
- def create_label(example: dict) -> str:
806
- prompt_labels = self.dataset_config.prompt_label_mapping.values()
807
- labels: dict[str, list[str]] = {
808
- prompt_label: list() for prompt_label in prompt_labels
809
- }
810
- for token, label in zip(example["tokens"], example["labels"]):
811
- label = label.lower()
812
- if label == "o":
813
- continue
814
- prompt_label = self.dataset_config.prompt_label_mapping[label]
815
- if label.startswith("b-"):
816
- labels[prompt_label].append(token)
817
- elif label.startswith("i-"):
818
- labels[prompt_label][-1] += " " + token
819
- return json.dumps(labels, ensure_ascii=False)
820
-
821
- few_shot_sections = [
822
- create_prompt(
823
- text=" ".join(example["tokens"]).replace("\n", " ").strip(),
824
- label=create_label(example=example),
825
- )
826
- for example in few_shot_examples
827
- ]
828
- new_sections = [
829
- create_prompt(
830
- text=" ".join(tokens).replace("\n", " ").strip(), label=""
831
- )
832
- for tokens in examples["tokens"]
833
- ]
834
-
835
- case TaskGroup.QUESTION_ANSWERING:
836
- few_shot_sections = [
837
- create_prompt(
838
- text=example["context"].replace("\n", " ").strip(),
839
- question=example["question"].replace("\n", " ").strip(),
840
- label=example["answers"]["text"][0].replace("\n", " "),
841
- )
842
- for example in few_shot_examples
843
- ]
844
- new_sections = [
845
- create_prompt(
846
- text=context.replace("\n", " ").strip(),
847
- question=question.replace("\n", " ").strip(),
848
- label="",
849
- )
850
- for context, question in zip(
851
- examples["context"], examples["question"]
852
- )
853
- ]
854
-
855
- case _:
856
- raise NotImplementedError(f"Unsupported task group: {task.task_group}.")
857
-
858
- if self.buffer["instruction_model"]:
859
- few_shot_messages = [
860
- dict(role=role, content=content)
861
- for prompt, label in few_shot_sections
862
- for role, content in [("user", prompt), ("assistant", label)]
863
- ]
864
-
865
- messages_list = [
866
- few_shot_messages + [dict(role="user", content=prompt)]
867
- for prompt, _ in new_sections
868
- ]
869
-
870
- # Pick the chat template that matches the language of the dataset, if such a
871
- # template exists
872
- chat_template: str | None = None
873
- if isinstance(self._tokenizer.chat_template, dict):
874
- language_codes = [
875
- language.code for language in self.dataset_config.languages
876
- ]
877
- for name, candidate_template in self._tokenizer.chat_template.items():
878
- if name.lower() in language_codes:
879
- chat_template = candidate_template
880
- log_once(
881
- f"Using the {name!r} chat template for the tokenizer for "
882
- f"model {self.model_config.model_id!r}.",
883
- level=logging.DEBUG,
884
- )
885
- break
886
-
887
- texts = [
888
- self._tokenizer.apply_chat_template(
889
- conversation=messages,
890
- tokenize=False,
891
- add_generation_prompt=True,
892
- chat_template=chat_template,
893
- )
894
- for messages in messages_list
895
- ]
896
-
897
- examples["text"] = texts
898
-
899
- else:
900
- prompt_prefix = ""
901
- if self.dataset_config.prompt_prefix:
902
- prompt_prefix = self.dataset_config.prompt_prefix + "\n\n"
903
-
904
- few_shot_prompt = "\n\n".join([prompt for prompt, _ in few_shot_sections])
905
- if few_shot_prompt:
906
- few_shot_prompt += "\n\n"
907
-
908
- examples["text"] = [
909
- prompt_prefix + few_shot_prompt + new_prompt
910
- for new_prompt, _ in new_sections
911
- ]
912
-
913
- return examples
914
-
915
625
  @property
916
626
  def data_collator(self) -> c.Callable[[list[t.Any]], dict[str, t.Any]]:
917
627
  """The data collator used to prepare samples during finetuning.
euroeval/benchmarker.py CHANGED
@@ -774,6 +774,7 @@ class Benchmarker:
774
774
  metric_configs=dataset_config.task.metrics,
775
775
  scores=scores,
776
776
  model_id=model_config.model_id,
777
+ model_revision=model_config.revision,
777
778
  )
778
779
 
779
780
  record = BenchmarkResult(
euroeval/data_models.py CHANGED
@@ -531,7 +531,9 @@ class DatasetConfig:
531
531
 
532
532
  # Convert labels to single-quoted labels - and remove duplicates
533
533
  quoted_labels = [
534
- f"'{label}'" for label in set(self.prompt_label_mapping.values())
534
+ f"'{self.prompt_label_mapping[label]}'"
535
+ for label in set(self.labels)
536
+ if label in self.prompt_label_mapping
535
537
  ]
536
538
 
537
539
  if not quoted_labels:
@@ -6,13 +6,14 @@ from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
6
6
 
7
7
  ### Official datasets ###
8
8
 
9
- DUTCH_SOCIAL_CONFIG = DatasetConfig(
10
- name="dutch-social",
9
+ DBRD_CONFIG = DatasetConfig(
10
+ name="dbrd",
11
11
  pretty_name="the truncated version of the Dutch sentiment classification "
12
- "dataset Dutch Social",
13
- huggingface_id="EuroEval/dutch-social-mini",
12
+ "dataset DBRD",
13
+ huggingface_id="EuroEval/dbrd-mini",
14
14
  task=SENT,
15
15
  languages=[NL],
16
+ _labels=["negative", "positive"],
16
17
  )
17
18
 
18
19
  SCALA_NL_CONFIG = DatasetConfig(
@@ -71,18 +72,6 @@ HELLASWAG_NL_CONFIG = DatasetConfig(
71
72
 
72
73
  ### Unofficial datasets ###
73
74
 
74
- DBRD_CONFIG = DatasetConfig(
75
- name="dbrd",
76
- pretty_name="the truncated version of the Dutch sentiment classification "
77
- "dataset DBRD",
78
- huggingface_id="EuroEval/dbrd-mini",
79
- task=SENT,
80
- languages=[NL],
81
- _labels=["negative", "positive"],
82
- _prompt_label_mapping=dict(positive="positief", negative="negatief"),
83
- unofficial=True,
84
- )
85
-
86
75
  DUTCH_COLA_CONFIG = DatasetConfig(
87
76
  name="dutch-cola",
88
77
  pretty_name="the truncated version of the Dutch linguistic acceptability dataset "
@@ -0,0 +1,346 @@
1
+ """Utility functions related to generative models."""
2
+
3
+ import itertools as it
4
+ import json
5
+ import logging
6
+ import random
7
+ import typing as t
8
+
9
+ from .enums import TaskGroup
10
+ from .exceptions import InvalidBenchmark
11
+ from .utils import log_once
12
+
13
+ if t.TYPE_CHECKING:
14
+ from datasets import DatasetDict
15
+ from transformers.tokenization_utils import PreTrainedTokenizer
16
+
17
+ from .data_models import DatasetConfig, ModelConfig
18
+
19
+ logger = logging.getLogger("euroeval")
20
+
21
+
22
+ def extract_few_shot_examples(
23
+ dataset: "DatasetDict", dataset_config: "DatasetConfig", itr_idx: int
24
+ ) -> list[dict[str, t.Any]]:
25
+ """Extract few-shot examples from a dataset.
26
+
27
+ This will always extract the examples from the training split.
28
+
29
+ We ensure that the few-shot examples are unique by picking them one at a time.
30
+
31
+ Args:
32
+ dataset:
33
+ The dataset to extract the few-shot examples from.
34
+ dataset_config:
35
+ The dataset configuration.
36
+ itr_idx:
37
+ The index of the dataset in the iterator.
38
+
39
+ Returns:
40
+ The few-shot examples.
41
+ """
42
+ random_seed = 4242 + itr_idx
43
+ num_few_shots = dataset_config.num_few_shot_examples
44
+ few_shot_examples: list[dict[str, t.Any]] = list()
45
+ shuffled_train = dataset["train"].shuffle(seed=random_seed)
46
+
47
+ match dataset_config.task.task_group:
48
+ case (
49
+ TaskGroup.SEQUENCE_CLASSIFICATION | TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION
50
+ ):
51
+ # Locate the maximum number of tokens that constitutes a short example
52
+ for max_num_tokens in [512, 1024, 2048, 4096, 8192]:
53
+ train_with_short_examples = dataset["train"].filter(
54
+ lambda example: len(example["text"]) < max_num_tokens
55
+ )
56
+ num_short_examples = len(train_with_short_examples)
57
+ if num_short_examples >= dataset_config.num_few_shot_examples:
58
+ break
59
+ else:
60
+ raise InvalidBenchmark(
61
+ "Could not find enough short examples for few-shot learning."
62
+ )
63
+
64
+ shuffled_train = train_with_short_examples.shuffle(seed=random_seed)
65
+ labels = it.cycle(dataset_config.labels)
66
+ while len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0:
67
+ label = next(labels)
68
+ possible_examples = shuffled_train.filter(
69
+ lambda x: x["label"].lower() == label.lower()
70
+ )
71
+ if len(possible_examples) == 0:
72
+ continue
73
+ example = possible_examples.select(range(1))[0]
74
+ few_shot_examples.append(example)
75
+ shuffled_train = shuffled_train.filter(
76
+ lambda x: x["text"] != example["text"]
77
+ )
78
+
79
+ case TaskGroup.TEXT_TO_TEXT:
80
+ while len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0:
81
+ example = shuffled_train.select(range(1))[0]
82
+ few_shot_examples.append(example)
83
+ shuffled_train = shuffled_train.filter(
84
+ lambda x: x["text"] != example["text"]
85
+ )
86
+
87
+ case TaskGroup.TOKEN_CLASSIFICATION:
88
+ labels = it.cycle(
89
+ [
90
+ label.lower()
91
+ for label in dataset_config.labels
92
+ if label.lower().startswith("b-")
93
+ ]
94
+ )
95
+ while len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0:
96
+ label = next(labels)
97
+ possible_examples = shuffled_train.filter(
98
+ lambda x: label in [tag.lower() for tag in x["labels"]]
99
+ )
100
+ if len(possible_examples) == 0:
101
+ continue
102
+ example = possible_examples.select(range(1))[0]
103
+ few_shot_examples.append(example)
104
+ shuffled_train = shuffled_train.filter(
105
+ lambda x: x["tokens"] != example["tokens"]
106
+ )
107
+
108
+ case TaskGroup.QUESTION_ANSWERING:
109
+ # Locate the maximum number of tokens that constitutes a short example
110
+ for max_num_tokens in [512, 1024, 2048, 4096, 8192]:
111
+ train_with_short_examples = dataset["train"].filter(
112
+ lambda example: len(example["context"]) < max_num_tokens
113
+ )
114
+ num_short_examples = len(train_with_short_examples)
115
+ if num_short_examples >= dataset_config.num_few_shot_examples:
116
+ break
117
+ else:
118
+ raise InvalidBenchmark(
119
+ "Could not find enough short examples for few-shot learning."
120
+ )
121
+
122
+ shuffled_train = train_with_short_examples.shuffle(seed=random_seed)
123
+ while len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0:
124
+ example = shuffled_train.select(range(1))[0]
125
+ few_shot_examples.append(example)
126
+ shuffled_train = shuffled_train.filter(
127
+ lambda x: x["context"] != example["context"]
128
+ )
129
+
130
+ case _:
131
+ raise NotImplementedError(
132
+ f"Unsupported task group: {dataset_config.task.task_group}."
133
+ )
134
+
135
+ random.seed(random_seed)
136
+ random.shuffle(few_shot_examples)
137
+ return few_shot_examples
138
+
139
+
140
+ def apply_prompt(
141
+ examples: dict[str, t.Any],
142
+ few_shot_examples: list[dict[str, t.Any]],
143
+ model_config: "ModelConfig",
144
+ dataset_config: "DatasetConfig",
145
+ instruction_model: bool,
146
+ always_populate_text_field: bool,
147
+ tokenizer: "PreTrainedTokenizer | None",
148
+ ) -> dict[str, t.Any]:
149
+ """Apply prompt template to an example, potentially with few-shot examples.
150
+
151
+ Args:
152
+ examples:
153
+ The examples to apply the few-shot examples to.
154
+ few_shot_examples:
155
+ The few-shot examples to apply.
156
+ dataset_config:
157
+ The dataset configuration.
158
+ instruction_model:
159
+ Whether the model is instruction-tuned.
160
+ always_populate_text_field:
161
+ Whether to always populate the 'text' field in the examples, as opposed to
162
+ the 'messages' field.
163
+ tokenizer:
164
+ The tokenizer to use for the model. If None, the tokenizer is not used.
165
+
166
+ Returns:
167
+ The example with the few-shot examples applied.
168
+ """
169
+ # Sanity check
170
+ if instruction_model and always_populate_text_field and tokenizer is None:
171
+ raise ValueError(
172
+ "The `tokenizer` argument must be provided when the model is instruction "
173
+ "tuned and when we are not just returning the raw messages."
174
+ )
175
+
176
+ def create_prompt(**kwargs: str) -> tuple[str, str]:
177
+ """Create a prompt from the given keyword arguments.
178
+
179
+ Args:
180
+ kwargs:
181
+ The keyword arguments to use in the prompt.
182
+
183
+ Returns:
184
+ A pair (prompt, label), where "label" is an empty string if the model is
185
+ not instruction tuned (as in this case it is included in the prompt).
186
+ """
187
+ label_key = "label" if "label" in kwargs else "target_text"
188
+ label = kwargs.pop(label_key)
189
+ assert label is not None, (
190
+ f"Found a None label for the prompt: {kwargs}. This should not happen."
191
+ )
192
+ label_mapping = dataset_config.prompt_label_mapping
193
+ label = label_mapping.get(label, label)
194
+ if instruction_model:
195
+ prompt = dataset_config.instruction_prompt.format(**kwargs)
196
+ return prompt, label
197
+ else:
198
+ kwargs[label_key] = label
199
+ return dataset_config.prompt_template.format(**kwargs), ""
200
+
201
+ match dataset_config.task.task_group:
202
+ case (
203
+ TaskGroup.SEQUENCE_CLASSIFICATION | TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION
204
+ ):
205
+ few_shot_sections = [
206
+ create_prompt(
207
+ text=example["text"].replace("\n", " ").strip(),
208
+ label=example["label"].replace("\n", " ").strip(),
209
+ )
210
+ for example in few_shot_examples
211
+ ]
212
+ new_sections = [
213
+ create_prompt(text=text.replace("\n", " ").strip(), label="")
214
+ for text in examples["text"]
215
+ ]
216
+
217
+ case TaskGroup.TEXT_TO_TEXT:
218
+ few_shot_sections = [
219
+ create_prompt(
220
+ text=example["text"].replace("\n", " ").strip(),
221
+ target_text=example["target_text"].replace("\n", " ").strip(),
222
+ )
223
+ for example in few_shot_examples
224
+ ]
225
+ new_sections = [
226
+ create_prompt(text=text.replace("\n", " ").strip(), target_text="")
227
+ for text in examples["text"]
228
+ ]
229
+
230
+ case TaskGroup.TOKEN_CLASSIFICATION:
231
+
232
+ def create_label(example: dict) -> str:
233
+ prompt_labels = dataset_config.prompt_label_mapping.values()
234
+ labels: dict[str, list[str]] = {
235
+ prompt_label: list() for prompt_label in prompt_labels
236
+ }
237
+ for token, label in zip(example["tokens"], example["labels"]):
238
+ label = label.lower()
239
+ if label == "o":
240
+ continue
241
+ prompt_label = dataset_config.prompt_label_mapping[label]
242
+ if label.startswith("b-"):
243
+ labels[prompt_label].append(token)
244
+ elif label.startswith("i-"):
245
+ labels[prompt_label][-1] += " " + token
246
+ return json.dumps(labels, ensure_ascii=False)
247
+
248
+ few_shot_sections = [
249
+ create_prompt(
250
+ text=" ".join(example["tokens"]).replace("\n", " ").strip(),
251
+ label=create_label(example=example),
252
+ )
253
+ for example in few_shot_examples
254
+ ]
255
+ new_sections = [
256
+ create_prompt(
257
+ text=" ".join(tokens).replace("\n", " ").strip(), label=""
258
+ )
259
+ for tokens in examples["tokens"]
260
+ ]
261
+
262
+ case TaskGroup.QUESTION_ANSWERING:
263
+ few_shot_sections = [
264
+ create_prompt(
265
+ text=example["context"].replace("\n", " ").strip(),
266
+ question=example["question"].replace("\n", " ").strip(),
267
+ label=example["answers"]["text"][0].replace("\n", " "),
268
+ )
269
+ for example in few_shot_examples
270
+ ]
271
+ new_sections = [
272
+ create_prompt(
273
+ text=context.replace("\n", " ").strip(),
274
+ question=question.replace("\n", " ").strip(),
275
+ label="",
276
+ )
277
+ for context, question in zip(examples["context"], examples["question"])
278
+ ]
279
+
280
+ case _:
281
+ raise NotImplementedError(
282
+ f"Unsupported task group: {dataset_config.task.task_group}."
283
+ )
284
+
285
+ if instruction_model:
286
+ few_shot_messages = [
287
+ dict(role=role, content=content)
288
+ for prompt, label in few_shot_sections
289
+ for role, content in [("user", prompt), ("assistant", label)]
290
+ ]
291
+
292
+ messages_list = [
293
+ few_shot_messages + [dict(role="user", content=prompt)]
294
+ for prompt, _ in new_sections
295
+ ]
296
+
297
+ if not always_populate_text_field:
298
+ examples["messages"] = messages_list
299
+
300
+ else:
301
+ assert tokenizer is not None
302
+
303
+ # Pick the chat template that matches the language of the dataset, if such a
304
+ # template exists
305
+ chat_template: str | None = None
306
+ if isinstance(tokenizer.chat_template, dict):
307
+ language_codes = [
308
+ language.code for language in dataset_config.languages
309
+ ]
310
+ for name, candidate_template in tokenizer.chat_template.items():
311
+ if name.lower() in language_codes:
312
+ chat_template = candidate_template
313
+ log_once(
314
+ f"Using the {name!r} chat template for the tokenizer for "
315
+ f"model {model_config.model_id!r}.",
316
+ level=logging.DEBUG,
317
+ )
318
+ break
319
+
320
+ texts = [
321
+ tokenizer.apply_chat_template(
322
+ conversation=messages,
323
+ tokenize=False,
324
+ add_generation_prompt=True,
325
+ chat_template=chat_template,
326
+ )
327
+ for messages in messages_list
328
+ ]
329
+
330
+ examples["text"] = texts
331
+
332
+ else:
333
+ prompt_prefix = ""
334
+ if dataset_config.prompt_prefix:
335
+ prompt_prefix = dataset_config.prompt_prefix + "\n\n"
336
+
337
+ few_shot_prompt = "\n\n".join([prompt for prompt, _ in few_shot_sections])
338
+ if few_shot_prompt:
339
+ few_shot_prompt += "\n\n"
340
+
341
+ examples["text"] = [
342
+ prompt_prefix + few_shot_prompt + new_prompt
343
+ for new_prompt, _ in new_sections
344
+ ]
345
+
346
+ return examples
euroeval/scores.py CHANGED
@@ -18,6 +18,7 @@ def log_scores(
18
18
  metric_configs: list["MetricConfig"],
19
19
  scores: list[dict[str, float]],
20
20
  model_id: str,
21
+ model_revision: str,
21
22
  ) -> "ScoreDict":
22
23
  """Log the scores.
23
24
 
@@ -30,13 +31,18 @@ def log_scores(
30
31
  The scores that are to be logged. This is a list of dictionaries full of
31
32
  scores.
32
33
  model_id:
33
- The full Hugging Face Hub path to the pretrained transformer model.
34
+ The model ID of the model that was evaluated.
35
+ model_revision:
36
+ The revision of the model.
34
37
 
35
38
  Returns:
36
39
  A dictionary with keys 'raw_scores' and 'total', with 'raw_scores' being
37
40
  identical to `scores` and 'total' being a dictionary with the aggregated scores
38
41
  (means and standard errors).
39
42
  """
43
+ if model_revision and model_revision != "main":
44
+ model_id += f"@{model_revision}"
45
+
40
46
  logger.info(f"Finished evaluation of {model_id} on {dataset_name}.")
41
47
 
42
48
  total_dict: dict[str, float] = dict()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 15.7.0
3
+ Version: 15.7.1
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -1,21 +1,22 @@
1
1
  euroeval/__init__.py,sha256=NiT6S4II1YpnNl5KFHDNogE-rvVkOHQy5pR483eq_do,2581
2
2
  euroeval/benchmark_config_factory.py,sha256=JCjJS2pjtiuQ6tpwZ_DJFvNzwdbZu5YdJcHhFz-q6eU,12562
3
- euroeval/benchmarker.py,sha256=gOLNpW11cBX_8AvotnlGNbejtOM4acmXS3aovNREqhA,48434
3
+ euroeval/benchmarker.py,sha256=OnjGVblWW20wSmA7Tr2c-qE3g8FIjxW6wTJySAcGxVk,48492
4
4
  euroeval/callbacks.py,sha256=F1AJCLB8FJpxqYprwLi_PsH4Bc0x4lyR8UiTG-GlFLY,2452
5
5
  euroeval/cli.py,sha256=EMB6g6kRvxIqlfYLSoMzwLAtEd-fqXipo4A_HTkhjkA,8575
6
6
  euroeval/constants.py,sha256=p6kp_R6-Tq5LBvyXyT6Sa6N3SkjEElGS2LSZRBoQaYs,1985
7
7
  euroeval/data_loading.py,sha256=L_REtxefte5Ke4xE_Cz01zkfCyKlOYhSqT5ZXXulHPc,3992
8
- euroeval/data_models.py,sha256=Nlb2s26u5OvQ2AITAt25NMpeI1IHM2_qqbpyU_bZhiY,22907
8
+ euroeval/data_models.py,sha256=t5FwpGxiSIMe7iKae-tT7usUWki-ILzAFFm7dPJoFsk,22973
9
9
  euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
10
10
  euroeval/exceptions.py,sha256=LRd7HoudupRp5-AX3L0X4hIAWCa6JVx-LViHPg7u7dg,5821
11
11
  euroeval/finetuning.py,sha256=IieAhgvxjeLHAHBief1Ay-STcCosQmrDHFTRTXFZX0Q,10743
12
12
  euroeval/generation.py,sha256=LSsskfLjIJ-c3gQxmr7eiAobPOm-5bU9vnR7uHQ7XmU,10745
13
+ euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
13
14
  euroeval/human_evaluation.py,sha256=VGvw1X6Mkdf22r-THSNWXMIqyJP44yh4rW53vq-0huo,27681
14
15
  euroeval/languages.py,sha256=IQUbGMyn7pxAyM70M0FTO80m92Q4KgIU604MJhVia-Q,8513
15
16
  euroeval/model_cache.py,sha256=n39yFpZkudBCVwz1EQpZ-g5BQtlQemQ5nP3IiFKJZHg,8275
16
17
  euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
17
18
  euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
18
- euroeval/scores.py,sha256=OL1MPVSgBySc9gMGeZBnj_j6-EvpDtEOwjO12IgeP6o,2899
19
+ euroeval/scores.py,sha256=TovjCZD8wmGrIjA4v5oAQp18P5KVcHvakkByDh0Hstk,3059
19
20
  euroeval/speed_benchmark.py,sha256=J7VKWMf7GU_l0lRR8f0QeUr_vAaBQqTbgQ_yToHhp_0,3980
20
21
  euroeval/tasks.py,sha256=87gbe__K5KNIb1aBSuwGnMPmZgamJFecNNYmNgMxaVo,7069
21
22
  euroeval/tokenization_utils.py,sha256=fbMVAOkRdcpf9L2SVechPpmWYgDXgQcc-sDrYu21wFI,12487
@@ -25,11 +26,11 @@ euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwR
25
26
  euroeval/benchmark_modules/base.py,sha256=LcG46I2O5wcvu_3T_irBY6VkUhWVPKifBhcP-ln93TA,10798
26
27
  euroeval/benchmark_modules/fresh.py,sha256=_LWmpqiNGGTA-NoVC0v3-fS1sraDS9n-pgKUzz89jVk,9919
27
28
  euroeval/benchmark_modules/hf.py,sha256=yFApLL4_ia5Kw2iat5RSI8h5RhI4OP04HlzYidlhBCs,44012
28
- euroeval/benchmark_modules/litellm.py,sha256=9Fhh7Zyn6F4JBlRoQkST1wIeb8z0YliRRrcmD5pONs4,52551
29
- euroeval/benchmark_modules/vllm.py,sha256=vwAE7SGRhePqkzAt1S-FKPelEqe8VMGwah9Nj2J1hLs,51295
29
+ euroeval/benchmark_modules/litellm.py,sha256=v_rbCm2FiTMqcUui_09k3E1-s5uOmbfAvSy2c7Mm0_E,42636
30
+ euroeval/benchmark_modules/vllm.py,sha256=Q-3vtZz5XxQQImJxOiF0XDrQ4T_p0bkgdPw1Jobgu3s,39380
30
31
  euroeval/dataset_configs/__init__.py,sha256=fkD1hzW7szJLc1MdK-AY4EBFWBUX5Z8t4f9uBHQnRvU,1858
31
32
  euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
32
- euroeval/dataset_configs/dutch.py,sha256=N3zL0vGe4OyPgVU_AiYNNfk96jSc_JDtKrVIHbaEYCU,3536
33
+ euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
33
34
  euroeval/dataset_configs/english.py,sha256=yHw7D0zSNVbiSBAjR1mWX4V5FSkhqy4y-o-pnyWCLxE,2323
34
35
  euroeval/dataset_configs/faroese.py,sha256=QQgLe5gv0f3AtXe5rV65xZ98gFgyITQPDr3UwO4Bnv4,1350
35
36
  euroeval/dataset_configs/finnish.py,sha256=BIfcxdgJu4CfevHAjzwH7cYd8Xd9DGrm49lcJZcGVQM,1730
@@ -53,8 +54,8 @@ euroeval/task_group_utils/question_answering.py,sha256=kZBABJ_WYNTH4Xgo2jIvfx7iY
53
54
  euroeval/task_group_utils/sequence_classification.py,sha256=xPz1gJioK96iv2bNoDWiC2EJkhRvRd7QZNgY8bT237c,11703
54
55
  euroeval/task_group_utils/text_to_text.py,sha256=Nu1_qRPLbboCd9Q5rxqY4fQFJ_aGXu80aWQqoTG1cYc,5047
55
56
  euroeval/task_group_utils/token_classification.py,sha256=3idWB81Fcx9UhTuk-gxMfXENrCBmiWBDUWdULXoIhpw,17863
56
- euroeval-15.7.0.dist-info/METADATA,sha256=8oMsbhHWeO7j4KQdn4lpt-O94Nw0erwRoD_Ogk6CX2U,13669
57
- euroeval-15.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
58
- euroeval-15.7.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
59
- euroeval-15.7.0.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
60
- euroeval-15.7.0.dist-info/RECORD,,
57
+ euroeval-15.7.1.dist-info/METADATA,sha256=Fj6QejwQCK0zGuP_DHSQ7sul195ivUqOUCT5AVxgLSI,13669
58
+ euroeval-15.7.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
+ euroeval-15.7.1.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
60
+ euroeval-15.7.1.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
61
+ euroeval-15.7.1.dist-info/RECORD,,