EuroEval 15.10.1__py3-none-any.whl → 15.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. euroeval/__init__.py +7 -0
  2. euroeval/benchmark_config_factory.py +7 -0
  3. euroeval/benchmark_modules/base.py +29 -29
  4. euroeval/benchmark_modules/fresh.py +31 -19
  5. euroeval/benchmark_modules/hf.py +27 -23
  6. euroeval/benchmark_modules/litellm.py +50 -30
  7. euroeval/benchmark_modules/vllm.py +22 -26
  8. euroeval/benchmarker.py +8 -1
  9. euroeval/callbacks.py +17 -13
  10. euroeval/cli.py +10 -0
  11. euroeval/data_loading.py +10 -5
  12. euroeval/data_models.py +9 -40
  13. euroeval/dataset_configs/__init__.py +1 -0
  14. euroeval/dataset_configs/english.py +13 -4
  15. euroeval/dataset_configs/norwegian.py +8 -0
  16. euroeval/dataset_configs/portuguese.py +74 -0
  17. euroeval/dataset_configs/spanish.py +4 -3
  18. euroeval/finetuning.py +9 -8
  19. euroeval/generation.py +27 -8
  20. euroeval/human_evaluation.py +14 -13
  21. euroeval/languages.py +1 -2
  22. euroeval/metrics.py +452 -0
  23. euroeval/prompt_templates/linguistic_acceptability.py +9 -1
  24. euroeval/prompt_templates/multiple_choice.py +9 -1
  25. euroeval/prompt_templates/named_entity_recognition.py +20 -1
  26. euroeval/prompt_templates/sentiment_classification.py +11 -1
  27. euroeval/prompt_templates/summarization.py +8 -1
  28. euroeval/scores.py +14 -19
  29. euroeval/speed_benchmark.py +6 -7
  30. euroeval/task_group_utils/multiple_choice_classification.py +6 -4
  31. euroeval/task_group_utils/question_answering.py +5 -28
  32. euroeval/task_group_utils/sequence_classification.py +6 -30
  33. euroeval/task_group_utils/text_to_text.py +19 -34
  34. euroeval/task_group_utils/token_classification.py +18 -30
  35. euroeval/tasks.py +11 -136
  36. euroeval/types.py +6 -4
  37. {euroeval-15.10.1.dist-info → euroeval-15.12.0.dist-info}/METADATA +10 -10
  38. euroeval-15.12.0.dist-info/RECORD +63 -0
  39. {euroeval-15.10.1.dist-info → euroeval-15.12.0.dist-info}/licenses/LICENSE +1 -1
  40. euroeval-15.10.1.dist-info/RECORD +0 -61
  41. {euroeval-15.10.1.dist-info → euroeval-15.12.0.dist-info}/WHEEL +0 -0
  42. {euroeval-15.10.1.dist-info → euroeval-15.12.0.dist-info}/entry_points.txt +0 -0
euroeval/tasks.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """All benchmarks tasks used in EuroEval."""
2
2
 
3
- from .data_models import MetricConfig, Task
3
+ from . import metrics as m
4
+ from .data_models import Task
4
5
  from .enums import TaskGroup
5
6
  from .prompt_templates import (
6
7
  LA_TEMPLATES,
@@ -25,21 +26,7 @@ LA = Task(
25
26
  name="linguistic-acceptability",
26
27
  task_group=TaskGroup.SEQUENCE_CLASSIFICATION,
27
28
  template_dict=LA_TEMPLATES,
28
- metrics=[
29
- MetricConfig(
30
- name="mcc",
31
- pretty_name="Matthew's Correlation Coefficient",
32
- huggingface_id="matthews_correlation",
33
- results_key="matthews_correlation",
34
- ),
35
- MetricConfig(
36
- name="macro_f1",
37
- pretty_name="Macro-average F1-score",
38
- huggingface_id="f1",
39
- results_key="f1",
40
- compute_kwargs=dict(average="macro"),
41
- ),
42
- ],
29
+ metrics=[m.mcc_metric, m.macro_f1_metric],
43
30
  default_num_few_shot_examples=12,
44
31
  default_max_generated_tokens=5,
45
32
  default_labels=["correct", "incorrect"],
@@ -50,20 +37,7 @@ NER = Task(
50
37
  name="named-entity-recognition",
51
38
  task_group=TaskGroup.TOKEN_CLASSIFICATION,
52
39
  template_dict=NER_TEMPLATES,
53
- metrics=[
54
- MetricConfig(
55
- name="micro_f1_no_misc",
56
- pretty_name="Micro-average F1-score without MISC tags",
57
- huggingface_id="seqeval",
58
- results_key="overall_f1",
59
- ),
60
- MetricConfig(
61
- name="micro_f1",
62
- pretty_name="Micro-average F1-score with MISC tags",
63
- huggingface_id="seqeval",
64
- results_key="overall_f1",
65
- ),
66
- ],
40
+ metrics=[m.micro_f1_no_misc_metric, m.micro_f1_metric],
67
41
  default_num_few_shot_examples=8,
68
42
  default_max_generated_tokens=128,
69
43
  default_labels=[
@@ -84,22 +58,7 @@ RC = Task(
84
58
  name="reading-comprehension",
85
59
  task_group=TaskGroup.QUESTION_ANSWERING,
86
60
  template_dict=RC_TEMPLATES,
87
- metrics=[
88
- MetricConfig(
89
- name="f1",
90
- pretty_name="F1-score",
91
- huggingface_id="squad_v2",
92
- results_key="f1",
93
- postprocessing_fn=lambda raw_score: (raw_score, f"{raw_score:.2f}%"),
94
- ),
95
- MetricConfig(
96
- name="em",
97
- pretty_name="Exact Match",
98
- huggingface_id="squad_v2",
99
- results_key="exact",
100
- postprocessing_fn=lambda raw_score: (raw_score, f"{raw_score:.2f}%"),
101
- ),
102
- ],
61
+ metrics=[m.f1_metric, m.em_metric],
103
62
  default_num_few_shot_examples=4,
104
63
  default_max_generated_tokens=32,
105
64
  default_labels=["start_positions", "end_positions"],
@@ -110,21 +69,7 @@ SENT = Task(
110
69
  name="sentiment-classification",
111
70
  task_group=TaskGroup.SEQUENCE_CLASSIFICATION,
112
71
  template_dict=SENT_TEMPLATES,
113
- metrics=[
114
- MetricConfig(
115
- name="mcc",
116
- pretty_name="Matthew's Correlation Coefficient",
117
- huggingface_id="matthews_correlation",
118
- results_key="matthews_correlation",
119
- ),
120
- MetricConfig(
121
- name="macro_f1",
122
- pretty_name="Macro-average F1-score",
123
- huggingface_id="f1",
124
- results_key="f1",
125
- compute_kwargs=dict(average="macro"),
126
- ),
127
- ],
72
+ metrics=[m.mcc_metric, m.macro_f1_metric],
128
73
  default_num_few_shot_examples=12,
129
74
  default_max_generated_tokens=5,
130
75
  default_labels=["positive", "neutral", "negative"],
@@ -135,23 +80,7 @@ SUMM = Task(
135
80
  name="summarization",
136
81
  task_group=TaskGroup.TEXT_TO_TEXT,
137
82
  template_dict=SUMM_TEMPLATES,
138
- metrics=[
139
- MetricConfig(
140
- name="bertscore",
141
- pretty_name="BERTScore",
142
- huggingface_id="bertscore",
143
- results_key="f1",
144
- compute_kwargs=dict(
145
- model_type="microsoft/mdeberta-v3-base", device="auto", batch_size=1
146
- ),
147
- ),
148
- MetricConfig(
149
- name="rouge_l",
150
- pretty_name="ROUGE-L",
151
- huggingface_id="rouge",
152
- results_key="rougeL",
153
- ),
154
- ],
83
+ metrics=[m.bert_score_metric, m.rouge_l_metric],
155
84
  default_num_few_shot_examples=1,
156
85
  default_max_generated_tokens=256,
157
86
  default_labels=[],
@@ -162,20 +91,7 @@ KNOW = Task(
162
91
  name="knowledge",
163
92
  task_group=TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION,
164
93
  template_dict=MULTIPLE_CHOICE_TEMPLATES,
165
- metrics=[
166
- MetricConfig(
167
- name="mcc",
168
- pretty_name="Matthew's Correlation Coefficient",
169
- huggingface_id="matthews_correlation",
170
- results_key="matthews_correlation",
171
- ),
172
- MetricConfig(
173
- name="accuracy",
174
- pretty_name="Accuracy",
175
- huggingface_id="accuracy",
176
- results_key="accuracy",
177
- ),
178
- ],
94
+ metrics=[m.mcc_metric, m.accuracy_metric],
179
95
  default_num_few_shot_examples=5,
180
96
  default_max_generated_tokens=5,
181
97
  default_labels=["a", "b", "c", "d"],
@@ -186,20 +102,7 @@ MCRC = Task(
186
102
  name="multiple-choice-reading-comprehension",
187
103
  task_group=TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION,
188
104
  template_dict=MULTIPLE_CHOICE_TEMPLATES,
189
- metrics=[
190
- MetricConfig(
191
- name="mcc",
192
- pretty_name="Matthew's Correlation Coefficient",
193
- huggingface_id="matthews_correlation",
194
- results_key="matthews_correlation",
195
- ),
196
- MetricConfig(
197
- name="accuracy",
198
- pretty_name="Accuracy",
199
- huggingface_id="accuracy",
200
- results_key="accuracy",
201
- ),
202
- ],
105
+ metrics=[m.mcc_metric, m.accuracy_metric],
203
106
  default_num_few_shot_examples=5,
204
107
  default_max_generated_tokens=5,
205
108
  default_labels=["a", "b", "c", "d"],
@@ -210,20 +113,7 @@ COMMON_SENSE = Task(
210
113
  name="common-sense-reasoning",
211
114
  task_group=TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION,
212
115
  template_dict=MULTIPLE_CHOICE_TEMPLATES,
213
- metrics=[
214
- MetricConfig(
215
- name="mcc",
216
- pretty_name="Matthew's Correlation Coefficient",
217
- huggingface_id="matthews_correlation",
218
- results_key="matthews_correlation",
219
- ),
220
- MetricConfig(
221
- name="accuracy",
222
- pretty_name="Accuracy",
223
- huggingface_id="accuracy",
224
- results_key="accuracy",
225
- ),
226
- ],
116
+ metrics=[m.mcc_metric, m.accuracy_metric],
227
117
  default_num_few_shot_examples=5,
228
118
  default_max_generated_tokens=5,
229
119
  default_labels=["a", "b", "c", "d"],
@@ -234,22 +124,7 @@ SPEED = Task(
234
124
  name="speed",
235
125
  task_group=TaskGroup.SPEED,
236
126
  template_dict={},
237
- metrics=[
238
- MetricConfig(
239
- name="speed",
240
- pretty_name="Tokens per second",
241
- huggingface_id="",
242
- results_key="speed",
243
- postprocessing_fn=lambda raw_score: (raw_score, f"{raw_score:,.0f}"),
244
- ),
245
- MetricConfig(
246
- name="speed_short",
247
- pretty_name="Tokens per second on short documents",
248
- huggingface_id="",
249
- results_key="speed",
250
- postprocessing_fn=lambda raw_score: (raw_score, f"{raw_score:,.0f}"),
251
- ),
252
- ],
127
+ metrics=[m.speed_metric, m.speed_short_metric],
253
128
  default_num_few_shot_examples=0,
254
129
  default_max_generated_tokens=5,
255
130
  default_labels=[],
euroeval/types.py CHANGED
@@ -2,16 +2,17 @@
2
2
 
3
3
  import typing as t
4
4
 
5
- from numpy.typing import NDArray
6
5
  from transformers.trainer_utils import EvalPrediction
7
6
 
8
7
  if t.TYPE_CHECKING:
8
+ from numpy.typing import NDArray
9
+
9
10
  from .data_models import GenerativeModelOutput
10
11
 
11
12
 
12
13
  ScoreDict: t.TypeAlias = dict[str, dict[str, float] | list[dict[str, float]]]
13
- Predictions: t.TypeAlias = NDArray | list[str] | list[list[str]]
14
- Labels: t.TypeAlias = NDArray | list[str] | list[list[str]]
14
+ Predictions: t.TypeAlias = "NDArray | list[str] | list[list[str]]"
15
+ Labels: t.TypeAlias = "NDArray | list[str] | list[list[str]]"
15
16
 
16
17
 
17
18
  class ComputeMetricsFunction(t.Protocol):
@@ -21,7 +22,8 @@ class ComputeMetricsFunction(t.Protocol):
21
22
  self,
22
23
  model_outputs_and_labels: EvalPrediction
23
24
  | tuple[
24
- NDArray | list[str] | list[list[str]], NDArray | list[str] | list[list[str]]
25
+ "NDArray | list[str] | list[list[str]]",
26
+ "NDArray | list[str] | list[list[str]]",
25
27
  ],
26
28
  ) -> dict[str, float]:
27
29
  """Compute the metrics.
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 15.10.1
3
+ Version: 15.12.0
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
7
- Author-email: Dan Saattrup Nielsen <dan.nielsen@alexandra.dk>
8
- Maintainer-email: Dan Saattrup Nielsen <dan.nielsen@alexandra.dk>
7
+ Author-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
8
+ Maintainer-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
9
9
  License: MIT License
10
10
 
11
- Copyright (c) 2022-2024 Dan Saattrup Nielsen
11
+ Copyright (c) 2022-2025 Dan Saattrup Smart
12
12
 
13
13
  Permission is hereby granted, free of charge, to any person obtaining a copy
14
14
  of this software and associated documentation files (the "Software"), to deal
@@ -43,6 +43,7 @@ Requires-Dist: numpy<2.0.0,>=1.23.0
43
43
  Requires-Dist: ollama>=0.5.1
44
44
  Requires-Dist: pandas>=2.2.0
45
45
  Requires-Dist: peft>=0.15.0
46
+ Requires-Dist: protobuf>=2.0.0
46
47
  Requires-Dist: pydantic>=2.6.0
47
48
  Requires-Dist: pyinfer>=0.0.3
48
49
  Requires-Dist: python-dotenv>=1.0.1
@@ -94,8 +95,7 @@ ______________________________________________________________________
94
95
 
95
96
  ## Maintainer
96
97
 
97
- - Dan Saattrup Nielsen ([@saattrupdan](https://github.com/saattrupdan),
98
- dan.nielsen@alexandra.dk)
98
+ - Dan Saattrup Smart ([@saattrupdan](https://github.com/saattrupdan), dan.smart@alexandra.dk)
99
99
 
100
100
 
101
101
  ## Installation
@@ -268,14 +268,14 @@ contributing new datasets, your help makes this project better for everyone.
268
268
  If you want to cite the framework then feel free to use this:
269
269
 
270
270
  ```
271
- @article{nielsen2024encoder,
271
+ @article{smart2024encoder,
272
272
  title={Encoder vs Decoder: Comparative Analysis of Encoder and Decoder Language Models on Multilingual NLU Tasks},
273
- author={Nielsen, Dan Saattrup and Enevoldsen, Kenneth and Schneider-Kamp, Peter},
273
+ author={Smart, Dan Saattrup and Enevoldsen, Kenneth and Schneider-Kamp, Peter},
274
274
  journal={arXiv preprint arXiv:2406.13469},
275
275
  year={2024}
276
276
  }
277
- @inproceedings{nielsen2023scandeval,
278
- author = {Nielsen, Dan Saattrup},
277
+ @inproceedings{smart2023scandeval,
278
+ author = {Smart, Dan Saattrup},
279
279
  booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)},
280
280
  month = may,
281
281
  pages = {185--201},
@@ -0,0 +1,63 @@
1
+ euroeval/__init__.py,sha256=fZyR9R3C3vwGJS3CrCJ6ySr_FDnMu_Aqnz0FdadWEEs,3399
2
+ euroeval/benchmark_config_factory.py,sha256=jKC8bEzJSGGCcG8aWsPxiyHX6fjOQYQWvkp1MIUuHYM,11564
3
+ euroeval/benchmarker.py,sha256=SDBzdCa4I8u1XDeN_1mKTFzfaaQbbY_oWcHt3niADxk,48497
4
+ euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
5
+ euroeval/cli.py,sha256=h81Lswm_q9htkYz-GQQQVIsdsUPnfe3LDH8AZdBcpKs,8602
6
+ euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
7
+ euroeval/data_loading.py,sha256=DP-cqwN_d0Y-KaN8P8c3fDr6PX80UYROHgRwX82ix4w,4156
8
+ euroeval/data_models.py,sha256=gPHyIoN2A5_O-cJgyb6jhn6enH8zsiIBI09W_wdHMQs,22031
9
+ euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
10
+ euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
+ euroeval/finetuning.py,sha256=BrPZ-6qFY8K-dwfaRwNetVYfYburoQwLQty6pn6iP_s,11340
12
+ euroeval/generation.py,sha256=1fqFEWwM2RzI3uPZem95VFWbN8EfrKZQTrHEP34ihHs,11622
13
+ euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
14
+ euroeval/human_evaluation.py,sha256=Jtz3K5Lqne48wPZWf4EAd3d-n_wX27nGJHigjhV1D7s,27537
15
+ euroeval/languages.py,sha256=cr_Z5jtaHb2XY0zeOhuk3ATHX74PODzt6gMPC2zMD7c,8594
16
+ euroeval/metrics.py,sha256=nxosyoRjlk7TcoAOkjU7zx2TB43b9tA8M1m4V1s5eKU,15516
17
+ euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
18
+ euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
19
+ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
20
+ euroeval/scores.py,sha256=TatSbjia7Zwj71gQFyV_gCHyppMbOgeaZgNCib8G86k,2849
21
+ euroeval/speed_benchmark.py,sha256=6bFGeMmtdl_6owkxNQ3ZKiyQQS58k0NApzlsbDgBW5s,4037
22
+ euroeval/tasks.py,sha256=btxf29M5rUP7JjBl6u9aQlHQAxrJNP4bRbdEQtDnmDA,3376
23
+ euroeval/tokenization_utils.py,sha256=LxgGs7juS5PuMYt5LL2X6eVXdtnpi-A2jFxqcWpF6NA,17931
24
+ euroeval/types.py,sha256=EIYMNOqqHqibnbNw-fvdst6HwTvq32gtxhr7jL7i-xM,2511
25
+ euroeval/utils.py,sha256=5R7y67xe0ODaje7k8nOu2AFS3Ph2gcsiWpIq5rjSSuA,11613
26
+ euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
27
+ euroeval/benchmark_modules/base.py,sha256=D1oKD16KBvxEoBUfqwvzvcDc1hx6letdD3v1PnBmF4A,10669
28
+ euroeval/benchmark_modules/fresh.py,sha256=sg_AXNPApFObCzCRWhCgKxfr-eqQsT6Ri0xx0_Yy5JM,10293
29
+ euroeval/benchmark_modules/hf.py,sha256=-W_bWEdm0zePkn4nDz4l0T4hhJJnlfwHrtIO3m5BrUs,44725
30
+ euroeval/benchmark_modules/litellm.py,sha256=_gKBbJsXzo_cHJVaeuQpHRBENEZUGS_vcC-uGIhhmHA,52111
31
+ euroeval/benchmark_modules/vllm.py,sha256=kq3PMUuRT0NOky6XSHl1JeHTDGehwcub0HcGC5S_Wv4,38834
32
+ euroeval/dataset_configs/__init__.py,sha256=EbjEyHwBtSztASl8_xblD8hessruDdV4Eg1vXrmGOuY,1935
33
+ euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
34
+ euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
35
+ euroeval/dataset_configs/english.py,sha256=1q8XJqIVWBBNkldL7t-cVnU2O9EUb9_xoVRSN8arN90,2561
36
+ euroeval/dataset_configs/faroese.py,sha256=QQgLe5gv0f3AtXe5rV65xZ98gFgyITQPDr3UwO4Bnv4,1350
37
+ euroeval/dataset_configs/finnish.py,sha256=_8YWIlZNpO8Qi233bH7cKwm3tq3WETLfC_6mzg7LLog,2045
38
+ euroeval/dataset_configs/french.py,sha256=ATsj8_9_GxFTQgmfrniPQFZ1R9hoQCI1_ieWTnscFHU,2382
39
+ euroeval/dataset_configs/german.py,sha256=QO6PrBQY6kyZeQMU1vg6KrC_sKyj9U2ukS9nbKO19is,2560
40
+ euroeval/dataset_configs/icelandic.py,sha256=mncl7X4yO9gBmYqXMBfm7FKU1jcKryerSgd0dqlIA_4,4198
41
+ euroeval/dataset_configs/italian.py,sha256=KNjCvTzsEqH_EEk3At8slKqNwWWiIdbv_t5ke7n9nZI,2660
42
+ euroeval/dataset_configs/norwegian.py,sha256=30YGdDPtDszG10BNDVHb-XXTGgGIIgDUNGoeM9q0K_E,5385
43
+ euroeval/dataset_configs/portuguese.py,sha256=-HSDsujWfK__nV2SCu-z0ne0AXLDszOT05oYphQUDTw,2063
44
+ euroeval/dataset_configs/spanish.py,sha256=Yzm1kiilEKoHyd3xD2wrw596Ac9UcaWhlE93GlOFjlc,2558
45
+ euroeval/dataset_configs/swedish.py,sha256=SOD2nKQTVwTpTvr362mDPHon42kr9vWs5C0mK02Fh-o,2811
46
+ euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
47
+ euroeval/prompt_templates/linguistic_acceptability.py,sha256=ZN71BEt4HAhSYY-GWjh-S-iVvq5AODQJThkrjDhy4oM,7138
48
+ euroeval/prompt_templates/multiple_choice.py,sha256=F9ItGQtnaaez15A8MQ1UCpKRDsLM-AZyRdYetGAofa0,5494
49
+ euroeval/prompt_templates/named_entity_recognition.py,sha256=ga21s9T4_Hhbf88boWm7gnL7OgD7txuS_EeDgXaxEoE,13602
50
+ euroeval/prompt_templates/reading_comprehension.py,sha256=yLqryWQAW04GULz_EyNDLOS7ZrDUeasuLFt-dtqCnYk,6585
51
+ euroeval/prompt_templates/sentiment_classification.py,sha256=2Xsmj8lbaAXACHhwbbR4dWhoKyKB87TqpMO-ssQ-Djo,7649
52
+ euroeval/prompt_templates/summarization.py,sha256=I98LlUOBVa_xo02npq7BWKKZOXGqm-_15i64QzbEsb0,5334
53
+ euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
54
+ euroeval/task_group_utils/multiple_choice_classification.py,sha256=yfy8lczpZ_MY-Y4FQx3Et9vEUpuD3YMFjF3wQGCfMNw,6632
55
+ euroeval/task_group_utils/question_answering.py,sha256=agwtWOmctgat98yqgFiMSPY6zmoaPgYVyzMmOkNjr58,27284
56
+ euroeval/task_group_utils/sequence_classification.py,sha256=igmD24aMNN7QBJ8NDzgEnGwM-jq_zhC37QxazNm7GZ4,12711
57
+ euroeval/task_group_utils/text_to_text.py,sha256=xOpja-W4E-1peMjZX8G-3G5iRgmFHHygrQ5WN1hB3FI,4550
58
+ euroeval/task_group_utils/token_classification.py,sha256=wCy3aI-Sn9f-87tHzAnYDA6EbY3ah3xao1SnfnoRNz4,17490
59
+ euroeval-15.12.0.dist-info/METADATA,sha256=8cY6HWgAZgrCkIA20lVKuf42y-e7U1MZQZSTdF3e7ig,13479
60
+ euroeval-15.12.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
61
+ euroeval-15.12.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
62
+ euroeval-15.12.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
63
+ euroeval-15.12.0.dist-info/RECORD,,
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2022-2024 Dan Saattrup Nielsen
3
+ Copyright (c) 2022-2025 Dan Saattrup Smart
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,61 +0,0 @@
1
- euroeval/__init__.py,sha256=jjInLLkd5IrDrwqag3U35g7SgzITBlFYllgofc-uQFg,3067
2
- euroeval/benchmark_config_factory.py,sha256=icTeT5C-bNCJmvSWFlxKdEpRboZN8OjwaHGu7JM-2xI,11158
3
- euroeval/benchmarker.py,sha256=wmgrYVS31PMhhrVienjaVHHyfnZAy51kUvC6OjooiOw,48047
4
- euroeval/callbacks.py,sha256=F1AJCLB8FJpxqYprwLi_PsH4Bc0x4lyR8UiTG-GlFLY,2452
5
- euroeval/cli.py,sha256=d8JztMi_RbpUlEBXidd6DQ-xeC-xhozf_qU6Vkzye20,8161
6
- euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
7
- euroeval/data_loading.py,sha256=2rMLSy8pbntlwmImizMtkTiUzj93mcv5kzYjZELWWfU,4081
8
- euroeval/data_models.py,sha256=7nAGDpN58Y35Lt9JZE_y0y5iOYesw2htcwHc68MkBZU,22953
9
- euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
10
- euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
- euroeval/finetuning.py,sha256=OFS8YlDhckPupoKWf26Nrd7CTtLQzJXTsDvzMdSR_34,11319
12
- euroeval/generation.py,sha256=LSsskfLjIJ-c3gQxmr7eiAobPOm-5bU9vnR7uHQ7XmU,10745
13
- euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
14
- euroeval/human_evaluation.py,sha256=zqbbJkqm2Uymf-88PxM3R9vVRR8SZJlq3QrqWEoiVeE,27643
15
- euroeval/languages.py,sha256=LerXuRBAUYkQL6qSV-F82itAE4EgBGFBtzaGnJJZvOE,8555
16
- euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
17
- euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
18
- euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
19
- euroeval/scores.py,sha256=TovjCZD8wmGrIjA4v5oAQp18P5KVcHvakkByDh0Hstk,3059
20
- euroeval/speed_benchmark.py,sha256=J7VKWMf7GU_l0lRR8f0QeUr_vAaBQqTbgQ_yToHhp_0,3980
21
- euroeval/tasks.py,sha256=87gbe__K5KNIb1aBSuwGnMPmZgamJFecNNYmNgMxaVo,7069
22
- euroeval/tokenization_utils.py,sha256=LxgGs7juS5PuMYt5LL2X6eVXdtnpi-A2jFxqcWpF6NA,17931
23
- euroeval/types.py,sha256=E0JhLfg-ek5pdFcYJbnGRUSodHxkuR3o8XGuIrBcuRM,2485
24
- euroeval/utils.py,sha256=5R7y67xe0ODaje7k8nOu2AFS3Ph2gcsiWpIq5rjSSuA,11613
25
- euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
26
- euroeval/benchmark_modules/base.py,sha256=LcG46I2O5wcvu_3T_irBY6VkUhWVPKifBhcP-ln93TA,10798
27
- euroeval/benchmark_modules/fresh.py,sha256=_LWmpqiNGGTA-NoVC0v3-fS1sraDS9n-pgKUzz89jVk,9919
28
- euroeval/benchmark_modules/hf.py,sha256=Nbtn5eZ4axbmL09M8dGZCBr07pn9-btbqGgQ6q7KbHg,44620
29
- euroeval/benchmark_modules/litellm.py,sha256=LS4mBXXG6h4uJwySPc6SI6f0y_HuiKE7IprprqWpoCI,50601
30
- euroeval/benchmark_modules/vllm.py,sha256=sgeltOVfZA9bu0AmXV7PtZvuRst0I8s6VOIp0CI6DO8,38880
31
- euroeval/dataset_configs/__init__.py,sha256=kWKtlSAOY-olOQL3UtFqL6I3Tki3G3waMZSd2YChjCg,1895
32
- euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
33
- euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
34
- euroeval/dataset_configs/english.py,sha256=-N85DiNVrZFqpahNUTfxaWy4vvdOWC8Bi0G4uAO4uDw,2326
35
- euroeval/dataset_configs/faroese.py,sha256=QQgLe5gv0f3AtXe5rV65xZ98gFgyITQPDr3UwO4Bnv4,1350
36
- euroeval/dataset_configs/finnish.py,sha256=_8YWIlZNpO8Qi233bH7cKwm3tq3WETLfC_6mzg7LLog,2045
37
- euroeval/dataset_configs/french.py,sha256=ATsj8_9_GxFTQgmfrniPQFZ1R9hoQCI1_ieWTnscFHU,2382
38
- euroeval/dataset_configs/german.py,sha256=QO6PrBQY6kyZeQMU1vg6KrC_sKyj9U2ukS9nbKO19is,2560
39
- euroeval/dataset_configs/icelandic.py,sha256=mncl7X4yO9gBmYqXMBfm7FKU1jcKryerSgd0dqlIA_4,4198
40
- euroeval/dataset_configs/italian.py,sha256=KNjCvTzsEqH_EEk3At8slKqNwWWiIdbv_t5ke7n9nZI,2660
41
- euroeval/dataset_configs/norwegian.py,sha256=2SD5681gZFa1Ig-AEpnyStbivan_bq_Pada4qwE7tw0,5181
42
- euroeval/dataset_configs/spanish.py,sha256=NviL-FzJ5jq1bLTRvbtZBiGrAmZjxyijZNpKZFrnT-M,2527
43
- euroeval/dataset_configs/swedish.py,sha256=SOD2nKQTVwTpTvr362mDPHon42kr9vWs5C0mK02Fh-o,2811
44
- euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
45
- euroeval/prompt_templates/linguistic_acceptability.py,sha256=FAIJKS26EVRxlLHk1C3lN0GDtd5AM0MwvaMf-NNIxfU,6677
46
- euroeval/prompt_templates/multiple_choice.py,sha256=6iEqiPpT-3WJN_gsyhyapnwsrcsYGdVkSkzwn-VKKxw,5101
47
- euroeval/prompt_templates/named_entity_recognition.py,sha256=Xd6gBJD2e1l8-We2Ujor7crRUBcbgnNeeVknBIrTMJo,12737
48
- euroeval/prompt_templates/reading_comprehension.py,sha256=yLqryWQAW04GULz_EyNDLOS7ZrDUeasuLFt-dtqCnYk,6585
49
- euroeval/prompt_templates/sentiment_classification.py,sha256=LDOwjGQ2kqhwgNyphPywQeolwNB09o-xYWc9RUbzc84,7136
50
- euroeval/prompt_templates/summarization.py,sha256=mcWeKNhGWmp7IG_iY64T-VOSabQg5wKddjSbJNYFDp8,4984
51
- euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
52
- euroeval/task_group_utils/multiple_choice_classification.py,sha256=LQ6zD1UGi-jGCKI2xUJiQdAXoqb5QMpIJu41B2U0HPw,6543
53
- euroeval/task_group_utils/question_answering.py,sha256=D4oJL2vQEjHghyxiiiq_vj1IQC6eryqNoLXuTiQEPmw,28071
54
- euroeval/task_group_utils/sequence_classification.py,sha256=zwRUgVHqLlREILwyg-yuDPkrIQOfqGVPsFBai-2D9a8,13525
55
- euroeval/task_group_utils/text_to_text.py,sha256=Nu1_qRPLbboCd9Q5rxqY4fQFJ_aGXu80aWQqoTG1cYc,5047
56
- euroeval/task_group_utils/token_classification.py,sha256=3idWB81Fcx9UhTuk-gxMfXENrCBmiWBDUWdULXoIhpw,17863
57
- euroeval-15.10.1.dist-info/METADATA,sha256=mx7pTjlWwRsDgD05msa6lNaaq7M2XeoCQV-BxDLSvag,13472
58
- euroeval-15.10.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
- euroeval-15.10.1.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
60
- euroeval-15.10.1.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
61
- euroeval-15.10.1.dist-info/RECORD,,