EuroEval 16.2.1__py3-none-any.whl → 16.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (39) hide show
  1. euroeval/__init__.py +4 -2
  2. euroeval/benchmark_modules/fresh.py +3 -1
  3. euroeval/benchmark_modules/hf.py +8 -4
  4. euroeval/benchmark_modules/litellm.py +5 -17
  5. euroeval/benchmark_modules/vllm.py +98 -30
  6. euroeval/benchmarker.py +291 -405
  7. euroeval/cli.py +1 -1
  8. euroeval/constants.py +3 -0
  9. euroeval/data_models.py +35 -35
  10. euroeval/dataset_configs/__init__.py +1 -0
  11. euroeval/dataset_configs/danish.py +0 -2
  12. euroeval/dataset_configs/dutch.py +0 -2
  13. euroeval/dataset_configs/english.py +0 -2
  14. euroeval/dataset_configs/finnish.py +0 -2
  15. euroeval/dataset_configs/french.py +0 -2
  16. euroeval/dataset_configs/german.py +0 -2
  17. euroeval/dataset_configs/italian.py +0 -2
  18. euroeval/dataset_configs/latvian.py +2 -3
  19. euroeval/dataset_configs/lithuanian.py +62 -0
  20. euroeval/dataset_configs/norwegian.py +0 -2
  21. euroeval/dataset_configs/polish.py +0 -2
  22. euroeval/dataset_configs/portuguese.py +0 -2
  23. euroeval/dataset_configs/spanish.py +0 -2
  24. euroeval/dataset_configs/swedish.py +0 -3
  25. euroeval/metrics/huggingface.py +1 -1
  26. euroeval/metrics/pipeline.py +5 -0
  27. euroeval/prompt_templates/linguistic_acceptability.py +9 -0
  28. euroeval/prompt_templates/multiple_choice.py +9 -0
  29. euroeval/prompt_templates/named_entity_recognition.py +20 -0
  30. euroeval/prompt_templates/reading_comprehension.py +10 -0
  31. euroeval/prompt_templates/sentiment_classification.py +11 -0
  32. euroeval/tokenisation_utils.py +8 -8
  33. euroeval/utils.py +10 -5
  34. {euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/METADATA +181 -60
  35. euroeval-16.3.0.dist-info/RECORD +71 -0
  36. euroeval-16.2.1.dist-info/RECORD +0 -70
  37. {euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/WHEEL +0 -0
  38. {euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/entry_points.txt +0 -0
  39. {euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/licenses/LICENSE +0 -0
euroeval/utils.py CHANGED
@@ -62,6 +62,10 @@ def resolve_model_path(download_dir: str) -> str:
62
62
 
63
63
  Returns:
64
64
  The path to the model.
65
+
66
+ Raises:
67
+ InvalidModel:
68
+ If the model path is not valid, or if required files are missing.
65
69
  """
66
70
  model_path = Path(download_dir)
67
71
  # Get the 'path safe' version of the model id, which is the last dir in the path
@@ -271,14 +275,15 @@ def internet_connection_available() -> bool:
271
275
  s = socket.create_connection(("1.1.1.1", 80))
272
276
  s.close()
273
277
  return True
274
- # a bit ugly but we dont want to actually import the pytest-socket exceptions
275
- # we catch all exceptions and check if the name matches any known errors
278
+
279
+ # We want to only catch exceptions related to socket connections, but as we cannot
280
+ # import these here as they're developer dependencies, we check the exception name
281
+ # instead. If the exception is not related to socket connections, we reraise it.
276
282
  except Exception as e:
277
283
  pytest_socket_errors = ["SocketConnectBlockedError", "SocketBlockedError"]
278
284
  if type(e).__name__ in pytest_socket_errors or isinstance(e, OSError):
279
285
  return False
280
- else:
281
- raise e
286
+ raise e
282
287
 
283
288
 
284
289
  class HiddenPrints:
@@ -457,7 +462,7 @@ def extract_json_dict_from_string(s: str) -> dict | None:
457
462
  Returns:
458
463
  The extracted JSON dictionary, or None if no JSON dictionary could be found.
459
464
  """
460
- json_regex = r"\{[^{}]+?\}"
465
+ json_regex = r"\{[^{}]*?\}"
461
466
  if (json_match := re.search(pattern=json_regex, string=s, flags=re.DOTALL)) is None:
462
467
  logger.debug(
463
468
  "The model output does not contain any JSON dictionary, so cannot parse "
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 16.2.1
3
+ Version: 16.3.0
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -62,21 +62,28 @@ Provides-Extra: all
62
62
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
63
63
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
64
64
  Requires-Dist: timm>=1.0.19; extra == 'all'
65
- Requires-Dist: vllm[flashinfer]>=0.10.1; (platform_system == 'Linux') and extra == 'all'
65
+ Requires-Dist: vllm[flashinfer]<0.11.0,>=0.10.1; (platform_system == 'Linux') and extra == 'all'
66
66
  Provides-Extra: generative
67
67
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
68
68
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
69
69
  Requires-Dist: timm>=1.0.19; extra == 'generative'
70
- Requires-Dist: vllm[flashinfer]>=0.10.1; (platform_system == 'Linux') and extra == 'generative'
70
+ Requires-Dist: vllm[flashinfer]<0.11.0,>=0.10.1; (platform_system == 'Linux') and extra == 'generative'
71
71
  Description-Content-Type: text/markdown
72
72
 
73
+ <!-- This disables the requirement that the first line is a top-level heading -->
74
+ <!-- markdownlint-configure-file { "MD041": false } -->
75
+
73
76
  <div align='center'>
74
- <img src="https://raw.githubusercontent.com/EuroEval/EuroEval/main/gfx/euroeval.png" height="500" width="372">
77
+ <img
78
+ src="https://raw.githubusercontent.com/EuroEval/EuroEval/main/gfx/euroeval.png"
79
+ height="500"
80
+ width="372"
81
+ >
75
82
  </div>
76
83
 
77
- ### The robust European language model benchmark.
84
+ ### The robust European language model benchmark
78
85
 
79
- _(formerly known as ScandEval)_
86
+ (formerly known as ScandEval)
80
87
 
81
88
  ______________________________________________________________________
82
89
  [![Documentation](https://img.shields.io/badge/docs-passing-green)](https://euroeval.com)
@@ -88,16 +95,16 @@ ______________________________________________________________________
88
95
  [![Code Coverage](https://img.shields.io/badge/Coverage-67%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
89
96
  [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg)](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
90
97
 
91
-
92
98
  ## Maintainer
93
99
 
94
- - Dan Saattrup Smart ([@saattrupdan](https://github.com/saattrupdan), dan.smart@alexandra.dk)
95
-
100
+ - Dan Saattrup Smart ([@saattrupdan](https://github.com/saattrupdan), <dan.smart@alexandra.dk>)
96
101
 
97
102
  ## Installation
103
+
98
104
  To install the package simply write the following command in your favorite terminal:
99
- ```
100
- $ pip install euroeval[all]
105
+
106
+ ```bash
107
+ pip install euroeval[all]
101
108
  ```
102
109
 
103
110
  This will install the EuroEval package with all extras. You can also install the
@@ -105,51 +112,61 @@ minimal version by leaving out the `[all]`, in which case the package will let y
105
112
  when an evaluation requires a certain extra dependency, and how you install it.
106
113
 
107
114
  ## Quickstart
115
+
108
116
  ### Benchmarking from the Command Line
117
+
109
118
  The easiest way to benchmark pretrained models is via the command line interface. After
110
119
  having installed the package, you can benchmark your favorite model like so:
111
- ```
112
- $ euroeval --model <model-id>
120
+
121
+ ```bash
122
+ euroeval --model <model-id>
113
123
  ```
114
124
 
115
125
  Here `model` is the HuggingFace model ID, which can be found on the [HuggingFace
116
126
  Hub](https://huggingface.co/models). By default this will benchmark the model on all
117
127
  the tasks available. If you want to benchmark on a particular task, then use the
118
128
  `--task` argument:
119
- ```
120
- $ euroeval --model <model-id> --task sentiment-classification
129
+
130
+ ```bash
131
+ euroeval --model <model-id> --task sentiment-classification
121
132
  ```
122
133
 
123
134
  We can also narrow down which languages we would like to benchmark on. This can be done
124
135
  by setting the `--language` argument. Here we thus benchmark the model on the Danish
125
136
  sentiment classification task:
126
- ```
127
- $ euroeval --model <model-id> --task sentiment-classification --language da
137
+
138
+ ```bash
139
+ euroeval --model <model-id> --task sentiment-classification --language da
128
140
  ```
129
141
 
130
142
  Multiple models, datasets and/or languages can be specified by just attaching multiple
131
143
  arguments. Here is an example with two models:
132
- ```
133
- $ euroeval --model <model-id1> --model <model-id2>
144
+
145
+ ```bash
146
+ euroeval --model <model-id1> --model <model-id2>
134
147
  ```
135
148
 
136
149
  The specific model version/revision to use can also be added after the suffix '@':
137
- ```
138
- $ euroeval --model <model-id>@<commit>
150
+
151
+ ```bash
152
+ euroeval --model <model-id>@<commit>
139
153
  ```
140
154
 
141
155
  This can be a branch name, a tag name, or a commit id. It defaults to 'main' for latest.
142
156
 
143
157
  See all the arguments and options available for the `euroeval` command by typing
144
- ```
145
- $ euroeval --help
158
+
159
+ ```bash
160
+ euroeval --help
146
161
  ```
147
162
 
148
163
  ### Benchmarking from a Script
164
+
149
165
  In a script, the syntax is similar to the command line interface. You simply initialise
150
166
  an object of the `Benchmarker` class, and call this benchmark object with your favorite
151
167
  model:
152
- ```
168
+
169
+ ```python
153
170
  >>> from euroeval import Benchmarker
154
171
  >>> benchmark = Benchmarker()
155
172
  >>> benchmark(model="<model-id>")
@@ -157,29 +174,34 @@ model:
157
174
 
158
175
  To benchmark on a specific task and/or language, you simply specify the `task` or
159
176
  `language` arguments, shown here with same example as above:
160
- ```
177
+
178
+ ```python
161
179
  >>> benchmark(model="<model-id>", task="sentiment-classification", language="da")
162
180
  ```
163
181
 
164
182
  If you want to benchmark a subset of all the models on the Hugging Face Hub, you can
165
183
  simply leave out the `model` argument. In this example, we're benchmarking all Danish
166
184
  models on the Danish sentiment classification task:
167
- ```
185
+
186
+ ```python
168
187
  >>> benchmark(task="sentiment-classification", language="da")
169
188
  ```
170
189
 
171
190
  ### Benchmarking in an Offline Environment
191
+
172
192
  If you need to benchmark in an offline environment, you need to download the models,
173
193
  datasets and metrics beforehand. This can be done by adding the `--download-only`
174
194
  argument, from the command line, or the `download_only` argument, if benchmarking from a
175
195
  script. For example to download the model you want and all of the Danish sentiment
176
196
  classification datasets:
177
- ```
178
- $ euroeval --model <model-id> --task sentiment-classification --language da --download-only
197
+
198
+ ```bash
199
+ euroeval --model <model-id> --task sentiment-classification --language da --download-only
179
200
  ```
180
201
 
181
202
  Or from a script:
182
- ```
203
+
204
+ ```python
183
205
  >>> benchmark(
184
206
  ... model="<model-id>",
185
207
  ... task="sentiment-classification",
@@ -193,11 +215,13 @@ internet connection will be required during evaluation. If offline support is im
193
215
  to you, please consider [opening an issue](https://github.com/EuroEval/EuroEval/issues).
194
216
 
195
217
  ### Benchmarking from Docker
218
+
196
219
  A Dockerfile is provided in the repo, which can be downloaded and run, without needing
197
220
  to clone the repo and installing from source. This can be fetched programmatically by
198
221
  running the following:
199
- ```
200
- $ wget https://raw.githubusercontent.com/EuroEval/EuroEval/main/Dockerfile.cuda
222
+
223
+ ```bash
224
+ wget https://raw.githubusercontent.com/EuroEval/EuroEval/main/Dockerfile.cuda
201
225
  ```
202
226
 
203
227
  Next, to be able to build the Docker image, first ensure that the NVIDIA Container
@@ -208,56 +232,153 @@ and
208
232
  Ensure that the the CUDA version stated at the top of the Dockerfile matches the CUDA
209
233
  version installed (which you can check using `nvidia-smi`). After that, we build the
210
234
  image as follows:
211
- ```
212
- $ docker build --pull -t euroeval -f Dockerfile.cuda .
235
+
236
+ ```bash
237
+ docker build --pull -t euroeval -f Dockerfile.cuda .
213
238
  ```
214
239
 
215
240
  With the Docker image built, we can now evaluate any model as follows:
216
- ```
217
- $ docker run -e args="<euroeval-arguments>" --gpus 1 --name euroeval --rm euroeval
241
+
242
+ ```bash
243
+ docker run -e args="<euroeval-arguments>" --gpus 1 --name euroeval --rm euroeval
218
244
  ```
219
245
 
220
246
  Here `<euroeval-arguments>` consists of the arguments added to the `euroeval` CLI
221
247
  argument. This could for instance be `--model <model-id> --task
222
248
  sentiment-classification`.
223
249
 
224
-
225
250
  ### Reproducing the datasets
251
+
226
252
  All datasets used in this project are generated using the scripts located in the
227
253
  [src/scripts](src/scripts) folder. To reproduce a dataset, run the corresponding script
228
254
  with the following command
229
255
 
230
- ```shell
231
- $ uv run src/scripts/<name-of-script>.py
256
+ ```bash
257
+ uv run src/scripts/<name-of-script>.py
232
258
  ```
233
259
 
234
260
  Replace <name-of-script> with the specific script you wish to execute, e.g.,
235
261
 
236
- ```shell
237
- $ uv run src/scripts/create_allocine.py
262
+ ```bash
263
+ uv run src/scripts/create_allocine.py
238
264
  ```
239
265
 
240
266
  ## Contributors :pray:
241
267
 
242
268
  A huge thank you to all the contributors who have helped make this project a success!
243
269
 
244
- <a href="https://github.com/peter-sk"><img src="https://avatars.githubusercontent.com/u/6168908" width=50 alt="Contributor avatar for peter-sk"/></a>
245
- <a href="https://github.com/AJDERS"><img src="https://avatars.githubusercontent.com/u/38854604" width=50 alt="Contributor avatar for AJDERS"/></a>
246
- <a href="https://github.com/oliverkinch"><img src="https://avatars.githubusercontent.com/u/71556498" width=50 alt="Contributor avatar for oliverkinch"/></a>
247
- <a href="https://github.com/versae"><img src="https://avatars.githubusercontent.com/u/173537" width=50 alt="Contributor avatar for versae"/></a>
248
- <a href="https://github.com/KennethEnevoldsen"><img src="https://avatars.githubusercontent.com/u/23721977" width=50 alt="Contributor avatar for KennethEnevoldsen"/></a>
249
- <a href="https://github.com/viggo-gascou"><img src="https://avatars.githubusercontent.com/u/94069687" width=50 alt="Contributor avatar for viggo-gascou"/></a>
250
- <a href="https://github.com/mathiasesn"><img src="https://avatars.githubusercontent.com/u/27091759" width=50 alt="Contributor avatar for mathiasesn"/></a>
251
- <a href="https://github.com/Alkarex"><img src="https://avatars.githubusercontent.com/u/1008324" width=50 alt="Contributor avatar for Alkarex"/></a>
252
- <a href="https://github.com/marksverdhei"><img src="https://avatars.githubusercontent.com/u/46672778" width=50 alt="Contributor avatar for marksverdhei"/></a>
253
- <a href="https://github.com/Mikeriess"><img src="https://avatars.githubusercontent.com/u/19728563" width=50 alt="Contributor avatar for Mikeriess"/></a>
254
- <a href="https://github.com/ThomasKluiters"><img src="https://avatars.githubusercontent.com/u/8137941" width=50 alt="Contributor avatar for ThomasKluiters"/></a>
255
- <a href="https://github.com/BramVanroy"><img src="https://avatars.githubusercontent.com/u/2779410" width=50 alt="Contributor avatar for BramVanroy"/></a>
256
- <a href="https://github.com/peregilk"><img src="https://avatars.githubusercontent.com/u/9079808" width=50 alt="Contributor avatar for peregilk"/></a>
257
- <a href="https://github.com/Rijgersberg"><img src="https://avatars.githubusercontent.com/u/8604946" width=50 alt="Contributor avatar for Rijgersberg"/></a>
258
- <a href="https://github.com/duarteocarmo"><img src="https://avatars.githubusercontent.com/u/26342344" width=50 alt="Contributor avatar for duarteocarmo"/></a>
259
- <a href="https://github.com/slowwavesleep"><img src="https://avatars.githubusercontent.com/u/44175589" width=50 alt="Contributor avatar for slowwavesleep"/></a>
260
-
270
+ <a href="https://github.com/peter-sk">
271
+ <img
272
+ src="https://avatars.githubusercontent.com/u/6168908"
273
+ width=50
274
+ alt="Contributor avatar for peter-sk"
275
+ />
276
+ </a>
277
+ <a href="https://github.com/AJDERS">
278
+ <img
279
+ src="https://avatars.githubusercontent.com/u/38854604"
280
+ width=50
281
+ alt="Contributor avatar for AJDERS"
282
+ />
283
+ </a>
284
+ <a href="https://github.com/oliverkinch">
285
+ <img
286
+ src="https://avatars.githubusercontent.com/u/71556498"
287
+ width=50
288
+ alt="Contributor avatar for oliverkinch"
289
+ />
290
+ </a>
291
+ <a href="https://github.com/versae">
292
+ <img
293
+ src="https://avatars.githubusercontent.com/u/173537"
294
+ width=50
295
+ alt="Contributor avatar for versae"
296
+ />
297
+ </a>
298
+ <a href="https://github.com/KennethEnevoldsen">
299
+ <img
300
+ src="https://avatars.githubusercontent.com/u/23721977"
301
+ width=50
302
+ alt="Contributor avatar for KennethEnevoldsen"
303
+ />
304
+ </a>
305
+ <a href="https://github.com/viggo-gascou">
306
+ <img
307
+ src="https://avatars.githubusercontent.com/u/94069687"
308
+ width=50
309
+ alt="Contributor avatar for viggo-gascou"
310
+ />
311
+ </a>
312
+ <a href="https://github.com/mathiasesn">
313
+ <img
314
+ src="https://avatars.githubusercontent.com/u/27091759"
315
+ width=50
316
+ alt="Contributor avatar for mathiasesn"
317
+ />
318
+ </a>
319
+ <a href="https://github.com/Alkarex">
320
+ <img
321
+ src="https://avatars.githubusercontent.com/u/1008324"
322
+ width=50
323
+ alt="Contributor avatar for Alkarex"
324
+ />
325
+ </a>
326
+ <a href="https://github.com/marksverdhei">
327
+ <img
328
+ src="https://avatars.githubusercontent.com/u/46672778"
329
+ width=50
330
+ alt="Contributor avatar for marksverdhei"
331
+ />
332
+ </a>
333
+ <a href="https://github.com/Mikeriess">
334
+ <img
335
+ src="https://avatars.githubusercontent.com/u/19728563"
336
+ width=50
337
+ alt="Contributor avatar for Mikeriess"
338
+ />
339
+ </a>
340
+ <a href="https://github.com/ThomasKluiters">
341
+ <img
342
+ src="https://avatars.githubusercontent.com/u/8137941"
343
+ width=50
344
+ alt="Contributor avatar for ThomasKluiters"
345
+ />
346
+ </a>
347
+ <a href="https://github.com/BramVanroy">
348
+ <img
349
+ src="https://avatars.githubusercontent.com/u/2779410"
350
+ width=50
351
+ alt="Contributor avatar for BramVanroy"
352
+ />
353
+ </a>
354
+ <a href="https://github.com/peregilk">
355
+ <img
356
+ src="https://avatars.githubusercontent.com/u/9079808"
357
+ width=50
358
+ alt="Contributor avatar for peregilk"
359
+ />
360
+ </a>
361
+ <a href="https://github.com/Rijgersberg">
362
+ <img
363
+ src="https://avatars.githubusercontent.com/u/8604946"
364
+ width=50
365
+ alt="Contributor avatar for Rijgersberg"
366
+ />
367
+ </a>
368
+ <a href="https://github.com/duarteocarmo">
369
+ <img
370
+ src="https://avatars.githubusercontent.com/u/26342344"
371
+ width=50
372
+ alt="Contributor avatar for duarteocarmo"
373
+ />
374
+ </a>
375
+ <a href="https://github.com/slowwavesleep">
376
+ <img
377
+ src="https://avatars.githubusercontent.com/u/44175589"
378
+ width=50
379
+ alt="Contributor avatar for slowwavesleep"
380
+ />
381
+ </a>
261
382
 
262
383
  ### Contribute to EuroEval
263
384
 
@@ -269,8 +390,8 @@ contributing new datasets, your help makes this project better for everyone.
269
390
  - **Adding datasets**: If you're interested in adding a new dataset to EuroEval, we have
270
391
  a [dedicated guide](NEW_DATASET_GUIDE.md) with step-by-step instructions.
271
392
 
272
-
273
393
  ### Special Thanks
394
+
274
395
  - Thanks to [Google](https://google.com/) for sponsoring Gemini credits as part of their
275
396
  [Google Cloud for Researchers Program](https://cloud.google.com/edu/researchers).
276
397
  - Thanks [@Mikeriess](https://github.com/Mikeriess) for evaluating many of the larger
@@ -285,11 +406,11 @@ contributing new datasets, your help makes this project better for everyone.
285
406
  - Thanks to [CHC](https://chc.au.dk/) for sponsoring the OpenAI credits used to
286
407
  evaluate GPT-4-turbo in German.
287
408
 
288
-
289
409
  ## Citing EuroEval
410
+
290
411
  If you want to cite the framework then feel free to use this:
291
412
 
292
- ```
413
+ ```bibtex
293
414
  @article{smart2024encoder,
294
415
  title={Encoder vs Decoder: Comparative Analysis of Encoder and Decoder Language Models on Multilingual NLU Tasks},
295
416
  author={Smart, Dan Saattrup and Enevoldsen, Kenneth and Schneider-Kamp, Peter},
@@ -0,0 +1,71 @@
1
+ euroeval/__init__.py,sha256=QJo_xezfFnpKBB32nvA_juy29tAz1eVn---MQiexYjE,3901
2
+ euroeval/benchmark_config_factory.py,sha256=eOQsd9F4cJy8I7a3_lIKDZ5b5ukipIUqk0GZ3pyytwQ,8596
3
+ euroeval/benchmarker.py,sha256=Nt4k1DivG-YtsSiqEwqsHfBzEkauo1lrsG1RAS0ZWuw,48928
4
+ euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
5
+ euroeval/cli.py,sha256=yb_Gw3TrouBiUmeQIZF0705zio8UPFACUDOzSB3CCfo,9316
6
+ euroeval/constants.py,sha256=e1LRJe6CspvbKlfo4-9ee1wGocNoh1c7GcyaXpiN1Jk,2744
7
+ euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
8
+ euroeval/data_models.py,sha256=X4zAdR1K2MPb4f4Vc7gPYfolzFxxsz5WplnsmsiMYY8,27766
9
+ euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
10
+ euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
+ euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
12
+ euroeval/generation.py,sha256=Va3EOmFzOMBNfI4fh3nW5qhhrM3CBT8_4MaLwVtsF_E,12528
13
+ euroeval/generation_utils.py,sha256=d2_vylWXIeH4xIXgbsI5rN6dMt0zKp0zXExD6aOKWaA,18299
14
+ euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
15
+ euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
16
+ euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
17
+ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
18
+ euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
19
+ euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
20
+ euroeval/tasks.py,sha256=EzEWFDo_0ffabBFiRu-mw80jENUioE8D_VEn_Dsv-F8,4703
21
+ euroeval/tokenisation_utils.py,sha256=7lQ83rP1Ws7HHg20bFbqD4GqtdbyBADwyxPBmFzAzVA,21158
22
+ euroeval/types.py,sha256=_iVy-RwiCGu9TNX2sfyJTdCvXy1akNGTCywAo-YpBqU,2815
23
+ euroeval/utils.py,sha256=qAh8TLrJPk10l9qKcvD1mq2gNOGRTLl88PvPNj5IuRU,19451
24
+ euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
25
+ euroeval/benchmark_modules/base.py,sha256=mHF8XS6GGUXV-sJtxmI5WJBWPLMHuh-4Z4OWjC25x9Y,11566
26
+ euroeval/benchmark_modules/fresh.py,sha256=qqsaC6u06YeJIK-Z6w9gZefb5cg1nU7ZDrO76l2GZN0,10779
27
+ euroeval/benchmark_modules/hf.py,sha256=Z-Z_AxJk2APFXcZdyZrnKQ4OE_uRH81Vsm9x-gfJ1-I,44926
28
+ euroeval/benchmark_modules/litellm.py,sha256=2EUhzLcxocfFxjbgyyP5QQtLieoH-fWbLR6RRz64EN8,64176
29
+ euroeval/benchmark_modules/vllm.py,sha256=eTwS1YDB0v0lOWvv6_UXPlqNjNaPQTKRY-g495Y6X9s,46432
30
+ euroeval/dataset_configs/__init__.py,sha256=ylO6FwnzlWmCuifliE_b4Vs5GXapYeyvZ4j1XVFmdN8,2086
31
+ euroeval/dataset_configs/danish.py,sha256=fAMWYQVrx3B11r5NZSL-LWSQTJvCDwSxImIkIrGdoAA,5552
32
+ euroeval/dataset_configs/dutch.py,sha256=883caShKOOi5s1Ky0_EKFeq0y9wVuqN-GVqeOwbKFr0,5438
33
+ euroeval/dataset_configs/english.py,sha256=rl6bBIluKXkxT8L4e071GQuPprMHTI955mgW46V3Cp0,4658
34
+ euroeval/dataset_configs/estonian.py,sha256=tdnz0gmMR9yO5rm3SsIz-Wd0LmlCvi3UJ2M5r4VwkSE,3093
35
+ euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
36
+ euroeval/dataset_configs/finnish.py,sha256=pfO_flf6HHUbZZLae62cV30__uey_Oj37aiX0eBNWcQ,4311
37
+ euroeval/dataset_configs/french.py,sha256=OdkCfWhtImgB3Ni6o0NRvCEvjeKAqausfJ2VO04CUwY,4641
38
+ euroeval/dataset_configs/german.py,sha256=sav75C7f33OofQzliwvb3g7B7cw0MXm0G8wdlcmI7r8,5051
39
+ euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
40
+ euroeval/dataset_configs/italian.py,sha256=YucxgJtCG31sQplJ6hL64sF39ZSj926_a7McpCzKxh0,4925
41
+ euroeval/dataset_configs/latvian.py,sha256=fB3tsqZoFldTnrlpeSu9iQQ907ptOVC8ZaielkgmVlM,2677
42
+ euroeval/dataset_configs/lithuanian.py,sha256=QTahv862C5XzjLU8WHcExBGlkRFQnj9F4-I_5x1qJSk,1833
43
+ euroeval/dataset_configs/norwegian.py,sha256=ipDIg2wXquZvIjlc4Bs-TbMJCKOoK6TL7lP9AzLOOj8,7666
44
+ euroeval/dataset_configs/polish.py,sha256=5MTWLUmDG0qMgb1ATSdON2A_2ZFLlXUVjS0u64srfIg,3593
45
+ euroeval/dataset_configs/portuguese.py,sha256=wanwK9LYdBND_JPh203L_YQraiLSd2kI8P0myy6U6Dk,4010
46
+ euroeval/dataset_configs/spanish.py,sha256=xVWWHS84aOjDcutfAh7J2roHEb2KHZ084pYysH2BdSo,4823
47
+ euroeval/dataset_configs/swedish.py,sha256=f_H7khH0IHcZXEQyYM8bpIvYnRsSj0EhVXh4RgpOCmw,5317
48
+ euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
49
+ euroeval/metrics/base.py,sha256=HST2XeZrUQZV_vTiieePiaznEov3CIGzuVNIITtLsQc,2596
50
+ euroeval/metrics/huggingface.py,sha256=7_97xfdqsznoBOm3diVvZtJ6k9XUa8isiVVmOgia8kI,6522
51
+ euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
52
+ euroeval/metrics/pipeline.py,sha256=aLNf0vKTfov-HZbvyJj9_9Z1rR1BkVsWxAea8btCWg8,10513
53
+ euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
54
+ euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
55
+ euroeval/prompt_templates/linguistic_acceptability.py,sha256=n-InOATuwdjlmDjiUdGIk9bQJMUgVFdp3u-iQ0K9WjY,9189
56
+ euroeval/prompt_templates/multiple_choice.py,sha256=W0WZdAhbOV2jdHNhjfNNhgoPTbFKA2vhs72U0hP1rW0,7323
57
+ euroeval/prompt_templates/named_entity_recognition.py,sha256=Kl7SB7vRJ-K9oXMZcJEffELaQlbwspNKUrQLDeNobcY,17301
58
+ euroeval/prompt_templates/reading_comprehension.py,sha256=OtV8tu6wyf7rwW3krmyk8bzdNSRS5WkWFgxok4o67_o,9243
59
+ euroeval/prompt_templates/sentiment_classification.py,sha256=tnalqea4TjG6z4xF7tDDKQm7rWrYGg6SIWTX3RDQQ20,10012
60
+ euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13aIOgY0SjIzzsNo,6593
61
+ euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
62
+ euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
63
+ euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
64
+ euroeval/task_group_utils/sequence_classification.py,sha256=TAqZCoMQ9I-HFhMH35_J1mY2SQg95HUbXcgrBIyhgk0,16082
65
+ euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
66
+ euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
67
+ euroeval-16.3.0.dist-info/METADATA,sha256=iSfb2jRJO7BfidNgy0jOKUXFh_WwBojxgisOBWQmYHg,15381
68
+ euroeval-16.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
69
+ euroeval-16.3.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
70
+ euroeval-16.3.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
71
+ euroeval-16.3.0.dist-info/RECORD,,
@@ -1,70 +0,0 @@
1
- euroeval/__init__.py,sha256=mXTjuGrEE-1fIS9x28oJKg-gNGt4q7y2E74l330KEmY,3787
2
- euroeval/benchmark_config_factory.py,sha256=eOQsd9F4cJy8I7a3_lIKDZ5b5ukipIUqk0GZ3pyytwQ,8596
3
- euroeval/benchmarker.py,sha256=5l4p1ncq4VJX_bDjv2f8oBq2GETPtJmduGOnLAbWjF8,55762
4
- euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
5
- euroeval/cli.py,sha256=GOAWzdtasJfOvTuVQszu-T1T9GfQ_un-blOICO-y7g4,9316
6
- euroeval/constants.py,sha256=NN7kcwQdlDyyGFSrLjsL_qKVRyoRqZ9sKO5SjlgtRwA,2741
7
- euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
8
- euroeval/data_models.py,sha256=9Sgrq6Ktg1ETXRJ0v4VA_amAPowGuB7fZtL-8RlDQn0,27766
9
- euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
10
- euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
- euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
12
- euroeval/generation.py,sha256=Va3EOmFzOMBNfI4fh3nW5qhhrM3CBT8_4MaLwVtsF_E,12528
13
- euroeval/generation_utils.py,sha256=d2_vylWXIeH4xIXgbsI5rN6dMt0zKp0zXExD6aOKWaA,18299
14
- euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
15
- euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
16
- euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
17
- euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
18
- euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
19
- euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
20
- euroeval/tasks.py,sha256=EzEWFDo_0ffabBFiRu-mw80jENUioE8D_VEn_Dsv-F8,4703
21
- euroeval/tokenisation_utils.py,sha256=nLeF2cdZSm5PZiAcDTtxY82nUJ-or8VU8YxYLa167EM,21158
22
- euroeval/types.py,sha256=_iVy-RwiCGu9TNX2sfyJTdCvXy1akNGTCywAo-YpBqU,2815
23
- euroeval/utils.py,sha256=DRJW6wtmNpRtuHt03diWo3S5m3rdxoPEQpd-KWi7aGY,19255
24
- euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
25
- euroeval/benchmark_modules/base.py,sha256=mHF8XS6GGUXV-sJtxmI5WJBWPLMHuh-4Z4OWjC25x9Y,11566
26
- euroeval/benchmark_modules/fresh.py,sha256=TveSQiFBi3xXgCEQBdHwkUQ685PDkKW0y3G5Yt5rkeM,10655
27
- euroeval/benchmark_modules/hf.py,sha256=XmkoDFzaJqnd_5mmUkqCaOgAdRPFs3KZKZZ0cr83TlM,44742
28
- euroeval/benchmark_modules/litellm.py,sha256=F3udd6NmhQOe3go_7rAcWg7mgZrNQpWWvLe-5U4E2RQ,64771
29
- euroeval/benchmark_modules/vllm.py,sha256=yLy8TCTnodu4NdTiO7XSdxuHX60AJ1-7p6J3e5h7-iA,43994
30
- euroeval/dataset_configs/__init__.py,sha256=uuIZmElpJV8iupo5oDj3TeQhBDRANdWpLKYFASLirHA,2046
31
- euroeval/dataset_configs/danish.py,sha256=QABfgI7m-0-5AimDXegp5ssDSLcM2VrAI_RWsinSZP4,5631
32
- euroeval/dataset_configs/dutch.py,sha256=63Ro2yFym5MuIDXf5953vUYenw9B0kZSCmZbXjdy4Rs,5517
33
- euroeval/dataset_configs/english.py,sha256=7lS12Tj7FnMGkS4xj7UoZyymNX6PGXTVl5muPswIgAE,4737
34
- euroeval/dataset_configs/estonian.py,sha256=tdnz0gmMR9yO5rm3SsIz-Wd0LmlCvi3UJ2M5r4VwkSE,3093
35
- euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
36
- euroeval/dataset_configs/finnish.py,sha256=esb5nu4HAEdqiP7F9klmME-tkjme01Qd89TOxTB1S20,4390
37
- euroeval/dataset_configs/french.py,sha256=lZKhJcTpaG8n3y8u5KY61UfU9YzEHF9tIPKm8UakoBs,4720
38
- euroeval/dataset_configs/german.py,sha256=gF0idcfDt5Iy89ozwgEXEYR_ukyYurdQSS1KITPz5aM,5130
39
- euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
40
- euroeval/dataset_configs/italian.py,sha256=tJ_-OYRJ8wJX7ZCwdE4KJIScn1ijYigAXK3lDTZTA3E,5004
41
- euroeval/dataset_configs/latvian.py,sha256=-zVftcd7Zl6MbrqL-zqBSixsIiPsbt5ZAqldE2wFOEI,2713
42
- euroeval/dataset_configs/norwegian.py,sha256=ccLM2Zkf5eaFH1K1KyzqoMwkVNcXgjMQTxIhPf4tl_E,7745
43
- euroeval/dataset_configs/polish.py,sha256=Z-9PT9KaopQUmBgFk5F85ve3pjQwTJqouG8IFgg5iqw,3672
44
- euroeval/dataset_configs/portuguese.py,sha256=gQ054SdLQ5fkm4IAP6Mdh5RcPDJPDITcuyaLKZit_9o,4089
45
- euroeval/dataset_configs/spanish.py,sha256=DvJlMK6OQg4qmxKzQA2IficlBMB7BafvxqIVuTKiZyw,4902
46
- euroeval/dataset_configs/swedish.py,sha256=YWHp7hbJ25o36csSg9uXaQCEJK1BPb7u2RQZiCe0lNs,5445
47
- euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
48
- euroeval/metrics/base.py,sha256=HST2XeZrUQZV_vTiieePiaznEov3CIGzuVNIITtLsQc,2596
49
- euroeval/metrics/huggingface.py,sha256=iHKJnvOXRc_e8sxB2ff3WkfK64jXyn5KEnIxPyfD2fM,6522
50
- euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
51
- euroeval/metrics/pipeline.py,sha256=Wcan3eDWV7t4WRXMPWCCe_JsA-fZnIfZU2ESinbbL2I,10284
52
- euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
53
- euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
54
- euroeval/prompt_templates/linguistic_acceptability.py,sha256=m23LrckohdnToQDsexdsW_5YyBfGTf5DTjiMI643F9A,8717
55
- euroeval/prompt_templates/multiple_choice.py,sha256=Q-8-ETqG-RZeLzR8v8WUBIN7djiNSfNpmYnZRUWcd84,6905
56
- euroeval/prompt_templates/named_entity_recognition.py,sha256=HIX9EBkSIBl5JXceFtiZTdvzWr9YHM9-55D6bcjIyQ4,16436
57
- euroeval/prompt_templates/reading_comprehension.py,sha256=ogzmhiSZO6egrdxxQiWz6a0XMdC0vws-lg5yRKQoYV0,8730
58
- euroeval/prompt_templates/sentiment_classification.py,sha256=b3TvH26M77vwFfn577NlGVW881qfV7YSm-Xba_w98Fc,9504
59
- euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13aIOgY0SjIzzsNo,6593
60
- euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
61
- euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
62
- euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
63
- euroeval/task_group_utils/sequence_classification.py,sha256=TAqZCoMQ9I-HFhMH35_J1mY2SQg95HUbXcgrBIyhgk0,16082
64
- euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
65
- euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
66
- euroeval-16.2.1.dist-info/METADATA,sha256=brIXZ3x3MUf-ggNpKKC_4Lvrqem0MfKPrJ8DZJ5T3Iw,14590
67
- euroeval-16.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
68
- euroeval-16.2.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
69
- euroeval-16.2.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
70
- euroeval-16.2.1.dist-info/RECORD,,