EuroEval 15.16.0__py3-none-any.whl → 16.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (63) hide show
  1. euroeval/__init__.py +3 -7
  2. euroeval/benchmark_config_factory.py +3 -7
  3. euroeval/benchmark_modules/base.py +35 -19
  4. euroeval/benchmark_modules/fresh.py +24 -19
  5. euroeval/benchmark_modules/hf.py +136 -154
  6. euroeval/benchmark_modules/litellm.py +190 -110
  7. euroeval/benchmark_modules/vllm.py +161 -114
  8. euroeval/benchmarker.py +49 -22
  9. euroeval/cli.py +3 -3
  10. euroeval/constants.py +13 -15
  11. euroeval/data_loading.py +33 -28
  12. euroeval/data_models.py +53 -7
  13. euroeval/dataset_configs/__init__.py +2 -0
  14. euroeval/dataset_configs/danish.py +38 -1
  15. euroeval/dataset_configs/dutch.py +38 -1
  16. euroeval/dataset_configs/english.py +38 -1
  17. euroeval/dataset_configs/estonian.py +95 -0
  18. euroeval/dataset_configs/faroese.py +38 -0
  19. euroeval/dataset_configs/finnish.py +39 -1
  20. euroeval/dataset_configs/french.py +38 -1
  21. euroeval/dataset_configs/german.py +38 -1
  22. euroeval/dataset_configs/icelandic.py +39 -1
  23. euroeval/dataset_configs/italian.py +38 -1
  24. euroeval/dataset_configs/latvian.py +81 -0
  25. euroeval/dataset_configs/norwegian.py +38 -1
  26. euroeval/dataset_configs/portuguese.py +38 -1
  27. euroeval/dataset_configs/spanish.py +38 -1
  28. euroeval/dataset_configs/swedish.py +38 -1
  29. euroeval/enums.py +0 -6
  30. euroeval/finetuning.py +6 -6
  31. euroeval/generation.py +25 -14
  32. euroeval/generation_utils.py +46 -14
  33. euroeval/languages.py +947 -187
  34. euroeval/metrics/__init__.py +6 -0
  35. euroeval/metrics/base.py +76 -0
  36. euroeval/metrics/huggingface.py +192 -0
  37. euroeval/metrics/llm_as_a_judge.py +257 -0
  38. euroeval/metrics/pipeline.py +234 -0
  39. euroeval/metrics/speed.py +51 -0
  40. euroeval/prompt_templates/linguistic_acceptability.py +40 -2
  41. euroeval/prompt_templates/multiple_choice.py +23 -2
  42. euroeval/prompt_templates/named_entity_recognition.py +65 -2
  43. euroeval/prompt_templates/reading_comprehension.py +42 -2
  44. euroeval/prompt_templates/sentiment_classification.py +46 -2
  45. euroeval/prompt_templates/summarization.py +24 -4
  46. euroeval/scores.py +7 -2
  47. euroeval/speed_benchmark.py +6 -6
  48. euroeval/task_group_utils/multiple_choice_classification.py +17 -6
  49. euroeval/task_group_utils/question_answering.py +35 -28
  50. euroeval/task_group_utils/sequence_classification.py +96 -23
  51. euroeval/task_group_utils/text_to_text.py +7 -3
  52. euroeval/task_group_utils/token_classification.py +47 -75
  53. euroeval/tasks.py +31 -6
  54. euroeval/tokenization_utils.py +295 -207
  55. euroeval/utils.py +118 -34
  56. {euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/METADATA +11 -14
  57. euroeval-16.0.0.dist-info/RECORD +69 -0
  58. {euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/entry_points.txt +0 -1
  59. euroeval/human_evaluation.py +0 -738
  60. euroeval/metrics.py +0 -470
  61. euroeval-15.16.0.dist-info/RECORD +0 -63
  62. {euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/WHEEL +0 -0
  63. {euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/licenses/LICENSE +0 -0
euroeval/utils.py CHANGED
@@ -8,12 +8,15 @@ import importlib.util
8
8
  import logging
9
9
  import os
10
10
  import random
11
+ import re
11
12
  import sys
12
13
  import typing as t
13
14
  import warnings
14
15
  from functools import cache
15
16
  from pathlib import Path
16
17
 
18
+ import demjson3
19
+ import huggingface_hub as hf_hub
17
20
  import litellm
18
21
  import numpy as np
19
22
  import requests
@@ -24,9 +27,6 @@ from transformers import logging as tf_logging
24
27
 
25
28
  from .exceptions import NaNValueInModelOutput
26
29
 
27
- if importlib.util.find_spec("ray") is not None:
28
- import ray
29
-
30
30
  if t.TYPE_CHECKING:
31
31
  from types import TracebackType
32
32
 
@@ -94,54 +94,53 @@ def block_terminal_output() -> None:
94
94
  # Ignore miscellaneous warnings
95
95
  warnings.filterwarnings("ignore", category=UserWarning)
96
96
  warnings.filterwarnings("ignore", category=FutureWarning)
97
- warnings.filterwarnings(
98
- "ignore",
99
- module="torch.nn.parallel*",
100
- message="Was asked to gather along dimension 0, but all input tensors were "
101
- "scalars; will instead unsqueeze and return a vector.",
102
- )
103
- warnings.filterwarnings("ignore", module="seqeval*")
104
-
105
- # Up the logging level, to disable outputs
106
- logging.getLogger("filelock").setLevel(logging.CRITICAL)
107
97
  logging.getLogger("absl").setLevel(logging.CRITICAL)
108
- logging.getLogger("datasets").setLevel(logging.CRITICAL)
98
+
99
+ # Disable matplotlib logging
100
+ logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
101
+
102
+ # Disable PyTorch logging
103
+ logging.getLogger("torch.utils.cpp_extension").setLevel(logging.CRITICAL)
104
+ warnings.filterwarnings(action="ignore", module="torch*")
105
+ os.environ["TORCH_LOGS"] = "-all"
106
+
107
+ # Disable huggingface_hub logging
108
+ logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
109
+
110
+ # Disable LiteLLM logging
111
+ logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
112
+ logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
113
+ logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
109
114
  logging.getLogger("openai").setLevel(logging.CRITICAL)
110
- logging.getLogger("torch.distributed.distributed_c10d").setLevel(logging.CRITICAL)
111
- logging.getLogger("torch.nn.parallel.distributed").setLevel(logging.CRITICAL)
115
+ logging.getLogger("httpx").setLevel(logging.CRITICAL)
116
+ litellm.suppress_debug_info = True
117
+
118
+ # Disable vLLM logging
112
119
  logging.getLogger("vllm").setLevel(logging.CRITICAL)
113
120
  logging.getLogger("vllm.engine.llm_engine").setLevel(logging.CRITICAL)
114
121
  logging.getLogger("vllm.transformers_utils.tokenizer").setLevel(logging.CRITICAL)
115
122
  logging.getLogger("vllm.core.scheduler").setLevel(logging.CRITICAL)
116
123
  logging.getLogger("vllm.model_executor.weight_utils").setLevel(logging.CRITICAL)
117
124
  logging.getLogger("vllm.platforms").setLevel(logging.CRITICAL)
118
- logging.getLogger("httpx").setLevel(logging.CRITICAL)
119
- logging.getLogger("ray._private.worker").setLevel(logging.CRITICAL)
120
- logging.getLogger("ray._private.services").setLevel(logging.CRITICAL)
121
- logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
122
- logging.getLogger("accelerate").setLevel(logging.CRITICAL)
123
- logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
124
- logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
125
- logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
126
- logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
127
-
128
- # This suppresses vLLM logging
125
+ logging.getLogger("mistral_common.tokens.tokenizers.tekken").setLevel(
126
+ logging.CRITICAL
127
+ )
129
128
  os.environ["LOG_LEVEL"] = "CRITICAL"
130
129
  os.environ["VLLM_CONFIGURE_LOGGING"] = "0"
131
130
 
132
- if importlib.util.find_spec("ray") is not None:
133
- ray._private.worker._worker_logs_enabled = False
134
-
135
- # Disable the tokeniser progress bars
131
+ # Disable datasets logging
132
+ logging.getLogger("datasets").setLevel(logging.CRITICAL)
133
+ logging.getLogger("filelock").setLevel(logging.CRITICAL)
136
134
  disable_progress_bar()
137
135
 
136
+ # Disable evaluate logging
137
+ warnings.filterwarnings("ignore", module="seqeval*")
138
+
138
139
  # Disable most of the `transformers` logging
139
140
  tf_logging._default_log_level = logging.CRITICAL
140
141
  tf_logging.set_verbosity(logging.CRITICAL)
141
142
  logging.getLogger("transformers.trainer").setLevel(logging.CRITICAL)
142
-
143
- # Disable logging from `litellm`
144
- litellm.suppress_debug_info = True
143
+ logging.getLogger("accelerate").setLevel(logging.CRITICAL)
145
144
 
146
145
 
147
146
  def get_class_by_name(class_name: str | list[str], module_name: str) -> t.Type | None:
@@ -373,3 +372,88 @@ async def add_semaphore_and_catch_exception(
373
372
  return await coroutine
374
373
  except Exception as exc:
375
374
  return exc
375
+
376
+
377
+ def extract_json_dict_from_string(s: str) -> dict | None:
378
+ """Extract a JSON dictionary from a string.
379
+
380
+ Args:
381
+ s:
382
+ The string to extract the JSON dictionary from.
383
+
384
+ Returns:
385
+ The extracted JSON dictionary, or None if no JSON dictionary could be found.
386
+ """
387
+ json_regex = r"\{[^{}]+?\}"
388
+ if (json_match := re.search(pattern=json_regex, string=s, flags=re.DOTALL)) is None:
389
+ logger.debug(
390
+ "The model output does not contain any JSON dictionary, so cannot parse "
391
+ f"it. Skipping. Here is the output: {s!r}"
392
+ )
393
+ return None
394
+ json_string = json_match.group()
395
+ try:
396
+ json_output = demjson3.decode(txt=json_string)
397
+ except demjson3.JSONDecodeError:
398
+ logger.debug(
399
+ "The model output is not valid JSON, so cannot parse it. Skipping. "
400
+ f"Here is the output: {json_string!r}"
401
+ )
402
+ return None
403
+ if not isinstance(json_output, dict):
404
+ logger.debug(
405
+ "The model output is not a JSON dictionary, so cannot parse "
406
+ f"it. Skipping. Here is the output: {json_string!r}"
407
+ )
408
+ return None
409
+ elif not all(isinstance(key, str) for key in json_output.keys()):
410
+ logger.debug(
411
+ "The model output is not a JSON dictionary with string keys, "
412
+ "so cannot parse it. Skipping. Here is the output: "
413
+ f"{json_string!r}"
414
+ )
415
+ return None
416
+ return json_output
417
+
418
+
419
+ @cache
420
+ def get_hf_token(api_key: str | None) -> str | bool:
421
+ """Get the Hugging Face token.
422
+
423
+ Args:
424
+ api_key:
425
+ The API key to use as the Hugging Face token. If None, we will try to
426
+ extract it in other ways.
427
+
428
+ Returns:
429
+ The Hugging Face token, or True if no token is set but the user is logged in, or
430
+ False if no token is set and the user is not logged in.
431
+ """
432
+ if api_key is not None:
433
+ log_once(
434
+ "Using the Hugging Face API key passed to the function.",
435
+ level=logging.DEBUG,
436
+ )
437
+ return api_key
438
+ elif (token := os.getenv("HUGGINGFACE_API_KEY")) is not None:
439
+ log_once(
440
+ "Using the Hugging Face API key from the environment variable "
441
+ "`HUGGINGFACE_API_KEY`.",
442
+ level=logging.DEBUG,
443
+ )
444
+ return token
445
+ try:
446
+ hf_hub.whoami()
447
+ log_once(
448
+ "No Hugging Face API key was set, but the user is logged in to Hugging "
449
+ "Face, so using the local token.",
450
+ level=logging.DEBUG,
451
+ )
452
+ return True
453
+ except hf_hub.errors.LocalTokenNotFoundError:
454
+ log_once(
455
+ "No Hugging Face API key was set and the user is not logged in to Hugging "
456
+ "Face, so no token will be used.",
457
+ level=logging.DEBUG,
458
+ )
459
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 15.16.0
3
+ Version: 16.0.0
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -28,18 +28,19 @@ License: MIT License
28
28
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
29
  SOFTWARE.
30
30
  License-File: LICENSE
31
- Requires-Python: <4.0,>=3.10
31
+ Requires-Python: <4.0,>=3.11
32
32
  Requires-Dist: accelerate>=1.9.0
33
33
  Requires-Dist: bert-score>=0.3.13
34
34
  Requires-Dist: click>=8.1.3
35
+ Requires-Dist: cloudpickle>=3.1.1
35
36
  Requires-Dist: datasets>=3.5.0
36
37
  Requires-Dist: demjson3>=3.0.6
37
38
  Requires-Dist: evaluate>=0.4.1
38
39
  Requires-Dist: huggingface-hub>=0.30.1
39
40
  Requires-Dist: levenshtein>=0.24.0
40
- Requires-Dist: litellm>=1.72.2
41
+ Requires-Dist: litellm>=1.75.6
41
42
  Requires-Dist: more-itertools>=10.5.0
42
- Requires-Dist: numpy<2.0.0,>=1.23.0
43
+ Requires-Dist: numpy>=2.0.0
43
44
  Requires-Dist: ollama>=0.5.1
44
45
  Requires-Dist: pandas>=2.2.0
45
46
  Requires-Dist: peft>=0.15.0
@@ -49,27 +50,22 @@ Requires-Dist: pyinfer>=0.0.3
49
50
  Requires-Dist: python-dotenv>=1.0.1
50
51
  Requires-Dist: rouge-score>=0.1.2
51
52
  Requires-Dist: sacremoses>=0.1.1
52
- Requires-Dist: scikit-learn<1.6.0
53
+ Requires-Dist: scikit-learn==1.6.1
53
54
  Requires-Dist: sentencepiece>=0.1.96
54
55
  Requires-Dist: seqeval>=1.2.2
55
56
  Requires-Dist: setuptools>=75.8.2
56
57
  Requires-Dist: tenacity>=9.0.0
57
58
  Requires-Dist: termcolor>=2.0.0
58
59
  Requires-Dist: torch>=2.6.0
59
- Requires-Dist: transformers>=4.55.0
60
+ Requires-Dist: transformers[mistral-common]>=4.56.0
60
61
  Provides-Extra: all
61
62
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
62
63
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
63
- Requires-Dist: gradio>=4.26.0; extra == 'all'
64
- Requires-Dist: vllm>=0.10.0; (platform_system == 'Linux') and extra == 'all'
64
+ Requires-Dist: vllm>=0.10.1; (platform_system == 'Linux') and extra == 'all'
65
65
  Provides-Extra: generative
66
66
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
67
67
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
68
- Requires-Dist: vllm>=0.10.0; (platform_system == 'Linux') and extra == 'generative'
69
- Provides-Extra: human-evaluation
70
- Requires-Dist: gradio>=4.26.0; extra == 'human-evaluation'
71
- Provides-Extra: test
72
- Requires-Dist: gradio>=4.26.0; extra == 'test'
68
+ Requires-Dist: vllm>=0.10.1; (platform_system == 'Linux') and extra == 'generative'
73
69
  Description-Content-Type: text/markdown
74
70
 
75
71
  <div align='center'>
@@ -223,17 +219,18 @@ A huge thank you to all the contributors who have helped make this project a suc
223
219
  <a href="https://github.com/AJDERS"><img src="https://avatars.githubusercontent.com/u/38854604" width=50 alt="Contributor avatar for AJDERS"/></a>
224
220
  <a href="https://github.com/oliverkinch"><img src="https://avatars.githubusercontent.com/u/71556498" width=50 alt="Contributor avatar for oliverkinch"/></a>
225
221
  <a href="https://github.com/versae"><img src="https://avatars.githubusercontent.com/u/173537" width=50 alt="Contributor avatar for versae"/></a>
222
+ <a href="https://github.com/KennethEnevoldsen"><img src="https://avatars.githubusercontent.com/u/23721977" width=50 alt="Contributor avatar for KennethEnevoldsen"/></a>
226
223
  <a href="https://github.com/viggo-gascou"><img src="https://avatars.githubusercontent.com/u/94069687" width=50 alt="Contributor avatar for viggo-gascou"/></a>
227
224
  <a href="https://github.com/mathiasesn"><img src="https://avatars.githubusercontent.com/u/27091759" width=50 alt="Contributor avatar for mathiasesn"/></a>
228
225
  <a href="https://github.com/Alkarex"><img src="https://avatars.githubusercontent.com/u/1008324" width=50 alt="Contributor avatar for Alkarex"/></a>
229
226
  <a href="https://github.com/marksverdhei"><img src="https://avatars.githubusercontent.com/u/46672778" width=50 alt="Contributor avatar for marksverdhei"/></a>
230
227
  <a href="https://github.com/Mikeriess"><img src="https://avatars.githubusercontent.com/u/19728563" width=50 alt="Contributor avatar for Mikeriess"/></a>
231
- <a href="https://github.com/pakagronglb"><img src="https://avatars.githubusercontent.com/u/178713124" width=50 alt="Contributor avatar for pakagronglb"/></a>
232
228
  <a href="https://github.com/ThomasKluiters"><img src="https://avatars.githubusercontent.com/u/8137941" width=50 alt="Contributor avatar for ThomasKluiters"/></a>
233
229
  <a href="https://github.com/BramVanroy"><img src="https://avatars.githubusercontent.com/u/2779410" width=50 alt="Contributor avatar for BramVanroy"/></a>
234
230
  <a href="https://github.com/peregilk"><img src="https://avatars.githubusercontent.com/u/9079808" width=50 alt="Contributor avatar for peregilk"/></a>
235
231
  <a href="https://github.com/Rijgersberg"><img src="https://avatars.githubusercontent.com/u/8604946" width=50 alt="Contributor avatar for Rijgersberg"/></a>
236
232
  <a href="https://github.com/duarteocarmo"><img src="https://avatars.githubusercontent.com/u/26342344" width=50 alt="Contributor avatar for duarteocarmo"/></a>
233
+ <a href="https://github.com/slowwavesleep"><img src="https://avatars.githubusercontent.com/u/44175589" width=50 alt="Contributor avatar for slowwavesleep"/></a>
237
234
 
238
235
 
239
236
  ### Contribute to EuroEval
@@ -0,0 +1,69 @@
1
+ euroeval/__init__.py,sha256=MgFG1amMgiTJmK_hcQ7nnX-o4KFhlD1P5xKUBTloPCQ,3564
2
+ euroeval/benchmark_config_factory.py,sha256=ZKzGkWr-Mr4wEMYNXUHsYkd2R-dxnNyETZJJ-Fq-my0,11386
3
+ euroeval/benchmarker.py,sha256=YNqhl2QchqzbGMGu8QoJAG_mnYbcJ46ksfaS0x78fiw,49847
4
+ euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
5
+ euroeval/cli.py,sha256=RR45NiHMI9hphqBJ7Xopde-C18Be9JgJxgg6eYPFVMM,8594
6
+ euroeval/constants.py,sha256=HWJ3PJRS-ZbAMXTvujiK8QP7IiS4RHkjnegv3oi52w0,2499
7
+ euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
8
+ euroeval/data_models.py,sha256=NdzD1ER3GHJp51UXLGTW8iTYwzZlITH2nO0vanTkEWU,24272
9
+ euroeval/enums.py,sha256=V73E8FTL1aRz74OKcxokTYLnO7Q8HGs2QI0JPZI4qQo,3032
10
+ euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
+ euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
12
+ euroeval/generation.py,sha256=wm2u8fDGDgtWxCReG3N6v4_lLvo0OHTpR88ThGSRH7A,12139
13
+ euroeval/generation_utils.py,sha256=vU-j9kjFDuPlSizEaRByx_XJyyAVpE8PdGOm9i--9zQ,14613
14
+ euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
15
+ euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
16
+ euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
17
+ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
18
+ euroeval/scores.py,sha256=gJ7DSQVyE2_8qZxJPuUJcFk7Byj2D7nevE23kd4XMbA,3004
19
+ euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
20
+ euroeval/tasks.py,sha256=jl8HicriMSN_LfHANokVGFqzgV53QcJ5dmzb297xI04,4173
21
+ euroeval/tokenization_utils.py,sha256=icEfttWReKRC5MbREOuxTHOPpuVvH6uHhnqz1w7qIyA,20565
22
+ euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
23
+ euroeval/utils.py,sha256=O4JIROPfbA7MD9SbOY0CifoCckYjmdNjXYjOxDwBnwM,14149
24
+ euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
25
+ euroeval/benchmark_modules/base.py,sha256=vYW97bnlzqxxcIq6lY-zd0o6zxyDRMhT85jOhdKnoYE,11482
26
+ euroeval/benchmark_modules/fresh.py,sha256=_iRTHt9qUkq7jPOlgwx7IwZG48dK4mjMrh7KiEHeUjE,10462
27
+ euroeval/benchmark_modules/hf.py,sha256=HDXuVwt0kZUyL9x3aG5pEjSdGCRfzegqT0xKZYprjU0,43843
28
+ euroeval/benchmark_modules/litellm.py,sha256=M6ct5ppcYfO-Il5VMRm3PuyAeQ-rtS22UKyRStLnqfM,59210
29
+ euroeval/benchmark_modules/vllm.py,sha256=dTwGGOFQ7wqYXg7x2YBUJNQcO6OwqjTMBfUf5OveXNk,41289
30
+ euroeval/dataset_configs/__init__.py,sha256=lEOr4kJzgtUymeNBVhd-VwdUK0YTUZ3GjUMlLz5fGWk,2010
31
+ euroeval/dataset_configs/danish.py,sha256=3n9e0r-hYRI2hPOgLDMQsO8bPgZKjw7OcFCUsCvdmk4,5294
32
+ euroeval/dataset_configs/dutch.py,sha256=tY7FDw7BmhXxNfI1hqfasxQXP0QbYTqknokTZ7gqdRY,5079
33
+ euroeval/dataset_configs/english.py,sha256=Y4yc3AQu8WojqENj0sy4-rIlx1LhPnsCQ0DeonqDsVs,4128
34
+ euroeval/dataset_configs/estonian.py,sha256=o13P_XkrdhLFCz9l8LJy-TSY3JIN7XmByxesEDiagnc,2879
35
+ euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
36
+ euroeval/dataset_configs/finnish.py,sha256=7iXjjpJ23tupvtXwJF3TH1Tzwhxw0RFaoBv38HclsJc,3950
37
+ euroeval/dataset_configs/french.py,sha256=9ofGQpnjw0j_lPB0SuWMvbuWVZXfOvROMqZ03d-EAHs,4281
38
+ euroeval/dataset_configs/german.py,sha256=qsJO2YCND8Kuc_atSWXjkoD2itUQNbUsExiGk7P0OnE,4459
39
+ euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
40
+ euroeval/dataset_configs/italian.py,sha256=xoS_oIFXnTraiV9PX2dBsE1GyodlAbma5dEB7yM_Q8A,4564
41
+ euroeval/dataset_configs/latvian.py,sha256=tibwTbe-atsRZEBbegJ6nbr1Oh4RthUYhZoHPVVawq0,2273
42
+ euroeval/dataset_configs/norwegian.py,sha256=eTX0KpjH60FyLGrUTfspvNvYaL-Ytfw3DTFftlriVM0,7295
43
+ euroeval/dataset_configs/portuguese.py,sha256=x-Idrdo_EtmB_xoabwKivKG091DvFEQEbO6MTcjZVqs,3646
44
+ euroeval/dataset_configs/spanish.py,sha256=5m3Qh328YPhbN8jFPIy9Sa7ZWob02ToCWzlDoT8IsSw,4462
45
+ euroeval/dataset_configs/swedish.py,sha256=j_I7ba9a0nXzEPvpnPTuNFEkS51pnUPrnRwcqGh7tu0,4715
46
+ euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
47
+ euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
48
+ euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
49
+ euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
50
+ euroeval/metrics/pipeline.py,sha256=T65p2sxPnwh2WgCjqsqzvE3XOzizNY7rlSm8KPR7sCk,8883
51
+ euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
52
+ euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
53
+ euroeval/prompt_templates/linguistic_acceptability.py,sha256=9ZIyv_hfI2Aj20Uy9SY1izq5OBRV844PXPiZCNCOoEY,8207
54
+ euroeval/prompt_templates/multiple_choice.py,sha256=TCMKB0xS5IEa8f4YEUjsoifcUpaIv4yOL4FisVvPwok,6423
55
+ euroeval/prompt_templates/named_entity_recognition.py,sha256=_ZRVDcnbXvTs_C2NXy78oMbCLFDtW9SuxmvSVg51Umo,15554
56
+ euroeval/prompt_templates/reading_comprehension.py,sha256=eRMN-kCT3wuImbuFXzZYfo5WiVhCFWJkCYwRUDtpeWo,8208
57
+ euroeval/prompt_templates/sentiment_classification.py,sha256=eIXn-aAY7LKeXqxzMKoqdVbihA2f1RaNQk7DhceuQdQ,8887
58
+ euroeval/prompt_templates/summarization.py,sha256=GvnKuYJKbJ_2QkdtSWp_h4RhfOXdq-7_yYeClJSPaTY,6137
59
+ euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
60
+ euroeval/task_group_utils/multiple_choice_classification.py,sha256=lNEOWi3ckLBnMP1QoSTxNxT-s6kBz2XH17mrmjQlv5s,7075
61
+ euroeval/task_group_utils/question_answering.py,sha256=vdEbcZy7BE6ICA7kWkPYmPW4eVuIiZ_4uJRLUexDhwY,27750
62
+ euroeval/task_group_utils/sequence_classification.py,sha256=K_hFWY6D5WR8-uy6ZikCq3ighHNHSyzW7A62vwDkwDs,16512
63
+ euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
64
+ euroeval/task_group_utils/token_classification.py,sha256=6bN9soT1kLthutCpqUT-jDmZZw9Mt7H3tjI4zVvE4BY,16469
65
+ euroeval-16.0.0.dist-info/METADATA,sha256=uvzi8Bkgab8rKhgKavqFnv8rpL0KntFIYMZ7f1Joa0U,13544
66
+ euroeval-16.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
67
+ euroeval-16.0.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
68
+ euroeval-16.0.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
69
+ euroeval-16.0.0.dist-info/RECORD,,
@@ -1,4 +1,3 @@
1
1
  [console_scripts]
2
2
  euroeval = euroeval.cli:benchmark
3
- human_evaluate = euroeval.human_evaluation:main
4
3
  scandeval = euroeval.cli:benchmark