EuroEval 15.16.0__py3-none-any.whl → 16.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (64) hide show
  1. euroeval/__init__.py +8 -7
  2. euroeval/benchmark_config_factory.py +3 -7
  3. euroeval/benchmark_modules/base.py +35 -19
  4. euroeval/benchmark_modules/fresh.py +24 -19
  5. euroeval/benchmark_modules/hf.py +136 -154
  6. euroeval/benchmark_modules/litellm.py +190 -110
  7. euroeval/benchmark_modules/vllm.py +199 -139
  8. euroeval/benchmarker.py +49 -22
  9. euroeval/cli.py +3 -3
  10. euroeval/constants.py +19 -15
  11. euroeval/data_loading.py +33 -28
  12. euroeval/data_models.py +73 -23
  13. euroeval/dataset_configs/__init__.py +2 -0
  14. euroeval/dataset_configs/danish.py +35 -1
  15. euroeval/dataset_configs/dutch.py +38 -1
  16. euroeval/dataset_configs/english.py +38 -1
  17. euroeval/dataset_configs/estonian.py +95 -0
  18. euroeval/dataset_configs/faroese.py +38 -0
  19. euroeval/dataset_configs/finnish.py +39 -1
  20. euroeval/dataset_configs/french.py +38 -1
  21. euroeval/dataset_configs/german.py +38 -1
  22. euroeval/dataset_configs/icelandic.py +39 -1
  23. euroeval/dataset_configs/italian.py +38 -1
  24. euroeval/dataset_configs/latvian.py +81 -0
  25. euroeval/dataset_configs/norwegian.py +38 -1
  26. euroeval/dataset_configs/portuguese.py +38 -1
  27. euroeval/dataset_configs/spanish.py +38 -1
  28. euroeval/dataset_configs/swedish.py +38 -1
  29. euroeval/enums.py +0 -6
  30. euroeval/finetuning.py +6 -6
  31. euroeval/generation.py +25 -14
  32. euroeval/generation_utils.py +90 -20
  33. euroeval/languages.py +947 -187
  34. euroeval/metrics/__init__.py +6 -0
  35. euroeval/metrics/base.py +76 -0
  36. euroeval/metrics/huggingface.py +192 -0
  37. euroeval/metrics/llm_as_a_judge.py +257 -0
  38. euroeval/metrics/pipeline.py +276 -0
  39. euroeval/metrics/speed.py +51 -0
  40. euroeval/model_cache.py +13 -1
  41. euroeval/prompt_templates/linguistic_acceptability.py +40 -2
  42. euroeval/prompt_templates/multiple_choice.py +23 -2
  43. euroeval/prompt_templates/named_entity_recognition.py +65 -2
  44. euroeval/prompt_templates/reading_comprehension.py +42 -2
  45. euroeval/prompt_templates/sentiment_classification.py +46 -2
  46. euroeval/prompt_templates/summarization.py +24 -4
  47. euroeval/scores.py +7 -2
  48. euroeval/speed_benchmark.py +6 -6
  49. euroeval/task_group_utils/multiple_choice_classification.py +19 -8
  50. euroeval/task_group_utils/question_answering.py +35 -28
  51. euroeval/task_group_utils/sequence_classification.py +128 -42
  52. euroeval/task_group_utils/text_to_text.py +7 -3
  53. euroeval/task_group_utils/token_classification.py +59 -73
  54. euroeval/tasks.py +33 -6
  55. euroeval/tokenization_utils.py +294 -207
  56. euroeval/utils.py +150 -35
  57. {euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/METADATA +13 -14
  58. euroeval-16.0.1.dist-info/RECORD +69 -0
  59. {euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/entry_points.txt +0 -1
  60. euroeval/human_evaluation.py +0 -738
  61. euroeval/metrics.py +0 -470
  62. euroeval-15.16.0.dist-info/RECORD +0 -63
  63. {euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/WHEEL +0 -0
  64. {euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/licenses/LICENSE +0 -0
euroeval/utils.py CHANGED
@@ -8,12 +8,15 @@ import importlib.util
8
8
  import logging
9
9
  import os
10
10
  import random
11
+ import re
11
12
  import sys
12
13
  import typing as t
13
14
  import warnings
14
15
  from functools import cache
15
16
  from pathlib import Path
16
17
 
18
+ import demjson3
19
+ import huggingface_hub as hf_hub
17
20
  import litellm
18
21
  import numpy as np
19
22
  import requests
@@ -22,10 +25,7 @@ from datasets.utils import disable_progress_bar
22
25
  from requests.exceptions import RequestException
23
26
  from transformers import logging as tf_logging
24
27
 
25
- from .exceptions import NaNValueInModelOutput
26
-
27
- if importlib.util.find_spec("ray") is not None:
28
- import ray
28
+ from .exceptions import InvalidBenchmark, NaNValueInModelOutput
29
29
 
30
30
  if t.TYPE_CHECKING:
31
31
  from types import TracebackType
@@ -94,54 +94,53 @@ def block_terminal_output() -> None:
94
94
  # Ignore miscellaneous warnings
95
95
  warnings.filterwarnings("ignore", category=UserWarning)
96
96
  warnings.filterwarnings("ignore", category=FutureWarning)
97
- warnings.filterwarnings(
98
- "ignore",
99
- module="torch.nn.parallel*",
100
- message="Was asked to gather along dimension 0, but all input tensors were "
101
- "scalars; will instead unsqueeze and return a vector.",
102
- )
103
- warnings.filterwarnings("ignore", module="seqeval*")
104
-
105
- # Up the logging level, to disable outputs
106
- logging.getLogger("filelock").setLevel(logging.CRITICAL)
107
97
  logging.getLogger("absl").setLevel(logging.CRITICAL)
108
- logging.getLogger("datasets").setLevel(logging.CRITICAL)
98
+
99
+ # Disable matplotlib logging
100
+ logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
101
+
102
+ # Disable PyTorch logging
103
+ logging.getLogger("torch.utils.cpp_extension").setLevel(logging.CRITICAL)
104
+ warnings.filterwarnings(action="ignore", module="torch*")
105
+ os.environ["TORCH_LOGS"] = "-all"
106
+
107
+ # Disable huggingface_hub logging
108
+ logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
109
+
110
+ # Disable LiteLLM logging
111
+ logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
112
+ logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
113
+ logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
109
114
  logging.getLogger("openai").setLevel(logging.CRITICAL)
110
- logging.getLogger("torch.distributed.distributed_c10d").setLevel(logging.CRITICAL)
111
- logging.getLogger("torch.nn.parallel.distributed").setLevel(logging.CRITICAL)
115
+ logging.getLogger("httpx").setLevel(logging.CRITICAL)
116
+ litellm.suppress_debug_info = True
117
+
118
+ # Disable vLLM logging
112
119
  logging.getLogger("vllm").setLevel(logging.CRITICAL)
113
120
  logging.getLogger("vllm.engine.llm_engine").setLevel(logging.CRITICAL)
114
121
  logging.getLogger("vllm.transformers_utils.tokenizer").setLevel(logging.CRITICAL)
115
122
  logging.getLogger("vllm.core.scheduler").setLevel(logging.CRITICAL)
116
123
  logging.getLogger("vllm.model_executor.weight_utils").setLevel(logging.CRITICAL)
117
124
  logging.getLogger("vllm.platforms").setLevel(logging.CRITICAL)
118
- logging.getLogger("httpx").setLevel(logging.CRITICAL)
119
- logging.getLogger("ray._private.worker").setLevel(logging.CRITICAL)
120
- logging.getLogger("ray._private.services").setLevel(logging.CRITICAL)
121
- logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
122
- logging.getLogger("accelerate").setLevel(logging.CRITICAL)
123
- logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
124
- logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
125
- logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
126
- logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
127
-
128
- # This suppresses vLLM logging
125
+ logging.getLogger("mistral_common.tokens.tokenizers.tekken").setLevel(
126
+ logging.CRITICAL
127
+ )
129
128
  os.environ["LOG_LEVEL"] = "CRITICAL"
130
129
  os.environ["VLLM_CONFIGURE_LOGGING"] = "0"
131
130
 
132
- if importlib.util.find_spec("ray") is not None:
133
- ray._private.worker._worker_logs_enabled = False
134
-
135
- # Disable the tokeniser progress bars
131
+ # Disable datasets logging
132
+ logging.getLogger("datasets").setLevel(logging.CRITICAL)
133
+ logging.getLogger("filelock").setLevel(logging.CRITICAL)
136
134
  disable_progress_bar()
137
135
 
136
+ # Disable evaluate logging
137
+ warnings.filterwarnings("ignore", module="seqeval*")
138
+
138
139
  # Disable most of the `transformers` logging
139
140
  tf_logging._default_log_level = logging.CRITICAL
140
141
  tf_logging.set_verbosity(logging.CRITICAL)
141
142
  logging.getLogger("transformers.trainer").setLevel(logging.CRITICAL)
142
-
143
- # Disable logging from `litellm`
144
- litellm.suppress_debug_info = True
143
+ logging.getLogger("accelerate").setLevel(logging.CRITICAL)
145
144
 
146
145
 
147
146
  def get_class_by_name(class_name: str | list[str], module_name: str) -> t.Type | None:
@@ -373,3 +372,119 @@ async def add_semaphore_and_catch_exception(
373
372
  return await coroutine
374
373
  except Exception as exc:
375
374
  return exc
375
+
376
+
377
+ def extract_json_dict_from_string(s: str) -> dict | None:
378
+ """Extract a JSON dictionary from a string.
379
+
380
+ Args:
381
+ s:
382
+ The string to extract the JSON dictionary from.
383
+
384
+ Returns:
385
+ The extracted JSON dictionary, or None if no JSON dictionary could be found.
386
+ """
387
+ json_regex = r"\{[^{}]+?\}"
388
+ if (json_match := re.search(pattern=json_regex, string=s, flags=re.DOTALL)) is None:
389
+ logger.debug(
390
+ "The model output does not contain any JSON dictionary, so cannot parse "
391
+ f"it. Skipping. Here is the output: {s!r}"
392
+ )
393
+ return None
394
+ json_string = json_match.group()
395
+ try:
396
+ json_output = demjson3.decode(txt=json_string)
397
+ except demjson3.JSONDecodeError:
398
+ logger.debug(
399
+ "The model output is not valid JSON, so cannot parse it. Skipping. "
400
+ f"Here is the output: {json_string!r}"
401
+ )
402
+ return None
403
+ if not isinstance(json_output, dict):
404
+ logger.debug(
405
+ "The model output is not a JSON dictionary, so cannot parse "
406
+ f"it. Skipping. Here is the output: {json_string!r}"
407
+ )
408
+ return None
409
+ elif not all(isinstance(key, str) for key in json_output.keys()):
410
+ logger.debug(
411
+ "The model output is not a JSON dictionary with string keys, "
412
+ "so cannot parse it. Skipping. Here is the output: "
413
+ f"{json_string!r}"
414
+ )
415
+ return None
416
+ return json_output
417
+
418
+
419
+ @cache
420
+ def get_hf_token(api_key: str | None) -> str | bool:
421
+ """Get the Hugging Face token.
422
+
423
+ Args:
424
+ api_key:
425
+ The API key to use as the Hugging Face token. If None, we will try to
426
+ extract it in other ways.
427
+
428
+ Returns:
429
+ The Hugging Face token, or True if no token is set but the user is logged in, or
430
+ False if no token is set and the user is not logged in.
431
+ """
432
+ if api_key is not None:
433
+ log_once(
434
+ "Using the Hugging Face API key passed to the function.",
435
+ level=logging.DEBUG,
436
+ )
437
+ return api_key
438
+ elif (token := os.getenv("HUGGINGFACE_API_KEY")) is not None:
439
+ log_once(
440
+ "Using the Hugging Face API key from the environment variable "
441
+ "`HUGGINGFACE_API_KEY`.",
442
+ level=logging.DEBUG,
443
+ )
444
+ return token
445
+ try:
446
+ hf_hub.whoami()
447
+ log_once(
448
+ "No Hugging Face API key was set, but the user is logged in to Hugging "
449
+ "Face, so using the local token.",
450
+ level=logging.DEBUG,
451
+ )
452
+ return True
453
+ except hf_hub.errors.LocalTokenNotFoundError:
454
+ log_once(
455
+ "No Hugging Face API key was set and the user is not logged in to Hugging "
456
+ "Face, so no token will be used.",
457
+ level=logging.DEBUG,
458
+ )
459
+ return False
460
+
461
+
462
+ def extract_multiple_choice_labels(
463
+ prompt: str, candidate_labels: list[str]
464
+ ) -> list[str]:
465
+ """Extract multiple choice labels from a prompt.
466
+
467
+ Args:
468
+ prompt:
469
+ The prompt to extract the labels from.
470
+ candidate_labels:
471
+ The candidate labels to look for in the prompt.
472
+
473
+ Returns:
474
+ The extracted labels.
475
+ """
476
+ sample_candidate_labels: list[str] = list()
477
+ for candidate_label in candidate_labels:
478
+ candidate_label_match = re.search(
479
+ pattern=rf"\b{candidate_label}\. ", string=prompt, flags=re.IGNORECASE
480
+ )
481
+ if candidate_label_match is not None:
482
+ sample_candidate_labels.append(candidate_label)
483
+ if not sample_candidate_labels:
484
+ raise InvalidBenchmark(
485
+ "Could not extract any candidate labels from the prompt. Please ensure "
486
+ "that the candidate labels are present in the prompt, each followed by a "
487
+ "dot and a space (e.g., 'a. '). The candidate labels are: "
488
+ f"{', '.join(candidate_labels)}. Here is the prompt: {prompt!r}"
489
+ )
490
+ return sample_candidate_labels
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 15.16.0
3
+ Version: 16.0.1
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -28,18 +28,19 @@ License: MIT License
28
28
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
29
  SOFTWARE.
30
30
  License-File: LICENSE
31
- Requires-Python: <4.0,>=3.10
31
+ Requires-Python: <4.0,>=3.11
32
32
  Requires-Dist: accelerate>=1.9.0
33
33
  Requires-Dist: bert-score>=0.3.13
34
34
  Requires-Dist: click>=8.1.3
35
+ Requires-Dist: cloudpickle>=3.1.1
35
36
  Requires-Dist: datasets>=3.5.0
36
37
  Requires-Dist: demjson3>=3.0.6
37
38
  Requires-Dist: evaluate>=0.4.1
38
39
  Requires-Dist: huggingface-hub>=0.30.1
39
40
  Requires-Dist: levenshtein>=0.24.0
40
- Requires-Dist: litellm>=1.72.2
41
+ Requires-Dist: litellm>=1.75.6
41
42
  Requires-Dist: more-itertools>=10.5.0
42
- Requires-Dist: numpy<2.0.0,>=1.23.0
43
+ Requires-Dist: numpy>=2.0.0
43
44
  Requires-Dist: ollama>=0.5.1
44
45
  Requires-Dist: pandas>=2.2.0
45
46
  Requires-Dist: peft>=0.15.0
@@ -49,27 +50,24 @@ Requires-Dist: pyinfer>=0.0.3
49
50
  Requires-Dist: python-dotenv>=1.0.1
50
51
  Requires-Dist: rouge-score>=0.1.2
51
52
  Requires-Dist: sacremoses>=0.1.1
52
- Requires-Dist: scikit-learn<1.6.0
53
+ Requires-Dist: scikit-learn==1.6.1
53
54
  Requires-Dist: sentencepiece>=0.1.96
54
55
  Requires-Dist: seqeval>=1.2.2
55
56
  Requires-Dist: setuptools>=75.8.2
56
57
  Requires-Dist: tenacity>=9.0.0
57
58
  Requires-Dist: termcolor>=2.0.0
58
59
  Requires-Dist: torch>=2.6.0
59
- Requires-Dist: transformers>=4.55.0
60
+ Requires-Dist: transformers[mistral-common]>=4.56.0
60
61
  Provides-Extra: all
61
62
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
62
63
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
63
- Requires-Dist: gradio>=4.26.0; extra == 'all'
64
- Requires-Dist: vllm>=0.10.0; (platform_system == 'Linux') and extra == 'all'
64
+ Requires-Dist: flashinfer-python>=0.3.1; (platform_system == 'Linux') and extra == 'all'
65
+ Requires-Dist: vllm>=0.10.1; (platform_system == 'Linux') and extra == 'all'
65
66
  Provides-Extra: generative
66
67
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
67
68
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
68
- Requires-Dist: vllm>=0.10.0; (platform_system == 'Linux') and extra == 'generative'
69
- Provides-Extra: human-evaluation
70
- Requires-Dist: gradio>=4.26.0; extra == 'human-evaluation'
71
- Provides-Extra: test
72
- Requires-Dist: gradio>=4.26.0; extra == 'test'
69
+ Requires-Dist: flashinfer-python>=0.3.1; (platform_system == 'Linux') and extra == 'generative'
70
+ Requires-Dist: vllm>=0.10.1; (platform_system == 'Linux') and extra == 'generative'
73
71
  Description-Content-Type: text/markdown
74
72
 
75
73
  <div align='center'>
@@ -223,17 +221,18 @@ A huge thank you to all the contributors who have helped make this project a suc
223
221
  <a href="https://github.com/AJDERS"><img src="https://avatars.githubusercontent.com/u/38854604" width=50 alt="Contributor avatar for AJDERS"/></a>
224
222
  <a href="https://github.com/oliverkinch"><img src="https://avatars.githubusercontent.com/u/71556498" width=50 alt="Contributor avatar for oliverkinch"/></a>
225
223
  <a href="https://github.com/versae"><img src="https://avatars.githubusercontent.com/u/173537" width=50 alt="Contributor avatar for versae"/></a>
224
+ <a href="https://github.com/KennethEnevoldsen"><img src="https://avatars.githubusercontent.com/u/23721977" width=50 alt="Contributor avatar for KennethEnevoldsen"/></a>
226
225
  <a href="https://github.com/viggo-gascou"><img src="https://avatars.githubusercontent.com/u/94069687" width=50 alt="Contributor avatar for viggo-gascou"/></a>
227
226
  <a href="https://github.com/mathiasesn"><img src="https://avatars.githubusercontent.com/u/27091759" width=50 alt="Contributor avatar for mathiasesn"/></a>
228
227
  <a href="https://github.com/Alkarex"><img src="https://avatars.githubusercontent.com/u/1008324" width=50 alt="Contributor avatar for Alkarex"/></a>
229
228
  <a href="https://github.com/marksverdhei"><img src="https://avatars.githubusercontent.com/u/46672778" width=50 alt="Contributor avatar for marksverdhei"/></a>
230
229
  <a href="https://github.com/Mikeriess"><img src="https://avatars.githubusercontent.com/u/19728563" width=50 alt="Contributor avatar for Mikeriess"/></a>
231
- <a href="https://github.com/pakagronglb"><img src="https://avatars.githubusercontent.com/u/178713124" width=50 alt="Contributor avatar for pakagronglb"/></a>
232
230
  <a href="https://github.com/ThomasKluiters"><img src="https://avatars.githubusercontent.com/u/8137941" width=50 alt="Contributor avatar for ThomasKluiters"/></a>
233
231
  <a href="https://github.com/BramVanroy"><img src="https://avatars.githubusercontent.com/u/2779410" width=50 alt="Contributor avatar for BramVanroy"/></a>
234
232
  <a href="https://github.com/peregilk"><img src="https://avatars.githubusercontent.com/u/9079808" width=50 alt="Contributor avatar for peregilk"/></a>
235
233
  <a href="https://github.com/Rijgersberg"><img src="https://avatars.githubusercontent.com/u/8604946" width=50 alt="Contributor avatar for Rijgersberg"/></a>
236
234
  <a href="https://github.com/duarteocarmo"><img src="https://avatars.githubusercontent.com/u/26342344" width=50 alt="Contributor avatar for duarteocarmo"/></a>
235
+ <a href="https://github.com/slowwavesleep"><img src="https://avatars.githubusercontent.com/u/44175589" width=50 alt="Contributor avatar for slowwavesleep"/></a>
237
236
 
238
237
 
239
238
  ### Contribute to EuroEval
@@ -0,0 +1,69 @@
1
+ euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
2
+ euroeval/benchmark_config_factory.py,sha256=ZKzGkWr-Mr4wEMYNXUHsYkd2R-dxnNyETZJJ-Fq-my0,11386
3
+ euroeval/benchmarker.py,sha256=YNqhl2QchqzbGMGu8QoJAG_mnYbcJ46ksfaS0x78fiw,49847
4
+ euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
5
+ euroeval/cli.py,sha256=RR45NiHMI9hphqBJ7Xopde-C18Be9JgJxgg6eYPFVMM,8594
6
+ euroeval/constants.py,sha256=imy-YwofbAwTbjk_vgynYf3zaK5kKV349oXZl99DVyM,2742
7
+ euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
8
+ euroeval/data_models.py,sha256=UGyqPAYFImrR1gi4ctQdCVb0rjVkEmyf4Lc1a7_6t6E,24663
9
+ euroeval/enums.py,sha256=V73E8FTL1aRz74OKcxokTYLnO7Q8HGs2QI0JPZI4qQo,3032
10
+ euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
+ euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
12
+ euroeval/generation.py,sha256=wm2u8fDGDgtWxCReG3N6v4_lLvo0OHTpR88ThGSRH7A,12139
13
+ euroeval/generation_utils.py,sha256=w3hfiJfUPDjf2xSKdDrhlpfuxZlztF0_0h2sFPB2hT0,16212
14
+ euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
15
+ euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
16
+ euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
17
+ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
18
+ euroeval/scores.py,sha256=gJ7DSQVyE2_8qZxJPuUJcFk7Byj2D7nevE23kd4XMbA,3004
19
+ euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
20
+ euroeval/tasks.py,sha256=fwmDKnIexmWbm8HueLUilYzqdNRfo0rFxX-tjZ53Nbg,4503
21
+ euroeval/tokenization_utils.py,sha256=66nip9llPw3XBEzGY0TE1DrejLV2WvdSA1p1euXC6Bg,20556
22
+ euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
23
+ euroeval/utils.py,sha256=ITvT-JxXosrDuElNV7cbASfxzDWSBz9mJWAZHiTOiZY,15304
24
+ euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
25
+ euroeval/benchmark_modules/base.py,sha256=vYW97bnlzqxxcIq6lY-zd0o6zxyDRMhT85jOhdKnoYE,11482
26
+ euroeval/benchmark_modules/fresh.py,sha256=_iRTHt9qUkq7jPOlgwx7IwZG48dK4mjMrh7KiEHeUjE,10462
27
+ euroeval/benchmark_modules/hf.py,sha256=HDXuVwt0kZUyL9x3aG5pEjSdGCRfzegqT0xKZYprjU0,43843
28
+ euroeval/benchmark_modules/litellm.py,sha256=M6ct5ppcYfO-Il5VMRm3PuyAeQ-rtS22UKyRStLnqfM,59210
29
+ euroeval/benchmark_modules/vllm.py,sha256=ckWLA9maDP5TLAfLhEXzkOYJBngb5BQR7X7RLKPl64A,41824
30
+ euroeval/dataset_configs/__init__.py,sha256=lEOr4kJzgtUymeNBVhd-VwdUK0YTUZ3GjUMlLz5fGWk,2010
31
+ euroeval/dataset_configs/danish.py,sha256=Pb43E-xfgQk9uaxq8ooznvf8okdX8KAYFEPHt1CG_TQ,5192
32
+ euroeval/dataset_configs/dutch.py,sha256=tY7FDw7BmhXxNfI1hqfasxQXP0QbYTqknokTZ7gqdRY,5079
33
+ euroeval/dataset_configs/english.py,sha256=Y4yc3AQu8WojqENj0sy4-rIlx1LhPnsCQ0DeonqDsVs,4128
34
+ euroeval/dataset_configs/estonian.py,sha256=o13P_XkrdhLFCz9l8LJy-TSY3JIN7XmByxesEDiagnc,2879
35
+ euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
36
+ euroeval/dataset_configs/finnish.py,sha256=7iXjjpJ23tupvtXwJF3TH1Tzwhxw0RFaoBv38HclsJc,3950
37
+ euroeval/dataset_configs/french.py,sha256=9ofGQpnjw0j_lPB0SuWMvbuWVZXfOvROMqZ03d-EAHs,4281
38
+ euroeval/dataset_configs/german.py,sha256=qsJO2YCND8Kuc_atSWXjkoD2itUQNbUsExiGk7P0OnE,4459
39
+ euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
40
+ euroeval/dataset_configs/italian.py,sha256=xoS_oIFXnTraiV9PX2dBsE1GyodlAbma5dEB7yM_Q8A,4564
41
+ euroeval/dataset_configs/latvian.py,sha256=tibwTbe-atsRZEBbegJ6nbr1Oh4RthUYhZoHPVVawq0,2273
42
+ euroeval/dataset_configs/norwegian.py,sha256=eTX0KpjH60FyLGrUTfspvNvYaL-Ytfw3DTFftlriVM0,7295
43
+ euroeval/dataset_configs/portuguese.py,sha256=x-Idrdo_EtmB_xoabwKivKG091DvFEQEbO6MTcjZVqs,3646
44
+ euroeval/dataset_configs/spanish.py,sha256=5m3Qh328YPhbN8jFPIy9Sa7ZWob02ToCWzlDoT8IsSw,4462
45
+ euroeval/dataset_configs/swedish.py,sha256=j_I7ba9a0nXzEPvpnPTuNFEkS51pnUPrnRwcqGh7tu0,4715
46
+ euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
47
+ euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
48
+ euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
49
+ euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
50
+ euroeval/metrics/pipeline.py,sha256=a09Um3tnNdyQhzyDa9k-seYQXriYiJRQ5vyHK2lrKcg,10276
51
+ euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
52
+ euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
53
+ euroeval/prompt_templates/linguistic_acceptability.py,sha256=9ZIyv_hfI2Aj20Uy9SY1izq5OBRV844PXPiZCNCOoEY,8207
54
+ euroeval/prompt_templates/multiple_choice.py,sha256=TCMKB0xS5IEa8f4YEUjsoifcUpaIv4yOL4FisVvPwok,6423
55
+ euroeval/prompt_templates/named_entity_recognition.py,sha256=_ZRVDcnbXvTs_C2NXy78oMbCLFDtW9SuxmvSVg51Umo,15554
56
+ euroeval/prompt_templates/reading_comprehension.py,sha256=eRMN-kCT3wuImbuFXzZYfo5WiVhCFWJkCYwRUDtpeWo,8208
57
+ euroeval/prompt_templates/sentiment_classification.py,sha256=eIXn-aAY7LKeXqxzMKoqdVbihA2f1RaNQk7DhceuQdQ,8887
58
+ euroeval/prompt_templates/summarization.py,sha256=GvnKuYJKbJ_2QkdtSWp_h4RhfOXdq-7_yYeClJSPaTY,6137
59
+ euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
60
+ euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
61
+ euroeval/task_group_utils/question_answering.py,sha256=vdEbcZy7BE6ICA7kWkPYmPW4eVuIiZ_4uJRLUexDhwY,27750
62
+ euroeval/task_group_utils/sequence_classification.py,sha256=ZIXcYo6ins9VUv8TT4aupWrfUQoWGBlgU8a1hYATOYM,17249
63
+ euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
64
+ euroeval/task_group_utils/token_classification.py,sha256=sNl0rhkXI9g5zKsJujrWX-9jWbYYK2iaKA1AcUg0xW4,17118
65
+ euroeval-16.0.1.dist-info/METADATA,sha256=toyIiyjwyl4Oty2YsD-P6r95hN0Si3BkBNBMOfmiwBA,13729
66
+ euroeval-16.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
67
+ euroeval-16.0.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
68
+ euroeval-16.0.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
69
+ euroeval-16.0.1.dist-info/RECORD,,
@@ -1,4 +1,3 @@
1
1
  [console_scripts]
2
2
  euroeval = euroeval.cli:benchmark
3
- human_evaluate = euroeval.human_evaluation:main
4
3
  scandeval = euroeval.cli:benchmark