EuroEval 15.16.0__py3-none-any.whl → 16.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +3 -7
- euroeval/benchmark_config_factory.py +3 -7
- euroeval/benchmark_modules/base.py +35 -19
- euroeval/benchmark_modules/fresh.py +24 -19
- euroeval/benchmark_modules/hf.py +136 -154
- euroeval/benchmark_modules/litellm.py +190 -110
- euroeval/benchmark_modules/vllm.py +161 -114
- euroeval/benchmarker.py +49 -22
- euroeval/cli.py +3 -3
- euroeval/constants.py +13 -15
- euroeval/data_loading.py +33 -28
- euroeval/data_models.py +53 -7
- euroeval/dataset_configs/__init__.py +2 -0
- euroeval/dataset_configs/danish.py +38 -1
- euroeval/dataset_configs/dutch.py +38 -1
- euroeval/dataset_configs/english.py +38 -1
- euroeval/dataset_configs/estonian.py +95 -0
- euroeval/dataset_configs/faroese.py +38 -0
- euroeval/dataset_configs/finnish.py +39 -1
- euroeval/dataset_configs/french.py +38 -1
- euroeval/dataset_configs/german.py +38 -1
- euroeval/dataset_configs/icelandic.py +39 -1
- euroeval/dataset_configs/italian.py +38 -1
- euroeval/dataset_configs/latvian.py +81 -0
- euroeval/dataset_configs/norwegian.py +38 -1
- euroeval/dataset_configs/portuguese.py +38 -1
- euroeval/dataset_configs/spanish.py +38 -1
- euroeval/dataset_configs/swedish.py +38 -1
- euroeval/enums.py +0 -6
- euroeval/finetuning.py +6 -6
- euroeval/generation.py +25 -14
- euroeval/generation_utils.py +46 -14
- euroeval/languages.py +947 -187
- euroeval/metrics/__init__.py +6 -0
- euroeval/metrics/base.py +76 -0
- euroeval/metrics/huggingface.py +192 -0
- euroeval/metrics/llm_as_a_judge.py +257 -0
- euroeval/metrics/pipeline.py +234 -0
- euroeval/metrics/speed.py +51 -0
- euroeval/prompt_templates/linguistic_acceptability.py +40 -2
- euroeval/prompt_templates/multiple_choice.py +23 -2
- euroeval/prompt_templates/named_entity_recognition.py +65 -2
- euroeval/prompt_templates/reading_comprehension.py +42 -2
- euroeval/prompt_templates/sentiment_classification.py +46 -2
- euroeval/prompt_templates/summarization.py +24 -4
- euroeval/scores.py +7 -2
- euroeval/speed_benchmark.py +6 -6
- euroeval/task_group_utils/multiple_choice_classification.py +17 -6
- euroeval/task_group_utils/question_answering.py +35 -28
- euroeval/task_group_utils/sequence_classification.py +96 -23
- euroeval/task_group_utils/text_to_text.py +7 -3
- euroeval/task_group_utils/token_classification.py +47 -75
- euroeval/tasks.py +31 -6
- euroeval/tokenization_utils.py +295 -207
- euroeval/utils.py +118 -34
- {euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/METADATA +11 -14
- euroeval-16.0.0.dist-info/RECORD +69 -0
- {euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/entry_points.txt +0 -1
- euroeval/human_evaluation.py +0 -738
- euroeval/metrics.py +0 -470
- euroeval-15.16.0.dist-info/RECORD +0 -63
- {euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/WHEEL +0 -0
- {euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/licenses/LICENSE +0 -0
euroeval/utils.py
CHANGED
|
@@ -8,12 +8,15 @@ import importlib.util
|
|
|
8
8
|
import logging
|
|
9
9
|
import os
|
|
10
10
|
import random
|
|
11
|
+
import re
|
|
11
12
|
import sys
|
|
12
13
|
import typing as t
|
|
13
14
|
import warnings
|
|
14
15
|
from functools import cache
|
|
15
16
|
from pathlib import Path
|
|
16
17
|
|
|
18
|
+
import demjson3
|
|
19
|
+
import huggingface_hub as hf_hub
|
|
17
20
|
import litellm
|
|
18
21
|
import numpy as np
|
|
19
22
|
import requests
|
|
@@ -24,9 +27,6 @@ from transformers import logging as tf_logging
|
|
|
24
27
|
|
|
25
28
|
from .exceptions import NaNValueInModelOutput
|
|
26
29
|
|
|
27
|
-
if importlib.util.find_spec("ray") is not None:
|
|
28
|
-
import ray
|
|
29
|
-
|
|
30
30
|
if t.TYPE_CHECKING:
|
|
31
31
|
from types import TracebackType
|
|
32
32
|
|
|
@@ -94,54 +94,53 @@ def block_terminal_output() -> None:
|
|
|
94
94
|
# Ignore miscellaneous warnings
|
|
95
95
|
warnings.filterwarnings("ignore", category=UserWarning)
|
|
96
96
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
97
|
-
warnings.filterwarnings(
|
|
98
|
-
"ignore",
|
|
99
|
-
module="torch.nn.parallel*",
|
|
100
|
-
message="Was asked to gather along dimension 0, but all input tensors were "
|
|
101
|
-
"scalars; will instead unsqueeze and return a vector.",
|
|
102
|
-
)
|
|
103
|
-
warnings.filterwarnings("ignore", module="seqeval*")
|
|
104
|
-
|
|
105
|
-
# Up the logging level, to disable outputs
|
|
106
|
-
logging.getLogger("filelock").setLevel(logging.CRITICAL)
|
|
107
97
|
logging.getLogger("absl").setLevel(logging.CRITICAL)
|
|
108
|
-
|
|
98
|
+
|
|
99
|
+
# Disable matplotlib logging
|
|
100
|
+
logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
|
|
101
|
+
|
|
102
|
+
# Disable PyTorch logging
|
|
103
|
+
logging.getLogger("torch.utils.cpp_extension").setLevel(logging.CRITICAL)
|
|
104
|
+
warnings.filterwarnings(action="ignore", module="torch*")
|
|
105
|
+
os.environ["TORCH_LOGS"] = "-all"
|
|
106
|
+
|
|
107
|
+
# Disable huggingface_hub logging
|
|
108
|
+
logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
|
|
109
|
+
|
|
110
|
+
# Disable LiteLLM logging
|
|
111
|
+
logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
|
|
112
|
+
logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
|
|
113
|
+
logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
|
|
109
114
|
logging.getLogger("openai").setLevel(logging.CRITICAL)
|
|
110
|
-
logging.getLogger("
|
|
111
|
-
|
|
115
|
+
logging.getLogger("httpx").setLevel(logging.CRITICAL)
|
|
116
|
+
litellm.suppress_debug_info = True
|
|
117
|
+
|
|
118
|
+
# Disable vLLM logging
|
|
112
119
|
logging.getLogger("vllm").setLevel(logging.CRITICAL)
|
|
113
120
|
logging.getLogger("vllm.engine.llm_engine").setLevel(logging.CRITICAL)
|
|
114
121
|
logging.getLogger("vllm.transformers_utils.tokenizer").setLevel(logging.CRITICAL)
|
|
115
122
|
logging.getLogger("vllm.core.scheduler").setLevel(logging.CRITICAL)
|
|
116
123
|
logging.getLogger("vllm.model_executor.weight_utils").setLevel(logging.CRITICAL)
|
|
117
124
|
logging.getLogger("vllm.platforms").setLevel(logging.CRITICAL)
|
|
118
|
-
logging.getLogger("
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
|
|
122
|
-
logging.getLogger("accelerate").setLevel(logging.CRITICAL)
|
|
123
|
-
logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
|
|
124
|
-
logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
|
|
125
|
-
logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
|
|
126
|
-
logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
|
|
127
|
-
|
|
128
|
-
# This suppresses vLLM logging
|
|
125
|
+
logging.getLogger("mistral_common.tokens.tokenizers.tekken").setLevel(
|
|
126
|
+
logging.CRITICAL
|
|
127
|
+
)
|
|
129
128
|
os.environ["LOG_LEVEL"] = "CRITICAL"
|
|
130
129
|
os.environ["VLLM_CONFIGURE_LOGGING"] = "0"
|
|
131
130
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
# Disable the tokeniser progress bars
|
|
131
|
+
# Disable datasets logging
|
|
132
|
+
logging.getLogger("datasets").setLevel(logging.CRITICAL)
|
|
133
|
+
logging.getLogger("filelock").setLevel(logging.CRITICAL)
|
|
136
134
|
disable_progress_bar()
|
|
137
135
|
|
|
136
|
+
# Disable evaluate logging
|
|
137
|
+
warnings.filterwarnings("ignore", module="seqeval*")
|
|
138
|
+
|
|
138
139
|
# Disable most of the `transformers` logging
|
|
139
140
|
tf_logging._default_log_level = logging.CRITICAL
|
|
140
141
|
tf_logging.set_verbosity(logging.CRITICAL)
|
|
141
142
|
logging.getLogger("transformers.trainer").setLevel(logging.CRITICAL)
|
|
142
|
-
|
|
143
|
-
# Disable logging from `litellm`
|
|
144
|
-
litellm.suppress_debug_info = True
|
|
143
|
+
logging.getLogger("accelerate").setLevel(logging.CRITICAL)
|
|
145
144
|
|
|
146
145
|
|
|
147
146
|
def get_class_by_name(class_name: str | list[str], module_name: str) -> t.Type | None:
|
|
@@ -373,3 +372,88 @@ async def add_semaphore_and_catch_exception(
|
|
|
373
372
|
return await coroutine
|
|
374
373
|
except Exception as exc:
|
|
375
374
|
return exc
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def extract_json_dict_from_string(s: str) -> dict | None:
|
|
378
|
+
"""Extract a JSON dictionary from a string.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
s:
|
|
382
|
+
The string to extract the JSON dictionary from.
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
The extracted JSON dictionary, or None if no JSON dictionary could be found.
|
|
386
|
+
"""
|
|
387
|
+
json_regex = r"\{[^{}]+?\}"
|
|
388
|
+
if (json_match := re.search(pattern=json_regex, string=s, flags=re.DOTALL)) is None:
|
|
389
|
+
logger.debug(
|
|
390
|
+
"The model output does not contain any JSON dictionary, so cannot parse "
|
|
391
|
+
f"it. Skipping. Here is the output: {s!r}"
|
|
392
|
+
)
|
|
393
|
+
return None
|
|
394
|
+
json_string = json_match.group()
|
|
395
|
+
try:
|
|
396
|
+
json_output = demjson3.decode(txt=json_string)
|
|
397
|
+
except demjson3.JSONDecodeError:
|
|
398
|
+
logger.debug(
|
|
399
|
+
"The model output is not valid JSON, so cannot parse it. Skipping. "
|
|
400
|
+
f"Here is the output: {json_string!r}"
|
|
401
|
+
)
|
|
402
|
+
return None
|
|
403
|
+
if not isinstance(json_output, dict):
|
|
404
|
+
logger.debug(
|
|
405
|
+
"The model output is not a JSON dictionary, so cannot parse "
|
|
406
|
+
f"it. Skipping. Here is the output: {json_string!r}"
|
|
407
|
+
)
|
|
408
|
+
return None
|
|
409
|
+
elif not all(isinstance(key, str) for key in json_output.keys()):
|
|
410
|
+
logger.debug(
|
|
411
|
+
"The model output is not a JSON dictionary with string keys, "
|
|
412
|
+
"so cannot parse it. Skipping. Here is the output: "
|
|
413
|
+
f"{json_string!r}"
|
|
414
|
+
)
|
|
415
|
+
return None
|
|
416
|
+
return json_output
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
@cache
|
|
420
|
+
def get_hf_token(api_key: str | None) -> str | bool:
|
|
421
|
+
"""Get the Hugging Face token.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
api_key:
|
|
425
|
+
The API key to use as the Hugging Face token. If None, we will try to
|
|
426
|
+
extract it in other ways.
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
The Hugging Face token, or True if no token is set but the user is logged in, or
|
|
430
|
+
False if no token is set and the user is not logged in.
|
|
431
|
+
"""
|
|
432
|
+
if api_key is not None:
|
|
433
|
+
log_once(
|
|
434
|
+
"Using the Hugging Face API key passed to the function.",
|
|
435
|
+
level=logging.DEBUG,
|
|
436
|
+
)
|
|
437
|
+
return api_key
|
|
438
|
+
elif (token := os.getenv("HUGGINGFACE_API_KEY")) is not None:
|
|
439
|
+
log_once(
|
|
440
|
+
"Using the Hugging Face API key from the environment variable "
|
|
441
|
+
"`HUGGINGFACE_API_KEY`.",
|
|
442
|
+
level=logging.DEBUG,
|
|
443
|
+
)
|
|
444
|
+
return token
|
|
445
|
+
try:
|
|
446
|
+
hf_hub.whoami()
|
|
447
|
+
log_once(
|
|
448
|
+
"No Hugging Face API key was set, but the user is logged in to Hugging "
|
|
449
|
+
"Face, so using the local token.",
|
|
450
|
+
level=logging.DEBUG,
|
|
451
|
+
)
|
|
452
|
+
return True
|
|
453
|
+
except hf_hub.errors.LocalTokenNotFoundError:
|
|
454
|
+
log_once(
|
|
455
|
+
"No Hugging Face API key was set and the user is not logged in to Hugging "
|
|
456
|
+
"Face, so no token will be used.",
|
|
457
|
+
level=logging.DEBUG,
|
|
458
|
+
)
|
|
459
|
+
return False
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: EuroEval
|
|
3
|
-
Version:
|
|
3
|
+
Version: 16.0.0
|
|
4
4
|
Summary: The robust European language model benchmark.
|
|
5
5
|
Project-URL: Repository, https://github.com/EuroEval/EuroEval
|
|
6
6
|
Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
|
|
@@ -28,18 +28,19 @@ License: MIT License
|
|
|
28
28
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
29
|
SOFTWARE.
|
|
30
30
|
License-File: LICENSE
|
|
31
|
-
Requires-Python: <4.0,>=3.
|
|
31
|
+
Requires-Python: <4.0,>=3.11
|
|
32
32
|
Requires-Dist: accelerate>=1.9.0
|
|
33
33
|
Requires-Dist: bert-score>=0.3.13
|
|
34
34
|
Requires-Dist: click>=8.1.3
|
|
35
|
+
Requires-Dist: cloudpickle>=3.1.1
|
|
35
36
|
Requires-Dist: datasets>=3.5.0
|
|
36
37
|
Requires-Dist: demjson3>=3.0.6
|
|
37
38
|
Requires-Dist: evaluate>=0.4.1
|
|
38
39
|
Requires-Dist: huggingface-hub>=0.30.1
|
|
39
40
|
Requires-Dist: levenshtein>=0.24.0
|
|
40
|
-
Requires-Dist: litellm>=1.
|
|
41
|
+
Requires-Dist: litellm>=1.75.6
|
|
41
42
|
Requires-Dist: more-itertools>=10.5.0
|
|
42
|
-
Requires-Dist: numpy
|
|
43
|
+
Requires-Dist: numpy>=2.0.0
|
|
43
44
|
Requires-Dist: ollama>=0.5.1
|
|
44
45
|
Requires-Dist: pandas>=2.2.0
|
|
45
46
|
Requires-Dist: peft>=0.15.0
|
|
@@ -49,27 +50,22 @@ Requires-Dist: pyinfer>=0.0.3
|
|
|
49
50
|
Requires-Dist: python-dotenv>=1.0.1
|
|
50
51
|
Requires-Dist: rouge-score>=0.1.2
|
|
51
52
|
Requires-Dist: sacremoses>=0.1.1
|
|
52
|
-
Requires-Dist: scikit-learn
|
|
53
|
+
Requires-Dist: scikit-learn==1.6.1
|
|
53
54
|
Requires-Dist: sentencepiece>=0.1.96
|
|
54
55
|
Requires-Dist: seqeval>=1.2.2
|
|
55
56
|
Requires-Dist: setuptools>=75.8.2
|
|
56
57
|
Requires-Dist: tenacity>=9.0.0
|
|
57
58
|
Requires-Dist: termcolor>=2.0.0
|
|
58
59
|
Requires-Dist: torch>=2.6.0
|
|
59
|
-
Requires-Dist: transformers>=4.
|
|
60
|
+
Requires-Dist: transformers[mistral-common]>=4.56.0
|
|
60
61
|
Provides-Extra: all
|
|
61
62
|
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
|
|
62
63
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
|
|
63
|
-
Requires-Dist:
|
|
64
|
-
Requires-Dist: vllm>=0.10.0; (platform_system == 'Linux') and extra == 'all'
|
|
64
|
+
Requires-Dist: vllm>=0.10.1; (platform_system == 'Linux') and extra == 'all'
|
|
65
65
|
Provides-Extra: generative
|
|
66
66
|
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
|
|
67
67
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
|
|
68
|
-
Requires-Dist: vllm>=0.10.
|
|
69
|
-
Provides-Extra: human-evaluation
|
|
70
|
-
Requires-Dist: gradio>=4.26.0; extra == 'human-evaluation'
|
|
71
|
-
Provides-Extra: test
|
|
72
|
-
Requires-Dist: gradio>=4.26.0; extra == 'test'
|
|
68
|
+
Requires-Dist: vllm>=0.10.1; (platform_system == 'Linux') and extra == 'generative'
|
|
73
69
|
Description-Content-Type: text/markdown
|
|
74
70
|
|
|
75
71
|
<div align='center'>
|
|
@@ -223,17 +219,18 @@ A huge thank you to all the contributors who have helped make this project a suc
|
|
|
223
219
|
<a href="https://github.com/AJDERS"><img src="https://avatars.githubusercontent.com/u/38854604" width=50 alt="Contributor avatar for AJDERS"/></a>
|
|
224
220
|
<a href="https://github.com/oliverkinch"><img src="https://avatars.githubusercontent.com/u/71556498" width=50 alt="Contributor avatar for oliverkinch"/></a>
|
|
225
221
|
<a href="https://github.com/versae"><img src="https://avatars.githubusercontent.com/u/173537" width=50 alt="Contributor avatar for versae"/></a>
|
|
222
|
+
<a href="https://github.com/KennethEnevoldsen"><img src="https://avatars.githubusercontent.com/u/23721977" width=50 alt="Contributor avatar for KennethEnevoldsen"/></a>
|
|
226
223
|
<a href="https://github.com/viggo-gascou"><img src="https://avatars.githubusercontent.com/u/94069687" width=50 alt="Contributor avatar for viggo-gascou"/></a>
|
|
227
224
|
<a href="https://github.com/mathiasesn"><img src="https://avatars.githubusercontent.com/u/27091759" width=50 alt="Contributor avatar for mathiasesn"/></a>
|
|
228
225
|
<a href="https://github.com/Alkarex"><img src="https://avatars.githubusercontent.com/u/1008324" width=50 alt="Contributor avatar for Alkarex"/></a>
|
|
229
226
|
<a href="https://github.com/marksverdhei"><img src="https://avatars.githubusercontent.com/u/46672778" width=50 alt="Contributor avatar for marksverdhei"/></a>
|
|
230
227
|
<a href="https://github.com/Mikeriess"><img src="https://avatars.githubusercontent.com/u/19728563" width=50 alt="Contributor avatar for Mikeriess"/></a>
|
|
231
|
-
<a href="https://github.com/pakagronglb"><img src="https://avatars.githubusercontent.com/u/178713124" width=50 alt="Contributor avatar for pakagronglb"/></a>
|
|
232
228
|
<a href="https://github.com/ThomasKluiters"><img src="https://avatars.githubusercontent.com/u/8137941" width=50 alt="Contributor avatar for ThomasKluiters"/></a>
|
|
233
229
|
<a href="https://github.com/BramVanroy"><img src="https://avatars.githubusercontent.com/u/2779410" width=50 alt="Contributor avatar for BramVanroy"/></a>
|
|
234
230
|
<a href="https://github.com/peregilk"><img src="https://avatars.githubusercontent.com/u/9079808" width=50 alt="Contributor avatar for peregilk"/></a>
|
|
235
231
|
<a href="https://github.com/Rijgersberg"><img src="https://avatars.githubusercontent.com/u/8604946" width=50 alt="Contributor avatar for Rijgersberg"/></a>
|
|
236
232
|
<a href="https://github.com/duarteocarmo"><img src="https://avatars.githubusercontent.com/u/26342344" width=50 alt="Contributor avatar for duarteocarmo"/></a>
|
|
233
|
+
<a href="https://github.com/slowwavesleep"><img src="https://avatars.githubusercontent.com/u/44175589" width=50 alt="Contributor avatar for slowwavesleep"/></a>
|
|
237
234
|
|
|
238
235
|
|
|
239
236
|
### Contribute to EuroEval
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
euroeval/__init__.py,sha256=MgFG1amMgiTJmK_hcQ7nnX-o4KFhlD1P5xKUBTloPCQ,3564
|
|
2
|
+
euroeval/benchmark_config_factory.py,sha256=ZKzGkWr-Mr4wEMYNXUHsYkd2R-dxnNyETZJJ-Fq-my0,11386
|
|
3
|
+
euroeval/benchmarker.py,sha256=YNqhl2QchqzbGMGu8QoJAG_mnYbcJ46ksfaS0x78fiw,49847
|
|
4
|
+
euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
|
|
5
|
+
euroeval/cli.py,sha256=RR45NiHMI9hphqBJ7Xopde-C18Be9JgJxgg6eYPFVMM,8594
|
|
6
|
+
euroeval/constants.py,sha256=HWJ3PJRS-ZbAMXTvujiK8QP7IiS4RHkjnegv3oi52w0,2499
|
|
7
|
+
euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
|
|
8
|
+
euroeval/data_models.py,sha256=NdzD1ER3GHJp51UXLGTW8iTYwzZlITH2nO0vanTkEWU,24272
|
|
9
|
+
euroeval/enums.py,sha256=V73E8FTL1aRz74OKcxokTYLnO7Q8HGs2QI0JPZI4qQo,3032
|
|
10
|
+
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
11
|
+
euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
|
|
12
|
+
euroeval/generation.py,sha256=wm2u8fDGDgtWxCReG3N6v4_lLvo0OHTpR88ThGSRH7A,12139
|
|
13
|
+
euroeval/generation_utils.py,sha256=vU-j9kjFDuPlSizEaRByx_XJyyAVpE8PdGOm9i--9zQ,14613
|
|
14
|
+
euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
|
|
15
|
+
euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
|
|
16
|
+
euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
|
|
17
|
+
euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
|
|
18
|
+
euroeval/scores.py,sha256=gJ7DSQVyE2_8qZxJPuUJcFk7Byj2D7nevE23kd4XMbA,3004
|
|
19
|
+
euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
|
|
20
|
+
euroeval/tasks.py,sha256=jl8HicriMSN_LfHANokVGFqzgV53QcJ5dmzb297xI04,4173
|
|
21
|
+
euroeval/tokenization_utils.py,sha256=icEfttWReKRC5MbREOuxTHOPpuVvH6uHhnqz1w7qIyA,20565
|
|
22
|
+
euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
|
|
23
|
+
euroeval/utils.py,sha256=O4JIROPfbA7MD9SbOY0CifoCckYjmdNjXYjOxDwBnwM,14149
|
|
24
|
+
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
25
|
+
euroeval/benchmark_modules/base.py,sha256=vYW97bnlzqxxcIq6lY-zd0o6zxyDRMhT85jOhdKnoYE,11482
|
|
26
|
+
euroeval/benchmark_modules/fresh.py,sha256=_iRTHt9qUkq7jPOlgwx7IwZG48dK4mjMrh7KiEHeUjE,10462
|
|
27
|
+
euroeval/benchmark_modules/hf.py,sha256=HDXuVwt0kZUyL9x3aG5pEjSdGCRfzegqT0xKZYprjU0,43843
|
|
28
|
+
euroeval/benchmark_modules/litellm.py,sha256=M6ct5ppcYfO-Il5VMRm3PuyAeQ-rtS22UKyRStLnqfM,59210
|
|
29
|
+
euroeval/benchmark_modules/vllm.py,sha256=dTwGGOFQ7wqYXg7x2YBUJNQcO6OwqjTMBfUf5OveXNk,41289
|
|
30
|
+
euroeval/dataset_configs/__init__.py,sha256=lEOr4kJzgtUymeNBVhd-VwdUK0YTUZ3GjUMlLz5fGWk,2010
|
|
31
|
+
euroeval/dataset_configs/danish.py,sha256=3n9e0r-hYRI2hPOgLDMQsO8bPgZKjw7OcFCUsCvdmk4,5294
|
|
32
|
+
euroeval/dataset_configs/dutch.py,sha256=tY7FDw7BmhXxNfI1hqfasxQXP0QbYTqknokTZ7gqdRY,5079
|
|
33
|
+
euroeval/dataset_configs/english.py,sha256=Y4yc3AQu8WojqENj0sy4-rIlx1LhPnsCQ0DeonqDsVs,4128
|
|
34
|
+
euroeval/dataset_configs/estonian.py,sha256=o13P_XkrdhLFCz9l8LJy-TSY3JIN7XmByxesEDiagnc,2879
|
|
35
|
+
euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
|
|
36
|
+
euroeval/dataset_configs/finnish.py,sha256=7iXjjpJ23tupvtXwJF3TH1Tzwhxw0RFaoBv38HclsJc,3950
|
|
37
|
+
euroeval/dataset_configs/french.py,sha256=9ofGQpnjw0j_lPB0SuWMvbuWVZXfOvROMqZ03d-EAHs,4281
|
|
38
|
+
euroeval/dataset_configs/german.py,sha256=qsJO2YCND8Kuc_atSWXjkoD2itUQNbUsExiGk7P0OnE,4459
|
|
39
|
+
euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
|
|
40
|
+
euroeval/dataset_configs/italian.py,sha256=xoS_oIFXnTraiV9PX2dBsE1GyodlAbma5dEB7yM_Q8A,4564
|
|
41
|
+
euroeval/dataset_configs/latvian.py,sha256=tibwTbe-atsRZEBbegJ6nbr1Oh4RthUYhZoHPVVawq0,2273
|
|
42
|
+
euroeval/dataset_configs/norwegian.py,sha256=eTX0KpjH60FyLGrUTfspvNvYaL-Ytfw3DTFftlriVM0,7295
|
|
43
|
+
euroeval/dataset_configs/portuguese.py,sha256=x-Idrdo_EtmB_xoabwKivKG091DvFEQEbO6MTcjZVqs,3646
|
|
44
|
+
euroeval/dataset_configs/spanish.py,sha256=5m3Qh328YPhbN8jFPIy9Sa7ZWob02ToCWzlDoT8IsSw,4462
|
|
45
|
+
euroeval/dataset_configs/swedish.py,sha256=j_I7ba9a0nXzEPvpnPTuNFEkS51pnUPrnRwcqGh7tu0,4715
|
|
46
|
+
euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
|
|
47
|
+
euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
|
|
48
|
+
euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
|
|
49
|
+
euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
|
|
50
|
+
euroeval/metrics/pipeline.py,sha256=T65p2sxPnwh2WgCjqsqzvE3XOzizNY7rlSm8KPR7sCk,8883
|
|
51
|
+
euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
|
|
52
|
+
euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
|
|
53
|
+
euroeval/prompt_templates/linguistic_acceptability.py,sha256=9ZIyv_hfI2Aj20Uy9SY1izq5OBRV844PXPiZCNCOoEY,8207
|
|
54
|
+
euroeval/prompt_templates/multiple_choice.py,sha256=TCMKB0xS5IEa8f4YEUjsoifcUpaIv4yOL4FisVvPwok,6423
|
|
55
|
+
euroeval/prompt_templates/named_entity_recognition.py,sha256=_ZRVDcnbXvTs_C2NXy78oMbCLFDtW9SuxmvSVg51Umo,15554
|
|
56
|
+
euroeval/prompt_templates/reading_comprehension.py,sha256=eRMN-kCT3wuImbuFXzZYfo5WiVhCFWJkCYwRUDtpeWo,8208
|
|
57
|
+
euroeval/prompt_templates/sentiment_classification.py,sha256=eIXn-aAY7LKeXqxzMKoqdVbihA2f1RaNQk7DhceuQdQ,8887
|
|
58
|
+
euroeval/prompt_templates/summarization.py,sha256=GvnKuYJKbJ_2QkdtSWp_h4RhfOXdq-7_yYeClJSPaTY,6137
|
|
59
|
+
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
60
|
+
euroeval/task_group_utils/multiple_choice_classification.py,sha256=lNEOWi3ckLBnMP1QoSTxNxT-s6kBz2XH17mrmjQlv5s,7075
|
|
61
|
+
euroeval/task_group_utils/question_answering.py,sha256=vdEbcZy7BE6ICA7kWkPYmPW4eVuIiZ_4uJRLUexDhwY,27750
|
|
62
|
+
euroeval/task_group_utils/sequence_classification.py,sha256=K_hFWY6D5WR8-uy6ZikCq3ighHNHSyzW7A62vwDkwDs,16512
|
|
63
|
+
euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
|
|
64
|
+
euroeval/task_group_utils/token_classification.py,sha256=6bN9soT1kLthutCpqUT-jDmZZw9Mt7H3tjI4zVvE4BY,16469
|
|
65
|
+
euroeval-16.0.0.dist-info/METADATA,sha256=uvzi8Bkgab8rKhgKavqFnv8rpL0KntFIYMZ7f1Joa0U,13544
|
|
66
|
+
euroeval-16.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
67
|
+
euroeval-16.0.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
|
|
68
|
+
euroeval-16.0.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
69
|
+
euroeval-16.0.0.dist-info/RECORD,,
|