ScandEval 16.10.1__py3-none-any.whl → 16.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scandeval/__init__.py +0 -9
- scandeval/benchmark_config_factory.py +5 -0
- scandeval/benchmark_modules/hf.py +36 -8
- scandeval/benchmark_modules/litellm.py +119 -22
- scandeval/benchmark_modules/vllm.py +202 -94
- scandeval/benchmarker.py +28 -7
- scandeval/cli.py +13 -0
- scandeval/constants.py +31 -2
- scandeval/data_models.py +12 -2
- scandeval/dataset_configs/dutch.py +10 -0
- scandeval/logging_utils.py +1 -1
- scandeval/metrics/__init__.py +1 -0
- scandeval/metrics/bias.py +237 -0
- scandeval/metrics/huggingface.py +5 -3
- scandeval/metrics/llm_as_a_judge.py +79 -15
- scandeval/model_loading.py +2 -1
- scandeval/task_group_utils/sequence_classification.py +12 -3
- scandeval/tasks.py +22 -0
- scandeval/tokenisation_utils.py +12 -1
- scandeval/types.py +39 -0
- scandeval/utils.py +38 -66
- {scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/METADATA +50 -24
- {scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/RECORD +26 -25
- {scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/licenses/LICENSE +1 -1
- {scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/WHEEL +0 -0
- {scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/entry_points.txt +0 -0
scandeval/utils.py
CHANGED
|
@@ -14,16 +14,17 @@ import socket
|
|
|
14
14
|
import sys
|
|
15
15
|
import typing as t
|
|
16
16
|
from pathlib import Path
|
|
17
|
-
from types import ModuleType
|
|
17
|
+
from types import ModuleType
|
|
18
18
|
|
|
19
19
|
import demjson3
|
|
20
20
|
import huggingface_hub as hf_hub
|
|
21
21
|
import numpy as np
|
|
22
22
|
import torch
|
|
23
23
|
from huggingface_hub.errors import LocalTokenNotFoundError
|
|
24
|
+
from requests.exceptions import RequestException
|
|
24
25
|
|
|
25
26
|
from .caching_utils import cache_arguments
|
|
26
|
-
from .constants import T
|
|
27
|
+
from .constants import LOCAL_MODELS_REQUIRED_FILES, T
|
|
27
28
|
from .exceptions import InvalidBenchmark, InvalidModel, NaNValueInModelOutput
|
|
28
29
|
from .logging_utils import log, log_once
|
|
29
30
|
|
|
@@ -44,10 +45,25 @@ def create_model_cache_dir(cache_dir: str, model_id: str) -> str:
|
|
|
44
45
|
Returns:
|
|
45
46
|
The path to the cache directory.
|
|
46
47
|
"""
|
|
47
|
-
#
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
48
|
+
# If the model ID is a path, we just use that as the cache dir
|
|
49
|
+
if Path(model_id).is_dir():
|
|
50
|
+
log_once(
|
|
51
|
+
f"Since the model {model_id!r} is a local model, we will use the model "
|
|
52
|
+
"directory directly as the model cache directory.",
|
|
53
|
+
level=logging.DEBUG,
|
|
54
|
+
)
|
|
55
|
+
return model_id
|
|
56
|
+
|
|
57
|
+
# Otherwise, we create a cache dir based on the model ID
|
|
58
|
+
model_cache_dir = Path(
|
|
59
|
+
cache_dir, "model_cache", model_id.replace("/", "--")
|
|
60
|
+
).as_posix()
|
|
61
|
+
log_once(
|
|
62
|
+
f"Using the model cache directory {model_cache_dir!r} for the model "
|
|
63
|
+
f"{model_id!r}.",
|
|
64
|
+
level=logging.DEBUG,
|
|
65
|
+
)
|
|
66
|
+
return model_cache_dir
|
|
51
67
|
|
|
52
68
|
|
|
53
69
|
def resolve_model_path(download_dir: str) -> str:
|
|
@@ -65,8 +81,10 @@ def resolve_model_path(download_dir: str) -> str:
|
|
|
65
81
|
If the model path is not valid, or if required files are missing.
|
|
66
82
|
"""
|
|
67
83
|
model_path = Path(download_dir)
|
|
84
|
+
|
|
68
85
|
# Get the 'path safe' version of the model id, which is the last dir in the path
|
|
69
86
|
model_id_path = model_path.name
|
|
87
|
+
|
|
70
88
|
# Hf hub `cache_dir` puts the files in models--`model_id_path`/snapshots
|
|
71
89
|
model_path = model_path / f"models--{model_id_path}" / "snapshots"
|
|
72
90
|
if not model_path.exists():
|
|
@@ -89,16 +107,16 @@ def resolve_model_path(download_dir: str) -> str:
|
|
|
89
107
|
f"at {model_path}"
|
|
90
108
|
)
|
|
91
109
|
|
|
92
|
-
# Check that found_files contains at least
|
|
93
|
-
|
|
94
|
-
(file for file in found_files if file.name
|
|
110
|
+
# Check that found_files contains at least one of the required files
|
|
111
|
+
found_required_file = next(
|
|
112
|
+
(file for file in found_files if file.name in LOCAL_MODELS_REQUIRED_FILES), None
|
|
95
113
|
)
|
|
96
|
-
if
|
|
114
|
+
if found_required_file is None:
|
|
97
115
|
raise InvalidModel(
|
|
98
|
-
f"
|
|
99
|
-
f"at {model_path}"
|
|
116
|
+
f"At least one of the files {LOCAL_MODELS_REQUIRED_FILES} must be present "
|
|
117
|
+
f"for {model_id_path.strip('models--')} at {model_path}"
|
|
100
118
|
)
|
|
101
|
-
model_path =
|
|
119
|
+
model_path = found_required_file.parent
|
|
102
120
|
|
|
103
121
|
# As a precaution we also check that all of the files are in the same directory
|
|
104
122
|
# if not we create a new dir with symlinks to all of the files from all snapshots
|
|
@@ -423,6 +441,13 @@ def get_hf_token(api_key: str | None) -> str | bool:
|
|
|
423
441
|
level=logging.DEBUG,
|
|
424
442
|
)
|
|
425
443
|
return False
|
|
444
|
+
except RequestException:
|
|
445
|
+
log_once(
|
|
446
|
+
"No Hugging Face API key was set and the connection to Hugging Face "
|
|
447
|
+
"failed, so no token will be used.",
|
|
448
|
+
level=logging.DEBUG,
|
|
449
|
+
)
|
|
450
|
+
return False
|
|
426
451
|
|
|
427
452
|
|
|
428
453
|
def extract_multiple_choice_labels(
|
|
@@ -521,56 +546,3 @@ def load_custom_datasets_module(custom_datasets_file: Path) -> ModuleType | None
|
|
|
521
546
|
spec.loader.exec_module(module)
|
|
522
547
|
return module
|
|
523
548
|
return None
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
class attention_backend:
|
|
527
|
-
"""Context manager to temporarily set the attention backend.
|
|
528
|
-
|
|
529
|
-
This sets the `VLLM_ATTENTION_BACKEND` environment variable to the desired value
|
|
530
|
-
for the duration of the context manager, and restores the previous value afterwards.
|
|
531
|
-
"""
|
|
532
|
-
|
|
533
|
-
def __init__(self, value: str | None) -> None:
|
|
534
|
-
"""Initialise the context manager.
|
|
535
|
-
|
|
536
|
-
Args:
|
|
537
|
-
value:
|
|
538
|
-
The name of the attention backend to set. If None then no change is
|
|
539
|
-
made. Also, if the user has already set the `VLLM_ATTENTION_BACKEND` env
|
|
540
|
-
var, then no change is made.
|
|
541
|
-
"""
|
|
542
|
-
user_has_set_backend = (
|
|
543
|
-
os.environ.get("USER_HAS_SET_VLLM_ATTENTION_BACKEND", "0") == "1"
|
|
544
|
-
)
|
|
545
|
-
self.value = None if user_has_set_backend else value
|
|
546
|
-
self.previous_value: str | None = None
|
|
547
|
-
|
|
548
|
-
def __enter__(self) -> None:
|
|
549
|
-
"""Enter the context manager."""
|
|
550
|
-
if self.value is None:
|
|
551
|
-
return
|
|
552
|
-
self.previous_value = os.getenv("VLLM_ATTENTION_BACKEND")
|
|
553
|
-
os.environ["VLLM_ATTENTION_BACKEND"] = self.value
|
|
554
|
-
|
|
555
|
-
def __exit__(
|
|
556
|
-
self,
|
|
557
|
-
exc_type: t.Type[BaseException] | None,
|
|
558
|
-
exc_value: BaseException | None,
|
|
559
|
-
exc_tb: TracebackType | None,
|
|
560
|
-
) -> None:
|
|
561
|
-
"""Exit the context manager.
|
|
562
|
-
|
|
563
|
-
Args:
|
|
564
|
-
exc_type:
|
|
565
|
-
The type of the exception.
|
|
566
|
-
exc_value:
|
|
567
|
-
The value of the exception.
|
|
568
|
-
exc_tb:
|
|
569
|
-
The traceback of the exception.
|
|
570
|
-
"""
|
|
571
|
-
if self.value is None:
|
|
572
|
-
return
|
|
573
|
-
if self.previous_value is None:
|
|
574
|
-
os.environ.pop("VLLM_ATTENTION_BACKEND", None)
|
|
575
|
-
else:
|
|
576
|
-
os.environ["VLLM_ATTENTION_BACKEND"] = self.previous_value
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ScandEval
|
|
3
|
-
Version: 16.
|
|
3
|
+
Version: 16.12.0
|
|
4
4
|
Summary: The robust European language model benchmark.
|
|
5
5
|
Project-URL: Repository, https://github.com/EuroEval/EuroEval
|
|
6
6
|
Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
|
|
@@ -8,7 +8,7 @@ Author-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
|
|
|
8
8
|
Maintainer-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
|
|
9
9
|
License: MIT License
|
|
10
10
|
|
|
11
|
-
Copyright (c) 2022-
|
|
11
|
+
Copyright (c) 2022-2026 Dan Saattrup Smart
|
|
12
12
|
|
|
13
13
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
14
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -28,7 +28,7 @@ License: MIT License
|
|
|
28
28
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
29
|
SOFTWARE.
|
|
30
30
|
License-File: LICENSE
|
|
31
|
-
Requires-Python: <4.0,>=3.
|
|
31
|
+
Requires-Python: <4.0,>=3.12
|
|
32
32
|
Requires-Dist: accelerate>=1.9.0
|
|
33
33
|
Requires-Dist: bert-score>=0.3.13
|
|
34
34
|
Requires-Dist: click>=8.1.3
|
|
@@ -59,19 +59,23 @@ Requires-Dist: setuptools>=75.8.2
|
|
|
59
59
|
Requires-Dist: tenacity>=9.0.0
|
|
60
60
|
Requires-Dist: termcolor>=2.0.0
|
|
61
61
|
Requires-Dist: torch>=2.6.0
|
|
62
|
-
Requires-Dist: transformers[mistral-common]
|
|
62
|
+
Requires-Dist: transformers[mistral-common]<5.0.0,>=4.56.0
|
|
63
63
|
Provides-Extra: all
|
|
64
64
|
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
|
|
65
65
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
|
|
66
66
|
Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'all'
|
|
67
67
|
Requires-Dist: timm>=1.0.19; extra == 'all'
|
|
68
|
-
Requires-Dist: vllm
|
|
68
|
+
Requires-Dist: vllm-metal>=0.1.0; (platform_system == 'Darwin') and extra == 'all'
|
|
69
|
+
Requires-Dist: vllm==0.11.0; (platform_system == 'Darwin') and extra == 'all'
|
|
70
|
+
Requires-Dist: vllm[flashinfer]>=0.14.1; (platform_system == 'Linux') and extra == 'all'
|
|
69
71
|
Provides-Extra: generative
|
|
70
72
|
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
|
|
71
73
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
|
|
72
74
|
Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'generative'
|
|
73
75
|
Requires-Dist: timm>=1.0.19; extra == 'generative'
|
|
74
|
-
Requires-Dist: vllm
|
|
76
|
+
Requires-Dist: vllm-metal>=0.1.0; (platform_system == 'Darwin') and extra == 'generative'
|
|
77
|
+
Requires-Dist: vllm==0.11.0; (platform_system == 'Darwin') and extra == 'generative'
|
|
78
|
+
Requires-Dist: vllm[flashinfer]>=0.14.1; (platform_system == 'Linux') and extra == 'generative'
|
|
75
79
|
Description-Content-Type: text/markdown
|
|
76
80
|
|
|
77
81
|
<!-- This disables the requirement that the first line is a top-level heading -->
|
|
@@ -96,7 +100,7 @@ ______________________________________________________________________
|
|
|
96
100
|
[](https://arxiv.org/abs/2406.13469)
|
|
97
101
|
[](https://github.com/EuroEval/EuroEval/blob/main/LICENSE)
|
|
98
102
|
[](https://github.com/EuroEval/EuroEval/commits/main)
|
|
99
|
-
[](https://github.com/EuroEval/EuroEval/tree/main/tests)
|
|
100
104
|
[](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
|
|
101
105
|
|
|
102
106
|
## Maintainer
|
|
@@ -123,16 +127,17 @@ The easiest way to benchmark pretrained models is via the command line interface
|
|
|
123
127
|
having installed the package, you can benchmark your favorite model like so:
|
|
124
128
|
|
|
125
129
|
```bash
|
|
126
|
-
euroeval --model <model-id>
|
|
130
|
+
euroeval --model <model-id-or-path>
|
|
127
131
|
```
|
|
128
132
|
|
|
129
|
-
Here `model` is the HuggingFace model ID, which can be found on the [HuggingFace
|
|
130
|
-
Hub](https://huggingface.co/models)
|
|
131
|
-
the
|
|
132
|
-
|
|
133
|
+
Here `model` is either the HuggingFace model ID, which can be found on the [HuggingFace
|
|
134
|
+
Hub](https://huggingface.co/models), or a local path to a model directory (containing
|
|
135
|
+
the model files as well as the `config.json` file). By default this will benchmark the
|
|
136
|
+
model on all the tasks available. If you want to benchmark on a particular task, then
|
|
137
|
+
use the `--task` argument:
|
|
133
138
|
|
|
134
139
|
```bash
|
|
135
|
-
euroeval --model <model-id> --task sentiment-classification
|
|
140
|
+
euroeval --model <model-id-or-path> --task sentiment-classification
|
|
136
141
|
```
|
|
137
142
|
|
|
138
143
|
We can also narrow down which languages we would like to benchmark on. This can be done
|
|
@@ -140,20 +145,20 @@ by setting the `--language` argument. Here we thus benchmark the model on the Da
|
|
|
140
145
|
sentiment classification task:
|
|
141
146
|
|
|
142
147
|
```bash
|
|
143
|
-
euroeval --model <model-id> --task sentiment-classification --language da
|
|
148
|
+
euroeval --model <model-id-or-path> --task sentiment-classification --language da
|
|
144
149
|
```
|
|
145
150
|
|
|
146
151
|
Multiple models, datasets and/or languages can be specified by just attaching multiple
|
|
147
152
|
arguments. Here is an example with two models:
|
|
148
153
|
|
|
149
154
|
```bash
|
|
150
|
-
euroeval --model <model-
|
|
155
|
+
euroeval --model <model-id-or-path-1> --model <model-id-or-path-2>
|
|
151
156
|
```
|
|
152
157
|
|
|
153
158
|
The specific model version/revision to use can also be added after the suffix '@':
|
|
154
159
|
|
|
155
160
|
```bash
|
|
156
|
-
euroeval --model <model-id>@<commit>
|
|
161
|
+
euroeval --model <model-id-or-path>@<commit>
|
|
157
162
|
```
|
|
158
163
|
|
|
159
164
|
This can be a branch name, a tag name, or a commit id. It defaults to 'main' for latest.
|
|
@@ -173,7 +178,7 @@ model:
|
|
|
173
178
|
```python
|
|
174
179
|
>>> from euroeval import Benchmarker
|
|
175
180
|
>>> benchmarker = Benchmarker()
|
|
176
|
-
>>> benchmarker.benchmark(model="<model-id>")
|
|
181
|
+
>>> benchmarker.benchmark(model="<model-id-or-path>")
|
|
177
182
|
```
|
|
178
183
|
|
|
179
184
|
To benchmark on a specific task and/or language, you simply specify the `task` or
|
|
@@ -181,7 +186,7 @@ To benchmark on a specific task and/or language, you simply specify the `task` o
|
|
|
181
186
|
|
|
182
187
|
```python
|
|
183
188
|
>>> benchmarker.benchmark(
|
|
184
|
-
... model="<model-id>",
|
|
189
|
+
... model="<model-id-or-path>",
|
|
185
190
|
... task="sentiment-classification",
|
|
186
191
|
... language="da",
|
|
187
192
|
... )
|
|
@@ -225,7 +230,7 @@ docker run -e args="<euroeval-arguments>" --gpus 1 --name euroeval --rm euroeval
|
|
|
225
230
|
```
|
|
226
231
|
|
|
227
232
|
Here `<euroeval-arguments>` consists of the arguments added to the `euroeval` CLI
|
|
228
|
-
argument. This could for instance be `--model <model-id> --task
|
|
233
|
+
argument. This could for instance be `--model <model-id-or-path> --task
|
|
229
234
|
sentiment-classification`.
|
|
230
235
|
|
|
231
236
|
## Benchmarking custom inference APIs
|
|
@@ -291,14 +296,14 @@ script. For example to download the model you want and all of the Danish sentime
|
|
|
291
296
|
classification datasets:
|
|
292
297
|
|
|
293
298
|
```bash
|
|
294
|
-
euroeval --model <model-id> --task sentiment-classification --language da --download-only
|
|
299
|
+
euroeval --model <model-id-or-path> --task sentiment-classification --language da --download-only
|
|
295
300
|
```
|
|
296
301
|
|
|
297
302
|
Or from a script:
|
|
298
303
|
|
|
299
304
|
```python
|
|
300
305
|
>>> benchmarker.benchmark(
|
|
301
|
-
... model="<model-id>",
|
|
306
|
+
... model="<model-id-or-path>",
|
|
302
307
|
... task="sentiment-classification",
|
|
303
308
|
... language="da",
|
|
304
309
|
... download_only=True,
|
|
@@ -346,7 +351,7 @@ MY_CONFIG = DatasetConfig(
|
|
|
346
351
|
You can then benchmark your custom dataset by simply running
|
|
347
352
|
|
|
348
353
|
```bash
|
|
349
|
-
euroeval --dataset my-dataset --model <model-id>
|
|
354
|
+
euroeval --dataset my-dataset --model <model-id-or-path>
|
|
350
355
|
```
|
|
351
356
|
|
|
352
357
|
You can also run the benchmark from a Python script, by simply providing your custom
|
|
@@ -356,7 +361,7 @@ dataset configuration directly into the `benchmark` method:
|
|
|
356
361
|
from euroeval import Benchmarker
|
|
357
362
|
|
|
358
363
|
benchmarker = Benchmarker()
|
|
359
|
-
benchmarker.benchmark(model="<model-id>", dataset=MY_CONFIG)
|
|
364
|
+
benchmarker.benchmark(model="<model-id-or-path>", dataset=MY_CONFIG)
|
|
360
365
|
```
|
|
361
366
|
|
|
362
367
|
We have included three convenience tasks to make it easier to set up custom datasets:
|
|
@@ -436,7 +441,7 @@ MY_SQL_DATASET = DatasetConfig(
|
|
|
436
441
|
Again, with this you can benchmark your custom dataset by simply running
|
|
437
442
|
|
|
438
443
|
```bash
|
|
439
|
-
euroeval --dataset my-sql-dataset --model <model-id>
|
|
444
|
+
euroeval --dataset my-sql-dataset --model <model-id-or-path>
|
|
440
445
|
```
|
|
441
446
|
|
|
442
447
|
## Reproducing the evaluation datasets
|
|
@@ -592,6 +597,27 @@ A huge thank you to all the contributors who have helped make this project a suc
|
|
|
592
597
|
alt="Contributor avatar for tvosch"
|
|
593
598
|
/>
|
|
594
599
|
</a>
|
|
600
|
+
<a href="https://github.com/Touzen">
|
|
601
|
+
<img
|
|
602
|
+
src="https://avatars.githubusercontent.com/u/1416265"
|
|
603
|
+
width=50
|
|
604
|
+
alt="Contributor avatar for Touzen"
|
|
605
|
+
/>
|
|
606
|
+
</a>
|
|
607
|
+
<a href="https://github.com/caldaibis">
|
|
608
|
+
<img
|
|
609
|
+
src="https://avatars.githubusercontent.com/u/16032437"
|
|
610
|
+
width=50
|
|
611
|
+
alt="Contributor avatar for caldaibis"
|
|
612
|
+
/>
|
|
613
|
+
</a>
|
|
614
|
+
<a href="https://github.com/SwekeR-463">
|
|
615
|
+
<img
|
|
616
|
+
src="https://avatars.githubusercontent.com/u/114919896?v=4"
|
|
617
|
+
width=50
|
|
618
|
+
alt="Contributor avatar for SwekeR-463"
|
|
619
|
+
/>
|
|
620
|
+
</a>
|
|
595
621
|
|
|
596
622
|
### Contribute to EuroEval
|
|
597
623
|
|
|
@@ -1,34 +1,34 @@
|
|
|
1
|
-
scandeval/__init__.py,sha256=
|
|
2
|
-
scandeval/benchmark_config_factory.py,sha256=
|
|
3
|
-
scandeval/benchmarker.py,sha256=
|
|
1
|
+
scandeval/__init__.py,sha256=wHhEEQ8wLNLAN9ULdAkWZpGSo08IpTx_w_gaya0FnVQ,3896
|
|
2
|
+
scandeval/benchmark_config_factory.py,sha256=NeikkDCfvTI3ZrAAP-kCQK6Ma3FfwITa_sZ4Ou0w3GM,8895
|
|
3
|
+
scandeval/benchmarker.py,sha256=HPG3qF3dX1hnhEc3WYsSGTkWJ8GeXC1ct_A-89IQTtw,54470
|
|
4
4
|
scandeval/caching_utils.py,sha256=lLUbkpDdJZy4xodIpwIz5d-WNKGuszbr_d9dyiJ5kZc,2591
|
|
5
5
|
scandeval/callbacks.py,sha256=l8f6Zr8EoHfVFsI1ZnMUK0Y8uZB00Nvaz_I6XDn6avE,2515
|
|
6
|
-
scandeval/cli.py,sha256=
|
|
7
|
-
scandeval/constants.py,sha256=
|
|
6
|
+
scandeval/cli.py,sha256=BUrE8ca4wIOQjBM4NoyhNVzGPnVdjOl7xFXbUDuAsq0,9807
|
|
7
|
+
scandeval/constants.py,sha256=0IVDd0tmb3r6lKB5CODc4RqS7OofZdW3xE40jT74LeQ,4492
|
|
8
8
|
scandeval/data_loading.py,sha256=8ryYEmj6di1f9QefGfNajxObQ9iapIGuAsL8m9KzDyI,7050
|
|
9
|
-
scandeval/data_models.py,sha256=
|
|
9
|
+
scandeval/data_models.py,sha256=IaXgy5OKPA1wHP55-m9IqE2hBC8Kv8nhsUSTqJBq7ho,30968
|
|
10
10
|
scandeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
|
|
11
11
|
scandeval/exceptions.py,sha256=4-N2OIo5PJ2aciLjagNAVhdHPxpq2QxywbBqJ8lkKj0,5780
|
|
12
12
|
scandeval/finetuning.py,sha256=dTjchPHLFRD65ZrEmtj5TfMTPZ6PODn77t372fgTNwE,11983
|
|
13
13
|
scandeval/generation.py,sha256=ccE-S0jxkM99XziIdeaBbk8yRGv4YBkzZkoabhFCSKA,13382
|
|
14
14
|
scandeval/generation_utils.py,sha256=A6YCiiMrMEUHq5BcVEjsouIKMPGt0sCfPzsJY1GVyk0,20092
|
|
15
15
|
scandeval/languages.py,sha256=gUSosFbvf1eEQHjVsKhXdJ4jiGXC-9lMkOL8AsBG33Q,37295
|
|
16
|
-
scandeval/logging_utils.py,sha256=
|
|
16
|
+
scandeval/logging_utils.py,sha256=Qnni11ngHrjCf_fgkk6lp6gs-tGSgUS3d5zRR83y6ec,9507
|
|
17
17
|
scandeval/model_cache.py,sha256=sjMYW0klnHt2yAFLavDTsp_InxPeSOuVEFo-Rh_31UM,10219
|
|
18
18
|
scandeval/model_config.py,sha256=fxHfgpw-9vj3hwke28DguVGvG9TU06nkTXT0V6KAMpQ,2761
|
|
19
|
-
scandeval/model_loading.py,sha256=
|
|
19
|
+
scandeval/model_loading.py,sha256=DsX7et18Epcv8kHATZgwPJnwH17GHmh3JCzrSoI3GAE,2377
|
|
20
20
|
scandeval/scores.py,sha256=9a1XtppFbp8GJFc9JdThGxqBY0YUE7-92oyrlxScjNk,3281
|
|
21
21
|
scandeval/speed_benchmark.py,sha256=VUOvauc9tuAegThNT2g1a-Z1l7DEmKq57dHI4t16o5A,4068
|
|
22
|
-
scandeval/tasks.py,sha256=
|
|
23
|
-
scandeval/tokenisation_utils.py,sha256=
|
|
24
|
-
scandeval/types.py,sha256
|
|
25
|
-
scandeval/utils.py,sha256=
|
|
22
|
+
scandeval/tasks.py,sha256=FQvnl28iudjIA2V_G3gHpSsyKaSs7r1i-T5c2pLAuF4,6656
|
|
23
|
+
scandeval/tokenisation_utils.py,sha256=K9ovIi5WNqLrFKkafl16R3K-2PallGwV_zeIFw_AM_k,21553
|
|
24
|
+
scandeval/types.py,sha256=CHQjLzqKYDXPCyZas7rKg6wD1pNiYuaOFMWimrj5H64,4374
|
|
25
|
+
scandeval/utils.py,sha256=P7RARAvJzm-CVavNjMXR2ZseWxT3irXegRzjrVIdCww,17481
|
|
26
26
|
scandeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
27
27
|
scandeval/benchmark_modules/base.py,sha256=5YAsCMILKTRXFx_ylGQ7iS5AFKN25iFdkBjj8KzzElw,11445
|
|
28
28
|
scandeval/benchmark_modules/fresh.py,sha256=sG5ae4p1J-GGmVNcVBIxY1xZIAlUwq_pu-9c4uAYU3Y,10734
|
|
29
|
-
scandeval/benchmark_modules/hf.py,sha256=
|
|
30
|
-
scandeval/benchmark_modules/litellm.py,sha256=
|
|
31
|
-
scandeval/benchmark_modules/vllm.py,sha256=
|
|
29
|
+
scandeval/benchmark_modules/hf.py,sha256=ob-05POUBDWk9dU_hUT7nmXZ11IGCnMgj6xkyLYyX98,48512
|
|
30
|
+
scandeval/benchmark_modules/litellm.py,sha256=jVagENE3a0PNMDOaj4DLY-p2Lf-BzNVB1_voPq2CLTU,75545
|
|
31
|
+
scandeval/benchmark_modules/vllm.py,sha256=pPKDHf5T_p0u9CJcR7R5sMmN98mirl64kWfyEHbtb5s,61720
|
|
32
32
|
scandeval/dataset_configs/__init__.py,sha256=GFI_W9GKd3OSDdhhJzHc8mwoP9b32IHIIyvPBI-hK6k,3223
|
|
33
33
|
scandeval/dataset_configs/albanian.py,sha256=D__dli7JO3yeHzzdJ3FFyUGw-z20f1yI6QLnws-WB8I,1473
|
|
34
34
|
scandeval/dataset_configs/bosnian.py,sha256=golIWqwW1pFwSkuBM1v0yhHDblB2FoJgK24aO7kKm7M,877
|
|
@@ -37,7 +37,7 @@ scandeval/dataset_configs/catalan.py,sha256=SXwRJjIcMMN7rVuhFRZSnCGDoMfabW5HFoZO
|
|
|
37
37
|
scandeval/dataset_configs/croatian.py,sha256=U5oBTjttpWTWonTEzZAf-G3nvQICRQmw6Kla-HWn_5k,1260
|
|
38
38
|
scandeval/dataset_configs/czech.py,sha256=ghv2yNw839G-utll8PQRSjyKYbM5gfoQhFKy664GTCI,1562
|
|
39
39
|
scandeval/dataset_configs/danish.py,sha256=LEKs04vK2KnV0CYheT7FeS-g3iHBvf2bQxyl0D_LbTg,3293
|
|
40
|
-
scandeval/dataset_configs/dutch.py,sha256=
|
|
40
|
+
scandeval/dataset_configs/dutch.py,sha256=q9adDSpR08Ol5AMJJpp1e1T1ZbwmORaFnJaEGrAujm4,3747
|
|
41
41
|
scandeval/dataset_configs/english.py,sha256=nc9nGwxf1tHVMUhQeND61yJbpTO4rJaAusPZlstqtq0,2817
|
|
42
42
|
scandeval/dataset_configs/estonian.py,sha256=bWiKA_dJ7WUE8Z_1YZnSewhi4ZdCQBGJZ7pQxkCwMcU,2757
|
|
43
43
|
scandeval/dataset_configs/faroese.py,sha256=13qYwXonDPWG9Av5MY_NBNTRDglPVKz5_mbz7ZCJ_mo,1247
|
|
@@ -60,10 +60,11 @@ scandeval/dataset_configs/slovene.py,sha256=r6BbFRvkFYf_4lvQaltaJ1VTVGETZ0xspsu9
|
|
|
60
60
|
scandeval/dataset_configs/spanish.py,sha256=Q60nx69sGbYk8p0hg2cwLFyoPjg36FdstLQoacw9QmU,2928
|
|
61
61
|
scandeval/dataset_configs/swedish.py,sha256=kpEK29swY7iyUSzUvD9hNf2qwb3d7bHrFwboCWVAf2k,3269
|
|
62
62
|
scandeval/dataset_configs/ukrainian.py,sha256=spbCmCOU27jOfz6FZxqCIfVmDN5l8H-7VCl-k-8eAIo,1527
|
|
63
|
-
scandeval/metrics/__init__.py,sha256=
|
|
63
|
+
scandeval/metrics/__init__.py,sha256=nrjFjTK7NO5I8U6acULNzqezmMWN21aWd4faW4oYGHo,233
|
|
64
64
|
scandeval/metrics/base.py,sha256=dUBby-ZzettMjdcjek6rw0JTZMuScX4cQ2Rd6untKHY,2525
|
|
65
|
-
scandeval/metrics/
|
|
66
|
-
scandeval/metrics/
|
|
65
|
+
scandeval/metrics/bias.py,sha256=sV87PLzjc3XPsSAz2HJ4hmlLZ_IcHDsIUr7gYmp9HKc,7765
|
|
66
|
+
scandeval/metrics/huggingface.py,sha256=eKXn5wBcNdzs23cgJ64XG8LIwen1wDxXy2kAOw3bjoQ,9579
|
|
67
|
+
scandeval/metrics/llm_as_a_judge.py,sha256=UUFk3aL2BZqJ-u9-dzexsoArTxPJTMmHRqb1eWxexaI,12133
|
|
67
68
|
scandeval/metrics/pipeline.py,sha256=GTIqaFkn-nTLU4xBi8-zP1J4Ytv3qeFVuRB4OcuwkOw,10876
|
|
68
69
|
scandeval/metrics/speed.py,sha256=G5hEQcrtqxF070ZZwLDh61iZnq2CSW2o6ZM7zR4lOTY,1298
|
|
69
70
|
scandeval/prompt_templates/__init__.py,sha256=p3CUcSaJiiUm6EQyhceDUjotH7GdyHolMznAn2f44as,519
|
|
@@ -79,11 +80,11 @@ scandeval/prompt_templates/token_classification.py,sha256=8Uw34mN2xQ_5es-nz7vCK-
|
|
|
79
80
|
scandeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
80
81
|
scandeval/task_group_utils/multiple_choice_classification.py,sha256=PWUXeGn-9RsXxdVRYHJASyBVQ8L5Jla981eot0GLooY,7316
|
|
81
82
|
scandeval/task_group_utils/question_answering.py,sha256=tuMwr-RnvJap5jkTrluxC1tfQVS6rKN8_ifNwis-auw,29064
|
|
82
|
-
scandeval/task_group_utils/sequence_classification.py,sha256=
|
|
83
|
+
scandeval/task_group_utils/sequence_classification.py,sha256=1YAaKn5bY8j9ONPfJZODjaGKVMkA9fQcl51fvBcjeF8,16829
|
|
83
84
|
scandeval/task_group_utils/text_to_text.py,sha256=p6zzjob70qQUpfUOs0LToSzavE1ERqRAHu_727Jb2mM,5476
|
|
84
85
|
scandeval/task_group_utils/token_classification.py,sha256=8dF32KQAYAFnnn7DPHX-yvJmRrMBmT2CyFREacyTwvQ,17321
|
|
85
|
-
scandeval-16.
|
|
86
|
-
scandeval-16.
|
|
87
|
-
scandeval-16.
|
|
88
|
-
scandeval-16.
|
|
89
|
-
scandeval-16.
|
|
86
|
+
scandeval-16.12.0.dist-info/METADATA,sha256=YCSgBbbtWLDfWqepHFS8UX0zho8gpTXJC1lagT_l94w,24564
|
|
87
|
+
scandeval-16.12.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
88
|
+
scandeval-16.12.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
|
|
89
|
+
scandeval-16.12.0.dist-info/licenses/LICENSE,sha256=vb2c84xITVnhnVFsBS8AWXl-4S-KpxN6VMxTqqYlV3s,1080
|
|
90
|
+
scandeval-16.12.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|