ScandEval 16.10.1__py3-none-any.whl → 16.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scandeval/utils.py CHANGED
@@ -14,16 +14,17 @@ import socket
14
14
  import sys
15
15
  import typing as t
16
16
  from pathlib import Path
17
- from types import ModuleType, TracebackType
17
+ from types import ModuleType
18
18
 
19
19
  import demjson3
20
20
  import huggingface_hub as hf_hub
21
21
  import numpy as np
22
22
  import torch
23
23
  from huggingface_hub.errors import LocalTokenNotFoundError
24
+ from requests.exceptions import RequestException
24
25
 
25
26
  from .caching_utils import cache_arguments
26
- from .constants import T
27
+ from .constants import LOCAL_MODELS_REQUIRED_FILES, T
27
28
  from .exceptions import InvalidBenchmark, InvalidModel, NaNValueInModelOutput
28
29
  from .logging_utils import log, log_once
29
30
 
@@ -44,10 +45,25 @@ def create_model_cache_dir(cache_dir: str, model_id: str) -> str:
44
45
  Returns:
45
46
  The path to the cache directory.
46
47
  """
47
- # to avoid nesting due to models name containing '/'
48
- _model_id = model_id.replace("/", "--")
49
- cache_dir_path = Path(cache_dir) / "model_cache" / _model_id
50
- return str(cache_dir_path)
48
+ # If the model ID is a path, we just use that as the cache dir
49
+ if Path(model_id).is_dir():
50
+ log_once(
51
+ f"Since the model {model_id!r} is a local model, we will use the model "
52
+ "directory directly as the model cache directory.",
53
+ level=logging.DEBUG,
54
+ )
55
+ return model_id
56
+
57
+ # Otherwise, we create a cache dir based on the model ID
58
+ model_cache_dir = Path(
59
+ cache_dir, "model_cache", model_id.replace("/", "--")
60
+ ).as_posix()
61
+ log_once(
62
+ f"Using the model cache directory {model_cache_dir!r} for the model "
63
+ f"{model_id!r}.",
64
+ level=logging.DEBUG,
65
+ )
66
+ return model_cache_dir
51
67
 
52
68
 
53
69
  def resolve_model_path(download_dir: str) -> str:
@@ -65,8 +81,10 @@ def resolve_model_path(download_dir: str) -> str:
65
81
  If the model path is not valid, or if required files are missing.
66
82
  """
67
83
  model_path = Path(download_dir)
84
+
68
85
  # Get the 'path safe' version of the model id, which is the last dir in the path
69
86
  model_id_path = model_path.name
87
+
70
88
  # Hf hub `cache_dir` puts the files in models--`model_id_path`/snapshots
71
89
  model_path = model_path / f"models--{model_id_path}" / "snapshots"
72
90
  if not model_path.exists():
@@ -89,16 +107,16 @@ def resolve_model_path(download_dir: str) -> str:
89
107
  f"at {model_path}"
90
108
  )
91
109
 
92
- # Check that found_files contains at least a 'config.json'
93
- config_file = next(
94
- (file for file in found_files if file.name == "config.json"), None
110
+ # Check that found_files contains at least one of the required files
111
+ found_required_file = next(
112
+ (file for file in found_files if file.name in LOCAL_MODELS_REQUIRED_FILES), None
95
113
  )
96
- if config_file is None:
114
+ if found_required_file is None:
97
115
  raise InvalidModel(
98
- f"Missing required file 'config.json' for {model_id_path.strip('models--')}"
99
- f"at {model_path}"
116
+ f"At least one of the files {LOCAL_MODELS_REQUIRED_FILES} must be present "
117
+ f"for {model_id_path.strip('models--')} at {model_path}"
100
118
  )
101
- model_path = config_file.parent
119
+ model_path = found_required_file.parent
102
120
 
103
121
  # As a precaution we also check that all of the files are in the same directory
104
122
  # if not we create a new dir with symlinks to all of the files from all snapshots
@@ -423,6 +441,13 @@ def get_hf_token(api_key: str | None) -> str | bool:
423
441
  level=logging.DEBUG,
424
442
  )
425
443
  return False
444
+ except RequestException:
445
+ log_once(
446
+ "No Hugging Face API key was set and the connection to Hugging Face "
447
+ "failed, so no token will be used.",
448
+ level=logging.DEBUG,
449
+ )
450
+ return False
426
451
 
427
452
 
428
453
  def extract_multiple_choice_labels(
@@ -521,56 +546,3 @@ def load_custom_datasets_module(custom_datasets_file: Path) -> ModuleType | None
521
546
  spec.loader.exec_module(module)
522
547
  return module
523
548
  return None
524
-
525
-
526
- class attention_backend:
527
- """Context manager to temporarily set the attention backend.
528
-
529
- This sets the `VLLM_ATTENTION_BACKEND` environment variable to the desired value
530
- for the duration of the context manager, and restores the previous value afterwards.
531
- """
532
-
533
- def __init__(self, value: str | None) -> None:
534
- """Initialise the context manager.
535
-
536
- Args:
537
- value:
538
- The name of the attention backend to set. If None then no change is
539
- made. Also, if the user has already set the `VLLM_ATTENTION_BACKEND` env
540
- var, then no change is made.
541
- """
542
- user_has_set_backend = (
543
- os.environ.get("USER_HAS_SET_VLLM_ATTENTION_BACKEND", "0") == "1"
544
- )
545
- self.value = None if user_has_set_backend else value
546
- self.previous_value: str | None = None
547
-
548
- def __enter__(self) -> None:
549
- """Enter the context manager."""
550
- if self.value is None:
551
- return
552
- self.previous_value = os.getenv("VLLM_ATTENTION_BACKEND")
553
- os.environ["VLLM_ATTENTION_BACKEND"] = self.value
554
-
555
- def __exit__(
556
- self,
557
- exc_type: t.Type[BaseException] | None,
558
- exc_value: BaseException | None,
559
- exc_tb: TracebackType | None,
560
- ) -> None:
561
- """Exit the context manager.
562
-
563
- Args:
564
- exc_type:
565
- The type of the exception.
566
- exc_value:
567
- The value of the exception.
568
- exc_tb:
569
- The traceback of the exception.
570
- """
571
- if self.value is None:
572
- return
573
- if self.previous_value is None:
574
- os.environ.pop("VLLM_ATTENTION_BACKEND", None)
575
- else:
576
- os.environ["VLLM_ATTENTION_BACKEND"] = self.previous_value
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ScandEval
3
- Version: 16.10.1
3
+ Version: 16.12.0
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -8,7 +8,7 @@ Author-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
8
8
  Maintainer-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
9
9
  License: MIT License
10
10
 
11
- Copyright (c) 2022-2025 Dan Saattrup Smart
11
+ Copyright (c) 2022-2026 Dan Saattrup Smart
12
12
 
13
13
  Permission is hereby granted, free of charge, to any person obtaining a copy
14
14
  of this software and associated documentation files (the "Software"), to deal
@@ -28,7 +28,7 @@ License: MIT License
28
28
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
29
  SOFTWARE.
30
30
  License-File: LICENSE
31
- Requires-Python: <4.0,>=3.11
31
+ Requires-Python: <4.0,>=3.12
32
32
  Requires-Dist: accelerate>=1.9.0
33
33
  Requires-Dist: bert-score>=0.3.13
34
34
  Requires-Dist: click>=8.1.3
@@ -59,19 +59,23 @@ Requires-Dist: setuptools>=75.8.2
59
59
  Requires-Dist: tenacity>=9.0.0
60
60
  Requires-Dist: termcolor>=2.0.0
61
61
  Requires-Dist: torch>=2.6.0
62
- Requires-Dist: transformers[mistral-common]>=4.56.0
62
+ Requires-Dist: transformers[mistral-common]<5.0.0,>=4.56.0
63
63
  Provides-Extra: all
64
64
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
65
65
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
66
66
  Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'all'
67
67
  Requires-Dist: timm>=1.0.19; extra == 'all'
68
- Requires-Dist: vllm[flashinfer]==0.11.0; (platform_system == 'Linux') and extra == 'all'
68
+ Requires-Dist: vllm-metal>=0.1.0; (platform_system == 'Darwin') and extra == 'all'
69
+ Requires-Dist: vllm==0.11.0; (platform_system == 'Darwin') and extra == 'all'
70
+ Requires-Dist: vllm[flashinfer]>=0.14.1; (platform_system == 'Linux') and extra == 'all'
69
71
  Provides-Extra: generative
70
72
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
71
73
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
72
74
  Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'generative'
73
75
  Requires-Dist: timm>=1.0.19; extra == 'generative'
74
- Requires-Dist: vllm[flashinfer]==0.11.0; (platform_system == 'Linux') and extra == 'generative'
76
+ Requires-Dist: vllm-metal>=0.1.0; (platform_system == 'Darwin') and extra == 'generative'
77
+ Requires-Dist: vllm==0.11.0; (platform_system == 'Darwin') and extra == 'generative'
78
+ Requires-Dist: vllm[flashinfer]>=0.14.1; (platform_system == 'Linux') and extra == 'generative'
75
79
  Description-Content-Type: text/markdown
76
80
 
77
81
  <!-- This disables the requirement that the first line is a top-level heading -->
@@ -96,7 +100,7 @@ ______________________________________________________________________
96
100
  [![Second paper](https://img.shields.io/badge/arXiv-2406.13469-b31b1b.svg)](https://arxiv.org/abs/2406.13469)
97
101
  [![License](https://img.shields.io/github/license/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/blob/main/LICENSE)
98
102
  [![LastCommit](https://img.shields.io/github/last-commit/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/commits/main)
99
- [![Code Coverage](https://img.shields.io/badge/Coverage-70%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
103
+ [![Code Coverage](https://img.shields.io/badge/Coverage-74%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
100
104
  [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg)](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
101
105
 
102
106
  ## Maintainer
@@ -123,16 +127,17 @@ The easiest way to benchmark pretrained models is via the command line interface
123
127
  having installed the package, you can benchmark your favorite model like so:
124
128
 
125
129
  ```bash
126
- euroeval --model <model-id>
130
+ euroeval --model <model-id-or-path>
127
131
  ```
128
132
 
129
- Here `model` is the HuggingFace model ID, which can be found on the [HuggingFace
130
- Hub](https://huggingface.co/models). By default this will benchmark the model on all
131
- the tasks available. If you want to benchmark on a particular task, then use the
132
- `--task` argument:
133
+ Here `model` is either the HuggingFace model ID, which can be found on the [HuggingFace
134
+ Hub](https://huggingface.co/models), or a local path to a model directory (containing
135
+ the model files as well as the `config.json` file). By default this will benchmark the
136
+ model on all the tasks available. If you want to benchmark on a particular task, then
137
+ use the `--task` argument:
133
138
 
134
139
  ```bash
135
- euroeval --model <model-id> --task sentiment-classification
140
+ euroeval --model <model-id-or-path> --task sentiment-classification
136
141
  ```
137
142
 
138
143
  We can also narrow down which languages we would like to benchmark on. This can be done
@@ -140,20 +145,20 @@ by setting the `--language` argument. Here we thus benchmark the model on the Da
140
145
  sentiment classification task:
141
146
 
142
147
  ```bash
143
- euroeval --model <model-id> --task sentiment-classification --language da
148
+ euroeval --model <model-id-or-path> --task sentiment-classification --language da
144
149
  ```
145
150
 
146
151
  Multiple models, datasets and/or languages can be specified by just attaching multiple
147
152
  arguments. Here is an example with two models:
148
153
 
149
154
  ```bash
150
- euroeval --model <model-id1> --model <model-id2>
155
+ euroeval --model <model-id-or-path-1> --model <model-id-or-path-2>
151
156
  ```
152
157
 
153
158
  The specific model version/revision to use can also be added after the suffix '@':
154
159
 
155
160
  ```bash
156
- euroeval --model <model-id>@<commit>
161
+ euroeval --model <model-id-or-path>@<commit>
157
162
  ```
158
163
 
159
164
  This can be a branch name, a tag name, or a commit id. It defaults to 'main' for latest.
@@ -173,7 +178,7 @@ model:
173
178
  ```python
174
179
  >>> from euroeval import Benchmarker
175
180
  >>> benchmarker = Benchmarker()
176
- >>> benchmarker.benchmark(model="<model-id>")
181
+ >>> benchmarker.benchmark(model="<model-id-or-path>")
177
182
  ```
178
183
 
179
184
  To benchmark on a specific task and/or language, you simply specify the `task` or
@@ -181,7 +186,7 @@ To benchmark on a specific task and/or language, you simply specify the `task` o
181
186
 
182
187
  ```python
183
188
  >>> benchmarker.benchmark(
184
- ... model="<model-id>",
189
+ ... model="<model-id-or-path>",
185
190
  ... task="sentiment-classification",
186
191
  ... language="da",
187
192
  ... )
@@ -225,7 +230,7 @@ docker run -e args="<euroeval-arguments>" --gpus 1 --name euroeval --rm euroeval
225
230
  ```
226
231
 
227
232
  Here `<euroeval-arguments>` consists of the arguments added to the `euroeval` CLI
228
- argument. This could for instance be `--model <model-id> --task
233
+ argument. This could for instance be `--model <model-id-or-path> --task
229
234
  sentiment-classification`.
230
235
 
231
236
  ## Benchmarking custom inference APIs
@@ -291,14 +296,14 @@ script. For example to download the model you want and all of the Danish sentime
291
296
  classification datasets:
292
297
 
293
298
  ```bash
294
- euroeval --model <model-id> --task sentiment-classification --language da --download-only
299
+ euroeval --model <model-id-or-path> --task sentiment-classification --language da --download-only
295
300
  ```
296
301
 
297
302
  Or from a script:
298
303
 
299
304
  ```python
300
305
  >>> benchmarker.benchmark(
301
- ... model="<model-id>",
306
+ ... model="<model-id-or-path>",
302
307
  ... task="sentiment-classification",
303
308
  ... language="da",
304
309
  ... download_only=True,
@@ -346,7 +351,7 @@ MY_CONFIG = DatasetConfig(
346
351
  You can then benchmark your custom dataset by simply running
347
352
 
348
353
  ```bash
349
- euroeval --dataset my-dataset --model <model-id>
354
+ euroeval --dataset my-dataset --model <model-id-or-path>
350
355
  ```
351
356
 
352
357
  You can also run the benchmark from a Python script, by simply providing your custom
@@ -356,7 +361,7 @@ dataset configuration directly into the `benchmark` method:
356
361
  from euroeval import Benchmarker
357
362
 
358
363
  benchmarker = Benchmarker()
359
- benchmarker.benchmark(model="<model-id>", dataset=MY_CONFIG)
364
+ benchmarker.benchmark(model="<model-id-or-path>", dataset=MY_CONFIG)
360
365
  ```
361
366
 
362
367
  We have included three convenience tasks to make it easier to set up custom datasets:
@@ -436,7 +441,7 @@ MY_SQL_DATASET = DatasetConfig(
436
441
  Again, with this you can benchmark your custom dataset by simply running
437
442
 
438
443
  ```bash
439
- euroeval --dataset my-sql-dataset --model <model-id>
444
+ euroeval --dataset my-sql-dataset --model <model-id-or-path>
440
445
  ```
441
446
 
442
447
  ## Reproducing the evaluation datasets
@@ -592,6 +597,27 @@ A huge thank you to all the contributors who have helped make this project a suc
592
597
  alt="Contributor avatar for tvosch"
593
598
  />
594
599
  </a>
600
+ <a href="https://github.com/Touzen">
601
+ <img
602
+ src="https://avatars.githubusercontent.com/u/1416265"
603
+ width=50
604
+ alt="Contributor avatar for Touzen"
605
+ />
606
+ </a>
607
+ <a href="https://github.com/caldaibis">
608
+ <img
609
+ src="https://avatars.githubusercontent.com/u/16032437"
610
+ width=50
611
+ alt="Contributor avatar for caldaibis"
612
+ />
613
+ </a>
614
+ <a href="https://github.com/SwekeR-463">
615
+ <img
616
+ src="https://avatars.githubusercontent.com/u/114919896?v=4"
617
+ width=50
618
+ alt="Contributor avatar for SwekeR-463"
619
+ />
620
+ </a>
595
621
 
596
622
  ### Contribute to EuroEval
597
623
 
@@ -1,34 +1,34 @@
1
- scandeval/__init__.py,sha256=w4oYw-lbj5ZZ4pv-bHrgZNJ6dlu-WcAWg2e--_UMmeE,4244
2
- scandeval/benchmark_config_factory.py,sha256=2stmcqKwx0G9pAiA0atunqDchJ9eoezp1Wh3vB41zV4,8745
3
- scandeval/benchmarker.py,sha256=ARH1ATYAunKNRgIQTDvGqMN_M-ygG0SIQw-hfTOuC6U,53556
1
+ scandeval/__init__.py,sha256=wHhEEQ8wLNLAN9ULdAkWZpGSo08IpTx_w_gaya0FnVQ,3896
2
+ scandeval/benchmark_config_factory.py,sha256=NeikkDCfvTI3ZrAAP-kCQK6Ma3FfwITa_sZ4Ou0w3GM,8895
3
+ scandeval/benchmarker.py,sha256=HPG3qF3dX1hnhEc3WYsSGTkWJ8GeXC1ct_A-89IQTtw,54470
4
4
  scandeval/caching_utils.py,sha256=lLUbkpDdJZy4xodIpwIz5d-WNKGuszbr_d9dyiJ5kZc,2591
5
5
  scandeval/callbacks.py,sha256=l8f6Zr8EoHfVFsI1ZnMUK0Y8uZB00Nvaz_I6XDn6avE,2515
6
- scandeval/cli.py,sha256=zvPGomSdrcjxc4uhmh8SkB4s2d7U9JYhxBJ34vznqUI,9411
7
- scandeval/constants.py,sha256=wF7fQwaX8yZIypq_eh5RcaQFEhABR7dJxQaAX82b4P8,3766
6
+ scandeval/cli.py,sha256=BUrE8ca4wIOQjBM4NoyhNVzGPnVdjOl7xFXbUDuAsq0,9807
7
+ scandeval/constants.py,sha256=0IVDd0tmb3r6lKB5CODc4RqS7OofZdW3xE40jT74LeQ,4492
8
8
  scandeval/data_loading.py,sha256=8ryYEmj6di1f9QefGfNajxObQ9iapIGuAsL8m9KzDyI,7050
9
- scandeval/data_models.py,sha256=vRGKrYr1YFBcH4ngOHrESicbTaIcz-joKz58JN5YMFE,30548
9
+ scandeval/data_models.py,sha256=IaXgy5OKPA1wHP55-m9IqE2hBC8Kv8nhsUSTqJBq7ho,30968
10
10
  scandeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
11
11
  scandeval/exceptions.py,sha256=4-N2OIo5PJ2aciLjagNAVhdHPxpq2QxywbBqJ8lkKj0,5780
12
12
  scandeval/finetuning.py,sha256=dTjchPHLFRD65ZrEmtj5TfMTPZ6PODn77t372fgTNwE,11983
13
13
  scandeval/generation.py,sha256=ccE-S0jxkM99XziIdeaBbk8yRGv4YBkzZkoabhFCSKA,13382
14
14
  scandeval/generation_utils.py,sha256=A6YCiiMrMEUHq5BcVEjsouIKMPGt0sCfPzsJY1GVyk0,20092
15
15
  scandeval/languages.py,sha256=gUSosFbvf1eEQHjVsKhXdJ4jiGXC-9lMkOL8AsBG33Q,37295
16
- scandeval/logging_utils.py,sha256=Pd6DyHTPHCUsjtriomJboiTB35UdXvzxwnNpGTuec-g,9522
16
+ scandeval/logging_utils.py,sha256=Qnni11ngHrjCf_fgkk6lp6gs-tGSgUS3d5zRR83y6ec,9507
17
17
  scandeval/model_cache.py,sha256=sjMYW0klnHt2yAFLavDTsp_InxPeSOuVEFo-Rh_31UM,10219
18
18
  scandeval/model_config.py,sha256=fxHfgpw-9vj3hwke28DguVGvG9TU06nkTXT0V6KAMpQ,2761
19
- scandeval/model_loading.py,sha256=bE51L4-AaVgo9h10UsKH_47CB4tOJGU988HxotQ5sYE,2342
19
+ scandeval/model_loading.py,sha256=DsX7et18Epcv8kHATZgwPJnwH17GHmh3JCzrSoI3GAE,2377
20
20
  scandeval/scores.py,sha256=9a1XtppFbp8GJFc9JdThGxqBY0YUE7-92oyrlxScjNk,3281
21
21
  scandeval/speed_benchmark.py,sha256=VUOvauc9tuAegThNT2g1a-Z1l7DEmKq57dHI4t16o5A,4068
22
- scandeval/tasks.py,sha256=mgE6Vx_1WD9-aY-yeBxc_09Uyz-tqk69xISMWVYcrsY,5980
23
- scandeval/tokenisation_utils.py,sha256=Sa8V91J4NDFBF-qbConPsQvUkW_02cJp0gySz_Q3NDo,21191
24
- scandeval/types.py,sha256=-VNeeDEvlNwfemszpvuGb3Dr9Gu3Eqc6XRmR11HLRi4,3293
25
- scandeval/utils.py,sha256=BIAP9TWmY_xv6tuCUgmnYifoeodxlz8N2Q0We3frgLU,18389
22
+ scandeval/tasks.py,sha256=FQvnl28iudjIA2V_G3gHpSsyKaSs7r1i-T5c2pLAuF4,6656
23
+ scandeval/tokenisation_utils.py,sha256=K9ovIi5WNqLrFKkafl16R3K-2PallGwV_zeIFw_AM_k,21553
24
+ scandeval/types.py,sha256=CHQjLzqKYDXPCyZas7rKg6wD1pNiYuaOFMWimrj5H64,4374
25
+ scandeval/utils.py,sha256=P7RARAvJzm-CVavNjMXR2ZseWxT3irXegRzjrVIdCww,17481
26
26
  scandeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
27
27
  scandeval/benchmark_modules/base.py,sha256=5YAsCMILKTRXFx_ylGQ7iS5AFKN25iFdkBjj8KzzElw,11445
28
28
  scandeval/benchmark_modules/fresh.py,sha256=sG5ae4p1J-GGmVNcVBIxY1xZIAlUwq_pu-9c4uAYU3Y,10734
29
- scandeval/benchmark_modules/hf.py,sha256=f89E7XoMqsBHhYnMYBgy7ZuXDsAQ7VaIqMfFrHyjg8g,47363
30
- scandeval/benchmark_modules/litellm.py,sha256=TH35CQhoVinlmfHnAW-XJE21o96YfiIv993m0ASS80E,71590
31
- scandeval/benchmark_modules/vllm.py,sha256=pFCBuIp2m2KIlVMlqc7sGp1twiENvRHx3ppVs0bFvFo,57319
29
+ scandeval/benchmark_modules/hf.py,sha256=ob-05POUBDWk9dU_hUT7nmXZ11IGCnMgj6xkyLYyX98,48512
30
+ scandeval/benchmark_modules/litellm.py,sha256=jVagENE3a0PNMDOaj4DLY-p2Lf-BzNVB1_voPq2CLTU,75545
31
+ scandeval/benchmark_modules/vllm.py,sha256=pPKDHf5T_p0u9CJcR7R5sMmN98mirl64kWfyEHbtb5s,61720
32
32
  scandeval/dataset_configs/__init__.py,sha256=GFI_W9GKd3OSDdhhJzHc8mwoP9b32IHIIyvPBI-hK6k,3223
33
33
  scandeval/dataset_configs/albanian.py,sha256=D__dli7JO3yeHzzdJ3FFyUGw-z20f1yI6QLnws-WB8I,1473
34
34
  scandeval/dataset_configs/bosnian.py,sha256=golIWqwW1pFwSkuBM1v0yhHDblB2FoJgK24aO7kKm7M,877
@@ -37,7 +37,7 @@ scandeval/dataset_configs/catalan.py,sha256=SXwRJjIcMMN7rVuhFRZSnCGDoMfabW5HFoZO
37
37
  scandeval/dataset_configs/croatian.py,sha256=U5oBTjttpWTWonTEzZAf-G3nvQICRQmw6Kla-HWn_5k,1260
38
38
  scandeval/dataset_configs/czech.py,sha256=ghv2yNw839G-utll8PQRSjyKYbM5gfoQhFKy664GTCI,1562
39
39
  scandeval/dataset_configs/danish.py,sha256=LEKs04vK2KnV0CYheT7FeS-g3iHBvf2bQxyl0D_LbTg,3293
40
- scandeval/dataset_configs/dutch.py,sha256=OZJmaqGguXY5D9hz0zFNrwGQPRXgxZonctSc8Gsy9sY,3550
40
+ scandeval/dataset_configs/dutch.py,sha256=q9adDSpR08Ol5AMJJpp1e1T1ZbwmORaFnJaEGrAujm4,3747
41
41
  scandeval/dataset_configs/english.py,sha256=nc9nGwxf1tHVMUhQeND61yJbpTO4rJaAusPZlstqtq0,2817
42
42
  scandeval/dataset_configs/estonian.py,sha256=bWiKA_dJ7WUE8Z_1YZnSewhi4ZdCQBGJZ7pQxkCwMcU,2757
43
43
  scandeval/dataset_configs/faroese.py,sha256=13qYwXonDPWG9Av5MY_NBNTRDglPVKz5_mbz7ZCJ_mo,1247
@@ -60,10 +60,11 @@ scandeval/dataset_configs/slovene.py,sha256=r6BbFRvkFYf_4lvQaltaJ1VTVGETZ0xspsu9
60
60
  scandeval/dataset_configs/spanish.py,sha256=Q60nx69sGbYk8p0hg2cwLFyoPjg36FdstLQoacw9QmU,2928
61
61
  scandeval/dataset_configs/swedish.py,sha256=kpEK29swY7iyUSzUvD9hNf2qwb3d7bHrFwboCWVAf2k,3269
62
62
  scandeval/dataset_configs/ukrainian.py,sha256=spbCmCOU27jOfz6FZxqCIfVmDN5l8H-7VCl-k-8eAIo,1527
63
- scandeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
63
+ scandeval/metrics/__init__.py,sha256=nrjFjTK7NO5I8U6acULNzqezmMWN21aWd4faW4oYGHo,233
64
64
  scandeval/metrics/base.py,sha256=dUBby-ZzettMjdcjek6rw0JTZMuScX4cQ2Rd6untKHY,2525
65
- scandeval/metrics/huggingface.py,sha256=W1hPuIGBALOogGN2yTGTJUsylsMII3A66fEe9nB8N2k,9493
66
- scandeval/metrics/llm_as_a_judge.py,sha256=cZ7ZCuB3633T87MjBtAekrBQ_vYaNv1uTcqnI32gNpQ,9837
65
+ scandeval/metrics/bias.py,sha256=sV87PLzjc3XPsSAz2HJ4hmlLZ_IcHDsIUr7gYmp9HKc,7765
66
+ scandeval/metrics/huggingface.py,sha256=eKXn5wBcNdzs23cgJ64XG8LIwen1wDxXy2kAOw3bjoQ,9579
67
+ scandeval/metrics/llm_as_a_judge.py,sha256=UUFk3aL2BZqJ-u9-dzexsoArTxPJTMmHRqb1eWxexaI,12133
67
68
  scandeval/metrics/pipeline.py,sha256=GTIqaFkn-nTLU4xBi8-zP1J4Ytv3qeFVuRB4OcuwkOw,10876
68
69
  scandeval/metrics/speed.py,sha256=G5hEQcrtqxF070ZZwLDh61iZnq2CSW2o6ZM7zR4lOTY,1298
69
70
  scandeval/prompt_templates/__init__.py,sha256=p3CUcSaJiiUm6EQyhceDUjotH7GdyHolMznAn2f44as,519
@@ -79,11 +80,11 @@ scandeval/prompt_templates/token_classification.py,sha256=8Uw34mN2xQ_5es-nz7vCK-
79
80
  scandeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
80
81
  scandeval/task_group_utils/multiple_choice_classification.py,sha256=PWUXeGn-9RsXxdVRYHJASyBVQ8L5Jla981eot0GLooY,7316
81
82
  scandeval/task_group_utils/question_answering.py,sha256=tuMwr-RnvJap5jkTrluxC1tfQVS6rKN8_ifNwis-auw,29064
82
- scandeval/task_group_utils/sequence_classification.py,sha256=VhiggNrB7Gi2x-99MPL0RR2VZRv-wpJerXulgQH6wcU,16556
83
+ scandeval/task_group_utils/sequence_classification.py,sha256=1YAaKn5bY8j9ONPfJZODjaGKVMkA9fQcl51fvBcjeF8,16829
83
84
  scandeval/task_group_utils/text_to_text.py,sha256=p6zzjob70qQUpfUOs0LToSzavE1ERqRAHu_727Jb2mM,5476
84
85
  scandeval/task_group_utils/token_classification.py,sha256=8dF32KQAYAFnnn7DPHX-yvJmRrMBmT2CyFREacyTwvQ,17321
85
- scandeval-16.10.1.dist-info/METADATA,sha256=IYJza42KMRZdoc2-8z9NHaniGAH4K7hT1WHCyFT-Wow,23435
86
- scandeval-16.10.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
87
- scandeval-16.10.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
88
- scandeval-16.10.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
89
- scandeval-16.10.1.dist-info/RECORD,,
86
+ scandeval-16.12.0.dist-info/METADATA,sha256=YCSgBbbtWLDfWqepHFS8UX0zho8gpTXJC1lagT_l94w,24564
87
+ scandeval-16.12.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
88
+ scandeval-16.12.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
89
+ scandeval-16.12.0.dist-info/licenses/LICENSE,sha256=vb2c84xITVnhnVFsBS8AWXl-4S-KpxN6VMxTqqYlV3s,1080
90
+ scandeval-16.12.0.dist-info/RECORD,,
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2022-2025 Dan Saattrup Smart
3
+ Copyright (c) 2022-2026 Dan Saattrup Smart
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal