PyPI - ScandEval - Versions diffs - 16.10.1__py3-none-any.whl → 16.12.0__py3-none-any.whl - Mend

ScandEval 16.10.1py3-none-any.whl → 16.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

scandeval/__init__.py +0 -9
scandeval/benchmark_config_factory.py +5 -0
scandeval/benchmark_modules/hf.py +36 -8
scandeval/benchmark_modules/litellm.py +119 -22
scandeval/benchmark_modules/vllm.py +202 -94
scandeval/benchmarker.py +28 -7
scandeval/cli.py +13 -0
scandeval/constants.py +31 -2
scandeval/data_models.py +12 -2
scandeval/dataset_configs/dutch.py +10 -0
scandeval/logging_utils.py +1 -1
scandeval/metrics/__init__.py +1 -0
scandeval/metrics/bias.py +237 -0
scandeval/metrics/huggingface.py +5 -3
scandeval/metrics/llm_as_a_judge.py +79 -15
scandeval/model_loading.py +2 -1
scandeval/task_group_utils/sequence_classification.py +12 -3
scandeval/tasks.py +22 -0
scandeval/tokenisation_utils.py +12 -1
scandeval/types.py +39 -0
scandeval/utils.py +38 -66
{scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/METADATA +50 -24
{scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/RECORD +26 -25
{scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/licenses/LICENSE +1 -1
{scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/WHEEL +0 -0
{scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/entry_points.txt +0 -0

scandeval/utils.py CHANGED Viewed

@@ -14,16 +14,17 @@ import socket
 import sys
 import typing as t
 from pathlib import Path
-from types import ModuleType, TracebackType
+from types import ModuleType
 import demjson3
 import huggingface_hub as hf_hub
 import numpy as np
 import torch
 from huggingface_hub.errors import LocalTokenNotFoundError
+from requests.exceptions import RequestException
 from .caching_utils import cache_arguments
-from .constants import T
+from .constants import LOCAL_MODELS_REQUIRED_FILES, T
 from .exceptions import InvalidBenchmark, InvalidModel, NaNValueInModelOutput
 from .logging_utils import log, log_once
@@ -44,10 +45,25 @@ def create_model_cache_dir(cache_dir: str, model_id: str) -> str:
     Returns:
         The path to the cache directory.
     """
-    # to avoid nesting due to models name containing '/'
-    _model_id = model_id.replace("/", "--")
-    cache_dir_path = Path(cache_dir) / "model_cache" / _model_id
-    return str(cache_dir_path)
+    # If the model ID is a path, we just use that as the cache dir
+    if Path(model_id).is_dir():
+        log_once(
+            f"Since the model {model_id!r} is a local model, we will use the model "
+            "directory directly as the model cache directory.",
+            level=logging.DEBUG,
+        )
+        return model_id
+    # Otherwise, we create a cache dir based on the model ID
+    model_cache_dir = Path(
+        cache_dir, "model_cache", model_id.replace("/", "--")
+    ).as_posix()
+    log_once(
+        f"Using the model cache directory {model_cache_dir!r} for the model "
+        f"{model_id!r}.",
+        level=logging.DEBUG,
+    )
+    return model_cache_dir
 def resolve_model_path(download_dir: str) -> str:
@@ -65,8 +81,10 @@ def resolve_model_path(download_dir: str) -> str:
             If the model path is not valid, or if required files are missing.
     """
     model_path = Path(download_dir)
     # Get the 'path safe' version of the model id, which is the last dir in the path
     model_id_path = model_path.name
     # Hf hub `cache_dir` puts the files in models--`model_id_path`/snapshots
     model_path = model_path / f"models--{model_id_path}" / "snapshots"
     if not model_path.exists():
@@ -89,16 +107,16 @@ def resolve_model_path(download_dir: str) -> str:
             f"at {model_path}"
         )
-    # Check that found_files contains at least a 'config.json'
-    config_file = next(
-        (file for file in found_files if file.name == "config.json"), None
+    # Check that found_files contains at least one of the required files
+    found_required_file = next(
+        (file for file in found_files if file.name in LOCAL_MODELS_REQUIRED_FILES), None
     )
-    if config_file is None:
+    if found_required_file is None:
         raise InvalidModel(
-            f"Missing required file 'config.json' for {model_id_path.strip('models--')}"
-            f"at {model_path}"
+            f"At least one of the files {LOCAL_MODELS_REQUIRED_FILES} must be present "
+            f"for {model_id_path.strip('models--')} at {model_path}"
         )
-    model_path = config_file.parent
+    model_path = found_required_file.parent
     # As a precaution we also check that all of the files are in the same directory
     # if not we create a new dir with symlinks to all of the files from all snapshots
@@ -423,6 +441,13 @@ def get_hf_token(api_key: str | None) -> str | bool:
             level=logging.DEBUG,
         )
         return False
+    except RequestException:
+        log_once(
+            "No Hugging Face API key was set and the connection to Hugging Face "
+            "failed, so no token will be used.",
+            level=logging.DEBUG,
+        )
+        return False
 def extract_multiple_choice_labels(
@@ -521,56 +546,3 @@ def load_custom_datasets_module(custom_datasets_file: Path) -> ModuleType | None
         spec.loader.exec_module(module)
         return module
     return None
-class attention_backend:
-    """Context manager to temporarily set the attention backend.
-    This sets the `VLLM_ATTENTION_BACKEND` environment variable to the desired value
-    for the duration of the context manager, and restores the previous value afterwards.
-    """
-    def __init__(self, value: str | None) -> None:
-        """Initialise the context manager.
-        Args:
-            value:
-                The name of the attention backend to set. If None then no change is
-                made. Also, if the user has already set the `VLLM_ATTENTION_BACKEND` env
-                var, then no change is made.
-        """
-        user_has_set_backend = (
-            os.environ.get("USER_HAS_SET_VLLM_ATTENTION_BACKEND", "0") == "1"
-        )
-        self.value = None if user_has_set_backend else value
-        self.previous_value: str | None = None
-    def __enter__(self) -> None:
-        """Enter the context manager."""
-        if self.value is None:
-            return
-        self.previous_value = os.getenv("VLLM_ATTENTION_BACKEND")
-        os.environ["VLLM_ATTENTION_BACKEND"] = self.value
-    def __exit__(
-        self,
-        exc_type: t.Type[BaseException] | None,
-        exc_value: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        """Exit the context manager.
-        Args:
-            exc_type:
-                The type of the exception.
-            exc_value:
-                The value of the exception.
-            exc_tb:
-                The traceback of the exception.
-        """
-        if self.value is None:
-            return
-        if self.previous_value is None:
-            os.environ.pop("VLLM_ATTENTION_BACKEND", None)
-        else:
-            os.environ["VLLM_ATTENTION_BACKEND"] = self.previous_value

{scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ScandEval
-Version: 16.10.1
+Version: 16.12.0
 Summary: The robust European language model benchmark.
 Project-URL: Repository, https://github.com/EuroEval/EuroEval
 Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -8,7 +8,7 @@ Author-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
 Maintainer-email: Dan Saattrup Smart <dan.smart@alexandra.dk>
 License: MIT License
-        Copyright (c) 2022-2025 Dan Saattrup Smart
+        Copyright (c) 2022-2026 Dan Saattrup Smart
         Permission is hereby granted, free of charge, to any person obtaining a copy
         of this software and associated documentation files (the "Software"), to deal
@@ -28,7 +28,7 @@ License: MIT License
         OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         SOFTWARE.
 License-File: LICENSE
-Requires-Python: <4.0,>=3.11
+Requires-Python: <4.0,>=3.12
 Requires-Dist: accelerate>=1.9.0
 Requires-Dist: bert-score>=0.3.13
 Requires-Dist: click>=8.1.3
@@ -59,19 +59,23 @@ Requires-Dist: setuptools>=75.8.2
 Requires-Dist: tenacity>=9.0.0
 Requires-Dist: termcolor>=2.0.0
 Requires-Dist: torch>=2.6.0
-Requires-Dist: transformers[mistral-common]>=4.56.0
+Requires-Dist: transformers[mistral-common]<5.0.0,>=4.56.0
 Provides-Extra: all
 Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
 Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
 Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'all'
 Requires-Dist: timm>=1.0.19; extra == 'all'
-Requires-Dist: vllm[flashinfer]==0.11.0; (platform_system == 'Linux') and extra == 'all'
+Requires-Dist: vllm-metal>=0.1.0; (platform_system == 'Darwin') and extra == 'all'
+Requires-Dist: vllm==0.11.0; (platform_system == 'Darwin') and extra == 'all'
+Requires-Dist: vllm[flashinfer]>=0.14.1; (platform_system == 'Linux') and extra == 'all'
 Provides-Extra: generative
 Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
 Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
 Requires-Dist: ray>=2.53.0; (platform_system == 'Linux') and extra == 'generative'
 Requires-Dist: timm>=1.0.19; extra == 'generative'
-Requires-Dist: vllm[flashinfer]==0.11.0; (platform_system == 'Linux') and extra == 'generative'
+Requires-Dist: vllm-metal>=0.1.0; (platform_system == 'Darwin') and extra == 'generative'
+Requires-Dist: vllm==0.11.0; (platform_system == 'Darwin') and extra == 'generative'
+Requires-Dist: vllm[flashinfer]>=0.14.1; (platform_system == 'Linux') and extra == 'generative'
 Description-Content-Type: text/markdown
 <!-- This disables the requirement that the first line is a top-level heading -->
@@ -96,7 +100,7 @@ ______________________________________________________________________
 [![Second paper](https://img.shields.io/badge/arXiv-2406.13469-b31b1b.svg)](https://arxiv.org/abs/2406.13469)
 [![License](https://img.shields.io/github/license/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/blob/main/LICENSE)
 [![LastCommit](https://img.shields.io/github/last-commit/EuroEval/EuroEval)](https://github.com/EuroEval/EuroEval/commits/main)
-[![Code Coverage](https://img.shields.io/badge/Coverage-70%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
+[![Code Coverage](https://img.shields.io/badge/Coverage-74%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
 [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg)](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
 ## Maintainer
@@ -123,16 +127,17 @@ The easiest way to benchmark pretrained models is via the command line interface
 having installed the package, you can benchmark your favorite model like so:
 ```bash
-euroeval --model <model-id>
+euroeval --model <model-id-or-path>
 ```
-Here `model` is the HuggingFace model ID, which can be found on the [HuggingFace
-Hub](https://huggingface.co/models). By default this will benchmark the model on all
-the tasks available. If you want to benchmark on a particular task, then use the
-`--task` argument:
+Here `model` is either the HuggingFace model ID, which can be found on the [HuggingFace
+Hub](https://huggingface.co/models), or a local path to a model directory (containing
+the model files as well as the `config.json` file). By default this will benchmark the
+model on all the tasks available. If you want to benchmark on a particular task, then
+use the `--task` argument:
 ```bash
-euroeval --model <model-id> --task sentiment-classification
+euroeval --model <model-id-or-path> --task sentiment-classification
 ```
 We can also narrow down which languages we would like to benchmark on. This can be done
@@ -140,20 +145,20 @@ by setting the `--language` argument. Here we thus benchmark the model on the Da
 sentiment classification task:
 ```bash
-euroeval --model <model-id> --task sentiment-classification --language da
+euroeval --model <model-id-or-path> --task sentiment-classification --language da
 ```
 Multiple models, datasets and/or languages can be specified by just attaching multiple
 arguments. Here is an example with two models:
 ```bash
-euroeval --model <model-id1> --model <model-id2>
+euroeval --model <model-id-or-path-1> --model <model-id-or-path-2>
 ```
 The specific model version/revision to use can also be added after the suffix '@':
 ```bash
-euroeval --model <model-id>@<commit>
+euroeval --model <model-id-or-path>@<commit>
 ```
 This can be a branch name, a tag name, or a commit id. It defaults to 'main' for latest.
@@ -173,7 +178,7 @@ model:
 ```python
 >>> from euroeval import Benchmarker
 >>> benchmarker = Benchmarker()
->>> benchmarker.benchmark(model="<model-id>")
+>>> benchmarker.benchmark(model="<model-id-or-path>")
 ```
 To benchmark on a specific task and/or language, you simply specify the `task` or
@@ -181,7 +186,7 @@ To benchmark on a specific task and/or language, you simply specify the `task` o
 ```python
 >>> benchmarker.benchmark(
-...     model="<model-id>",
+...     model="<model-id-or-path>",
 ...     task="sentiment-classification",
 ...     language="da",
 ... )
@@ -225,7 +230,7 @@ docker run -e args="<euroeval-arguments>" --gpus 1 --name euroeval --rm euroeval
 ```
 Here `<euroeval-arguments>` consists of the arguments added to the `euroeval` CLI
-argument. This could for instance be `--model <model-id> --task
+argument. This could for instance be `--model <model-id-or-path> --task
 sentiment-classification`.
 ## Benchmarking custom inference APIs
@@ -291,14 +296,14 @@ script. For example to download the model you want and all of the Danish sentime
 classification datasets:
 ```bash
-euroeval --model <model-id> --task sentiment-classification --language da --download-only
+euroeval --model <model-id-or-path> --task sentiment-classification --language da --download-only
 ```
 Or from a script:
 ```python
 >>> benchmarker.benchmark(
-... model="<model-id>",
+... model="<model-id-or-path>",
 ... task="sentiment-classification",
 ... language="da",
 ... download_only=True,
@@ -346,7 +351,7 @@ MY_CONFIG = DatasetConfig(
 You can then benchmark your custom dataset by simply running
 ```bash
-euroeval --dataset my-dataset --model <model-id>
+euroeval --dataset my-dataset --model <model-id-or-path>
 ```
 You can also run the benchmark from a Python script, by simply providing your custom
@@ -356,7 +361,7 @@ dataset configuration directly into the `benchmark` method:
 from euroeval import Benchmarker
 benchmarker = Benchmarker()
-benchmarker.benchmark(model="<model-id>", dataset=MY_CONFIG)
+benchmarker.benchmark(model="<model-id-or-path>", dataset=MY_CONFIG)
 ```
 We have included three convenience tasks to make it easier to set up custom datasets:
@@ -436,7 +441,7 @@ MY_SQL_DATASET = DatasetConfig(
 Again, with this you can benchmark your custom dataset by simply running
 ```bash
-euroeval --dataset my-sql-dataset --model <model-id>
+euroeval --dataset my-sql-dataset --model <model-id-or-path>
 ```
 ## Reproducing the evaluation datasets
@@ -592,6 +597,27 @@ A huge thank you to all the contributors who have helped make this project a suc
         alt="Contributor avatar for tvosch"
     />
 </a>
+<a href="https://github.com/Touzen">
+    <img
+        src="https://avatars.githubusercontent.com/u/1416265"
+        width=50
+        alt="Contributor avatar for Touzen"
+    />
+</a>
+<a href="https://github.com/caldaibis">
+    <img
+        src="https://avatars.githubusercontent.com/u/16032437"
+        width=50
+        alt="Contributor avatar for caldaibis"
+    />
+</a>
+<a href="https://github.com/SwekeR-463">
+    <img
+        src="https://avatars.githubusercontent.com/u/114919896?v=4"
+        width=50
+        alt="Contributor avatar for SwekeR-463"
+    />
+</a>
 ### Contribute to EuroEval

{scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/RECORD RENAMED Viewed

@@ -1,34 +1,34 @@
-scandeval/__init__.py,sha256=w4oYw-lbj5ZZ4pv-bHrgZNJ6dlu-WcAWg2e--_UMmeE,4244
-scandeval/benchmark_config_factory.py,sha256=2stmcqKwx0G9pAiA0atunqDchJ9eoezp1Wh3vB41zV4,8745
-scandeval/benchmarker.py,sha256=ARH1ATYAunKNRgIQTDvGqMN_M-ygG0SIQw-hfTOuC6U,53556
+scandeval/__init__.py,sha256=wHhEEQ8wLNLAN9ULdAkWZpGSo08IpTx_w_gaya0FnVQ,3896
+scandeval/benchmark_config_factory.py,sha256=NeikkDCfvTI3ZrAAP-kCQK6Ma3FfwITa_sZ4Ou0w3GM,8895
+scandeval/benchmarker.py,sha256=HPG3qF3dX1hnhEc3WYsSGTkWJ8GeXC1ct_A-89IQTtw,54470
 scandeval/caching_utils.py,sha256=lLUbkpDdJZy4xodIpwIz5d-WNKGuszbr_d9dyiJ5kZc,2591
 scandeval/callbacks.py,sha256=l8f6Zr8EoHfVFsI1ZnMUK0Y8uZB00Nvaz_I6XDn6avE,2515
-scandeval/cli.py,sha256=zvPGomSdrcjxc4uhmh8SkB4s2d7U9JYhxBJ34vznqUI,9411
-scandeval/constants.py,sha256=wF7fQwaX8yZIypq_eh5RcaQFEhABR7dJxQaAX82b4P8,3766
+scandeval/cli.py,sha256=BUrE8ca4wIOQjBM4NoyhNVzGPnVdjOl7xFXbUDuAsq0,9807
+scandeval/constants.py,sha256=0IVDd0tmb3r6lKB5CODc4RqS7OofZdW3xE40jT74LeQ,4492
 scandeval/data_loading.py,sha256=8ryYEmj6di1f9QefGfNajxObQ9iapIGuAsL8m9KzDyI,7050
-scandeval/data_models.py,sha256=vRGKrYr1YFBcH4ngOHrESicbTaIcz-joKz58JN5YMFE,30548
+scandeval/data_models.py,sha256=IaXgy5OKPA1wHP55-m9IqE2hBC8Kv8nhsUSTqJBq7ho,30968
 scandeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
 scandeval/exceptions.py,sha256=4-N2OIo5PJ2aciLjagNAVhdHPxpq2QxywbBqJ8lkKj0,5780
 scandeval/finetuning.py,sha256=dTjchPHLFRD65ZrEmtj5TfMTPZ6PODn77t372fgTNwE,11983
 scandeval/generation.py,sha256=ccE-S0jxkM99XziIdeaBbk8yRGv4YBkzZkoabhFCSKA,13382
 scandeval/generation_utils.py,sha256=A6YCiiMrMEUHq5BcVEjsouIKMPGt0sCfPzsJY1GVyk0,20092
 scandeval/languages.py,sha256=gUSosFbvf1eEQHjVsKhXdJ4jiGXC-9lMkOL8AsBG33Q,37295
-scandeval/logging_utils.py,sha256=Pd6DyHTPHCUsjtriomJboiTB35UdXvzxwnNpGTuec-g,9522
+scandeval/logging_utils.py,sha256=Qnni11ngHrjCf_fgkk6lp6gs-tGSgUS3d5zRR83y6ec,9507
 scandeval/model_cache.py,sha256=sjMYW0klnHt2yAFLavDTsp_InxPeSOuVEFo-Rh_31UM,10219
 scandeval/model_config.py,sha256=fxHfgpw-9vj3hwke28DguVGvG9TU06nkTXT0V6KAMpQ,2761
-scandeval/model_loading.py,sha256=bE51L4-AaVgo9h10UsKH_47CB4tOJGU988HxotQ5sYE,2342
+scandeval/model_loading.py,sha256=DsX7et18Epcv8kHATZgwPJnwH17GHmh3JCzrSoI3GAE,2377
 scandeval/scores.py,sha256=9a1XtppFbp8GJFc9JdThGxqBY0YUE7-92oyrlxScjNk,3281
 scandeval/speed_benchmark.py,sha256=VUOvauc9tuAegThNT2g1a-Z1l7DEmKq57dHI4t16o5A,4068
-scandeval/tasks.py,sha256=mgE6Vx_1WD9-aY-yeBxc_09Uyz-tqk69xISMWVYcrsY,5980
-scandeval/tokenisation_utils.py,sha256=Sa8V91J4NDFBF-qbConPsQvUkW_02cJp0gySz_Q3NDo,21191
-scandeval/types.py,sha256=-VNeeDEvlNwfemszpvuGb3Dr9Gu3Eqc6XRmR11HLRi4,3293
-scandeval/utils.py,sha256=BIAP9TWmY_xv6tuCUgmnYifoeodxlz8N2Q0We3frgLU,18389
+scandeval/tasks.py,sha256=FQvnl28iudjIA2V_G3gHpSsyKaSs7r1i-T5c2pLAuF4,6656
+scandeval/tokenisation_utils.py,sha256=K9ovIi5WNqLrFKkafl16R3K-2PallGwV_zeIFw_AM_k,21553
+scandeval/types.py,sha256=CHQjLzqKYDXPCyZas7rKg6wD1pNiYuaOFMWimrj5H64,4374
+scandeval/utils.py,sha256=P7RARAvJzm-CVavNjMXR2ZseWxT3irXegRzjrVIdCww,17481
 scandeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
 scandeval/benchmark_modules/base.py,sha256=5YAsCMILKTRXFx_ylGQ7iS5AFKN25iFdkBjj8KzzElw,11445
 scandeval/benchmark_modules/fresh.py,sha256=sG5ae4p1J-GGmVNcVBIxY1xZIAlUwq_pu-9c4uAYU3Y,10734
-scandeval/benchmark_modules/hf.py,sha256=f89E7XoMqsBHhYnMYBgy7ZuXDsAQ7VaIqMfFrHyjg8g,47363
-scandeval/benchmark_modules/litellm.py,sha256=TH35CQhoVinlmfHnAW-XJE21o96YfiIv993m0ASS80E,71590
-scandeval/benchmark_modules/vllm.py,sha256=pFCBuIp2m2KIlVMlqc7sGp1twiENvRHx3ppVs0bFvFo,57319
+scandeval/benchmark_modules/hf.py,sha256=ob-05POUBDWk9dU_hUT7nmXZ11IGCnMgj6xkyLYyX98,48512
+scandeval/benchmark_modules/litellm.py,sha256=jVagENE3a0PNMDOaj4DLY-p2Lf-BzNVB1_voPq2CLTU,75545
+scandeval/benchmark_modules/vllm.py,sha256=pPKDHf5T_p0u9CJcR7R5sMmN98mirl64kWfyEHbtb5s,61720
 scandeval/dataset_configs/__init__.py,sha256=GFI_W9GKd3OSDdhhJzHc8mwoP9b32IHIIyvPBI-hK6k,3223
 scandeval/dataset_configs/albanian.py,sha256=D__dli7JO3yeHzzdJ3FFyUGw-z20f1yI6QLnws-WB8I,1473
 scandeval/dataset_configs/bosnian.py,sha256=golIWqwW1pFwSkuBM1v0yhHDblB2FoJgK24aO7kKm7M,877
@@ -37,7 +37,7 @@ scandeval/dataset_configs/catalan.py,sha256=SXwRJjIcMMN7rVuhFRZSnCGDoMfabW5HFoZO
 scandeval/dataset_configs/croatian.py,sha256=U5oBTjttpWTWonTEzZAf-G3nvQICRQmw6Kla-HWn_5k,1260
 scandeval/dataset_configs/czech.py,sha256=ghv2yNw839G-utll8PQRSjyKYbM5gfoQhFKy664GTCI,1562
 scandeval/dataset_configs/danish.py,sha256=LEKs04vK2KnV0CYheT7FeS-g3iHBvf2bQxyl0D_LbTg,3293
-scandeval/dataset_configs/dutch.py,sha256=OZJmaqGguXY5D9hz0zFNrwGQPRXgxZonctSc8Gsy9sY,3550
+scandeval/dataset_configs/dutch.py,sha256=q9adDSpR08Ol5AMJJpp1e1T1ZbwmORaFnJaEGrAujm4,3747
 scandeval/dataset_configs/english.py,sha256=nc9nGwxf1tHVMUhQeND61yJbpTO4rJaAusPZlstqtq0,2817
 scandeval/dataset_configs/estonian.py,sha256=bWiKA_dJ7WUE8Z_1YZnSewhi4ZdCQBGJZ7pQxkCwMcU,2757
 scandeval/dataset_configs/faroese.py,sha256=13qYwXonDPWG9Av5MY_NBNTRDglPVKz5_mbz7ZCJ_mo,1247
@@ -60,10 +60,11 @@ scandeval/dataset_configs/slovene.py,sha256=r6BbFRvkFYf_4lvQaltaJ1VTVGETZ0xspsu9
 scandeval/dataset_configs/spanish.py,sha256=Q60nx69sGbYk8p0hg2cwLFyoPjg36FdstLQoacw9QmU,2928
 scandeval/dataset_configs/swedish.py,sha256=kpEK29swY7iyUSzUvD9hNf2qwb3d7bHrFwboCWVAf2k,3269
 scandeval/dataset_configs/ukrainian.py,sha256=spbCmCOU27jOfz6FZxqCIfVmDN5l8H-7VCl-k-8eAIo,1527
-scandeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
+scandeval/metrics/__init__.py,sha256=nrjFjTK7NO5I8U6acULNzqezmMWN21aWd4faW4oYGHo,233
 scandeval/metrics/base.py,sha256=dUBby-ZzettMjdcjek6rw0JTZMuScX4cQ2Rd6untKHY,2525
-scandeval/metrics/huggingface.py,sha256=W1hPuIGBALOogGN2yTGTJUsylsMII3A66fEe9nB8N2k,9493
-scandeval/metrics/llm_as_a_judge.py,sha256=cZ7ZCuB3633T87MjBtAekrBQ_vYaNv1uTcqnI32gNpQ,9837
+scandeval/metrics/bias.py,sha256=sV87PLzjc3XPsSAz2HJ4hmlLZ_IcHDsIUr7gYmp9HKc,7765
+scandeval/metrics/huggingface.py,sha256=eKXn5wBcNdzs23cgJ64XG8LIwen1wDxXy2kAOw3bjoQ,9579
+scandeval/metrics/llm_as_a_judge.py,sha256=UUFk3aL2BZqJ-u9-dzexsoArTxPJTMmHRqb1eWxexaI,12133
 scandeval/metrics/pipeline.py,sha256=GTIqaFkn-nTLU4xBi8-zP1J4Ytv3qeFVuRB4OcuwkOw,10876
 scandeval/metrics/speed.py,sha256=G5hEQcrtqxF070ZZwLDh61iZnq2CSW2o6ZM7zR4lOTY,1298
 scandeval/prompt_templates/__init__.py,sha256=p3CUcSaJiiUm6EQyhceDUjotH7GdyHolMznAn2f44as,519
@@ -79,11 +80,11 @@ scandeval/prompt_templates/token_classification.py,sha256=8Uw34mN2xQ_5es-nz7vCK-
 scandeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
 scandeval/task_group_utils/multiple_choice_classification.py,sha256=PWUXeGn-9RsXxdVRYHJASyBVQ8L5Jla981eot0GLooY,7316
 scandeval/task_group_utils/question_answering.py,sha256=tuMwr-RnvJap5jkTrluxC1tfQVS6rKN8_ifNwis-auw,29064
-scandeval/task_group_utils/sequence_classification.py,sha256=VhiggNrB7Gi2x-99MPL0RR2VZRv-wpJerXulgQH6wcU,16556
+scandeval/task_group_utils/sequence_classification.py,sha256=1YAaKn5bY8j9ONPfJZODjaGKVMkA9fQcl51fvBcjeF8,16829
 scandeval/task_group_utils/text_to_text.py,sha256=p6zzjob70qQUpfUOs0LToSzavE1ERqRAHu_727Jb2mM,5476
 scandeval/task_group_utils/token_classification.py,sha256=8dF32KQAYAFnnn7DPHX-yvJmRrMBmT2CyFREacyTwvQ,17321
-scandeval-16.10.1.dist-info/METADATA,sha256=IYJza42KMRZdoc2-8z9NHaniGAH4K7hT1WHCyFT-Wow,23435
-scandeval-16.10.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-scandeval-16.10.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
-scandeval-16.10.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
-scandeval-16.10.1.dist-info/RECORD,,
+scandeval-16.12.0.dist-info/METADATA,sha256=YCSgBbbtWLDfWqepHFS8UX0zho8gpTXJC1lagT_l94w,24564
+scandeval-16.12.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+scandeval-16.12.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
+scandeval-16.12.0.dist-info/licenses/LICENSE,sha256=vb2c84xITVnhnVFsBS8AWXl-4S-KpxN6VMxTqqYlV3s,1080
+scandeval-16.12.0.dist-info/RECORD,,

{scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/licenses/LICENSE RENAMED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2022-2025 Dan Saattrup Smart
+Copyright (c) 2022-2026 Dan Saattrup Smart
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

{scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{scandeval-16.10.1.dist-info → scandeval-16.12.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

ScandEval 16.10.1__py3-none-any.whl → 16.12.0__py3-none-any.whl

ScandEval 16.10.1py3-none-any.whl → 16.12.0py3-none-any.whl