PyPI - EuroEval - Versions diffs - 16.2.1__py3-none-any.whl → 16.3.0__py3-none-any.whl - Mend

EuroEval 16.2.1py3-none-any.whl → 16.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of EuroEval might be problematic. Click here for more details.

Files changed (39) hide show

euroeval/__init__.py +4 -2
euroeval/benchmark_modules/fresh.py +3 -1
euroeval/benchmark_modules/hf.py +8 -4
euroeval/benchmark_modules/litellm.py +5 -17
euroeval/benchmark_modules/vllm.py +98 -30
euroeval/benchmarker.py +291 -405
euroeval/cli.py +1 -1
euroeval/constants.py +3 -0
euroeval/data_models.py +35 -35
euroeval/dataset_configs/__init__.py +1 -0
euroeval/dataset_configs/danish.py +0 -2
euroeval/dataset_configs/dutch.py +0 -2
euroeval/dataset_configs/english.py +0 -2
euroeval/dataset_configs/finnish.py +0 -2
euroeval/dataset_configs/french.py +0 -2
euroeval/dataset_configs/german.py +0 -2
euroeval/dataset_configs/italian.py +0 -2
euroeval/dataset_configs/latvian.py +2 -3
euroeval/dataset_configs/lithuanian.py +62 -0
euroeval/dataset_configs/norwegian.py +0 -2
euroeval/dataset_configs/polish.py +0 -2
euroeval/dataset_configs/portuguese.py +0 -2
euroeval/dataset_configs/spanish.py +0 -2
euroeval/dataset_configs/swedish.py +0 -3
euroeval/metrics/huggingface.py +1 -1
euroeval/metrics/pipeline.py +5 -0
euroeval/prompt_templates/linguistic_acceptability.py +9 -0
euroeval/prompt_templates/multiple_choice.py +9 -0
euroeval/prompt_templates/named_entity_recognition.py +20 -0
euroeval/prompt_templates/reading_comprehension.py +10 -0
euroeval/prompt_templates/sentiment_classification.py +11 -0
euroeval/tokenisation_utils.py +8 -8
euroeval/utils.py +10 -5
{euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/METADATA +181 -60
euroeval-16.3.0.dist-info/RECORD +71 -0
euroeval-16.2.1.dist-info/RECORD +0 -70
{euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/WHEEL +0 -0
{euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/entry_points.txt +0 -0
{euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/licenses/LICENSE +0 -0

euroeval/utils.py CHANGED Viewed

@@ -62,6 +62,10 @@ def resolve_model_path(download_dir: str) -> str:
     Returns:
         The path to the model.
+    Raises:
+        InvalidModel:
+            If the model path is not valid, or if required files are missing.
     """
     model_path = Path(download_dir)
     # Get the 'path safe' version of the model id, which is the last dir in the path
@@ -271,14 +275,15 @@ def internet_connection_available() -> bool:
         s = socket.create_connection(("1.1.1.1", 80))
         s.close()
         return True
-    # a bit ugly but we dont want to actually import the pytest-socket exceptions
-    # we catch all exceptions and check if the name matches any known errors
+    # We want to only catch exceptions related to socket connections, but as we cannot
+    # import these here as they're developer dependencies, we check the exception name
+    # instead. If the exception is not related to socket connections, we reraise it.
     except Exception as e:
         pytest_socket_errors = ["SocketConnectBlockedError", "SocketBlockedError"]
         if type(e).__name__ in pytest_socket_errors or isinstance(e, OSError):
             return False
-        else:
-            raise e
+        raise e
 class HiddenPrints:
@@ -457,7 +462,7 @@ def extract_json_dict_from_string(s: str) -> dict | None:
     Returns:
         The extracted JSON dictionary, or None if no JSON dictionary could be found.
     """
-    json_regex = r"\{[^{}]+?\}"
+    json_regex = r"\{[^{}]*?\}"
     if (json_match := re.search(pattern=json_regex, string=s, flags=re.DOTALL)) is None:
         logger.debug(
             "The model output does not contain any JSON dictionary, so cannot parse "

{euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: EuroEval
-Version: 16.2.1
+Version: 16.3.0
 Summary: The robust European language model benchmark.
 Project-URL: Repository, https://github.com/EuroEval/EuroEval
 Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -62,21 +62,28 @@ Provides-Extra: all
 Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
 Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
 Requires-Dist: timm>=1.0.19; extra == 'all'
-Requires-Dist: vllm[flashinfer]>=0.10.1; (platform_system == 'Linux') and extra == 'all'
+Requires-Dist: vllm[flashinfer]<0.11.0,>=0.10.1; (platform_system == 'Linux') and extra == 'all'
 Provides-Extra: generative
 Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
 Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
 Requires-Dist: timm>=1.0.19; extra == 'generative'
-Requires-Dist: vllm[flashinfer]>=0.10.1; (platform_system == 'Linux') and extra == 'generative'
+Requires-Dist: vllm[flashinfer]<0.11.0,>=0.10.1; (platform_system == 'Linux') and extra == 'generative'
 Description-Content-Type: text/markdown
+<!-- This disables the requirement that the first line is a top-level heading -->
+<!-- markdownlint-configure-file { "MD041": false } -->
 <div align='center'>
-<img src="https://raw.githubusercontent.com/EuroEval/EuroEval/main/gfx/euroeval.png" height="500" width="372">
+<img
+    src="https://raw.githubusercontent.com/EuroEval/EuroEval/main/gfx/euroeval.png"
+    height="500"
+    width="372"
+>
 </div>
-### The robust European language model benchmark.
+### The robust European language model benchmark
-_(formerly known as ScandEval)_
+(formerly known as ScandEval)
 ______________________________________________________________________
 [![Documentation](https://img.shields.io/badge/docs-passing-green)](https://euroeval.com)
@@ -88,16 +95,16 @@ ______________________________________________________________________
 [![Code Coverage](https://img.shields.io/badge/Coverage-67%25-yellow.svg)](https://github.com/EuroEval/EuroEval/tree/main/tests)
 [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg)](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
 ## Maintainer
-- Dan Saattrup Smart ([@saattrupdan](https://github.com/saattrupdan), dan.smart@alexandra.dk)
+- Dan Saattrup Smart ([@saattrupdan](https://github.com/saattrupdan), <dan.smart@alexandra.dk>)
 ## Installation
 To install the package simply write the following command in your favorite terminal:
-```
-$ pip install euroeval[all]
+```bash
+pip install euroeval[all]
 ```
 This will install the EuroEval package with all extras. You can also install the
@@ -105,51 +112,61 @@ minimal version by leaving out the `[all]`, in which case the package will let y
 when an evaluation requires a certain extra dependency, and how you install it.
 ## Quickstart
 ### Benchmarking from the Command Line
 The easiest way to benchmark pretrained models is via the command line interface. After
 having installed the package, you can benchmark your favorite model like so:
-```
-$ euroeval --model <model-id>
+```bash
+euroeval --model <model-id>
 ```
 Here `model` is the HuggingFace model ID, which can be found on the [HuggingFace
 Hub](https://huggingface.co/models). By default this will benchmark the model on all
 the tasks available. If you want to benchmark on a particular task, then use the
 `--task` argument:
-```
-$ euroeval --model <model-id> --task sentiment-classification
+```bash
+euroeval --model <model-id> --task sentiment-classification
 ```
 We can also narrow down which languages we would like to benchmark on. This can be done
 by setting the `--language` argument. Here we thus benchmark the model on the Danish
 sentiment classification task:
-```
-$ euroeval --model <model-id> --task sentiment-classification --language da
+```bash
+euroeval --model <model-id> --task sentiment-classification --language da
 ```
 Multiple models, datasets and/or languages can be specified by just attaching multiple
 arguments. Here is an example with two models:
-```
-$ euroeval --model <model-id1> --model <model-id2>
+```bash
+euroeval --model <model-id1> --model <model-id2>
 ```
 The specific model version/revision to use can also be added after the suffix '@':
-```
-$ euroeval --model <model-id>@<commit>
+```bash
+euroeval --model <model-id>@<commit>
 ```
 This can be a branch name, a tag name, or a commit id. It defaults to 'main' for latest.
 See all the arguments and options available for the `euroeval` command by typing
-```
-$ euroeval --help
+```bash
+euroeval --help
 ```
 ### Benchmarking from a Script
 In a script, the syntax is similar to the command line interface. You simply initialise
 an object of the `Benchmarker` class, and call this benchmark object with your favorite
 model:
-```
+```python
 >>> from euroeval import Benchmarker
 >>> benchmark = Benchmarker()
 >>> benchmark(model="<model-id>")
@@ -157,29 +174,34 @@ model:
 To benchmark on a specific task and/or language, you simply specify the `task` or
 `language` arguments, shown here with same example as above:
-```
+```python
 >>> benchmark(model="<model-id>", task="sentiment-classification", language="da")
 ```
 If you want to benchmark a subset of all the models on the Hugging Face Hub, you can
 simply leave out the `model` argument. In this example, we're benchmarking all Danish
 models on the Danish sentiment classification task:
-```
+```python
 >>> benchmark(task="sentiment-classification", language="da")
 ```
 ### Benchmarking in an Offline Environment
 If you need to benchmark in an offline environment, you need to download the models,
 datasets and metrics beforehand. This can be done by adding the `--download-only`
 argument, from the command line, or the `download_only` argument, if benchmarking from a
 script. For example to download the model you want and all of the Danish sentiment
 classification datasets:
-```
-$ euroeval --model <model-id> --task sentiment-classification --language da --download-only
+```bash
+euroeval --model <model-id> --task sentiment-classification --language da --download-only
 ```
 Or from a script:
-```
+```python
 >>> benchmark(
 ... model="<model-id>",
 ... task="sentiment-classification",
@@ -193,11 +215,13 @@ internet connection will be required during evaluation. If offline support is im
 to you, please consider [opening an issue](https://github.com/EuroEval/EuroEval/issues).
 ### Benchmarking from Docker
 A Dockerfile is provided in the repo, which can be downloaded and run, without needing
 to clone the repo and installing from source. This can be fetched programmatically by
 running the following:
-```
-$ wget https://raw.githubusercontent.com/EuroEval/EuroEval/main/Dockerfile.cuda
+```bash
+wget https://raw.githubusercontent.com/EuroEval/EuroEval/main/Dockerfile.cuda
 ```
 Next, to be able to build the Docker image, first ensure that the NVIDIA Container
@@ -208,56 +232,153 @@ and
 Ensure that the the CUDA version stated at the top of the Dockerfile matches the CUDA
 version installed (which you can check using `nvidia-smi`). After that, we build the
 image as follows:
-```
-$ docker build --pull -t euroeval -f Dockerfile.cuda .
+```bash
+docker build --pull -t euroeval -f Dockerfile.cuda .
 ```
 With the Docker image built, we can now evaluate any model as follows:
-```
-$ docker run -e args="<euroeval-arguments>" --gpus 1 --name euroeval --rm euroeval
+```bash
+docker run -e args="<euroeval-arguments>" --gpus 1 --name euroeval --rm euroeval
 ```
 Here `<euroeval-arguments>` consists of the arguments added to the `euroeval` CLI
 argument. This could for instance be `--model <model-id> --task
 sentiment-classification`.
 ### Reproducing the datasets
 All datasets used in this project are generated using the scripts located in the
 [src/scripts](src/scripts) folder. To reproduce a dataset, run the corresponding script
 with the following command
-```shell
-$ uv run src/scripts/<name-of-script>.py
+```bash
+uv run src/scripts/<name-of-script>.py
 ```
 Replace <name-of-script> with the specific script you wish to execute, e.g.,
-```shell
-$ uv run src/scripts/create_allocine.py
+```bash
+uv run src/scripts/create_allocine.py
 ```
 ## Contributors :pray:
 A huge thank you to all the contributors who have helped make this project a success!
-<a href="https://github.com/peter-sk"><img src="https://avatars.githubusercontent.com/u/6168908" width=50 alt="Contributor avatar for peter-sk"/></a>
-<a href="https://github.com/AJDERS"><img src="https://avatars.githubusercontent.com/u/38854604" width=50 alt="Contributor avatar for AJDERS"/></a>
-<a href="https://github.com/oliverkinch"><img src="https://avatars.githubusercontent.com/u/71556498" width=50 alt="Contributor avatar for oliverkinch"/></a>
-<a href="https://github.com/versae"><img src="https://avatars.githubusercontent.com/u/173537" width=50 alt="Contributor avatar for versae"/></a>
-<a href="https://github.com/KennethEnevoldsen"><img src="https://avatars.githubusercontent.com/u/23721977" width=50 alt="Contributor avatar for KennethEnevoldsen"/></a>
-<a href="https://github.com/viggo-gascou"><img src="https://avatars.githubusercontent.com/u/94069687" width=50 alt="Contributor avatar for viggo-gascou"/></a>
-<a href="https://github.com/mathiasesn"><img src="https://avatars.githubusercontent.com/u/27091759" width=50 alt="Contributor avatar for mathiasesn"/></a>
-<a href="https://github.com/Alkarex"><img src="https://avatars.githubusercontent.com/u/1008324" width=50 alt="Contributor avatar for Alkarex"/></a>
-<a href="https://github.com/marksverdhei"><img src="https://avatars.githubusercontent.com/u/46672778" width=50 alt="Contributor avatar for marksverdhei"/></a>
-<a href="https://github.com/Mikeriess"><img src="https://avatars.githubusercontent.com/u/19728563" width=50 alt="Contributor avatar for Mikeriess"/></a>
-<a href="https://github.com/ThomasKluiters"><img src="https://avatars.githubusercontent.com/u/8137941" width=50 alt="Contributor avatar for ThomasKluiters"/></a>
-<a href="https://github.com/BramVanroy"><img src="https://avatars.githubusercontent.com/u/2779410" width=50 alt="Contributor avatar for BramVanroy"/></a>
-<a href="https://github.com/peregilk"><img src="https://avatars.githubusercontent.com/u/9079808" width=50 alt="Contributor avatar for peregilk"/></a>
-<a href="https://github.com/Rijgersberg"><img src="https://avatars.githubusercontent.com/u/8604946" width=50 alt="Contributor avatar for Rijgersberg"/></a>
-<a href="https://github.com/duarteocarmo"><img src="https://avatars.githubusercontent.com/u/26342344" width=50 alt="Contributor avatar for duarteocarmo"/></a>
-<a href="https://github.com/slowwavesleep"><img src="https://avatars.githubusercontent.com/u/44175589" width=50 alt="Contributor avatar for slowwavesleep"/></a>
+<a href="https://github.com/peter-sk">
+    <img
+        src="https://avatars.githubusercontent.com/u/6168908"
+        width=50
+        alt="Contributor avatar for peter-sk"
+    />
+</a>
+<a href="https://github.com/AJDERS">
+    <img
+        src="https://avatars.githubusercontent.com/u/38854604"
+        width=50
+        alt="Contributor avatar for AJDERS"
+    />
+</a>
+<a href="https://github.com/oliverkinch">
+    <img
+        src="https://avatars.githubusercontent.com/u/71556498"
+        width=50
+        alt="Contributor avatar for oliverkinch"
+    />
+</a>
+<a href="https://github.com/versae">
+    <img
+        src="https://avatars.githubusercontent.com/u/173537"
+        width=50
+        alt="Contributor avatar for versae"
+    />
+</a>
+<a href="https://github.com/KennethEnevoldsen">
+    <img
+        src="https://avatars.githubusercontent.com/u/23721977"
+        width=50
+        alt="Contributor avatar for KennethEnevoldsen"
+    />
+</a>
+<a href="https://github.com/viggo-gascou">
+    <img
+        src="https://avatars.githubusercontent.com/u/94069687"
+        width=50
+        alt="Contributor avatar for viggo-gascou"
+    />
+</a>
+<a href="https://github.com/mathiasesn">
+    <img
+        src="https://avatars.githubusercontent.com/u/27091759"
+        width=50
+        alt="Contributor avatar for mathiasesn"
+    />
+</a>
+<a href="https://github.com/Alkarex">
+    <img
+        src="https://avatars.githubusercontent.com/u/1008324"
+        width=50
+        alt="Contributor avatar for Alkarex"
+    />
+</a>
+<a href="https://github.com/marksverdhei">
+    <img
+        src="https://avatars.githubusercontent.com/u/46672778"
+        width=50
+        alt="Contributor avatar for marksverdhei"
+    />
+</a>
+<a href="https://github.com/Mikeriess">
+    <img
+        src="https://avatars.githubusercontent.com/u/19728563"
+        width=50
+        alt="Contributor avatar for Mikeriess"
+    />
+</a>
+<a href="https://github.com/ThomasKluiters">
+    <img
+        src="https://avatars.githubusercontent.com/u/8137941"
+        width=50
+        alt="Contributor avatar for ThomasKluiters"
+    />
+</a>
+<a href="https://github.com/BramVanroy">
+    <img
+        src="https://avatars.githubusercontent.com/u/2779410"
+        width=50
+        alt="Contributor avatar for BramVanroy"
+    />
+</a>
+<a href="https://github.com/peregilk">
+    <img
+        src="https://avatars.githubusercontent.com/u/9079808"
+        width=50
+        alt="Contributor avatar for peregilk"
+    />
+</a>
+<a href="https://github.com/Rijgersberg">
+    <img
+        src="https://avatars.githubusercontent.com/u/8604946"
+        width=50
+        alt="Contributor avatar for Rijgersberg"
+    />
+</a>
+<a href="https://github.com/duarteocarmo">
+    <img
+        src="https://avatars.githubusercontent.com/u/26342344"
+        width=50
+        alt="Contributor avatar for duarteocarmo"
+    />
+</a>
+<a href="https://github.com/slowwavesleep">
+    <img
+        src="https://avatars.githubusercontent.com/u/44175589"
+        width=50
+        alt="Contributor avatar for slowwavesleep"
+    />
+</a>
 ### Contribute to EuroEval
@@ -269,8 +390,8 @@ contributing new datasets, your help makes this project better for everyone.
 - **Adding datasets**: If you're interested in adding a new dataset to EuroEval, we have
   a [dedicated guide](NEW_DATASET_GUIDE.md) with step-by-step instructions.
 ### Special Thanks
 - Thanks to [Google](https://google.com/) for sponsoring Gemini credits as part of their
   [Google Cloud for Researchers Program](https://cloud.google.com/edu/researchers).
 - Thanks [@Mikeriess](https://github.com/Mikeriess) for evaluating many of the larger
@@ -285,11 +406,11 @@ contributing new datasets, your help makes this project better for everyone.
 - Thanks to [CHC](https://chc.au.dk/) for sponsoring the OpenAI credits used to
   evaluate GPT-4-turbo in German.
 ## Citing EuroEval
 If you want to cite the framework then feel free to use this:
-```
+```bibtex
 @article{smart2024encoder,
   title={Encoder vs Decoder: Comparative Analysis of Encoder and Decoder Language Models on Multilingual NLU Tasks},
   author={Smart, Dan Saattrup and Enevoldsen, Kenneth and Schneider-Kamp, Peter},

euroeval-16.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,71 @@
+euroeval/__init__.py,sha256=QJo_xezfFnpKBB32nvA_juy29tAz1eVn---MQiexYjE,3901
+euroeval/benchmark_config_factory.py,sha256=eOQsd9F4cJy8I7a3_lIKDZ5b5ukipIUqk0GZ3pyytwQ,8596
+euroeval/benchmarker.py,sha256=Nt4k1DivG-YtsSiqEwqsHfBzEkauo1lrsG1RAS0ZWuw,48928
+euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
+euroeval/cli.py,sha256=yb_Gw3TrouBiUmeQIZF0705zio8UPFACUDOzSB3CCfo,9316
+euroeval/constants.py,sha256=e1LRJe6CspvbKlfo4-9ee1wGocNoh1c7GcyaXpiN1Jk,2744
+euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
+euroeval/data_models.py,sha256=X4zAdR1K2MPb4f4Vc7gPYfolzFxxsz5WplnsmsiMYY8,27766
+euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
+euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
+euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
+euroeval/generation.py,sha256=Va3EOmFzOMBNfI4fh3nW5qhhrM3CBT8_4MaLwVtsF_E,12528
+euroeval/generation_utils.py,sha256=d2_vylWXIeH4xIXgbsI5rN6dMt0zKp0zXExD6aOKWaA,18299
+euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
+euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
+euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
+euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
+euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
+euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
+euroeval/tasks.py,sha256=EzEWFDo_0ffabBFiRu-mw80jENUioE8D_VEn_Dsv-F8,4703
+euroeval/tokenisation_utils.py,sha256=7lQ83rP1Ws7HHg20bFbqD4GqtdbyBADwyxPBmFzAzVA,21158
+euroeval/types.py,sha256=_iVy-RwiCGu9TNX2sfyJTdCvXy1akNGTCywAo-YpBqU,2815
+euroeval/utils.py,sha256=qAh8TLrJPk10l9qKcvD1mq2gNOGRTLl88PvPNj5IuRU,19451
+euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
+euroeval/benchmark_modules/base.py,sha256=mHF8XS6GGUXV-sJtxmI5WJBWPLMHuh-4Z4OWjC25x9Y,11566
+euroeval/benchmark_modules/fresh.py,sha256=qqsaC6u06YeJIK-Z6w9gZefb5cg1nU7ZDrO76l2GZN0,10779
+euroeval/benchmark_modules/hf.py,sha256=Z-Z_AxJk2APFXcZdyZrnKQ4OE_uRH81Vsm9x-gfJ1-I,44926
+euroeval/benchmark_modules/litellm.py,sha256=2EUhzLcxocfFxjbgyyP5QQtLieoH-fWbLR6RRz64EN8,64176
+euroeval/benchmark_modules/vllm.py,sha256=eTwS1YDB0v0lOWvv6_UXPlqNjNaPQTKRY-g495Y6X9s,46432
+euroeval/dataset_configs/__init__.py,sha256=ylO6FwnzlWmCuifliE_b4Vs5GXapYeyvZ4j1XVFmdN8,2086
+euroeval/dataset_configs/danish.py,sha256=fAMWYQVrx3B11r5NZSL-LWSQTJvCDwSxImIkIrGdoAA,5552
+euroeval/dataset_configs/dutch.py,sha256=883caShKOOi5s1Ky0_EKFeq0y9wVuqN-GVqeOwbKFr0,5438
+euroeval/dataset_configs/english.py,sha256=rl6bBIluKXkxT8L4e071GQuPprMHTI955mgW46V3Cp0,4658
+euroeval/dataset_configs/estonian.py,sha256=tdnz0gmMR9yO5rm3SsIz-Wd0LmlCvi3UJ2M5r4VwkSE,3093
+euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
+euroeval/dataset_configs/finnish.py,sha256=pfO_flf6HHUbZZLae62cV30__uey_Oj37aiX0eBNWcQ,4311
+euroeval/dataset_configs/french.py,sha256=OdkCfWhtImgB3Ni6o0NRvCEvjeKAqausfJ2VO04CUwY,4641
+euroeval/dataset_configs/german.py,sha256=sav75C7f33OofQzliwvb3g7B7cw0MXm0G8wdlcmI7r8,5051
+euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
+euroeval/dataset_configs/italian.py,sha256=YucxgJtCG31sQplJ6hL64sF39ZSj926_a7McpCzKxh0,4925
+euroeval/dataset_configs/latvian.py,sha256=fB3tsqZoFldTnrlpeSu9iQQ907ptOVC8ZaielkgmVlM,2677
+euroeval/dataset_configs/lithuanian.py,sha256=QTahv862C5XzjLU8WHcExBGlkRFQnj9F4-I_5x1qJSk,1833
+euroeval/dataset_configs/norwegian.py,sha256=ipDIg2wXquZvIjlc4Bs-TbMJCKOoK6TL7lP9AzLOOj8,7666
+euroeval/dataset_configs/polish.py,sha256=5MTWLUmDG0qMgb1ATSdON2A_2ZFLlXUVjS0u64srfIg,3593
+euroeval/dataset_configs/portuguese.py,sha256=wanwK9LYdBND_JPh203L_YQraiLSd2kI8P0myy6U6Dk,4010
+euroeval/dataset_configs/spanish.py,sha256=xVWWHS84aOjDcutfAh7J2roHEb2KHZ084pYysH2BdSo,4823
+euroeval/dataset_configs/swedish.py,sha256=f_H7khH0IHcZXEQyYM8bpIvYnRsSj0EhVXh4RgpOCmw,5317
+euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
+euroeval/metrics/base.py,sha256=HST2XeZrUQZV_vTiieePiaznEov3CIGzuVNIITtLsQc,2596
+euroeval/metrics/huggingface.py,sha256=7_97xfdqsznoBOm3diVvZtJ6k9XUa8isiVVmOgia8kI,6522
+euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
+euroeval/metrics/pipeline.py,sha256=aLNf0vKTfov-HZbvyJj9_9Z1rR1BkVsWxAea8btCWg8,10513
+euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
+euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
+euroeval/prompt_templates/linguistic_acceptability.py,sha256=n-InOATuwdjlmDjiUdGIk9bQJMUgVFdp3u-iQ0K9WjY,9189
+euroeval/prompt_templates/multiple_choice.py,sha256=W0WZdAhbOV2jdHNhjfNNhgoPTbFKA2vhs72U0hP1rW0,7323
+euroeval/prompt_templates/named_entity_recognition.py,sha256=Kl7SB7vRJ-K9oXMZcJEffELaQlbwspNKUrQLDeNobcY,17301
+euroeval/prompt_templates/reading_comprehension.py,sha256=OtV8tu6wyf7rwW3krmyk8bzdNSRS5WkWFgxok4o67_o,9243
+euroeval/prompt_templates/sentiment_classification.py,sha256=tnalqea4TjG6z4xF7tDDKQm7rWrYGg6SIWTX3RDQQ20,10012
+euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13aIOgY0SjIzzsNo,6593
+euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
+euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
+euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
+euroeval/task_group_utils/sequence_classification.py,sha256=TAqZCoMQ9I-HFhMH35_J1mY2SQg95HUbXcgrBIyhgk0,16082
+euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
+euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
+euroeval-16.3.0.dist-info/METADATA,sha256=iSfb2jRJO7BfidNgy0jOKUXFh_WwBojxgisOBWQmYHg,15381
+euroeval-16.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+euroeval-16.3.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
+euroeval-16.3.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
+euroeval-16.3.0.dist-info/RECORD,,

euroeval-16.2.1.dist-info/RECORD DELETED Viewed

@@ -1,70 +0,0 @@
-euroeval/__init__.py,sha256=mXTjuGrEE-1fIS9x28oJKg-gNGt4q7y2E74l330KEmY,3787
-euroeval/benchmark_config_factory.py,sha256=eOQsd9F4cJy8I7a3_lIKDZ5b5ukipIUqk0GZ3pyytwQ,8596
-euroeval/benchmarker.py,sha256=5l4p1ncq4VJX_bDjv2f8oBq2GETPtJmduGOnLAbWjF8,55762
-euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
-euroeval/cli.py,sha256=GOAWzdtasJfOvTuVQszu-T1T9GfQ_un-blOICO-y7g4,9316
-euroeval/constants.py,sha256=NN7kcwQdlDyyGFSrLjsL_qKVRyoRqZ9sKO5SjlgtRwA,2741
-euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
-euroeval/data_models.py,sha256=9Sgrq6Ktg1ETXRJ0v4VA_amAPowGuB7fZtL-8RlDQn0,27766
-euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
-euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
-euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
-euroeval/generation.py,sha256=Va3EOmFzOMBNfI4fh3nW5qhhrM3CBT8_4MaLwVtsF_E,12528
-euroeval/generation_utils.py,sha256=d2_vylWXIeH4xIXgbsI5rN6dMt0zKp0zXExD6aOKWaA,18299
-euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
-euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
-euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
-euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
-euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
-euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
-euroeval/tasks.py,sha256=EzEWFDo_0ffabBFiRu-mw80jENUioE8D_VEn_Dsv-F8,4703
-euroeval/tokenisation_utils.py,sha256=nLeF2cdZSm5PZiAcDTtxY82nUJ-or8VU8YxYLa167EM,21158
-euroeval/types.py,sha256=_iVy-RwiCGu9TNX2sfyJTdCvXy1akNGTCywAo-YpBqU,2815
-euroeval/utils.py,sha256=DRJW6wtmNpRtuHt03diWo3S5m3rdxoPEQpd-KWi7aGY,19255
-euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
-euroeval/benchmark_modules/base.py,sha256=mHF8XS6GGUXV-sJtxmI5WJBWPLMHuh-4Z4OWjC25x9Y,11566
-euroeval/benchmark_modules/fresh.py,sha256=TveSQiFBi3xXgCEQBdHwkUQ685PDkKW0y3G5Yt5rkeM,10655
-euroeval/benchmark_modules/hf.py,sha256=XmkoDFzaJqnd_5mmUkqCaOgAdRPFs3KZKZZ0cr83TlM,44742
-euroeval/benchmark_modules/litellm.py,sha256=F3udd6NmhQOe3go_7rAcWg7mgZrNQpWWvLe-5U4E2RQ,64771
-euroeval/benchmark_modules/vllm.py,sha256=yLy8TCTnodu4NdTiO7XSdxuHX60AJ1-7p6J3e5h7-iA,43994
-euroeval/dataset_configs/__init__.py,sha256=uuIZmElpJV8iupo5oDj3TeQhBDRANdWpLKYFASLirHA,2046
-euroeval/dataset_configs/danish.py,sha256=QABfgI7m-0-5AimDXegp5ssDSLcM2VrAI_RWsinSZP4,5631
-euroeval/dataset_configs/dutch.py,sha256=63Ro2yFym5MuIDXf5953vUYenw9B0kZSCmZbXjdy4Rs,5517
-euroeval/dataset_configs/english.py,sha256=7lS12Tj7FnMGkS4xj7UoZyymNX6PGXTVl5muPswIgAE,4737
-euroeval/dataset_configs/estonian.py,sha256=tdnz0gmMR9yO5rm3SsIz-Wd0LmlCvi3UJ2M5r4VwkSE,3093
-euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
-euroeval/dataset_configs/finnish.py,sha256=esb5nu4HAEdqiP7F9klmME-tkjme01Qd89TOxTB1S20,4390
-euroeval/dataset_configs/french.py,sha256=lZKhJcTpaG8n3y8u5KY61UfU9YzEHF9tIPKm8UakoBs,4720
-euroeval/dataset_configs/german.py,sha256=gF0idcfDt5Iy89ozwgEXEYR_ukyYurdQSS1KITPz5aM,5130
-euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
-euroeval/dataset_configs/italian.py,sha256=tJ_-OYRJ8wJX7ZCwdE4KJIScn1ijYigAXK3lDTZTA3E,5004
-euroeval/dataset_configs/latvian.py,sha256=-zVftcd7Zl6MbrqL-zqBSixsIiPsbt5ZAqldE2wFOEI,2713
-euroeval/dataset_configs/norwegian.py,sha256=ccLM2Zkf5eaFH1K1KyzqoMwkVNcXgjMQTxIhPf4tl_E,7745
-euroeval/dataset_configs/polish.py,sha256=Z-9PT9KaopQUmBgFk5F85ve3pjQwTJqouG8IFgg5iqw,3672
-euroeval/dataset_configs/portuguese.py,sha256=gQ054SdLQ5fkm4IAP6Mdh5RcPDJPDITcuyaLKZit_9o,4089
-euroeval/dataset_configs/spanish.py,sha256=DvJlMK6OQg4qmxKzQA2IficlBMB7BafvxqIVuTKiZyw,4902
-euroeval/dataset_configs/swedish.py,sha256=YWHp7hbJ25o36csSg9uXaQCEJK1BPb7u2RQZiCe0lNs,5445
-euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
-euroeval/metrics/base.py,sha256=HST2XeZrUQZV_vTiieePiaznEov3CIGzuVNIITtLsQc,2596
-euroeval/metrics/huggingface.py,sha256=iHKJnvOXRc_e8sxB2ff3WkfK64jXyn5KEnIxPyfD2fM,6522
-euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
-euroeval/metrics/pipeline.py,sha256=Wcan3eDWV7t4WRXMPWCCe_JsA-fZnIfZU2ESinbbL2I,10284
-euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
-euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
-euroeval/prompt_templates/linguistic_acceptability.py,sha256=m23LrckohdnToQDsexdsW_5YyBfGTf5DTjiMI643F9A,8717
-euroeval/prompt_templates/multiple_choice.py,sha256=Q-8-ETqG-RZeLzR8v8WUBIN7djiNSfNpmYnZRUWcd84,6905
-euroeval/prompt_templates/named_entity_recognition.py,sha256=HIX9EBkSIBl5JXceFtiZTdvzWr9YHM9-55D6bcjIyQ4,16436
-euroeval/prompt_templates/reading_comprehension.py,sha256=ogzmhiSZO6egrdxxQiWz6a0XMdC0vws-lg5yRKQoYV0,8730
-euroeval/prompt_templates/sentiment_classification.py,sha256=b3TvH26M77vwFfn577NlGVW881qfV7YSm-Xba_w98Fc,9504
-euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13aIOgY0SjIzzsNo,6593
-euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
-euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
-euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
-euroeval/task_group_utils/sequence_classification.py,sha256=TAqZCoMQ9I-HFhMH35_J1mY2SQg95HUbXcgrBIyhgk0,16082
-euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
-euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
-euroeval-16.2.1.dist-info/METADATA,sha256=brIXZ3x3MUf-ggNpKKC_4Lvrqem0MfKPrJ8DZJ5T3Iw,14590
-euroeval-16.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-euroeval-16.2.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
-euroeval-16.2.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
-euroeval-16.2.1.dist-info/RECORD,,

{euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{euroeval-16.2.1.dist-info → euroeval-16.3.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

EuroEval 16.2.1__py3-none-any.whl → 16.3.0__py3-none-any.whl

Potentially problematic release.

EuroEval 16.2.1py3-none-any.whl → 16.3.0py3-none-any.whl