PyPI - EuroEval - Versions diffs - 15.16.0__py3-none-any.whl → 16.0.1__py3-none-any.whl - Mend

EuroEval 15.16.0py3-none-any.whl → 16.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of EuroEval might be problematic. Click here for more details.

Files changed (64) hide show

euroeval/__init__.py +8 -7
euroeval/benchmark_config_factory.py +3 -7
euroeval/benchmark_modules/base.py +35 -19
euroeval/benchmark_modules/fresh.py +24 -19
euroeval/benchmark_modules/hf.py +136 -154
euroeval/benchmark_modules/litellm.py +190 -110
euroeval/benchmark_modules/vllm.py +199 -139
euroeval/benchmarker.py +49 -22
euroeval/cli.py +3 -3
euroeval/constants.py +19 -15
euroeval/data_loading.py +33 -28
euroeval/data_models.py +73 -23
euroeval/dataset_configs/__init__.py +2 -0
euroeval/dataset_configs/danish.py +35 -1
euroeval/dataset_configs/dutch.py +38 -1
euroeval/dataset_configs/english.py +38 -1
euroeval/dataset_configs/estonian.py +95 -0
euroeval/dataset_configs/faroese.py +38 -0
euroeval/dataset_configs/finnish.py +39 -1
euroeval/dataset_configs/french.py +38 -1
euroeval/dataset_configs/german.py +38 -1
euroeval/dataset_configs/icelandic.py +39 -1
euroeval/dataset_configs/italian.py +38 -1
euroeval/dataset_configs/latvian.py +81 -0
euroeval/dataset_configs/norwegian.py +38 -1
euroeval/dataset_configs/portuguese.py +38 -1
euroeval/dataset_configs/spanish.py +38 -1
euroeval/dataset_configs/swedish.py +38 -1
euroeval/enums.py +0 -6
euroeval/finetuning.py +6 -6
euroeval/generation.py +25 -14
euroeval/generation_utils.py +90 -20
euroeval/languages.py +947 -187
euroeval/metrics/__init__.py +6 -0
euroeval/metrics/base.py +76 -0
euroeval/metrics/huggingface.py +192 -0
euroeval/metrics/llm_as_a_judge.py +257 -0
euroeval/metrics/pipeline.py +276 -0
euroeval/metrics/speed.py +51 -0
euroeval/model_cache.py +13 -1
euroeval/prompt_templates/linguistic_acceptability.py +40 -2
euroeval/prompt_templates/multiple_choice.py +23 -2
euroeval/prompt_templates/named_entity_recognition.py +65 -2
euroeval/prompt_templates/reading_comprehension.py +42 -2
euroeval/prompt_templates/sentiment_classification.py +46 -2
euroeval/prompt_templates/summarization.py +24 -4
euroeval/scores.py +7 -2
euroeval/speed_benchmark.py +6 -6
euroeval/task_group_utils/multiple_choice_classification.py +19 -8
euroeval/task_group_utils/question_answering.py +35 -28
euroeval/task_group_utils/sequence_classification.py +128 -42
euroeval/task_group_utils/text_to_text.py +7 -3
euroeval/task_group_utils/token_classification.py +59 -73
euroeval/tasks.py +33 -6
euroeval/tokenization_utils.py +294 -207
euroeval/utils.py +150 -35
{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/METADATA +13 -14
euroeval-16.0.1.dist-info/RECORD +69 -0
{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/entry_points.txt +0 -1
euroeval/human_evaluation.py +0 -738
euroeval/metrics.py +0 -470
euroeval-15.16.0.dist-info/RECORD +0 -63
{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/WHEEL +0 -0
{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/licenses/LICENSE +0 -0

euroeval/utils.py CHANGED Viewed

@@ -8,12 +8,15 @@ import importlib.util
 import logging
 import os
 import random
+import re
 import sys
 import typing as t
 import warnings
 from functools import cache
 from pathlib import Path
+import demjson3
+import huggingface_hub as hf_hub
 import litellm
 import numpy as np
 import requests
@@ -22,10 +25,7 @@ from datasets.utils import disable_progress_bar
 from requests.exceptions import RequestException
 from transformers import logging as tf_logging
-from .exceptions import NaNValueInModelOutput
-if importlib.util.find_spec("ray") is not None:
-    import ray
+from .exceptions import InvalidBenchmark, NaNValueInModelOutput
 if t.TYPE_CHECKING:
     from types import TracebackType
@@ -94,54 +94,53 @@ def block_terminal_output() -> None:
     # Ignore miscellaneous warnings
     warnings.filterwarnings("ignore", category=UserWarning)
     warnings.filterwarnings("ignore", category=FutureWarning)
-    warnings.filterwarnings(
-        "ignore",
-        module="torch.nn.parallel*",
-        message="Was asked to gather along dimension 0, but all input tensors were "
-        "scalars; will instead unsqueeze and return a vector.",
-    )
-    warnings.filterwarnings("ignore", module="seqeval*")
-    # Up the logging level, to disable outputs
-    logging.getLogger("filelock").setLevel(logging.CRITICAL)
     logging.getLogger("absl").setLevel(logging.CRITICAL)
-    logging.getLogger("datasets").setLevel(logging.CRITICAL)
+    # Disable matplotlib logging
+    logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
+    # Disable PyTorch logging
+    logging.getLogger("torch.utils.cpp_extension").setLevel(logging.CRITICAL)
+    warnings.filterwarnings(action="ignore", module="torch*")
+    os.environ["TORCH_LOGS"] = "-all"
+    # Disable huggingface_hub logging
+    logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
+    # Disable LiteLLM logging
+    logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
+    logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
+    logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
     logging.getLogger("openai").setLevel(logging.CRITICAL)
-    logging.getLogger("torch.distributed.distributed_c10d").setLevel(logging.CRITICAL)
-    logging.getLogger("torch.nn.parallel.distributed").setLevel(logging.CRITICAL)
+    logging.getLogger("httpx").setLevel(logging.CRITICAL)
+    litellm.suppress_debug_info = True
+    # Disable vLLM logging
     logging.getLogger("vllm").setLevel(logging.CRITICAL)
     logging.getLogger("vllm.engine.llm_engine").setLevel(logging.CRITICAL)
     logging.getLogger("vllm.transformers_utils.tokenizer").setLevel(logging.CRITICAL)
     logging.getLogger("vllm.core.scheduler").setLevel(logging.CRITICAL)
     logging.getLogger("vllm.model_executor.weight_utils").setLevel(logging.CRITICAL)
     logging.getLogger("vllm.platforms").setLevel(logging.CRITICAL)
-    logging.getLogger("httpx").setLevel(logging.CRITICAL)
-    logging.getLogger("ray._private.worker").setLevel(logging.CRITICAL)
-    logging.getLogger("ray._private.services").setLevel(logging.CRITICAL)
-    logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
-    logging.getLogger("accelerate").setLevel(logging.CRITICAL)
-    logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
-    logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
-    logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
-    logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
-    # This suppresses vLLM logging
+    logging.getLogger("mistral_common.tokens.tokenizers.tekken").setLevel(
+        logging.CRITICAL
+    )
     os.environ["LOG_LEVEL"] = "CRITICAL"
     os.environ["VLLM_CONFIGURE_LOGGING"] = "0"
-    if importlib.util.find_spec("ray") is not None:
-        ray._private.worker._worker_logs_enabled = False
-    # Disable the tokeniser progress bars
+    # Disable datasets logging
+    logging.getLogger("datasets").setLevel(logging.CRITICAL)
+    logging.getLogger("filelock").setLevel(logging.CRITICAL)
     disable_progress_bar()
+    # Disable evaluate logging
+    warnings.filterwarnings("ignore", module="seqeval*")
     # Disable most of the `transformers` logging
     tf_logging._default_log_level = logging.CRITICAL
     tf_logging.set_verbosity(logging.CRITICAL)
     logging.getLogger("transformers.trainer").setLevel(logging.CRITICAL)
-    # Disable logging from `litellm`
-    litellm.suppress_debug_info = True
+    logging.getLogger("accelerate").setLevel(logging.CRITICAL)
 def get_class_by_name(class_name: str | list[str], module_name: str) -> t.Type | None:
@@ -373,3 +372,119 @@ async def add_semaphore_and_catch_exception(
             return await coroutine
         except Exception as exc:
             return exc
+def extract_json_dict_from_string(s: str) -> dict | None:
+    """Extract a JSON dictionary from a string.
+    Args:
+        s:
+            The string to extract the JSON dictionary from.
+    Returns:
+        The extracted JSON dictionary, or None if no JSON dictionary could be found.
+    """
+    json_regex = r"\{[^{}]+?\}"
+    if (json_match := re.search(pattern=json_regex, string=s, flags=re.DOTALL)) is None:
+        logger.debug(
+            "The model output does not contain any JSON dictionary, so cannot parse "
+            f"it. Skipping. Here is the output: {s!r}"
+        )
+        return None
+    json_string = json_match.group()
+    try:
+        json_output = demjson3.decode(txt=json_string)
+    except demjson3.JSONDecodeError:
+        logger.debug(
+            "The model output is not valid JSON, so cannot parse it. Skipping. "
+            f"Here is the output: {json_string!r}"
+        )
+        return None
+    if not isinstance(json_output, dict):
+        logger.debug(
+            "The model output is not a JSON dictionary, so cannot parse "
+            f"it. Skipping. Here is the output: {json_string!r}"
+        )
+        return None
+    elif not all(isinstance(key, str) for key in json_output.keys()):
+        logger.debug(
+            "The model output is not a JSON dictionary with string keys, "
+            "so cannot parse it. Skipping. Here is the output: "
+            f"{json_string!r}"
+        )
+        return None
+    return json_output
+@cache
+def get_hf_token(api_key: str | None) -> str | bool:
+    """Get the Hugging Face token.
+    Args:
+        api_key:
+            The API key to use as the Hugging Face token. If None, we will try to
+            extract it in other ways.
+    Returns:
+        The Hugging Face token, or True if no token is set but the user is logged in, or
+        False if no token is set and the user is not logged in.
+    """
+    if api_key is not None:
+        log_once(
+            "Using the Hugging Face API key passed to the function.",
+            level=logging.DEBUG,
+        )
+        return api_key
+    elif (token := os.getenv("HUGGINGFACE_API_KEY")) is not None:
+        log_once(
+            "Using the Hugging Face API key from the environment variable "
+            "`HUGGINGFACE_API_KEY`.",
+            level=logging.DEBUG,
+        )
+        return token
+    try:
+        hf_hub.whoami()
+        log_once(
+            "No Hugging Face API key was set, but the user is logged in to Hugging "
+            "Face, so using the local token.",
+            level=logging.DEBUG,
+        )
+        return True
+    except hf_hub.errors.LocalTokenNotFoundError:
+        log_once(
+            "No Hugging Face API key was set and the user is not logged in to Hugging "
+            "Face, so no token will be used.",
+            level=logging.DEBUG,
+        )
+        return False
+def extract_multiple_choice_labels(
+    prompt: str, candidate_labels: list[str]
+) -> list[str]:
+    """Extract multiple choice labels from a prompt.
+    Args:
+        prompt:
+            The prompt to extract the labels from.
+        candidate_labels:
+            The candidate labels to look for in the prompt.
+    Returns:
+        The extracted labels.
+    """
+    sample_candidate_labels: list[str] = list()
+    for candidate_label in candidate_labels:
+        candidate_label_match = re.search(
+            pattern=rf"\b{candidate_label}\. ", string=prompt, flags=re.IGNORECASE
+        )
+        if candidate_label_match is not None:
+            sample_candidate_labels.append(candidate_label)
+    if not sample_candidate_labels:
+        raise InvalidBenchmark(
+            "Could not extract any candidate labels from the prompt. Please ensure "
+            "that the candidate labels are present in the prompt, each followed by a "
+            "dot and a space (e.g., 'a. '). The candidate labels are: "
+            f"{', '.join(candidate_labels)}. Here is the prompt: {prompt!r}"
+        )
+    return sample_candidate_labels

{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: EuroEval
-Version: 15.16.0
+Version: 16.0.1
 Summary: The robust European language model benchmark.
 Project-URL: Repository, https://github.com/EuroEval/EuroEval
 Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -28,18 +28,19 @@ License: MIT License
         OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         SOFTWARE.
 License-File: LICENSE
-Requires-Python: <4.0,>=3.10
+Requires-Python: <4.0,>=3.11
 Requires-Dist: accelerate>=1.9.0
 Requires-Dist: bert-score>=0.3.13
 Requires-Dist: click>=8.1.3
+Requires-Dist: cloudpickle>=3.1.1
 Requires-Dist: datasets>=3.5.0
 Requires-Dist: demjson3>=3.0.6
 Requires-Dist: evaluate>=0.4.1
 Requires-Dist: huggingface-hub>=0.30.1
 Requires-Dist: levenshtein>=0.24.0
-Requires-Dist: litellm>=1.72.2
+Requires-Dist: litellm>=1.75.6
 Requires-Dist: more-itertools>=10.5.0
-Requires-Dist: numpy<2.0.0,>=1.23.0
+Requires-Dist: numpy>=2.0.0
 Requires-Dist: ollama>=0.5.1
 Requires-Dist: pandas>=2.2.0
 Requires-Dist: peft>=0.15.0
@@ -49,27 +50,24 @@ Requires-Dist: pyinfer>=0.0.3
 Requires-Dist: python-dotenv>=1.0.1
 Requires-Dist: rouge-score>=0.1.2
 Requires-Dist: sacremoses>=0.1.1
-Requires-Dist: scikit-learn<1.6.0
+Requires-Dist: scikit-learn==1.6.1
 Requires-Dist: sentencepiece>=0.1.96
 Requires-Dist: seqeval>=1.2.2
 Requires-Dist: setuptools>=75.8.2
 Requires-Dist: tenacity>=9.0.0
 Requires-Dist: termcolor>=2.0.0
 Requires-Dist: torch>=2.6.0
-Requires-Dist: transformers>=4.55.0
+Requires-Dist: transformers[mistral-common]>=4.56.0
 Provides-Extra: all
 Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
 Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
-Requires-Dist: gradio>=4.26.0; extra == 'all'
-Requires-Dist: vllm>=0.10.0; (platform_system == 'Linux') and extra == 'all'
+Requires-Dist: flashinfer-python>=0.3.1; (platform_system == 'Linux') and extra == 'all'
+Requires-Dist: vllm>=0.10.1; (platform_system == 'Linux') and extra == 'all'
 Provides-Extra: generative
 Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
 Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
-Requires-Dist: vllm>=0.10.0; (platform_system == 'Linux') and extra == 'generative'
-Provides-Extra: human-evaluation
-Requires-Dist: gradio>=4.26.0; extra == 'human-evaluation'
-Provides-Extra: test
-Requires-Dist: gradio>=4.26.0; extra == 'test'
+Requires-Dist: flashinfer-python>=0.3.1; (platform_system == 'Linux') and extra == 'generative'
+Requires-Dist: vllm>=0.10.1; (platform_system == 'Linux') and extra == 'generative'
 Description-Content-Type: text/markdown
 <div align='center'>
@@ -223,17 +221,18 @@ A huge thank you to all the contributors who have helped make this project a suc
 <a href="https://github.com/AJDERS"><img src="https://avatars.githubusercontent.com/u/38854604" width=50 alt="Contributor avatar for AJDERS"/></a>
 <a href="https://github.com/oliverkinch"><img src="https://avatars.githubusercontent.com/u/71556498" width=50 alt="Contributor avatar for oliverkinch"/></a>
 <a href="https://github.com/versae"><img src="https://avatars.githubusercontent.com/u/173537" width=50 alt="Contributor avatar for versae"/></a>
+<a href="https://github.com/KennethEnevoldsen"><img src="https://avatars.githubusercontent.com/u/23721977" width=50 alt="Contributor avatar for KennethEnevoldsen"/></a>
 <a href="https://github.com/viggo-gascou"><img src="https://avatars.githubusercontent.com/u/94069687" width=50 alt="Contributor avatar for viggo-gascou"/></a>
 <a href="https://github.com/mathiasesn"><img src="https://avatars.githubusercontent.com/u/27091759" width=50 alt="Contributor avatar for mathiasesn"/></a>
 <a href="https://github.com/Alkarex"><img src="https://avatars.githubusercontent.com/u/1008324" width=50 alt="Contributor avatar for Alkarex"/></a>
 <a href="https://github.com/marksverdhei"><img src="https://avatars.githubusercontent.com/u/46672778" width=50 alt="Contributor avatar for marksverdhei"/></a>
 <a href="https://github.com/Mikeriess"><img src="https://avatars.githubusercontent.com/u/19728563" width=50 alt="Contributor avatar for Mikeriess"/></a>
-<a href="https://github.com/pakagronglb"><img src="https://avatars.githubusercontent.com/u/178713124" width=50 alt="Contributor avatar for pakagronglb"/></a>
 <a href="https://github.com/ThomasKluiters"><img src="https://avatars.githubusercontent.com/u/8137941" width=50 alt="Contributor avatar for ThomasKluiters"/></a>
 <a href="https://github.com/BramVanroy"><img src="https://avatars.githubusercontent.com/u/2779410" width=50 alt="Contributor avatar for BramVanroy"/></a>
 <a href="https://github.com/peregilk"><img src="https://avatars.githubusercontent.com/u/9079808" width=50 alt="Contributor avatar for peregilk"/></a>
 <a href="https://github.com/Rijgersberg"><img src="https://avatars.githubusercontent.com/u/8604946" width=50 alt="Contributor avatar for Rijgersberg"/></a>
 <a href="https://github.com/duarteocarmo"><img src="https://avatars.githubusercontent.com/u/26342344" width=50 alt="Contributor avatar for duarteocarmo"/></a>
+<a href="https://github.com/slowwavesleep"><img src="https://avatars.githubusercontent.com/u/44175589" width=50 alt="Contributor avatar for slowwavesleep"/></a>
 ### Contribute to EuroEval

euroeval-16.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,69 @@
+euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
+euroeval/benchmark_config_factory.py,sha256=ZKzGkWr-Mr4wEMYNXUHsYkd2R-dxnNyETZJJ-Fq-my0,11386
+euroeval/benchmarker.py,sha256=YNqhl2QchqzbGMGu8QoJAG_mnYbcJ46ksfaS0x78fiw,49847
+euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
+euroeval/cli.py,sha256=RR45NiHMI9hphqBJ7Xopde-C18Be9JgJxgg6eYPFVMM,8594
+euroeval/constants.py,sha256=imy-YwofbAwTbjk_vgynYf3zaK5kKV349oXZl99DVyM,2742
+euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
+euroeval/data_models.py,sha256=UGyqPAYFImrR1gi4ctQdCVb0rjVkEmyf4Lc1a7_6t6E,24663
+euroeval/enums.py,sha256=V73E8FTL1aRz74OKcxokTYLnO7Q8HGs2QI0JPZI4qQo,3032
+euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
+euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
+euroeval/generation.py,sha256=wm2u8fDGDgtWxCReG3N6v4_lLvo0OHTpR88ThGSRH7A,12139
+euroeval/generation_utils.py,sha256=w3hfiJfUPDjf2xSKdDrhlpfuxZlztF0_0h2sFPB2hT0,16212
+euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
+euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
+euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
+euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
+euroeval/scores.py,sha256=gJ7DSQVyE2_8qZxJPuUJcFk7Byj2D7nevE23kd4XMbA,3004
+euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
+euroeval/tasks.py,sha256=fwmDKnIexmWbm8HueLUilYzqdNRfo0rFxX-tjZ53Nbg,4503
+euroeval/tokenization_utils.py,sha256=66nip9llPw3XBEzGY0TE1DrejLV2WvdSA1p1euXC6Bg,20556
+euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
+euroeval/utils.py,sha256=ITvT-JxXosrDuElNV7cbASfxzDWSBz9mJWAZHiTOiZY,15304
+euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
+euroeval/benchmark_modules/base.py,sha256=vYW97bnlzqxxcIq6lY-zd0o6zxyDRMhT85jOhdKnoYE,11482
+euroeval/benchmark_modules/fresh.py,sha256=_iRTHt9qUkq7jPOlgwx7IwZG48dK4mjMrh7KiEHeUjE,10462
+euroeval/benchmark_modules/hf.py,sha256=HDXuVwt0kZUyL9x3aG5pEjSdGCRfzegqT0xKZYprjU0,43843
+euroeval/benchmark_modules/litellm.py,sha256=M6ct5ppcYfO-Il5VMRm3PuyAeQ-rtS22UKyRStLnqfM,59210
+euroeval/benchmark_modules/vllm.py,sha256=ckWLA9maDP5TLAfLhEXzkOYJBngb5BQR7X7RLKPl64A,41824
+euroeval/dataset_configs/__init__.py,sha256=lEOr4kJzgtUymeNBVhd-VwdUK0YTUZ3GjUMlLz5fGWk,2010
+euroeval/dataset_configs/danish.py,sha256=Pb43E-xfgQk9uaxq8ooznvf8okdX8KAYFEPHt1CG_TQ,5192
+euroeval/dataset_configs/dutch.py,sha256=tY7FDw7BmhXxNfI1hqfasxQXP0QbYTqknokTZ7gqdRY,5079
+euroeval/dataset_configs/english.py,sha256=Y4yc3AQu8WojqENj0sy4-rIlx1LhPnsCQ0DeonqDsVs,4128
+euroeval/dataset_configs/estonian.py,sha256=o13P_XkrdhLFCz9l8LJy-TSY3JIN7XmByxesEDiagnc,2879
+euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
+euroeval/dataset_configs/finnish.py,sha256=7iXjjpJ23tupvtXwJF3TH1Tzwhxw0RFaoBv38HclsJc,3950
+euroeval/dataset_configs/french.py,sha256=9ofGQpnjw0j_lPB0SuWMvbuWVZXfOvROMqZ03d-EAHs,4281
+euroeval/dataset_configs/german.py,sha256=qsJO2YCND8Kuc_atSWXjkoD2itUQNbUsExiGk7P0OnE,4459
+euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
+euroeval/dataset_configs/italian.py,sha256=xoS_oIFXnTraiV9PX2dBsE1GyodlAbma5dEB7yM_Q8A,4564
+euroeval/dataset_configs/latvian.py,sha256=tibwTbe-atsRZEBbegJ6nbr1Oh4RthUYhZoHPVVawq0,2273
+euroeval/dataset_configs/norwegian.py,sha256=eTX0KpjH60FyLGrUTfspvNvYaL-Ytfw3DTFftlriVM0,7295
+euroeval/dataset_configs/portuguese.py,sha256=x-Idrdo_EtmB_xoabwKivKG091DvFEQEbO6MTcjZVqs,3646
+euroeval/dataset_configs/spanish.py,sha256=5m3Qh328YPhbN8jFPIy9Sa7ZWob02ToCWzlDoT8IsSw,4462
+euroeval/dataset_configs/swedish.py,sha256=j_I7ba9a0nXzEPvpnPTuNFEkS51pnUPrnRwcqGh7tu0,4715
+euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
+euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
+euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
+euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
+euroeval/metrics/pipeline.py,sha256=a09Um3tnNdyQhzyDa9k-seYQXriYiJRQ5vyHK2lrKcg,10276
+euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
+euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
+euroeval/prompt_templates/linguistic_acceptability.py,sha256=9ZIyv_hfI2Aj20Uy9SY1izq5OBRV844PXPiZCNCOoEY,8207
+euroeval/prompt_templates/multiple_choice.py,sha256=TCMKB0xS5IEa8f4YEUjsoifcUpaIv4yOL4FisVvPwok,6423
+euroeval/prompt_templates/named_entity_recognition.py,sha256=_ZRVDcnbXvTs_C2NXy78oMbCLFDtW9SuxmvSVg51Umo,15554
+euroeval/prompt_templates/reading_comprehension.py,sha256=eRMN-kCT3wuImbuFXzZYfo5WiVhCFWJkCYwRUDtpeWo,8208
+euroeval/prompt_templates/sentiment_classification.py,sha256=eIXn-aAY7LKeXqxzMKoqdVbihA2f1RaNQk7DhceuQdQ,8887
+euroeval/prompt_templates/summarization.py,sha256=GvnKuYJKbJ_2QkdtSWp_h4RhfOXdq-7_yYeClJSPaTY,6137
+euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
+euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
+euroeval/task_group_utils/question_answering.py,sha256=vdEbcZy7BE6ICA7kWkPYmPW4eVuIiZ_4uJRLUexDhwY,27750
+euroeval/task_group_utils/sequence_classification.py,sha256=ZIXcYo6ins9VUv8TT4aupWrfUQoWGBlgU8a1hYATOYM,17249
+euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
+euroeval/task_group_utils/token_classification.py,sha256=sNl0rhkXI9g5zKsJujrWX-9jWbYYK2iaKA1AcUg0xW4,17118
+euroeval-16.0.1.dist-info/METADATA,sha256=toyIiyjwyl4Oty2YsD-P6r95hN0Si3BkBNBMOfmiwBA,13729
+euroeval-16.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+euroeval-16.0.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
+euroeval-16.0.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
+euroeval-16.0.1.dist-info/RECORD,,

{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/entry_points.txt RENAMED Viewed

@@ -1,4 +1,3 @@
 [console_scripts]
 euroeval = euroeval.cli:benchmark
-human_evaluate = euroeval.human_evaluation:main
 scandeval = euroeval.cli:benchmark

EuroEval 15.16.0__py3-none-any.whl → 16.0.1__py3-none-any.whl

Potentially problematic release.

EuroEval 15.16.0py3-none-any.whl → 16.0.1py3-none-any.whl