PyPI - EuroEval - Versions diffs - 15.4.1__py3-none-any.whl → 15.5.0__py3-none-any.whl - Mend

EuroEval 15.4.1py3-none-any.whl → 15.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of EuroEval might be problematic. Click here for more details.

Files changed (16) hide show

euroeval/__init__.py +2 -2
euroeval/benchmark_modules/hf.py +79 -39
euroeval/benchmark_modules/litellm.py +204 -74
euroeval/benchmark_modules/vllm.py +106 -42
euroeval/benchmarker.py +35 -6
euroeval/constants.py +11 -1
euroeval/data_models.py +6 -2
euroeval/dataset_configs.py +6 -6
euroeval/task_utils/sequence_classification.py +70 -30
euroeval/types.py +3 -3
euroeval/utils.py +131 -32
{euroeval-15.4.1.dist-info → euroeval-15.5.0.dist-info}/METADATA +6 -4
{euroeval-15.4.1.dist-info → euroeval-15.5.0.dist-info}/RECORD +16 -16
{euroeval-15.4.1.dist-info → euroeval-15.5.0.dist-info}/WHEEL +0 -0
{euroeval-15.4.1.dist-info → euroeval-15.5.0.dist-info}/entry_points.txt +0 -0
{euroeval-15.4.1.dist-info → euroeval-15.5.0.dist-info}/licenses/LICENSE +0 -0

euroeval/__init__.py CHANGED Viewed

@@ -4,6 +4,7 @@
 ### Block unwanted terminal output that happens on importing external modules ###
 import logging
+import os
 import sys
 import warnings
@@ -14,7 +15,7 @@ warnings.filterwarnings("ignore", category=UserWarning)
 logging.getLogger("httpx").setLevel(logging.CRITICAL)
 logging.getLogger("datasets").setLevel(logging.CRITICAL)
 logging.getLogger("vllm").setLevel(logging.CRITICAL)
-logging.getLogger("vllm.platforms").setLevel(logging.CRITICAL)
+os.environ["VLLM_CONFIGURE_LOGGING"] = "0"
 # Set up logging
 fmt = colored("%(asctime)s", "light_blue") + " ⋅ " + colored("%(message)s", "green")
@@ -29,7 +30,6 @@ logging.basicConfig(
 ### Set the rest up ###
 import importlib.metadata  # noqa: E402
-import os  # noqa: E402
 from dotenv import load_dotenv  # noqa: E402

euroeval/benchmark_modules/hf.py CHANGED Viewed

@@ -20,6 +20,7 @@ from huggingface_hub.utils import (
     HFValidationError,
     LocalTokenNotFoundError,
 )
+from peft import PeftConfig
 from requests.exceptions import RequestException
 from torch import nn
 from transformers import (
@@ -34,12 +35,16 @@ from transformers import (
     Trainer,
 )
 from transformers.modelcard import TASK_MAPPING
+from transformers.models.auto.modeling_auto import (
+    MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES,
+)
 from urllib3.exceptions import RequestError
 from ..constants import (
     DUMMY_FILL_VALUE,
     GENERATIVE_PIPELINE_TAGS,
     LOCAL_MODELS_REQUIRED_FILES,
+    MAX_CONTEXT_LENGTH,
     MERGE_TAGS,
 )
 from ..data_models import BenchmarkConfig, DatasetConfig, HFModelInfo, ModelConfig, Task
@@ -73,6 +78,7 @@ from ..utils import (
     get_class_by_name,
     get_eos_token,
     internet_connection_available,
+    log_once,
 )
 from .base import BenchmarkModule
@@ -240,6 +246,15 @@ class HuggingFaceEncoderModel(BenchmarkModule):
             max_length for max_length in all_max_lengths if max_length >= 128
         ]
+        # We remove the upper cap of maximum context length for the model, as it is
+        # highly unlikely that this is the model's actual maximum context length - we
+        # would rather not report a value than report an incorrect one.
+        all_max_lengths = [
+            max_length
+            for max_length in all_max_lengths
+            if max_length != MAX_CONTEXT_LENGTH
+        ]
         if len(list(all_max_lengths)) > 0:
             model_max_length = min(list(all_max_lengths))
         else:
@@ -727,53 +742,54 @@ def get_model_repo_info(
     # If the model does not exist locally, then we get the model info from the Hugging
     # Face Hub
     if model_info is None:
-        try:
-            model_info = hf_api.model_info(
-                repo_id=model_id, revision=revision, token=token
-            )
-        except (GatedRepoError, LocalTokenNotFoundError) as e:
+        num_attempts = 3
+        for _ in range(num_attempts):
             try:
-                hf_whoami(token=token)
-                logger.warning(
-                    f"Could not access the model {model_id} with the revision "
-                    f"{revision}. The error was {str(e)!r}."
+                model_info = hf_api.model_info(
+                    repo_id=model_id, revision=revision, token=token
                 )
+                break
+            except (GatedRepoError, LocalTokenNotFoundError) as e:
+                try:
+                    hf_whoami(token=token)
+                    logger.warning(
+                        f"Could not access the model {model_id} with the revision "
+                        f"{revision}. The error was {str(e)!r}."
+                    )
+                    return None
+                except LocalTokenNotFoundError:
+                    raise NeedsAdditionalArgument(
+                        cli_argument="--api-key",
+                        script_argument="api_key=<your-api-key>",
+                        run_with_cli=benchmark_config.run_with_cli,
+                    )
+            except (RepositoryNotFoundError, HFValidationError):
                 return None
-            except LocalTokenNotFoundError:
-                raise NeedsAdditionalArgument(
-                    cli_argument="--api-key",
-                    script_argument="api_key=<your-api-key>",
-                    run_with_cli=benchmark_config.run_with_cli,
-                )
-        except (RepositoryNotFoundError, HFValidationError):
-            return None
-        except (OSError, RequestException):
-            if internet_connection_available():
-                raise HuggingFaceHubDown()
-            else:
+            except (OSError, RequestException):
+                if internet_connection_available():
+                    continue
                 raise NoInternetConnection()
+        else:
+            raise HuggingFaceHubDown()
     # Get all the Hugging Face repository tags for the model. If the model is an adapter
     # model, then we also get the tags for the base model
     tags = model_info.tags or list()
-    has_base_model_tag = any(
-        tag.startswith("base_model:") and tag.count(":") == 1 for tag in tags
-    )
     base_model_id: str | None = None
-    if has_base_model_tag:
-        has_adapter_config = model_info.siblings is not None and any(
-            sibling.rfilename == "adapter_config.json"
-            for sibling in model_info.siblings
+    has_adapter_config = model_info.siblings is not None and any(
+        sibling.rfilename == "adapter_config.json" for sibling in model_info.siblings
+    )
+    if has_adapter_config:
+        adapter_config = PeftConfig.from_pretrained(model_id, revision=revision)
+        base_model_id = adapter_config.base_model_name_or_path
+        log_once(
+            f"Model {model_id!r} identified as an adapter model, with base model "
+            f"{base_model_id!r}.",
+            level=logging.DEBUG,
         )
-        if has_adapter_config:
-            base_model_id = [
-                tag.split(":")[1]
-                for tag in tags
-                if tag.startswith("base_model:") and tag.count(":") == 1
-            ][0]
+        if base_model_id is not None:
             base_model_info = hf_api.model_info(
                 repo_id=base_model_id,
-                revision=revision,
                 token=benchmark_config.api_key
                 or os.getenv("HUGGINGFACE_API_KEY")
                 or True,
@@ -781,12 +797,18 @@ def get_model_repo_info(
             tags += base_model_info.tags or list()
             tags = list(set(tags))
+    # TEMP: This extends the `TASK_MAPPING` dictionary to include the missing
+    # 'image-text-to-text' pipeline tag. This will be added as part of `TASK_MAPPING`
+    # when this PR has been merged in and published:
+    # https://github.com/huggingface/transformers/pull/37107
+    TASK_MAPPING["image-text-to-text"] = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES
     # Get the pipeline tag for the model. If it is not specified, then we determine it
     # by checking the model's architecture as written in the model's Hugging Face config
     pipeline_tag = model_info.pipeline_tag
     if pipeline_tag is None:
         hf_config = load_hf_model_config(
-            model_id=model_id,
+            model_id=base_model_id or model_id,
             num_labels=0,
             id2label=dict(),
             label2id=dict(),
@@ -812,7 +834,6 @@ def get_model_repo_info(
             pipeline_tag = "fill-mask"
     if benchmark_config.only_allow_safetensors:
-        # Check if any file ends with .safetensors
         repo_files = hf_api.list_repo_files(repo_id=model_id, revision=revision)
         has_safetensors = any(f.endswith(".safetensors") for f in repo_files)
         if not has_safetensors:
@@ -826,6 +847,26 @@ def get_model_repo_info(
                 )
             raise InvalidModel(msg)
+        # Also check base model if we are evaluating an adapter
+        if base_model_id is not None:
+            base_repo_files = hf_api.list_repo_files(repo_id=base_model_id)
+            base_has_safetensors = any(
+                f.endswith(".safetensors") for f in base_repo_files
+            )
+            if not base_has_safetensors:
+                msg = (
+                    f"Base model {base_model_id} does not have safetensors weights "
+                    "available."
+                )
+                if benchmark_config.run_with_cli:
+                    msg += " Skipping since the `--only-allow-safetensors` flag is set."
+                else:
+                    msg += (
+                        " Skipping since the `only_allow_safetensors` argument is set "
+                        "to `True`."
+                    )
+                raise InvalidModel(msg)
     return HFModelInfo(
         pipeline_tag=pipeline_tag, tags=tags, adapter_base_model_id=base_model_id
     )
@@ -1109,8 +1150,7 @@ def align_model_and_tokenizer(
     Returns:
         The fixed model and tokenizer.
     """
-    # Ensure that the model max length is at most 5,000, to avoid OOM errors
-    model_max_length = min(model_max_length, 5_000)
+    model_max_length = min(model_max_length, MAX_CONTEXT_LENGTH)
     if model_max_length > 0:
         tokenizer.model_max_length = model_max_length

EuroEval 15.4.1__py3-none-any.whl → 15.5.0__py3-none-any.whl

Potentially problematic release.

EuroEval 15.4.1py3-none-any.whl → 15.5.0py3-none-any.whl