EuroEval 15.4.1__py3-none-any.whl → 15.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +2 -2
- euroeval/benchmark_modules/hf.py +79 -39
- euroeval/benchmark_modules/litellm.py +204 -74
- euroeval/benchmark_modules/vllm.py +106 -42
- euroeval/benchmarker.py +35 -6
- euroeval/constants.py +11 -1
- euroeval/data_models.py +6 -2
- euroeval/dataset_configs.py +6 -6
- euroeval/task_utils/sequence_classification.py +70 -30
- euroeval/types.py +3 -3
- euroeval/utils.py +131 -32
- {euroeval-15.4.1.dist-info → euroeval-15.5.0.dist-info}/METADATA +6 -4
- {euroeval-15.4.1.dist-info → euroeval-15.5.0.dist-info}/RECORD +16 -16
- {euroeval-15.4.1.dist-info → euroeval-15.5.0.dist-info}/WHEEL +0 -0
- {euroeval-15.4.1.dist-info → euroeval-15.5.0.dist-info}/entry_points.txt +0 -0
- {euroeval-15.4.1.dist-info → euroeval-15.5.0.dist-info}/licenses/LICENSE +0 -0
euroeval/__init__.py
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
### Block unwanted terminal output that happens on importing external modules ###
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
|
+
import os
|
|
7
8
|
import sys
|
|
8
9
|
import warnings
|
|
9
10
|
|
|
@@ -14,7 +15,7 @@ warnings.filterwarnings("ignore", category=UserWarning)
|
|
|
14
15
|
logging.getLogger("httpx").setLevel(logging.CRITICAL)
|
|
15
16
|
logging.getLogger("datasets").setLevel(logging.CRITICAL)
|
|
16
17
|
logging.getLogger("vllm").setLevel(logging.CRITICAL)
|
|
17
|
-
|
|
18
|
+
os.environ["VLLM_CONFIGURE_LOGGING"] = "0"
|
|
18
19
|
|
|
19
20
|
# Set up logging
|
|
20
21
|
fmt = colored("%(asctime)s", "light_blue") + " ⋅ " + colored("%(message)s", "green")
|
|
@@ -29,7 +30,6 @@ logging.basicConfig(
|
|
|
29
30
|
### Set the rest up ###
|
|
30
31
|
|
|
31
32
|
import importlib.metadata # noqa: E402
|
|
32
|
-
import os # noqa: E402
|
|
33
33
|
|
|
34
34
|
from dotenv import load_dotenv # noqa: E402
|
|
35
35
|
|
euroeval/benchmark_modules/hf.py
CHANGED
|
@@ -20,6 +20,7 @@ from huggingface_hub.utils import (
|
|
|
20
20
|
HFValidationError,
|
|
21
21
|
LocalTokenNotFoundError,
|
|
22
22
|
)
|
|
23
|
+
from peft import PeftConfig
|
|
23
24
|
from requests.exceptions import RequestException
|
|
24
25
|
from torch import nn
|
|
25
26
|
from transformers import (
|
|
@@ -34,12 +35,16 @@ from transformers import (
|
|
|
34
35
|
Trainer,
|
|
35
36
|
)
|
|
36
37
|
from transformers.modelcard import TASK_MAPPING
|
|
38
|
+
from transformers.models.auto.modeling_auto import (
|
|
39
|
+
MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES,
|
|
40
|
+
)
|
|
37
41
|
from urllib3.exceptions import RequestError
|
|
38
42
|
|
|
39
43
|
from ..constants import (
|
|
40
44
|
DUMMY_FILL_VALUE,
|
|
41
45
|
GENERATIVE_PIPELINE_TAGS,
|
|
42
46
|
LOCAL_MODELS_REQUIRED_FILES,
|
|
47
|
+
MAX_CONTEXT_LENGTH,
|
|
43
48
|
MERGE_TAGS,
|
|
44
49
|
)
|
|
45
50
|
from ..data_models import BenchmarkConfig, DatasetConfig, HFModelInfo, ModelConfig, Task
|
|
@@ -73,6 +78,7 @@ from ..utils import (
|
|
|
73
78
|
get_class_by_name,
|
|
74
79
|
get_eos_token,
|
|
75
80
|
internet_connection_available,
|
|
81
|
+
log_once,
|
|
76
82
|
)
|
|
77
83
|
from .base import BenchmarkModule
|
|
78
84
|
|
|
@@ -240,6 +246,15 @@ class HuggingFaceEncoderModel(BenchmarkModule):
|
|
|
240
246
|
max_length for max_length in all_max_lengths if max_length >= 128
|
|
241
247
|
]
|
|
242
248
|
|
|
249
|
+
# We remove the upper cap of maximum context length for the model, as it is
|
|
250
|
+
# highly unlikely that this is the model's actual maximum context length - we
|
|
251
|
+
# would rather not report a value than report an incorrect one.
|
|
252
|
+
all_max_lengths = [
|
|
253
|
+
max_length
|
|
254
|
+
for max_length in all_max_lengths
|
|
255
|
+
if max_length != MAX_CONTEXT_LENGTH
|
|
256
|
+
]
|
|
257
|
+
|
|
243
258
|
if len(list(all_max_lengths)) > 0:
|
|
244
259
|
model_max_length = min(list(all_max_lengths))
|
|
245
260
|
else:
|
|
@@ -727,53 +742,54 @@ def get_model_repo_info(
|
|
|
727
742
|
# If the model does not exist locally, then we get the model info from the Hugging
|
|
728
743
|
# Face Hub
|
|
729
744
|
if model_info is None:
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
repo_id=model_id, revision=revision, token=token
|
|
733
|
-
)
|
|
734
|
-
except (GatedRepoError, LocalTokenNotFoundError) as e:
|
|
745
|
+
num_attempts = 3
|
|
746
|
+
for _ in range(num_attempts):
|
|
735
747
|
try:
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
f"Could not access the model {model_id} with the revision "
|
|
739
|
-
f"{revision}. The error was {str(e)!r}."
|
|
748
|
+
model_info = hf_api.model_info(
|
|
749
|
+
repo_id=model_id, revision=revision, token=token
|
|
740
750
|
)
|
|
751
|
+
break
|
|
752
|
+
except (GatedRepoError, LocalTokenNotFoundError) as e:
|
|
753
|
+
try:
|
|
754
|
+
hf_whoami(token=token)
|
|
755
|
+
logger.warning(
|
|
756
|
+
f"Could not access the model {model_id} with the revision "
|
|
757
|
+
f"{revision}. The error was {str(e)!r}."
|
|
758
|
+
)
|
|
759
|
+
return None
|
|
760
|
+
except LocalTokenNotFoundError:
|
|
761
|
+
raise NeedsAdditionalArgument(
|
|
762
|
+
cli_argument="--api-key",
|
|
763
|
+
script_argument="api_key=<your-api-key>",
|
|
764
|
+
run_with_cli=benchmark_config.run_with_cli,
|
|
765
|
+
)
|
|
766
|
+
except (RepositoryNotFoundError, HFValidationError):
|
|
741
767
|
return None
|
|
742
|
-
except
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
script_argument="api_key=<your-api-key>",
|
|
746
|
-
run_with_cli=benchmark_config.run_with_cli,
|
|
747
|
-
)
|
|
748
|
-
except (RepositoryNotFoundError, HFValidationError):
|
|
749
|
-
return None
|
|
750
|
-
except (OSError, RequestException):
|
|
751
|
-
if internet_connection_available():
|
|
752
|
-
raise HuggingFaceHubDown()
|
|
753
|
-
else:
|
|
768
|
+
except (OSError, RequestException):
|
|
769
|
+
if internet_connection_available():
|
|
770
|
+
continue
|
|
754
771
|
raise NoInternetConnection()
|
|
772
|
+
else:
|
|
773
|
+
raise HuggingFaceHubDown()
|
|
755
774
|
|
|
756
775
|
# Get all the Hugging Face repository tags for the model. If the model is an adapter
|
|
757
776
|
# model, then we also get the tags for the base model
|
|
758
777
|
tags = model_info.tags or list()
|
|
759
|
-
has_base_model_tag = any(
|
|
760
|
-
tag.startswith("base_model:") and tag.count(":") == 1 for tag in tags
|
|
761
|
-
)
|
|
762
778
|
base_model_id: str | None = None
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
779
|
+
has_adapter_config = model_info.siblings is not None and any(
|
|
780
|
+
sibling.rfilename == "adapter_config.json" for sibling in model_info.siblings
|
|
781
|
+
)
|
|
782
|
+
if has_adapter_config:
|
|
783
|
+
adapter_config = PeftConfig.from_pretrained(model_id, revision=revision)
|
|
784
|
+
base_model_id = adapter_config.base_model_name_or_path
|
|
785
|
+
log_once(
|
|
786
|
+
f"Model {model_id!r} identified as an adapter model, with base model "
|
|
787
|
+
f"{base_model_id!r}.",
|
|
788
|
+
level=logging.DEBUG,
|
|
767
789
|
)
|
|
768
|
-
if
|
|
769
|
-
base_model_id = [
|
|
770
|
-
tag.split(":")[1]
|
|
771
|
-
for tag in tags
|
|
772
|
-
if tag.startswith("base_model:") and tag.count(":") == 1
|
|
773
|
-
][0]
|
|
790
|
+
if base_model_id is not None:
|
|
774
791
|
base_model_info = hf_api.model_info(
|
|
775
792
|
repo_id=base_model_id,
|
|
776
|
-
revision=revision,
|
|
777
793
|
token=benchmark_config.api_key
|
|
778
794
|
or os.getenv("HUGGINGFACE_API_KEY")
|
|
779
795
|
or True,
|
|
@@ -781,12 +797,18 @@ def get_model_repo_info(
|
|
|
781
797
|
tags += base_model_info.tags or list()
|
|
782
798
|
tags = list(set(tags))
|
|
783
799
|
|
|
800
|
+
# TEMP: This extends the `TASK_MAPPING` dictionary to include the missing
|
|
801
|
+
# 'image-text-to-text' pipeline tag. This will be added as part of `TASK_MAPPING`
|
|
802
|
+
# when this PR has been merged in and published:
|
|
803
|
+
# https://github.com/huggingface/transformers/pull/37107
|
|
804
|
+
TASK_MAPPING["image-text-to-text"] = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES
|
|
805
|
+
|
|
784
806
|
# Get the pipeline tag for the model. If it is not specified, then we determine it
|
|
785
807
|
# by checking the model's architecture as written in the model's Hugging Face config
|
|
786
808
|
pipeline_tag = model_info.pipeline_tag
|
|
787
809
|
if pipeline_tag is None:
|
|
788
810
|
hf_config = load_hf_model_config(
|
|
789
|
-
model_id=model_id,
|
|
811
|
+
model_id=base_model_id or model_id,
|
|
790
812
|
num_labels=0,
|
|
791
813
|
id2label=dict(),
|
|
792
814
|
label2id=dict(),
|
|
@@ -812,7 +834,6 @@ def get_model_repo_info(
|
|
|
812
834
|
pipeline_tag = "fill-mask"
|
|
813
835
|
|
|
814
836
|
if benchmark_config.only_allow_safetensors:
|
|
815
|
-
# Check if any file ends with .safetensors
|
|
816
837
|
repo_files = hf_api.list_repo_files(repo_id=model_id, revision=revision)
|
|
817
838
|
has_safetensors = any(f.endswith(".safetensors") for f in repo_files)
|
|
818
839
|
if not has_safetensors:
|
|
@@ -826,6 +847,26 @@ def get_model_repo_info(
|
|
|
826
847
|
)
|
|
827
848
|
raise InvalidModel(msg)
|
|
828
849
|
|
|
850
|
+
# Also check base model if we are evaluating an adapter
|
|
851
|
+
if base_model_id is not None:
|
|
852
|
+
base_repo_files = hf_api.list_repo_files(repo_id=base_model_id)
|
|
853
|
+
base_has_safetensors = any(
|
|
854
|
+
f.endswith(".safetensors") for f in base_repo_files
|
|
855
|
+
)
|
|
856
|
+
if not base_has_safetensors:
|
|
857
|
+
msg = (
|
|
858
|
+
f"Base model {base_model_id} does not have safetensors weights "
|
|
859
|
+
"available."
|
|
860
|
+
)
|
|
861
|
+
if benchmark_config.run_with_cli:
|
|
862
|
+
msg += " Skipping since the `--only-allow-safetensors` flag is set."
|
|
863
|
+
else:
|
|
864
|
+
msg += (
|
|
865
|
+
" Skipping since the `only_allow_safetensors` argument is set "
|
|
866
|
+
"to `True`."
|
|
867
|
+
)
|
|
868
|
+
raise InvalidModel(msg)
|
|
869
|
+
|
|
829
870
|
return HFModelInfo(
|
|
830
871
|
pipeline_tag=pipeline_tag, tags=tags, adapter_base_model_id=base_model_id
|
|
831
872
|
)
|
|
@@ -1109,8 +1150,7 @@ def align_model_and_tokenizer(
|
|
|
1109
1150
|
Returns:
|
|
1110
1151
|
The fixed model and tokenizer.
|
|
1111
1152
|
"""
|
|
1112
|
-
|
|
1113
|
-
model_max_length = min(model_max_length, 5_000)
|
|
1153
|
+
model_max_length = min(model_max_length, MAX_CONTEXT_LENGTH)
|
|
1114
1154
|
|
|
1115
1155
|
if model_max_length > 0:
|
|
1116
1156
|
tokenizer.model_max_length = model_max_length
|