PyPI - EuroEval - Versions diffs - 15.16.0__py3-none-any.whl → 16.0.1__py3-none-any.whl - Mend

EuroEval 15.16.0py3-none-any.whl → 16.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of EuroEval might be problematic. Click here for more details.

Files changed (64) hide show

euroeval/__init__.py +8 -7
euroeval/benchmark_config_factory.py +3 -7
euroeval/benchmark_modules/base.py +35 -19
euroeval/benchmark_modules/fresh.py +24 -19
euroeval/benchmark_modules/hf.py +136 -154
euroeval/benchmark_modules/litellm.py +190 -110
euroeval/benchmark_modules/vllm.py +199 -139
euroeval/benchmarker.py +49 -22
euroeval/cli.py +3 -3
euroeval/constants.py +19 -15
euroeval/data_loading.py +33 -28
euroeval/data_models.py +73 -23
euroeval/dataset_configs/__init__.py +2 -0
euroeval/dataset_configs/danish.py +35 -1
euroeval/dataset_configs/dutch.py +38 -1
euroeval/dataset_configs/english.py +38 -1
euroeval/dataset_configs/estonian.py +95 -0
euroeval/dataset_configs/faroese.py +38 -0
euroeval/dataset_configs/finnish.py +39 -1
euroeval/dataset_configs/french.py +38 -1
euroeval/dataset_configs/german.py +38 -1
euroeval/dataset_configs/icelandic.py +39 -1
euroeval/dataset_configs/italian.py +38 -1
euroeval/dataset_configs/latvian.py +81 -0
euroeval/dataset_configs/norwegian.py +38 -1
euroeval/dataset_configs/portuguese.py +38 -1
euroeval/dataset_configs/spanish.py +38 -1
euroeval/dataset_configs/swedish.py +38 -1
euroeval/enums.py +0 -6
euroeval/finetuning.py +6 -6
euroeval/generation.py +25 -14
euroeval/generation_utils.py +90 -20
euroeval/languages.py +947 -187
euroeval/metrics/__init__.py +6 -0
euroeval/metrics/base.py +76 -0
euroeval/metrics/huggingface.py +192 -0
euroeval/metrics/llm_as_a_judge.py +257 -0
euroeval/metrics/pipeline.py +276 -0
euroeval/metrics/speed.py +51 -0
euroeval/model_cache.py +13 -1
euroeval/prompt_templates/linguistic_acceptability.py +40 -2
euroeval/prompt_templates/multiple_choice.py +23 -2
euroeval/prompt_templates/named_entity_recognition.py +65 -2
euroeval/prompt_templates/reading_comprehension.py +42 -2
euroeval/prompt_templates/sentiment_classification.py +46 -2
euroeval/prompt_templates/summarization.py +24 -4
euroeval/scores.py +7 -2
euroeval/speed_benchmark.py +6 -6
euroeval/task_group_utils/multiple_choice_classification.py +19 -8
euroeval/task_group_utils/question_answering.py +35 -28
euroeval/task_group_utils/sequence_classification.py +128 -42
euroeval/task_group_utils/text_to_text.py +7 -3
euroeval/task_group_utils/token_classification.py +59 -73
euroeval/tasks.py +33 -6
euroeval/tokenization_utils.py +294 -207
euroeval/utils.py +150 -35
{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/METADATA +13 -14
euroeval-16.0.1.dist-info/RECORD +69 -0
{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/entry_points.txt +0 -1
euroeval/human_evaluation.py +0 -738
euroeval/metrics.py +0 -470
euroeval-15.16.0.dist-info/RECORD +0 -63
{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/WHEEL +0 -0
{euroeval-15.16.0.dist-info → euroeval-16.0.1.dist-info}/licenses/LICENSE +0 -0

euroeval/dataset_configs/portuguese.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from ..data_models import DatasetConfig
 from ..languages import PT
-from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
+from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
 ### Official datasets ###
@@ -67,6 +67,17 @@ GOLDENSWAG_PT_CONFIG = DatasetConfig(
     languages=[PT],
 )
+EUROPEAN_VALUES_PT_CONFIG = DatasetConfig(
+    name="european-values-pt",
+    pretty_name="the Portuguese version of the European values evaluation dataset",
+    huggingface_id="EuroEval/european-values-pt",
+    task=EUROPEAN_VALUES,
+    languages=[PT],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+)
 ### Unofficial datasets ###
@@ -79,3 +90,29 @@ BOOLQ_PT_CONFIG = DatasetConfig(
     languages=[PT],
     unofficial=True,
 )
+EUROPEAN_VALUES_SITUATIONAL_PT_CONFIG = DatasetConfig(
+    name="european-values-situational-pt",
+    pretty_name="the Portuguese version of the European values evaluation dataset, "
+    "where the questions are phrased in a situational way",
+    huggingface_id="EuroEval/european-values-situational-pt",
+    task=EUROPEAN_VALUES,
+    languages=[PT],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)
+EUROPEAN_VALUES_COMPLETIONS_PT_CONFIG = DatasetConfig(
+    name="european-values-completions-pt",
+    pretty_name="the Portuguese version of the European values evaluation dataset, "
+    "where the questions are phrased as sentence completions",
+    huggingface_id="EuroEval/european-values-completions-pt",
+    task=EUROPEAN_VALUES,
+    languages=[PT],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)

euroeval/dataset_configs/spanish.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from ..data_models import DatasetConfig
 from ..languages import ES
-from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
+from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
 ### Official datasets ###
@@ -66,6 +66,17 @@ HELLASWAG_ES_CONFIG = DatasetConfig(
     languages=[ES],
 )
+EUROPEAN_VALUES_ES_CONFIG = DatasetConfig(
+    name="european-values-es",
+    pretty_name="the Spanish version of the European values evaluation dataset",
+    huggingface_id="EuroEval/european-values-es",
+    task=EUROPEAN_VALUES,
+    languages=[ES],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+)
 ### Unofficial datasets ###
@@ -107,3 +118,29 @@ GOLDENSWAG_ES_CONFIG = DatasetConfig(
     languages=[ES],
     unofficial=True,
 )
+EUROPEAN_VALUES_SITUATIONAL_ES_CONFIG = DatasetConfig(
+    name="european-values-situational-es",
+    pretty_name="the Spanish version of the European values evaluation dataset, where "
+    "the questions are phrased in a situational way",
+    huggingface_id="EuroEval/european-values-situational-es",
+    task=EUROPEAN_VALUES,
+    languages=[ES],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)
+EUROPEAN_VALUES_COMPLETIONS_ES_CONFIG = DatasetConfig(
+    name="european-values-completions-es",
+    pretty_name="the Spanish version of the European values evaluation dataset, where "
+    "the questions are phrased as sentence completions",
+    huggingface_id="EuroEval/european-values-completions-es",
+    task=EUROPEAN_VALUES,
+    languages=[ES],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)

euroeval/dataset_configs/swedish.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from ..data_models import DatasetConfig
 from ..languages import SV
-from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
+from ..tasks import COMMON_SENSE, EUROPEAN_VALUES, KNOW, LA, MCRC, NER, RC, SENT, SUMM
 ### Official datasets ###
@@ -67,6 +67,17 @@ HELLASWAG_SV_CONFIG = DatasetConfig(
     languages=[SV],
 )
+EUROPEAN_VALUES_SV_CONFIG = DatasetConfig(
+    name="european-values-sv",
+    pretty_name="the Swedish version of the European values evaluation dataset",
+    huggingface_id="EuroEval/european-values-sv",
+    task=EUROPEAN_VALUES,
+    languages=[SV],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+)
 ### Unofficial datasets ###
@@ -118,3 +129,29 @@ GOLDENSWAG_SV_CONFIG = DatasetConfig(
     languages=[SV],
     unofficial=True,
 )
+EUROPEAN_VALUES_SITUATIONAL_SV_CONFIG = DatasetConfig(
+    name="european-values-situational-sv",
+    pretty_name="the Swedish version of the European values evaluation dataset, where "
+    "the questions are phrased in a situational way",
+    huggingface_id="EuroEval/european-values-situational-sv",
+    task=EUROPEAN_VALUES,
+    languages=[SV],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)
+EUROPEAN_VALUES_COMPLETIONS_SV_CONFIG = DatasetConfig(
+    name="european-values-completions-sv",
+    pretty_name="the Swedish version of the European values evaluation dataset, where "
+    "the questions are phrased as sentence completions",
+    huggingface_id="EuroEval/european-values-completions-sv",
+    task=EUROPEAN_VALUES,
+    languages=[SV],
+    splits=["test"],
+    bootstrap_samples=False,
+    _instruction_prompt="{text}",
+    unofficial=True,
+)

euroeval/enums.py CHANGED Viewed

@@ -40,14 +40,11 @@ class InferenceBackend(AutoStrEnum):
             VLLM library.
         LITELLM:
             LiteLLM library.
-        NONE:
-            No inference backend used (e.g., for human evaluation).
     """
     TRANSFORMERS = auto()
     VLLM = auto()
     LITELLM = auto()
-    NONE = auto()
 class ModelType(AutoStrEnum):
@@ -58,13 +55,10 @@ class ModelType(AutoStrEnum):
             An encoder (i.e., BERT-style) model.
         GENERATIVE:
             A generative model. Can be either decoder or encoder-decoder (aka seq2seq).
-        HUMAN:
-            Human evaluator.
     """
     ENCODER = auto()
     GENERATIVE = auto()
-    HUMAN = auto()
 class GenerativeType(AutoStrEnum):

euroeval/finetuning.py CHANGED Viewed

@@ -119,7 +119,7 @@ def finetune(
             # NaN values can appear in the model output when using mixed precision, as
             # the hidden states get overflowed. In this case we try to disable mixed
             # precision and try again.
-            except NaNValueInModelOutput:
+            except NaNValueInModelOutput as e:
                 if dtype != DataType.FP32:
                     dtype = DataType.FP32
                     model_already_initialized = False
@@ -131,11 +131,11 @@ def finetune(
                     raise InvalidBenchmark(
                         "NaN value detected in model outputs, even with mixed "
                         "precision disabled."
-                    )
+                    ) from e
             except Exception as e:
                 if "CUDA" not in str(e) and "out of memory" not in str(e):
-                    raise InvalidBenchmark(str(e))
+                    raise InvalidBenchmark(str(e)) from e
                 if bs <= 1:
                     msg = "Could not benchmark the model, even with a batch size of 1!"
@@ -146,7 +146,7 @@ def finetune(
                             "environment variable set, as this removes the upper bound "
                             "on the memory usage."
                         )
-                    raise InvalidBenchmark(msg)
+                    raise InvalidBenchmark(msg) from e
                 model_already_initialized = False
@@ -195,7 +195,7 @@ def finetune_single_iteration(
     trainer = model.trainer_class(
         model=model.get_pytorch_module(),
-        processing_class=model.get_tokenizer(),
+        processing_class=model.get_tokeniser(),
         args=training_args,
         train_dataset=dataset["train"],
         eval_dataset=dataset["val"],
@@ -245,7 +245,7 @@ def finetune_single_iteration(
             clear_memory()
             raise e
         except (RuntimeError, ValueError, IndexError) as e:
-            raise InvalidBenchmark(str(e))
+            raise InvalidBenchmark(str(e)) from e
     return test_scores

euroeval/generation.py CHANGED Viewed

@@ -6,6 +6,7 @@ import typing as t
 from pathlib import Path
 import more_itertools as mit
+from datasets import Dataset
 from tqdm.auto import tqdm
 from .enums import BatchingPreference, TaskGroup
@@ -15,10 +16,10 @@ from .model_cache import (
     load_cached_model_outputs,
     split_dataset_into_cached_and_non_cached,
 )
-from .utils import clear_memory
+from .utils import clear_memory, log_once
 if t.TYPE_CHECKING:
-    from datasets import Dataset, DatasetDict
+    from datasets import DatasetDict
     from .benchmark_modules import BenchmarkModule
     from .data_models import (
@@ -78,7 +79,7 @@ def generate(
     scores: list[dict[str, float]] = list()
     for idx in tqdm(
-        iterable=range(benchmark_config.num_iterations),
+        iterable=range(len(datasets)),
         desc="Benchmarking",
         disable=not benchmark_config.progress_bar,
     ):
@@ -89,7 +90,6 @@ def generate(
             dataset_config=dataset_config,
             benchmark_config=benchmark_config,
         )
         logger.debug(f"Test scores for iteration {idx}: {test_scores}")
         scores.append(test_scores)
         clear_memory()
@@ -126,10 +126,15 @@ def generate_single_iteration(
     """
     cache.load()
-    # Split up the dataset into a cached and non-cached part
-    cached_dataset, non_cached_dataset = split_dataset_into_cached_and_non_cached(
-        dataset=dataset, cache=cache
-    )
+    # Split up the dataset into a cached and non-cached part, unless we are not
+    # bootstrapping the samples. In that case, we just use the dataset as is.
+    if dataset_config.bootstrap_samples:
+        cached_dataset, non_cached_dataset = split_dataset_into_cached_and_non_cached(
+            dataset=dataset, cache=cache
+        )
+    else:
+        cached_dataset = Dataset.from_dict({})
+        non_cached_dataset = dataset
     all_preds: list[str] = list()
@@ -230,9 +235,12 @@ def generate_single_iteration(
             cached_labels = list(cached_labels)
         ground_truth = non_cached_labels + cached_labels
     else:
-        raise ValueError(
-            "The dataset must have either a 'label', 'labels', or 'target_text' column"
+        log_once(
+            "No labels found in the dataset. We assume that this is intentional, and "
+            "will not supply any ground truth labels for evaluation.",
+            level=logging.DEBUG,
         )
+        ground_truth = []
     itr_scores: dict[str, float] = model.compute_metrics(
         model_outputs_and_labels=(all_preds, ground_truth), dataset=dataset
@@ -293,10 +301,13 @@ def debug_log(
         case (
             TaskGroup.SEQUENCE_CLASSIFICATION | TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION
         ):
-            labels = [
-                dataset_config.prompt_label_mapping.get(label, label).lower()
-                for label in batch["label"]
-            ]
+            if "label" in batch:
+                labels = [
+                    dataset_config.prompt_label_mapping.get(label, label).lower()
+                    for label in batch["label"]
+                ]
+            else:
+                labels = ["N/A"] * len(extracted_labels)
         case TaskGroup.QUESTION_ANSWERING:
             extracted_labels = [

euroeval/generation_utils.py CHANGED Viewed

@@ -8,19 +8,23 @@ import typing as t
 from .enums import TaskGroup
 from .exceptions import InvalidBenchmark
-from .utils import log_once
+from .tokenization_utils import apply_chat_template
+from .utils import extract_multiple_choice_labels, log_once
 if t.TYPE_CHECKING:
     from datasets import DatasetDict
     from transformers.tokenization_utils import PreTrainedTokenizer
-    from .data_models import DatasetConfig, ModelConfig
+    from .data_models import BenchmarkConfig, DatasetConfig, ModelConfig
 logger = logging.getLogger("euroeval")
 def extract_few_shot_examples(
-    dataset: "DatasetDict", dataset_config: "DatasetConfig", itr_idx: int
+    dataset: "DatasetDict",
+    dataset_config: "DatasetConfig",
+    benchmark_config: "BenchmarkConfig",
+    itr_idx: int,
 ) -> list[dict[str, t.Any]]:
     """Extract few-shot examples from a dataset.
@@ -33,12 +37,32 @@ def extract_few_shot_examples(
             The dataset to extract the few-shot examples from.
         dataset_config:
             The dataset configuration.
+        benchmark_config:
+            The benchmark configuration.
         itr_idx:
             The index of the dataset in the iterator.
     Returns:
         The few-shot examples.
+    Raises:
+        InvalidBenchmark:
+            If there are not enough short examples for few-shot learning.
     """
+    if dataset_config.task.requires_zero_shot and benchmark_config.few_shot:
+        msg = (
+            "This task only allows zero-shot evaluation, so even though you have "
+            "requested few-shot evaluation "
+        )
+        if benchmark_config.run_with_cli:
+            msg += "(by not setting the --zero-shot flag), "
+        else:
+            msg += "(by setting the default `few_shot=True` argument), "
+        msg += "we will run the evaluation in zero-shot mode."
+        benchmark_config.few_shot = False
+        log_once(msg, level=logging.DEBUG)
+        return []
     random_seed = 4242 + itr_idx
     num_few_shots = dataset_config.num_few_shot_examples
     few_shot_examples: list[dict[str, t.Any]] = list()
@@ -63,12 +87,19 @@ def extract_few_shot_examples(
             shuffled_train = train_with_short_examples.shuffle(seed=random_seed)
             labels = it.cycle(dataset_config.labels)
+            labels_with_no_samples: set[str] = set()
             while len(few_shot_examples) < num_few_shots and len(shuffled_train) > 0:
+                if len(labels_with_no_samples) == len(dataset_config.labels):
+                    raise InvalidBenchmark(
+                        "Could not find enough examples for few-shot learning. "
+                        "Please check the dataset and the labels."
+                    )
                 label = next(labels)
                 possible_examples = shuffled_train.filter(
                     lambda x: x["label"].lower() == label.lower()
                 )
                 if len(possible_examples) == 0:
+                    labels_with_no_samples.add(label)
                     continue
                 example = possible_examples.select(range(1))[0]
                 few_shot_examples.append(example)
@@ -144,7 +175,7 @@ def apply_prompt(
     dataset_config: "DatasetConfig",
     instruction_model: bool,
     always_populate_text_field: bool,
-    tokenizer: "PreTrainedTokenizer | None",
+    tokeniser: "PreTrainedTokenizer | None",
 ) -> dict[str, t.Any]:
     """Apply prompt template to an example, potentially with few-shot examples.
@@ -160,16 +191,16 @@ def apply_prompt(
         always_populate_text_field:
             Whether to always populate the 'text' field in the examples, as opposed to
             the 'messages' field.
-        tokenizer:
-            The tokenizer to use for the model. If None, the tokenizer is not used.
+        tokeniser:
+            The tokeniser to use for the model. If None, the tokeniser is not used.
     Returns:
         The example with the few-shot examples applied.
     """
     # Sanity check
-    if instruction_model and always_populate_text_field and tokenizer is None:
+    if instruction_model and always_populate_text_field and tokeniser is None:
         raise ValueError(
-            "The `tokenizer` argument must be provided when the model is instruction "
+            "The `tokeniser` argument must be provided when the model is instruction "
             "tuned and when we are not just returning the raw messages."
         )
@@ -199,18 +230,49 @@ def apply_prompt(
             return dataset_config.prompt_template.format(**kwargs), ""
     match dataset_config.task.task_group:
-        case (
-            TaskGroup.SEQUENCE_CLASSIFICATION | TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION
-        ):
+        case TaskGroup.SEQUENCE_CLASSIFICATION:
+            labels_str = dataset_config.get_labels_str()
             few_shot_sections = [
                 create_prompt(
                     text=example["text"].replace("\n", " ").strip(),
                     label=example["label"].replace("\n", " ").strip(),
+                    labels_str=labels_str,
                 )
                 for example in few_shot_examples
             ]
             new_sections = [
-                create_prompt(text=text.replace("\n", " ").strip(), label="")
+                create_prompt(
+                    text=text.replace("\n", " ").strip(),
+                    label="",
+                    labels_str=labels_str,
+                )
+                for text in examples["text"]
+            ]
+        case TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION:
+            few_shot_sections = [
+                create_prompt(
+                    text=example["text"].replace("\n", " ").strip(),
+                    label=example["label"].replace("\n", " ").strip(),
+                    labels_str=dataset_config.get_labels_str(
+                        labels=extract_multiple_choice_labels(
+                            prompt=example["text"],
+                            candidate_labels=dataset_config.labels,
+                        )
+                    ),
+                )
+                for example in few_shot_examples
+            ]
+            new_sections = [
+                create_prompt(
+                    text=text.replace("\n", " ").strip(),
+                    label="",
+                    labels_str=dataset_config.get_labels_str(
+                        labels=extract_multiple_choice_labels(
+                            prompt=text, candidate_labels=dataset_config.labels
+                        )
+                    ),
+                )
                 for text in examples["text"]
             ]
@@ -228,6 +290,7 @@ def apply_prompt(
             ]
         case TaskGroup.TOKEN_CLASSIFICATION:
+            labels_str = dataset_config.get_labels_str()
             def create_label(example: dict) -> str:
                 prompt_labels = dataset_config.prompt_label_mapping.values()
@@ -249,12 +312,15 @@ def apply_prompt(
                 create_prompt(
                     text=" ".join(example["tokens"]).replace("\n", " ").strip(),
                     label=create_label(example=example),
+                    labels_str=labels_str,
                 )
                 for example in few_shot_examples
             ]
             new_sections = [
                 create_prompt(
-                    text=" ".join(tokens).replace("\n", " ").strip(), label=""
+                    text=" ".join(tokens).replace("\n", " ").strip(),
+                    label="",
+                    labels_str=labels_str,
                 )
                 for tokens in examples["tokens"]
             ]
@@ -298,30 +364,31 @@ def apply_prompt(
             examples["messages"] = messages_list
         else:
-            assert tokenizer is not None
+            assert tokeniser is not None
             # Pick the chat template that matches the language of the dataset, if such a
             # template exists
             chat_template: str | None = None
-            if isinstance(tokenizer.chat_template, dict):
+            if hasattr(tokeniser, "chat_template") and isinstance(
+                tokeniser.chat_template, dict
+            ):
                 language_codes = [
                     language.code for language in dataset_config.languages
                 ]
-                for name, candidate_template in tokenizer.chat_template.items():
+                for name, candidate_template in tokeniser.chat_template.items():
                     if name.lower() in language_codes:
                         chat_template = candidate_template
                         log_once(
-                            f"Using the {name!r} chat template for the tokenizer for "
+                            f"Using the {name!r} chat template for the tokeniser for "
                             f"model {model_config.model_id!r}.",
                             level=logging.DEBUG,
                         )
                         break
             texts = [
-                tokenizer.apply_chat_template(
+                apply_chat_template(
                     conversation=messages,
-                    tokenize=False,
-                    add_generation_prompt=True,
+                    tokeniser=tokeniser,
                     chat_template=chat_template,
                 )
                 for messages in messages_list
@@ -343,4 +410,7 @@ def apply_prompt(
             for new_prompt, _ in new_sections
         ]
+    # Always add the final prompts without few-shot examples, too, for analysis
+    examples["prompt"] = [new_prompt for new_prompt, _ in new_sections]
     return examples

EuroEval 15.16.0__py3-none-any.whl → 16.0.1__py3-none-any.whl

Potentially problematic release.

EuroEval 15.16.0py3-none-any.whl → 16.0.1py3-none-any.whl