PyPI - mteb - Versions diffs - 2.5.2__py3-none-any.whl → 2.5.4__py3-none-any.whl - Mend

mteb 2.5.2py3-none-any.whl → 2.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

mteb/_create_dataloaders.py +10 -15
mteb/_evaluators/any_sts_evaluator.py +1 -4
mteb/_evaluators/evaluator.py +2 -1
mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +5 -6
mteb/_evaluators/pair_classification_evaluator.py +3 -1
mteb/_evaluators/retrieval_metrics.py +17 -16
mteb/_evaluators/sklearn_evaluator.py +9 -8
mteb/_evaluators/text/bitext_mining_evaluator.py +23 -16
mteb/_evaluators/text/summarization_evaluator.py +20 -16
mteb/abstasks/_data_filter/filters.py +1 -1
mteb/abstasks/_data_filter/task_pipelines.py +3 -0
mteb/abstasks/_statistics_calculation.py +18 -10
mteb/abstasks/_stratification.py +18 -18
mteb/abstasks/abstask.py +33 -27
mteb/abstasks/aggregate_task_metadata.py +1 -9
mteb/abstasks/aggregated_task.py +7 -26
mteb/abstasks/classification.py +10 -4
mteb/abstasks/clustering.py +18 -14
mteb/abstasks/clustering_legacy.py +8 -8
mteb/abstasks/image/image_text_pair_classification.py +5 -3
mteb/abstasks/multilabel_classification.py +20 -16
mteb/abstasks/pair_classification.py +18 -9
mteb/abstasks/regression.py +3 -3
mteb/abstasks/retrieval.py +12 -9
mteb/abstasks/sts.py +6 -3
mteb/abstasks/task_metadata.py +22 -19
mteb/abstasks/text/bitext_mining.py +36 -25
mteb/abstasks/text/reranking.py +7 -5
mteb/abstasks/text/summarization.py +8 -3
mteb/abstasks/zeroshot_classification.py +5 -2
mteb/benchmarks/benchmark.py +2 -2
mteb/cache.py +27 -22
mteb/cli/_display_tasks.py +2 -2
mteb/cli/build_cli.py +15 -10
mteb/cli/generate_model_card.py +10 -7
mteb/deprecated_evaluator.py +60 -46
mteb/evaluate.py +39 -30
mteb/filter_tasks.py +25 -26
mteb/get_tasks.py +29 -30
mteb/languages/language_scripts.py +5 -3
mteb/leaderboard/app.py +1 -1
mteb/load_results.py +12 -12
mteb/models/abs_encoder.py +7 -5
mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
mteb/models/cache_wrappers/cache_backends/_hash_utils.py +5 -4
mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
mteb/models/cache_wrappers/cache_wrapper.py +2 -2
mteb/models/get_model_meta.py +8 -1
mteb/models/instruct_wrapper.py +11 -5
mteb/models/model_implementations/andersborges.py +2 -2
mteb/models/model_implementations/blip_models.py +8 -8
mteb/models/model_implementations/bm25.py +1 -1
mteb/models/model_implementations/clip_models.py +3 -3
mteb/models/model_implementations/cohere_models.py +1 -1
mteb/models/model_implementations/cohere_v.py +2 -2
mteb/models/model_implementations/dino_models.py +23 -23
mteb/models/model_implementations/emillykkejensen_models.py +3 -3
mteb/models/model_implementations/gme_v_models.py +4 -3
mteb/models/model_implementations/jina_clip.py +1 -1
mteb/models/model_implementations/jina_models.py +1 -1
mteb/models/model_implementations/kennethenevoldsen_models.py +2 -2
mteb/models/model_implementations/llm2clip_models.py +3 -3
mteb/models/model_implementations/mcinext_models.py +4 -1
mteb/models/model_implementations/moco_models.py +2 -2
mteb/models/model_implementations/model2vec_models.py +1 -1
mteb/models/model_implementations/nomic_models.py +8 -8
mteb/models/model_implementations/openclip_models.py +7 -7
mteb/models/model_implementations/random_baseline.py +3 -3
mteb/models/model_implementations/rasgaard_models.py +1 -1
mteb/models/model_implementations/repllama_models.py +2 -2
mteb/models/model_implementations/rerankers_custom.py +3 -3
mteb/models/model_implementations/rerankers_monot5_based.py +3 -3
mteb/models/model_implementations/siglip_models.py +10 -10
mteb/models/model_implementations/vlm2vec_models.py +1 -1
mteb/models/model_implementations/voyage_v.py +4 -4
mteb/models/model_meta.py +14 -13
mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +9 -6
mteb/models/search_wrappers.py +26 -12
mteb/models/sentence_transformer_wrapper.py +19 -14
mteb/py.typed +0 -0
mteb/results/benchmark_results.py +28 -20
mteb/results/model_result.py +52 -22
mteb/results/task_result.py +55 -58
mteb/similarity_functions.py +11 -7
mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
mteb/tasks/classification/est/estonian_valence.py +1 -1
mteb/tasks/classification/multilingual/scala_classification.py +1 -1
mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
mteb/tasks/retrieval/code/code_rag.py +12 -12
mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
mteb/tasks/retrieval/nob/norquad.py +2 -2
mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
mteb/types/_result.py +2 -1
mteb/types/statistics.py +9 -3
{mteb-2.5.2.dist-info → mteb-2.5.4.dist-info}/METADATA +1 -1
{mteb-2.5.2.dist-info → mteb-2.5.4.dist-info}/RECORD +104 -103
{mteb-2.5.2.dist-info → mteb-2.5.4.dist-info}/WHEEL +0 -0
{mteb-2.5.2.dist-info → mteb-2.5.4.dist-info}/entry_points.txt +0 -0
{mteb-2.5.2.dist-info → mteb-2.5.4.dist-info}/licenses/LICENSE +0 -0
{mteb-2.5.2.dist-info → mteb-2.5.4.dist-info}/top_level.txt +0 -0

mteb/results/task_result.py CHANGED Viewed

@@ -2,9 +2,9 @@ from __future__ import annotations
 import json
 import logging
-from argparse import Namespace
+import warnings
 from collections import defaultdict
-from collections.abc import Callable, Iterable
+from collections.abc import Callable, Iterable, Mapping
 from functools import cached_property
 from importlib.metadata import version
 from pathlib import Path
@@ -16,8 +16,11 @@ from packaging.version import Version
 from pydantic import BaseModel, field_validator
 from typing_extensions import Self
+from mteb import TaskMetadata
 from mteb._helpful_enum import HelpfulStrEnum
+from mteb.abstasks import AbsTaskClassification
 from mteb.abstasks.abstask import AbsTask
+from mteb.abstasks.task_metadata import TaskDomain
 from mteb.languages import LanguageScripts
 from mteb.models.model_meta import ScoringFunction
 from mteb.types import (
@@ -39,67 +42,59 @@ class Criteria(HelpfulStrEnum):
     DATASET_REVISION = "dataset_revision"
-class ScalaNbClassificationDummy:
+class ScalaNbClassificationDummy(AbsTaskClassification):
     """A dummy task for loading historic results from before v1.11.0"""
-    metadata = Namespace(  # type: ignore
+    metadata = TaskMetadata(
         name="ScalaNbClassification",
+        description="A dummy",
         main_score="accuracy",
         type="Classification",
-        hf_subsets_to_langscripts={
-            "default": ["nob-Latn"],
-        },
-        dataset={"revision": "revision_not_applicable"},
-        revision="revision_not_applicable",
+        eval_langs=["nob-Latn"],
+        dataset={"path": "not/exists", "revision": "revision_not_applicable"},
     )
-class ScalaNnClassificationDummy:
+class ScalaNnClassificationDummy(AbsTaskClassification):
     """A dummy task for loading historic results from before v1.11.0"""
-    metadata = Namespace(  # type: ignore
+    metadata = TaskMetadata(
         name="ScalaNnClassification",
+        description="A dummy",
         main_score="accuracy",
         type="Classification",
-        hf_subsets_to_langscripts={
-            "default": ["nno-Latn"],
-        },
-        dataset={"revision": "revision_not_applicable"},
-        revision="revision_not_applicable",
+        eval_langs=["nob-Latn"],
+        dataset={"path": "not/exists", "revision": "revision_not_applicable"},
     )
-class ScalaDaClassificationDummy:
+class ScalaDaClassificationDummy(AbsTaskClassification):
     """A dummy task for loading historic results from before v1.11.0"""
-    metadata = Namespace(  # type: ignore
+    metadata = TaskMetadata(
         name="ScalaDaClassification",
+        description="A dummy",
         main_score="accuracy",
         type="Classification",
-        hf_subsets_to_langscripts={
-            "default": ["dan-Latn"],
-        },
-        dataset={"revision": "revision_not_applicable"},
-        revision="revision_not_applicable",
+        eval_langs=["dan-Latn"],
+        dataset={"path": "not/exists", "revision": "revision_not_applicable"},
     )
-class ScalaSvClassificationDummy:
+class ScalaSvClassificationDummy(AbsTaskClassification):
     """A dummy task for loading historic results from before v1.11.0"""
-    metadata = Namespace(  # type: ignore
+    metadata = TaskMetadata(
         name="ScalaSvClassification",
+        description="A dummy",
         main_score="accuracy",
         type="Classification",
-        hf_subsets_to_langscripts={
-            "default": ["swe-Latn"],
-        },
-        dataset={"revision": "revision_not_applicable"},
-        revision="revision_not_applicable",
+        eval_langs=["swe-Latn"],
+        dataset={"path": "not/exists", "revision": "revision_not_applicable"},
     )
-outdated_tasks = {
+outdated_tasks: dict[str, type[AbsTask]] = {
     "ScalaNbClassification": ScalaNbClassificationDummy,
     "ScalaNnClassification": ScalaNnClassificationDummy,
     "ScalaDaClassification": ScalaDaClassificationDummy,
@@ -166,10 +161,10 @@ class TaskResult(BaseModel):
     def from_task_results(
         cls,
         task: AbsTask | type[AbsTask],
-        scores: dict[SplitName, dict[HFSubset, ScoresDict]],
+        scores: dict[SplitName, Mapping[HFSubset, ScoresDict]],
         evaluation_time: float,
         kg_co2_emissions: float | None = None,
-    ) -> Self:
+    ) -> TaskResult:
         """Create a TaskResult from the task and scores.
         Args:
@@ -246,12 +241,12 @@ class TaskResult(BaseModel):
         return get_task(self.task_name)
     @property
-    def domains(self) -> list[str]:
+    def domains(self) -> list[TaskDomain]:
         """Get the domains of the task."""
         doms = self.task.metadata.domains
         if doms is None:
             doms = []
-        return doms  # type: ignore
+        return doms
     @property
     def task_type(self) -> str:
@@ -307,7 +302,7 @@ class TaskResult(BaseModel):
                     if isinstance(v, dict):
                         self._round_scores(v, n)
                     elif isinstance(v, float):
-                        value[i] = round(v, n)
+                        value[i] = round(v, n)  # type: ignore[call-overload]
             elif isinstance(value, float):
                 scores[key] = round(value, n)
@@ -325,7 +320,7 @@ class TaskResult(BaseModel):
             json.dump(json_obj, f, indent=2)
     @classmethod
-    def from_disk(cls, path: Path, load_historic_data: bool = True) -> Self:  # type: ignore
+    def from_disk(cls, path: Path, load_historic_data: bool = True) -> TaskResult:
         """Load TaskResult from disk.
         Args:
@@ -356,7 +351,7 @@ class TaskResult(BaseModel):
         )  # assume it is before 1.11.0 if the version is not present
         try:
-            obj = cls.model_validate(data)
+            obj: TaskResult = cls.model_validate(data)
         except Exception as e:
             if not pre_1_11_load:
                 raise e
@@ -381,6 +376,7 @@ class TaskResult(BaseModel):
         from mteb import get_task
         task_name = obj.task_name
+        task: AbsTask | type[AbsTask]
         if task_name in outdated_tasks:
             task = outdated_tasks[task_name]
         else:
@@ -393,11 +389,11 @@ class TaskResult(BaseModel):
                     for key in list(hf_subset_scores.keys()):
                         if isinstance(hf_subset_scores[key], dict):
                             for k, v in hf_subset_scores[key].items():
-                                hf_subset_scores[f"{key}_{k}"] = v
-                            hf_subset_scores.pop(key)
+                                hf_subset_scores[f"{key}_{k}"] = v  # type: ignore[index]
+                            hf_subset_scores.pop(key)  # type: ignore[attr-defined]
     @classmethod
-    def _convert_from_before_v1_11_0(cls, data: dict) -> Self:
+    def _convert_from_before_v1_11_0(cls, data: dict) -> TaskResult:
         from mteb.get_tasks import _TASKS_REGISTRY
         # in case the task name is not found in the registry, try to find a lower case version
@@ -462,7 +458,9 @@ class TaskResult(BaseModel):
                     if main_score in hf_subset_scores:
                         hf_subset_scores["main_score"] = hf_subset_scores[main_score]
                     else:
-                        logger.warning(f"Main score {main_score} not found in scores")
+                        msg = f"Main score {main_score} not found in scores"
+                        logger.warning(msg)
+                        warnings.warn(msg)
                         hf_subset_scores["main_score"] = None
         # specific fixes:
@@ -481,7 +479,7 @@ class TaskResult(BaseModel):
                 scores["test"]["fra-fra"] = scores["test"].pop("fr")
         result: TaskResult = TaskResult.from_task_results(
-            task,  # type: ignore
+            task,
             scores,
             evaluation_time,
             kg_co2_emissions=None,
@@ -532,7 +530,7 @@ class TaskResult(BaseModel):
     def _get_score_fast(
         self,
         splits: Iterable[str] | None = None,
-        languages: str | None = None,
+        languages: list[ISOLanguage | ISOLanguageScript] | None = None,
         subsets: Iterable[str] | None = None,
     ) -> float:
         """Sped up version of get_score that will be used if no aggregation, script or getter needs to be specified.
@@ -581,7 +579,7 @@ class TaskResult(BaseModel):
         return val_sum / n_val
     @classmethod
-    def from_validated(cls, **data) -> Self:
+    def from_validated(cls, **data) -> TaskResult:
         """Create a TaskResult from validated data.
         Returns:
@@ -592,13 +590,13 @@ class TaskResult(BaseModel):
     def __repr__(self) -> str:
         return f"TaskResult(task_name={self.task_name}, scores=...)"
-    def only_main_score(self) -> Self:
+    def only_main_score(self) -> TaskResult:
         """Return a new TaskResult object with only the main score.
         Returns:
             A new TaskResult object with only the main score.
         """
-        new_scores = {}
+        new_scores: dict[str, list[Score]] = {}
         for split in self.scores:
             new_scores[split] = []
             for subset_scores in self.scores[split]:
@@ -610,10 +608,9 @@ class TaskResult(BaseModel):
                     }
                 )
         new_res = {**self.to_dict(), "scores": new_scores}
-        new_res = TaskResult.from_validated(**new_res)
-        return new_res
+        return TaskResult.from_validated(**new_res)
-    def validate_and_filter_scores(self, task: AbsTask | None = None) -> Self:
+    def validate_and_filter_scores(self, task: AbsTask | None = None) -> TaskResult:
         """Validate and filter the scores against the task metadata.
         This ensures that the scores are correct for the given task, by removing any splits besides those specified in the task metadata.
@@ -635,7 +632,7 @@ class TaskResult(BaseModel):
         splits = task.eval_splits
         hf_subsets = set(task.hf_subsets)  # Convert to set once
-        new_scores = {}
+        new_scores: dict[str, list[Score]] = {}
         seen_splits = set()
         for split in self.scores:
             if split not in splits:
@@ -658,14 +655,14 @@ class TaskResult(BaseModel):
                 else:
                     missing_subsets_str = str(missing_subsets)
-                logger.warning(
-                    f"{task.metadata.name}: Missing subsets {missing_subsets_str} for split {split}"
-                )
+                msg = f"{task.metadata.name}: Missing subsets {missing_subsets_str} for split {split}"
+                logger.warning(msg)
+                warnings.warn(msg)
             seen_splits.add(split)
         if seen_splits != set(splits):
-            logger.warning(
-                f"{task.metadata.name}: Missing splits {set(splits) - seen_splits}"
-            )
+            msg = f"{task.metadata.name}: Missing splits {set(splits) - seen_splits}"
+            logger.warning(msg)
+            warnings.warn(msg)
         data = self.model_dump()
         data["scores"] = new_scores
         return type(self).model_construct(**data)
@@ -736,7 +733,7 @@ class TaskResult(BaseModel):
             "mteb_version",
             "dataset_revision",
         ],
-    ) -> Self:
+    ) -> TaskResult:
         """Merges two TaskResult objects.
         Args:

mteb/similarity_functions.py CHANGED Viewed

@@ -186,7 +186,7 @@ def max_sim(a: Array, b: Array) -> torch.Tensor:
         b,
     )
-    return scores.max(axis=-1).values.sum(axis=-1)
+    return scores.max(axis=-1).values.sum(axis=-1)  # type: ignore[call-overload]
 # https://github.com/lightonai/pylate/blob/2d094a724866d6e15701781528368438081c0157/pylate/scores/scores.py#L67C1-L122C38
@@ -217,7 +217,7 @@ def pairwise_max_sim(
             document_embedding,
         )
-        scores.append(query_document_score.max(axis=-1).values.sum())
+        scores.append(query_document_score.max(axis=-1).values.sum())  # type: ignore[call-overload]
     return torch.stack(scores, dim=0)
@@ -317,11 +317,15 @@ def similarity(text_embeddings: Array, input_embeddings: Array) -> Array:
     Returns:
         Matrix with similarities
     """
-    text_embeddings = _convert_to_tensor(text_embeddings)
-    input_embeddings = _convert_to_tensor(input_embeddings)
+    text_embeddings_tensor = _convert_to_tensor(text_embeddings)
+    input_embeddings_tensor = _convert_to_tensor(input_embeddings)
-    text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True)
-    input_embeddings = input_embeddings / input_embeddings.norm(dim=-1, keepdim=True)
-    logits = torch.matmul(input_embeddings, text_embeddings.T)
+    text_embeddings_tensor = text_embeddings_tensor / text_embeddings_tensor.norm(
+        dim=-1, keepdim=True
+    )
+    input_embeddings_tensor = input_embeddings_tensor / input_embeddings_tensor.norm(
+        dim=-1, keepdim=True
+    )
+    logits = torch.matmul(input_embeddings_tensor, text_embeddings_tensor.T)
     probs = (logits * 100).softmax(dim=-1)
     return probs

mteb/tasks/classification/dan/dk_hate_classification.py CHANGED Viewed

@@ -62,7 +62,7 @@ Piperidis, Stelios},
     def dataset_transform(self):
         # convert label to a 0/1 label
-        labels = self.dataset["train"]["label"]  # type: ignore
+        labels = self.dataset["train"]["label"]
         lab2idx = {lab: idx for idx, lab in enumerate(set(labels))}
         self.dataset = self.dataset.map(
             lambda x: {"label": lab2idx[x["label"]]}, remove_columns=["label"]

mteb/tasks/classification/est/estonian_valence.py CHANGED Viewed

@@ -45,7 +45,7 @@ class EstonianValenceClassification(AbsTaskClassification):
             "valence", "label"
         )
         # convert label to a numbers
-        labels = self.dataset["train"]["label"]  # type: ignore
+        labels = self.dataset["train"]["label"]
         lab2idx = {lab: idx for idx, lab in enumerate(set(labels))}
         self.dataset = self.dataset.map(
             lambda x: {"label": lab2idx[x["label"]]}, remove_columns=["label"]

mteb/tasks/classification/multilingual/scala_classification.py CHANGED Viewed

@@ -57,7 +57,7 @@ Fishel, Mark},
     def dataset_transform(self):
         for lang in self.dataset.keys():
             # convert label to a 0/1 label
-            labels = self.dataset[lang]["train"]["label"]  # type: ignore
+            labels = self.dataset[lang]["train"]["label"]
             lab2idx = {lab: idx for idx, lab in enumerate(set(labels))}
             self.dataset[lang] = self.dataset[lang].map(
                 lambda x: {"label": lab2idx[x["label"]]}, remove_columns=["label"]

mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py CHANGED Viewed

@@ -49,7 +49,7 @@ class SugarCrepe(AbsTaskImageTextPairClassification):
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
-        self.dataset = datasets.load_dataset(**self.metadata.dataset)  # type: ignore
+        self.dataset = datasets.load_dataset(**self.metadata.dataset)
         self.dataset = datasets.DatasetDict({"test": self.dataset["train"]})
         self.dataset_transform()
         self.data_loaded = True

mteb/tasks/retrieval/code/code_rag.py CHANGED Viewed

@@ -48,14 +48,14 @@ class CodeRAGProgrammingSolutionsRetrieval(AbsTaskRetrieval):
             "path": "code-rag-bench/programming-solutions",
             "revision": "1064f7bba54d5400d4836f5831fe4c2332a566a6",
         },
-        **common_args,  # type: ignore
+        **common_args,
     )
     def load_data(self) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
-        self.dataset = datasets.load_dataset(**self.metadata.dataset)  # type: ignore
+        self.dataset = datasets.load_dataset(**self.metadata.dataset)
         self.dataset_transform()
         self.data_loaded = True
@@ -71,7 +71,7 @@ class CodeRAGProgrammingSolutionsRetrieval(AbsTaskRetrieval):
         self.queries = {}
         split = self.metadata.eval_splits[0]
-        ds: datasets.Dataset = self.dataset[split]  # type: ignore
+        ds: datasets.Dataset = self.dataset[split]
         ds = ds.shuffle(seed=42)
         self.queries[split] = {}
@@ -105,14 +105,14 @@ class CodeRAGOnlineTutorialsRetrieval(AbsTaskRetrieval):
             "path": "code-rag-bench/online-tutorials",
             "revision": "095bb77130082e4690d6c3a031997b03487bf6e2",
         },
-        **common_args,  # type: ignore
+        **common_args,
     )
     def load_data(self) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
-        self.dataset = datasets.load_dataset(**self.metadata.dataset)  # type: ignore
+        self.dataset = datasets.load_dataset(**self.metadata.dataset)
         self.dataset_transform()
         self.data_loaded = True
@@ -128,7 +128,7 @@ class CodeRAGOnlineTutorialsRetrieval(AbsTaskRetrieval):
         self.queries = {}
         split = self.metadata.eval_splits[0]
-        ds: datasets.Dataset = self.dataset[split]  # type: ignore
+        ds: datasets.Dataset = self.dataset[split]
         ds = ds.shuffle(seed=42)
         self.queries[split] = {}
@@ -165,14 +165,14 @@ class CodeRAGLibraryDocumentationSolutionsRetrieval(AbsTaskRetrieval):
             "path": "code-rag-bench/library-documentation",
             "revision": "b530d3b5a25087d2074e731b76232db85b9e9107",
         },
-        **common_args,  # type: ignore
+        **common_args,
     )
     def load_data(self) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
-        self.dataset = datasets.load_dataset(**self.metadata.dataset)  # type: ignore
+        self.dataset = datasets.load_dataset(**self.metadata.dataset)
         self.dataset_transform()
         self.data_loaded = True
@@ -188,7 +188,7 @@ class CodeRAGLibraryDocumentationSolutionsRetrieval(AbsTaskRetrieval):
         self.queries = {}
         split = self.metadata.eval_splits[0]
-        ds: datasets.Dataset = self.dataset[split]  # type: ignore
+        ds: datasets.Dataset = self.dataset[split]
         ds = ds.shuffle(seed=42)
         self.queries[split] = {}
@@ -222,14 +222,14 @@ class CodeRAGStackoverflowPostsRetrieval(AbsTaskRetrieval):
             "path": "code-rag-bench/stackoverflow-posts",
             "revision": "04e05d86cb0ac467b29a5d87f4c56eac99dfc0a4",
         },
-        **common_args,  # type: ignore
+        **common_args,
     )
     def load_data(self) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
-        self.dataset = datasets.load_dataset(**self.metadata.dataset)  # type: ignore
+        self.dataset = datasets.load_dataset(**self.metadata.dataset)
         self.dataset_transform()
         self.data_loaded = True
@@ -245,7 +245,7 @@ class CodeRAGStackoverflowPostsRetrieval(AbsTaskRetrieval):
         self.queries = {}
         split = self.metadata.eval_splits[0]
-        ds: datasets.Dataset = self.dataset[split]  # type: ignore
+        ds: datasets.Dataset = self.dataset[split]
         ds = ds.shuffle(seed=42)
         self.queries[split] = {}

mteb/tasks/retrieval/dan/dan_fever_retrieval.py CHANGED Viewed

@@ -51,7 +51,7 @@ Derczynski, Leon},
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
-        self.dataset = datasets.load_dataset(**self.metadata.dataset)  # type: ignore
+        self.dataset = datasets.load_dataset(**self.metadata.dataset)
         self.dataset_transform()
         self.data_loaded = True

mteb/tasks/retrieval/dan/tv2_nordretrieval.py CHANGED Viewed

@@ -64,7 +64,7 @@ Piperidis, Stelios},
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
-        self.dataset = datasets.load_dataset(**self.metadata.dataset)  # type: ignore
+        self.dataset = datasets.load_dataset(**self.metadata.dataset)
         self.dataset_transform()
         self.data_loaded = True
@@ -81,7 +81,7 @@ Piperidis, Stelios},
         text2id = {}
         for split in self.dataset:
-            ds: datasets.Dataset = self.dataset[split]  # type: ignore
+            ds: datasets.Dataset = self.dataset[split]
             ds = ds.shuffle(seed=42)
             ds = ds.select(
                 range(2048)

mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py CHANGED Viewed

@@ -40,7 +40,7 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval):
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
-        self.dataset = datasets.load_dataset(**self.metadata.dataset)  # type: ignore
+        self.dataset = datasets.load_dataset(**self.metadata.dataset)
         self.dataset_transform()
         self.data_loaded = True
@@ -57,7 +57,7 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval):
         text2id = {}
         for split in self.dataset:
-            ds: datasets.Dataset = self.dataset[split]  # type: ignore
+            ds: datasets.Dataset = self.dataset[split]
             ds = ds.map(answers_to_list)
             self.queries[split] = {}

mteb/tasks/retrieval/nob/norquad.py CHANGED Viewed

@@ -54,7 +54,7 @@ Fishel, Mark},
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
-        self.dataset = datasets.load_dataset(**self.metadata.dataset)  # type: ignore
+        self.dataset = datasets.load_dataset(**self.metadata.dataset)
         self.dataset_transform()
         self.data_loaded = True
@@ -71,7 +71,7 @@ Fishel, Mark},
         text2id = {}
         for split in self.dataset:
-            ds: datasets.Dataset = self.dataset[split]  # type: ignore
+            ds: datasets.Dataset = self.dataset[split]
             ds = ds.shuffle(seed=42)
             max_samples = min(1024, len(ds))
             ds = ds.select(

mteb/tasks/retrieval/nob/snl_retrieval.py CHANGED Viewed

@@ -41,7 +41,7 @@ class SNLRetrieval(AbsTaskRetrieval):
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
-        self.dataset = datasets.load_dataset(**self.metadata.dataset)  # type: ignore
+        self.dataset = datasets.load_dataset(**self.metadata.dataset)
         self.dataset_transform()
         self.data_loaded = True
@@ -58,7 +58,7 @@ class SNLRetrieval(AbsTaskRetrieval):
         text2id = {}
         for split in self.dataset:
-            ds: datasets.Dataset = self.dataset[split]  # type: ignore
+            ds: datasets.Dataset = self.dataset[split]
             ds = ds.shuffle(seed=42)
             self.queries[split] = {}

mteb/tasks/retrieval/tur/tur_hist_quad.py CHANGED Viewed

@@ -59,7 +59,7 @@ class TurHistQuadRetrieval(AbsTaskRetrieval):
         text2id = {}
         for split in self.metadata.eval_splits:
-            ds: datasets.Dataset = self.dataset[split]  # type: ignore
+            ds: datasets.Dataset = self.dataset[split]
             ds = ds.shuffle(seed=42)
             max_samples = min(1024, len(ds))
             ds = ds.select(

mteb/types/_result.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from collections.abc import Mapping
 from typing import Any, NamedTuple
 HFSubset = str
@@ -8,7 +9,7 @@ SplitName = str
 Score = Any
 """A score value, could e.g. be accuracy. Normally it is a float or int, but it can take on any value. Should be json serializable."""
-ScoresDict = dict[str, Score]
+ScoresDict = Mapping[str, Score]
 """A dictionary of scores, typically also include metadata, e.g {'main_score': 0.5, 'accuracy': 0.5, 'f1': 0.6, 'hf_subset': 'en-de', 'languages': ['eng-Latn', 'deu-Latn']}"""

mteb/types/statistics.py CHANGED Viewed

@@ -10,8 +10,14 @@ class SplitDescriptiveStatistics(TypedDict):
 class DescriptiveStatistics(TypedDict, SplitDescriptiveStatistics):
-    """Class for descriptive statistics for the full task."""
+    """Class for descriptive statistics for the full task.
+    Attributes:
+        num_samples: Total number of samples
+        hf_subset_descriptive_stats: HFSubset descriptive statistics (only for multilingual datasets)
+    """
+    num_samples: int
     hf_subset_descriptive_stats: NotRequired[dict[HFSubset, SplitDescriptiveStatistics]]
@@ -88,9 +94,9 @@ class ScoreStatistics(TypedDict):
         max_score: Maximum score
     """
-    min_score: int
+    min_score: int | float
     avg_score: float
-    max_score: int
+    max_score: int | float
 class TopRankedStatistics(TypedDict):

{mteb-2.5.2.dist-info → mteb-2.5.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mteb
-Version: 2.5.2
+Version: 2.5.4
 Summary: Massive Text Embedding Benchmark
 Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
 Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>

mteb 2.5.2__py3-none-any.whl → 2.5.4__py3-none-any.whl

mteb 2.5.2py3-none-any.whl → 2.5.4py3-none-any.whl