PyPI - mteb - Versions diffs - 2.1.0__py3-none-any.whl → 2.1.2__py3-none-any.whl - Mend

mteb 2.1.0py3-none-any.whl → 2.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

mteb/_create_dataloaders.py CHANGED Viewed

@@ -277,6 +277,8 @@ def _custom_collate_fn(batch: list[dict[str, Any]]) -> dict[str, Any]:
             # Leave the images as a list to avoid stacking errors.
             collated[key] = [item[key] for item in batch]
         else:
+            if any(item[key] is None for item in batch):
+                raise ValueError(f"Found None in batch for key '{key}'")
             collated[key] = default_collate([item[key] for item in batch])
     return collated

mteb/_evaluators/retrieval_metrics.py CHANGED Viewed

@@ -5,7 +5,6 @@ from typing import Any
 import numpy as np
 import pandas as pd
 import pytrec_eval
-import torch
 from packaging.version import Version
 from sklearn.metrics import auc
@@ -14,14 +13,6 @@ from mteb.types import RelevantDocumentsType, RetrievalEvaluationResult
 logger = logging.getLogger(__name__)
-try:
-    # speeds up computation if available
-    torch.set_float32_matmul_precision("high")
-    logger.info("Setting torch float32 matmul precision to high for a speedup")
-except Exception:
-    pass
 def mrr(
     qrels: RelevantDocumentsType,
     results: dict[str, dict[str, float]],

mteb/abstasks/_stratification.py CHANGED Viewed

@@ -134,7 +134,7 @@ def _get_most_desired_combination(samples_with_combination: dict):
 class IterativeStratification(_BaseKFold):
     """Iteratively stratify a multi-label data set into folds
-    Construct an interative stratifier that splits the data set into folds trying to maintain balanced representation
+    Construct an iterative stratifier that splits the data set into folds trying to maintain balanced representation
     with respect to order-th label combinations.
     """

mteb/abstasks/abstask.py CHANGED Viewed

@@ -459,7 +459,7 @@ class AbsTask(ABC):
         """Filter the languages of the task.
         Args:
-            languages: list of languages to filter the task by can be either a 3-letter langauge code (e.g. "eng") or also include the script
+            languages: list of languages to filter the task by can be either a 3-letter language code (e.g. "eng") or also include the script
                 (e.g. "eng-Latn")
             script: A list of scripts to filter the task by. Will be ignored if language code specified the script. If None, all scripts are included.
                 If the language code does not specify the script the intersection of the language and script will be used.
@@ -491,6 +491,11 @@ class AbsTask(ABC):
                 if lang_scripts.contains_languages(langs):
                     subsets_to_keep.append(hf_subset)
+        if len(subsets_to_keep) == 0:
+            raise ValueError(
+                f"No subsets were found for {self.metadata.name} with filters: language code {languages}, script {script}, hf subsets {hf_subsets}."
+            )
         self.hf_subsets = subsets_to_keep
         return self

mteb/abstasks/dataset_card_template.md CHANGED Viewed

@@ -85,7 +85,7 @@ desc_stats = task.metadata.descriptive_stats
 ```
 ```json
-{{ descritptive_stats | default("{}", true) }}
+{{ descriptive_stats | default("{}", true) }}
 ```
 </details>

mteb/abstasks/retrieval.py CHANGED Viewed

@@ -653,6 +653,8 @@ class AbsTaskRetrieval(AbsTask):
             FileNotFoundError: If the specified path does not exist.
             ValueError: If the loaded top ranked results are not in the expected format.
         """
+        self._top_k = top_k
         top_ranked_path = Path(top_ranked_path)
         if top_ranked_path.is_dir():
             top_ranked_path = self._predictions_path(top_ranked_path)
@@ -682,7 +684,6 @@ class AbsTaskRetrieval(AbsTask):
                     top_k_sorted[query_id] = sorted_keys[: self._top_k]
                 self.dataset[subset][split]["top_ranked"] = top_k_sorted
-        self._top_k = top_k
         return self

mteb/abstasks/retrieval_dataset_loaders.py CHANGED Viewed

@@ -176,7 +176,7 @@ class RetrievalDatasetLoader:
                 {
                     "query-id": Value("string"),
                     "corpus-id": Value("string"),
-                    "score": Value("uint16"),
+                    "score": Value("int32"),
                 }
             )
         )

mteb/abstasks/task_metadata.py CHANGED Viewed

@@ -532,7 +532,7 @@ class TaskMetadata(BaseModel):
                 citation=self.bibtex_citation,
                 dataset_description=self.description,
                 dataset_reference=self.reference,
-                descritptive_stats=descriptive_stats,
+                descriptive_stats=descriptive_stats,
                 dataset_task_name=self.name,
                 category=self.category,
                 domains=", ".join(self.domains) if self.domains else None,

mteb/benchmarks/benchmarks/benchmarks.py CHANGED Viewed

@@ -641,7 +641,7 @@ MTEB_KOR = Benchmark(
     icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/kr.svg",
     tasks=get_tasks(
         languages=["kor"],
-        tasks=[  # @KennethEnevoldsen: We could probably expand this to a more solid benchamrk, but for now I have left it as is.
+        tasks=[  # @KennethEnevoldsen: We could probably expand this to a more solid benchmark, but for now I have left it as is.
             # Classification
             "KLUE-TC",
             # Reranking
@@ -975,8 +975,6 @@ MTEB_INDIC = Benchmark(
                 # Bitext
                 "IN22ConvBitextMining",
                 "IN22GenBitextMining",
-                "IndicGenBenchFloresBitextMining",
-                "LinceMTBitextMining",
                 # clustering
                 "SIB200ClusteringS2S",
                 # classification
@@ -985,7 +983,6 @@ MTEB_INDIC = Benchmark(
                 "HindiDiscourseClassification",
                 "SentimentAnalysisHindi",
                 "MalayalamNewsClassification",
-                "IndicLangClassification",
                 "MTOPIntentClassification",
                 "MultiHateClassification",
                 "TweetSentimentClassification",
@@ -1008,7 +1005,7 @@ MTEB_INDIC = Benchmark(
         # STS
         (get_task("IndicCrosslingualSTS"),)
     ),
-    description="A regional geopolitical text embedding benchmark targetting embedding performance on Indic languages.",
+    description="A regional geopolitical text embedding benchmark targeting embedding performance on Indic languages.",
     reference=None,
     citation=MMTEB_CITATION,
     contacts=["KennethEnevoldsen", "isaac-chung"],
@@ -1016,7 +1013,7 @@ MTEB_INDIC = Benchmark(
 eu_languages = [
-    # official EU languages (56) - we could include the whole economic area e.g. Norway - additioanlly we could include minority languages (probably a good idea?)
+    # official EU languages (56) - we could include the whole economic area e.g. Norway - additionally we could include minority languages (probably a good idea?)
     # germanic
     "dan",
     "eng",
@@ -1084,7 +1081,6 @@ MTEB_EU = Benchmark(
             "AmazonCounterfactualClassification",
             "MassiveScenarioClassification",
             "MultiHateClassification",
-            "NordicLangClassification",
             "ScalaClassification",
             "SwissJudgementClassification",
             "TweetSentimentClassification",
@@ -1142,7 +1138,7 @@ MTEB_EU = Benchmark(
         languages=eu_languages,
         exclusive_language_filter=True,
     ),
-    description="A regional geopolitical text embedding benchmark targetting embedding performance on European languages.",
+    description="A regional geopolitical text embedding benchmark targeting embedding performance on European languages.",
     reference=None,
     citation=MMTEB_CITATION,
     contacts=["KennethEnevoldsen", "isaac-chung"],
@@ -1638,7 +1634,7 @@ BEIR_NL = Benchmark(
 MTEB_NL = Benchmark(
     name="MTEB(nld, v1)",
-    display_name="MTEB-NL",
+    display_name="Dutch",
     icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/nl.svg",
     tasks=MTEBTasks(
         get_tasks(
@@ -1682,7 +1678,7 @@ MTEB_NL = Benchmark(
                 "SciFact-NL",
                 "NFCorpus-NL",
                 "BelebeleRetrieval",
-                # "WebFAQRetrieval",
+                "WebFAQRetrieval",
                 "DutchNewsArticlesRetrieval",
                 "bBSARDNLRetrieval",
                 "LegalQANLRetrieval",
@@ -1858,7 +1854,7 @@ MIEB_ENG = MIEBBenchmark(
     ),
     description="""MIEB(eng) is a comprehensive image embeddings benchmark, spanning 8 task types, covering 125 tasks.
     In addition to image classification (zero shot and linear probing), clustering, retrieval, MIEB includes tasks in compositionality evaluation,
-    document undestanding, visual STS, and CV-centric tasks.""",
+    document understanding, visual STS, and CV-centric tasks.""",
     reference="https://arxiv.org/abs/2504.10471",
     contacts=["gowitheflow-1998", "isaac-chung"],
     citation=r"""
@@ -1892,7 +1888,7 @@ MIEB_MULTILINGUAL = MIEBBenchmark(
     ),
     description="""MIEB(Multilingual) is a comprehensive image embeddings benchmark, spanning 10 task types, covering 130 tasks and a total of 39 languages.
     In addition to image classification (zero shot and linear probing), clustering, retrieval, MIEB includes tasks in compositionality evaluation,
-    document undestanding, visual STS, and CV-centric tasks. This benchmark consists of MIEB(eng) + 3 multilingual retrieval
+    document understanding, visual STS, and CV-centric tasks. This benchmark consists of MIEB(eng) + 3 multilingual retrieval
     datasets + the multilingual parts of VisualSTS-b and VisualSTS-16.""",
     reference="https://arxiv.org/abs/2504.10471",
     contacts=["gowitheflow-1998", "isaac-chung"],
@@ -2113,7 +2109,7 @@ BUILT_MTEB = Benchmark(
             "BuiltBenchReranking",
         ],
     ),
-    description='"Built-Bench" is an ongoing effort aimed at evaluating text embedding models in the context of built asset management, spanning over various dicsiplines such as architeture, engineering, constrcution, and operations management of the built environment.',
+    description='"Built-Bench" is an ongoing effort aimed at evaluating text embedding models in the context of built asset management, spanning over various disciplines such as architecture, engineering, construction, and operations management of the built environment.',
     reference="https://arxiv.org/abs/2411.12056",
     citation=r"""
 @article{shahinmoghadam2024benchmarking,

mteb/benchmarks/get_benchmark.py CHANGED Viewed

@@ -14,7 +14,7 @@ def _build_registry() -> dict[str, Benchmark]:
     benchmark_registry = {
         inst.name: inst
-        for nam, inst in benchmark_module.__dict__.items()
+        for _, inst in benchmark_module.__dict__.items()
         if isinstance(inst, Benchmark)
     }
     return benchmark_registry

mteb 2.1.0__py3-none-any.whl → 2.1.2__py3-none-any.whl

mteb 2.1.0py3-none-any.whl → 2.1.2py3-none-any.whl