PyPI - mteb - Versions diffs - 2.7.16__py3-none-any.whl → 2.7.18__py3-none-any.whl - Mend

mteb 2.7.16py3-none-any.whl → 2.7.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (173) hide show

mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py CHANGED Viewed

@@ -111,7 +111,7 @@ class CUREv1Retrieval(AbsTaskRetrieval):
         return queries
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py CHANGED Viewed

@@ -148,7 +148,7 @@ def _load_data(
     return corpus, queries, relevant_docs
-def load_data(self, num_proc: int = 1, **kwargs) -> None:
+def load_data(self, num_proc: int | None = None, **kwargs) -> None:
     if self.data_loaded:
         return

mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py CHANGED Viewed

@@ -143,7 +143,7 @@ class MIRACLVisionRetrieval(AbsTaskRetrieval):
         prompt={"query": "Find a screenshot that is relevant to the user's query."},
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py CHANGED Viewed

@@ -108,7 +108,7 @@ class MrTidyRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py CHANGED Viewed

@@ -97,7 +97,7 @@ class PublicHealthQARetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py CHANGED Viewed

@@ -103,7 +103,7 @@ class RuSciBenchCiteRetrieval(AbsTaskRetrieval):
         },
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return
@@ -161,7 +161,7 @@ class RuSciBenchCociteRetrieval(AbsTaskRetrieval):
         },
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py CHANGED Viewed

@@ -96,7 +96,7 @@ de Vries, Harm},
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py CHANGED Viewed

@@ -126,7 +126,7 @@ class VDRMultilingualRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py CHANGED Viewed

@@ -16,7 +16,7 @@ def _load_data(
     splits: list[str],
     langs: list | None = None,
     revision: str | None = None,
-    num_proc: int = 1,
+    num_proc: int | None = None,
 ):
     if langs is None:
         corpus = {}
@@ -131,7 +131,7 @@ class Vidore2ESGReportsRetrieval(AbsTaskRetrieval):
         prompt={"query": "Find a screenshot that relevant to the user's question."},
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return
@@ -179,7 +179,7 @@ class Vidore2EconomicsReportsRetrieval(AbsTaskRetrieval):
         prompt={"query": "Find a screenshot that relevant to the user's question."},
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return
@@ -227,7 +227,7 @@ class Vidore2BioMedicalLecturesRetrieval(AbsTaskRetrieval):
         prompt={"query": "Find a screenshot that relevant to the user's question."},
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return
@@ -275,7 +275,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskRetrieval):
         prompt={"query": "Find a screenshot that relevant to the user's question."},
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py CHANGED Viewed

@@ -68,6 +68,7 @@ class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
         license="cc-by-4.0",
         annotations_creators="derived",
         dialect=[],
+        modalities=["text", "image"],
         sample_creation="created and machine-translated",
         bibtex_citation=r"""
 @article{loison2026vidorev3comprehensiveevaluation,

mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py CHANGED Viewed

@@ -116,7 +116,7 @@ class WITT2IRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py CHANGED Viewed

@@ -104,7 +104,7 @@ class XFlickr30kCoT2IRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py CHANGED Viewed

@@ -64,7 +64,7 @@ class XQuADRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py CHANGED Viewed

@@ -146,7 +146,7 @@ class XM3600T2IRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackAndroidNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackAndroid"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackEnglishNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackEnglish"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackGamingNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackGamingRetrieval"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackGisNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackGisRetrieval"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackMathematicaNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackMathematicaRetrieval"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackPhysicsNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackPhysicsRetrieval"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackProgrammersNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackProgrammersRetrieval"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackStatsNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackStatsRetrieval"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackTexNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackTexRetrieval"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackUnixNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackUnixRetrieval"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackWebmastersNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackWebmastersRetrieval"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackWordpressNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackWordpressRetrieval"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nob/norquad.py CHANGED Viewed

@@ -50,7 +50,7 @@ Fishel, Mark},
         },
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
@@ -58,7 +58,7 @@ Fishel, Mark},
         self.dataset_transform()
         self.data_loaded = True
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         """And transform to a retrieval dataset, which have the following attributes
         self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text

mteb/tasks/retrieval/nob/snl_retrieval.py CHANGED Viewed

@@ -37,7 +37,7 @@ class SNLRetrieval(AbsTaskRetrieval):
         task_subtypes=["Article retrieval"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
@@ -45,7 +45,7 @@ class SNLRetrieval(AbsTaskRetrieval):
         self.dataset_transform()
         self.data_loaded = True
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         """And transform to a retrieval dataset, which have the following attributes
         self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text

mteb/tasks/retrieval/slk/slovak_sum_retrieval.py CHANGED Viewed

@@ -36,7 +36,7 @@ class SlovakSumRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return
         self.corpus, self.queries, self.relevant_docs = {}, {}, {}

mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py CHANGED Viewed

@@ -52,7 +52,7 @@ Zong, Chengqing},
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/sts/multilingual/sem_rel24_sts.py CHANGED Viewed

@@ -66,6 +66,6 @@ Seid Muhie Yimam and Saif M. Mohammad},
     min_score = 0
     max_score = 1
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         for lang, subset in self.dataset.items():
             self.dataset[lang] = subset.rename_column("label", "score")

mteb/tasks/sts/multilingual/sts_benchmark_multilingual_sts.py CHANGED Viewed

@@ -56,6 +56,6 @@ class STSBenchmarkMultilingualSTS(AbsTaskSTS):
     min_score = 0
     max_score = 5
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         for lang, subset in self.dataset.items():
             self.dataset[lang] = subset.rename_column("similarity_score", "score")

mteb/tasks/sts/por/assin2_sts.py CHANGED Viewed

@@ -39,7 +39,7 @@ class Assin2STS(AbsTaskSTS):
     min_score = 1
     max_score = 5
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {
                 "premise": "sentence1",

mteb/types/_encoder_io.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, TypedDict
 import numpy as np
 import torch
 from datasets import Dataset
+from numpy.typing import NDArray
 if TYPE_CHECKING:
     from PIL import Image
@@ -26,8 +27,8 @@ class EncodeKwargs(TypedDict):
 # --- Output types ---
-Array = np.ndarray | torch.Tensor
-"""General array type, can be a numpy array or a torch tensor."""
+Array = NDArray[np.floating | np.integer | np.bool] | torch.Tensor
+"""General array type, can be a numpy array (float, int, or bool) or a torch tensor."""
 # --- Input types ---

{mteb-2.7.16.dist-info → mteb-2.7.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mteb
-Version: 2.7.16
+Version: 2.7.18
 Summary: Massive Text Embedding Benchmark
 Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
 Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>

mteb 2.7.16__py3-none-any.whl → 2.7.18__py3-none-any.whl

mteb 2.7.16py3-none-any.whl → 2.7.18py3-none-any.whl