PyPI - mteb - Versions diffs - 2.7.4__py3-none-any.whl → 2.7.5__py3-none-any.whl - Mend

mteb 2.7.4py3-none-any.whl → 2.7.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (287) hide show

mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py CHANGED Viewed

@@ -108,7 +108,7 @@ class MrTidyRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py CHANGED Viewed

@@ -97,7 +97,7 @@ class PublicHealthQARetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py CHANGED Viewed

@@ -103,7 +103,7 @@ class RuSciBenchCiteRetrieval(AbsTaskRetrieval):
         },
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return
@@ -161,7 +161,7 @@ class RuSciBenchCociteRetrieval(AbsTaskRetrieval):
         },
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py CHANGED Viewed

@@ -96,7 +96,7 @@ de Vries, Harm},
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py CHANGED Viewed

@@ -126,7 +126,7 @@ class VDRMultilingualRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py CHANGED Viewed

@@ -16,6 +16,7 @@ def _load_data(
     splits: list[str],
     langs: list | None = None,
     revision: str | None = None,
+    num_proc: int = 1,
 ):
     if langs is None:
         corpus = {}
@@ -32,6 +33,7 @@ def _load_data(
             "queries",
             split=split,
             revision=revision,
+            num_proc=num_proc,
         )
         query_ds = query_ds.map(
             lambda x: {
@@ -40,6 +42,7 @@ def _load_data(
                 "modality": "text",
             },
             remove_columns=["query-id", "query"],
+            num_proc=num_proc,
         )
         corpus_ds = load_dataset(
@@ -47,6 +50,7 @@ def _load_data(
             "corpus",
             split=split,
             revision=revision,
+            num_proc=num_proc,
         )
         corpus_ds = corpus_ds.map(
             lambda x: {
@@ -54,6 +58,7 @@ def _load_data(
                 "modality": "image",
             },
             remove_columns=["corpus-id"],
+            num_proc=num_proc,
         )
         corpus_ds = corpus_ds.select_columns(["id", "image"])
@@ -62,6 +67,7 @@ def _load_data(
             "qrels",
             split=split,
             revision=revision,
+            num_proc=num_proc,
         )
         if langs is None:
@@ -125,7 +131,7 @@ class Vidore2ESGReportsRetrieval(AbsTaskRetrieval):
         prompt={"query": "Find a screenshot that relevant to the user's question."},
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return
@@ -134,6 +140,7 @@ class Vidore2ESGReportsRetrieval(AbsTaskRetrieval):
             splits=self.metadata.eval_splits,
             langs=_LANGS.keys(),
             revision=self.metadata.dataset["revision"],
+            num_proc=num_proc,
         )
         self.data_loaded = True
@@ -172,7 +179,7 @@ class Vidore2EconomicsReportsRetrieval(AbsTaskRetrieval):
         prompt={"query": "Find a screenshot that relevant to the user's question."},
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return
@@ -181,6 +188,7 @@ class Vidore2EconomicsReportsRetrieval(AbsTaskRetrieval):
             splits=self.metadata.eval_splits,
             langs=_LANGS.keys(),
             revision=self.metadata.dataset["revision"],
+            num_proc=num_proc,
         )
         self.data_loaded = True
@@ -219,7 +227,7 @@ class Vidore2BioMedicalLecturesRetrieval(AbsTaskRetrieval):
         prompt={"query": "Find a screenshot that relevant to the user's question."},
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return
@@ -228,6 +236,7 @@ class Vidore2BioMedicalLecturesRetrieval(AbsTaskRetrieval):
             splits=self.metadata.eval_splits,
             langs=_LANGS.keys(),
             revision=self.metadata.dataset["revision"],
+            num_proc=num_proc,
         )
         self.data_loaded = True
@@ -266,7 +275,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskRetrieval):
         prompt={"query": "Find a screenshot that relevant to the user's question."},
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return
@@ -274,6 +283,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskRetrieval):
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,
             revision=self.metadata.dataset["revision"],
+            num_proc=num_proc,
         )
         self.data_loaded = True

mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py CHANGED Viewed

@@ -116,7 +116,7 @@ class WITT2IRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py CHANGED Viewed

@@ -104,7 +104,7 @@ class XFlickr30kCoT2IRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py CHANGED Viewed

@@ -64,7 +64,7 @@ class XQuADRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py CHANGED Viewed

@@ -146,7 +146,7 @@ class XM3600T2IRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackAndroidNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackAndroid"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackEnglishNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackEnglish"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackGamingNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackGamingRetrieval"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackGisNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackGisRetrieval"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackMathematicaNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackMathematicaRetrieval"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackPhysicsNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackPhysicsRetrieval"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackProgrammersNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackProgrammersRetrieval"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackStatsNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackStatsRetrieval"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackTexNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackTexRetrieval"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackUnixNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackUnixRetrieval"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackWebmastersNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackWebmastersRetrieval"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CQADupstackWordpressNLRetrieval(AbsTaskRetrieval):
         adapted_from=["CQADupstackWordpressRetrieval"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/nob/norquad.py CHANGED Viewed

@@ -50,7 +50,7 @@ Fishel, Mark},
         },
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return

mteb/tasks/retrieval/nob/snl_retrieval.py CHANGED Viewed

@@ -37,7 +37,7 @@ class SNLRetrieval(AbsTaskRetrieval):
         task_subtypes=["Article retrieval"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return

mteb/tasks/retrieval/slk/slovak_sum_retrieval.py CHANGED Viewed

@@ -36,7 +36,7 @@ class SlovakSumRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return
         self.corpus, self.queries, self.relevant_docs = {}, {}, {}

mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py CHANGED Viewed

@@ -52,7 +52,7 @@ Zong, Chengqing},
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/sts/fao/faroese_sts.py CHANGED Viewed

@@ -43,5 +43,5 @@ Vulić, Ivan},
     min_score = 0
     max_score = 5
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("label", "score")

mteb/tasks/sts/fra/sick_fr_sts.py CHANGED Viewed

@@ -30,7 +30,7 @@ class SickFrSTS(AbsTaskSTS):
     min_score = 0
     max_score = 5
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {
                 "sentence_A": "sentence1",

mteb/tasks/sts/kor/klue_sts.py CHANGED Viewed

@@ -40,7 +40,7 @@ class KlueSTS(AbsTaskSTS):
     min_score = 0
     max_score = 5
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         # In the case of KLUE STS, score value is nested within the `labels` field.
         # We need to extract the `score` and move it outside of the `labels` field for access.
         for split in self.dataset:

mteb/tasks/sts/por/sick_br_sts.py CHANGED Viewed

@@ -52,7 +52,7 @@ and de Paiva, Valeria},
     min_score = 1
     max_score = 5
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset,
             seed=42,

mteb/tasks/sts/rus/ru_para_phraser_sts.py CHANGED Viewed

@@ -54,7 +54,7 @@ Filippskikh, Elizaveta},
     min_score = -1
     max_score = 1
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {
                 "text_1": "sentence1",

mteb/tasks/zeroshot_classification/eng/sci_mmir.py CHANGED Viewed

@@ -41,7 +41,7 @@ class SciMMIR(AbsTaskZeroShotClassification):
     label_column_name: str = "class"
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         class_code = {
             "fig_result": 0,
             "fig_illustration": 1,

{mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mteb
-Version: 2.7.4
+Version: 2.7.5
 Summary: Massive Text Embedding Benchmark
 Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
 Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>

mteb 2.7.4__py3-none-any.whl → 2.7.5__py3-none-any.whl

mteb 2.7.4py3-none-any.whl → 2.7.5py3-none-any.whl