PyPI - mteb - Versions diffs - 2.7.3__py3-none-any.whl → 2.7.5__py3-none-any.whl - Mend

mteb 2.7.3py3-none-any.whl → 2.7.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (434) hide show

mteb/tasks/classification/ssw/siswati_news_classification.py CHANGED Viewed

@@ -38,7 +38,7 @@ class SiswatiNewsClassification(AbsTaskClassification):
         superseded_by="SiswatiNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"title": "text"})

mteb/tasks/classification/tam/tamil_news_classification.py CHANGED Viewed

@@ -35,7 +35,7 @@ class TamilNewsClassification(AbsTaskClassification):
         superseded_by="TamilNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"NewsInTamil": "text", "Category": "label"}
         )
@@ -75,5 +75,5 @@ class TamilNewsClassificationV2(AbsTaskClassification):
         adapted_from=["TamilNewsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)

mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py CHANGED Viewed

@@ -28,7 +28,7 @@ class TeluguAndhraJyotiNewsClassification(AbsTaskClassification):
         superseded_by="TeluguAndhraJyotiNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"body": "text", "topic": "label"})
         self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)
@@ -59,5 +59,5 @@ class TeluguAndhraJyotiNewsClassificationV2(AbsTaskClassification):
         adapted_from=["TeluguAndhraJyotiNewsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)

mteb/tasks/classification/tha/wongnai_reviews_classification.py CHANGED Viewed

@@ -38,7 +38,7 @@ class WongnaiReviewsClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"review_body": "text", "star_rating": "label"}
         )

mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py CHANGED Viewed

@@ -36,7 +36,7 @@ class TurkishMovieSentimentClassification(AbsTaskClassification):
         superseded_by="TurkishMovieSentimentClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )
@@ -76,7 +76,7 @@ class TurkishMovieSentimentClassificationV2(AbsTaskClassification):
         adapted_from=["TurkishMovieSentimentClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/ukr/ukr_formality_classification.py CHANGED Viewed

@@ -39,7 +39,7 @@ Tetreault, Joel},
         superseded_by="UkrFormalityClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("labels", "label")
         self.dataset = self.dataset.class_encode_column("label")
         self.dataset = self.stratified_subsampling(
@@ -84,7 +84,7 @@ Tetreault, Joel},
         adapted_from=["UkrFormalityClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train", "test"]
         )

mteb/tasks/classification/vie/toxic_conversations_vn_classification.py CHANGED Viewed

@@ -39,7 +39,7 @@ class ToxicConversationsVNClassification(AbsTaskClassification):
         adapted_from=["ToxicConversationsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/vie/vie_student_feedback_classification.py CHANGED Viewed

@@ -79,7 +79,7 @@ class VieStudentFeedbackClassificationV2(AbsTaskClassification):
         adapted_from=["VieStudentFeedbackClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/zho/yue_openrice_review_classification.py CHANGED Viewed

@@ -39,7 +39,7 @@ class YueOpenriceReviewClassification(AbsTaskClassification):
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )
@@ -82,7 +82,7 @@ class YueOpenriceReviewClassificationV2(AbsTaskClassification):
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/zul/isi_zulu_news_classification.py CHANGED Viewed

@@ -38,7 +38,7 @@ class IsiZuluNewsClassification(AbsTaskClassification):
         superseded_by="IsiZuluNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"title": "text"})

mteb/tasks/clustering/deu/blurbs_clustering_p2p.py CHANGED Viewed

@@ -82,7 +82,7 @@ class BlurbsClusteringP2PFast(AbsTaskClustering):
         adapted_from=["BlurbsClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = _convert_to_fast(
             self.dataset, self.input_column_name, self.label_column_name, self.seed
         )

mteb/tasks/clustering/deu/blurbs_clustering_s2s.py CHANGED Viewed

@@ -91,7 +91,7 @@ class BlurbsClusteringS2SFast(AbsTaskClustering):
         adapted_from=["BlurbsClusteringS2S"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/arxiv_clustering_p2p.py CHANGED Viewed

@@ -82,7 +82,7 @@ class ArxivClusteringP2PFast(AbsTaskClusteringLegacy):
         # simply downsample each cluster.
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         rng_state = random.Random(self.seed)
         ds = {}

mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py CHANGED Viewed

@@ -38,7 +38,7 @@ class ArXivHierarchicalClusteringP2P(AbsTaskClustering):
         bibtex_citation="",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))
@@ -78,7 +78,7 @@ class ArXivHierarchicalClusteringS2S(AbsTaskClustering):
         bibtex_citation="",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/big_patent_clustering.py CHANGED Viewed

@@ -104,7 +104,7 @@ Summarization},
         adapted_from=["BigPatentClustering"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.metadata.eval_splits:
             _check_label_distribution(self.dataset[split])
         self.dataset = self.stratified_subsampling(

mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py CHANGED Viewed

@@ -33,7 +33,7 @@ class BiorxivClusteringP2PFast(AbsTaskClustering):
         adapted_from=["BiorxivClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.metadata.eval_splits:
             _check_label_distribution(self.dataset[split])

mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py CHANGED Viewed

@@ -33,7 +33,7 @@ class BiorxivClusteringS2SFast(AbsTaskClustering):
         adapted_from=["BiorxivClusteringS2S"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.metadata.eval_splits:
             _check_label_distribution(self.dataset[split])

mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py CHANGED Viewed

@@ -37,7 +37,7 @@ class MedrxivClusteringP2PFast(AbsTaskClustering):
         adapted_from=["MedrxivClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py CHANGED Viewed

@@ -37,7 +37,7 @@ class MedrxivClusteringS2SFast(AbsTaskClustering):
         adapted_from=["MedrxivClusteringS2S"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/reddit_clustering.py CHANGED Viewed

@@ -51,7 +51,7 @@ Iryna Gurevych},
         adapted_from=["RedditClustering"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/reddit_clustering_p2p.py CHANGED Viewed

@@ -94,7 +94,7 @@ Iryna Gurevych},
         adapted_from=["RedditClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/stack_exchange_clustering.py CHANGED Viewed

@@ -51,7 +51,7 @@ Iryna Gurevych},
         adapted_from=["StackExchangeClustering"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py CHANGED Viewed

@@ -52,7 +52,7 @@ Iryna Gurevych},
         adapted_from=["StackExchangeClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py CHANGED Viewed

@@ -93,7 +93,7 @@ class TwentyNewsgroupsClusteringFast(AbsTaskClustering):
         adapted_from=["TwentyNewsgroupsClustering"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/fas/fa_mteb_clustering.py CHANGED Viewed

@@ -33,7 +33,7 @@ class BeytooteClustering(AbsTaskClustering):
         bibtex_citation=""" """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset,
             seed=self.seed,
@@ -93,7 +93,7 @@ class HamshahriClustring(AbsTaskClustering):
         bibtex_citation=""" """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.map(
             lambda x: {"sentences": f"{x['title']}\n: {x['summary']}"}
         )
@@ -151,7 +151,7 @@ class NLPTwitterAnalysisClustering(AbsTaskClustering):
         bibtex_citation=""" """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("tweet", "sentences")
         self.dataset = self.dataset.rename_column("label", "labels")
         self.dataset = self.stratified_subsampling(
@@ -187,7 +187,7 @@ class SIDClustring(AbsTaskClustering):
         bibtex_citation=""" """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset,
             seed=self.seed,

mteb/tasks/clustering/fra/hal_clustering_s2s.py CHANGED Viewed

@@ -48,7 +48,7 @@ class HALClusteringS2S(AbsTaskClusteringLegacy):
         superseded_by="HALClusteringS2S.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         """Convert to standard format"""
         self.dataset = self.dataset.remove_columns("hal_id")
         titles = self.dataset["test"]["title"]
@@ -98,7 +98,7 @@ class HALClusteringS2SFast(AbsTaskClustering):
         adapted_from=["HALClusteringS2S"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         """Convert to standard format"""
         self.dataset["test"] = self.dataset["test"].remove_columns("hal_id")
         self.dataset["test"] = self.dataset["test"].rename_columns(

mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py CHANGED Viewed

@@ -51,7 +51,7 @@ class MLSUMClusteringP2P(AbsTaskClusteringLegacy):
         superseded_by="MLSUMClusteringP2P.v2",
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return
@@ -124,7 +124,7 @@ class MLSUMClusteringP2PFast(AbsTaskClustering):
         adapted_from=["MLSUMClusteringP2P"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return

mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py CHANGED Viewed

@@ -51,7 +51,7 @@ class MLSUMClusteringS2S(AbsTaskClusteringLegacy):
         superseded_by="MLSUMClusteringS2S.v2",
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return
@@ -119,7 +119,7 @@ class MLSUMClusteringS2SFast(AbsTaskClustering):
         adapted_from=["MLSUMClusteringS2S"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return

mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py CHANGED Viewed

@@ -239,7 +239,7 @@ class SIB200ClusteringFast(AbsTaskClustering):
 """,  # combined train, validation, and test into test.
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for lang in self.hf_subsets:
             labels = []

mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py CHANGED Viewed

@@ -81,7 +81,7 @@ class WikiClusteringFastP2P(AbsTaskClustering):
         adapted_from=["WikiClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for lang in self.hf_subsets:
             labels = []

mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py CHANGED Viewed

@@ -33,7 +33,7 @@ class DutchNewsArticlesClusteringP2P(AbsTaskClustering):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].rename_columns(
                 {"label": "labels", "text": "sentences"}

mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py CHANGED Viewed

@@ -33,7 +33,7 @@ class DutchNewsArticlesClusteringS2S(AbsTaskClustering):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].rename_columns(
                 {"label": "labels", "title": "sentences"}

mteb/tasks/clustering/nld/iconclass_clustering_s2s.py CHANGED Viewed

@@ -43,7 +43,7 @@ class IconclassClusteringS2S(AbsTaskClustering):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].map(
                 lambda ex: {"labels": ex["label"], "sentences": ex["text"]}

mteb/tasks/clustering/nld/open_tender_clustering_p2p.py CHANGED Viewed

@@ -43,7 +43,7 @@ class OpenTenderClusteringP2P(AbsTaskClustering):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         # reuse the dataset for classification
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].map(

mteb/tasks/clustering/nld/vabb_clustering_p2p.py CHANGED Viewed

@@ -44,7 +44,7 @@ class VABBClusteringP2P(AbsTaskClustering):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].map(
                 lambda ex: {

mteb/tasks/clustering/nld/vabb_clustering_s2s.py CHANGED Viewed

@@ -44,7 +44,7 @@ class VABBClusteringS2S(AbsTaskClustering):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].rename_columns(
                 {"title": "sentences"}

mteb/tasks/clustering/nob/snl_clustering.py CHANGED Viewed

@@ -58,7 +58,7 @@ class SNLClustering(AbsTaskClusteringLegacy):
         superseded_by="SNLHierarchicalClusteringP2P",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         splits = self.metadata.eval_splits
         documents: list = []

mteb/tasks/clustering/nob/vg_clustering.py CHANGED Viewed

@@ -58,7 +58,7 @@ class VGClustering(AbsTaskClusteringLegacy):
         superseded_by="VGHierarchicalClusteringP2P",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         splits = self.metadata.eval_splits
         documents: list = []

mteb/tasks/clustering/pol/polish_clustering.py CHANGED Viewed

@@ -131,7 +131,7 @@ Piperidis, Stelios},
         adapted_from=["EightTagsClustering"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(chain.from_iterable(self.dataset[split]["labels"]))
@@ -204,7 +204,7 @@ class PlscClusteringS2SFast(AbsTaskClustering):
         adapted_from=["PlscClusteringS2S"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = self.dataset[split]["labels"]
@@ -286,7 +286,7 @@ class PlscClusteringP2PFast(AbsTaskClustering):
         adapted_from=["PlscClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = self.dataset[split]["labels"]

mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py CHANGED Viewed

@@ -32,7 +32,7 @@ class RuSciBenchGRNTIClusteringP2P(AbsTaskClustering):
         prompt="Identify the category of scientific papers based on the titles and abstracts",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"label": "labels", "text": "sentences"}
         )

mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py CHANGED Viewed

@@ -32,7 +32,7 @@ class RuSciBenchOECDClusteringP2P(AbsTaskClustering):
         prompt="Identify the category of scientific papers based on the titles and abstracts",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"label": "labels", "text": "sentences"}
         )

mteb/tasks/clustering/zho/cmteb_clustering.py CHANGED Viewed

@@ -51,7 +51,7 @@ class CLSClusteringFastS2S(AbsTaskClustering):
         adapted_from=["CLSClusteringS2S"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))
@@ -110,7 +110,7 @@ class CLSClusteringFastP2P(AbsTaskClustering):
         adapted_from=["CLSClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))
@@ -239,7 +239,7 @@ class ThuNewsClusteringFastS2S(AbsTaskClustering):
         adapted_from=["ThuNewsClusteringS2S"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))
@@ -298,7 +298,7 @@ class ThuNewsClusteringFastP2P(AbsTaskClustering):
         adapted_from=["ThuNewsClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/image_text_pair_classification/eng/image_co_de.py CHANGED Viewed

@@ -53,7 +53,7 @@ class ImageCoDe(AbsTaskImageTextPairClassification):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py CHANGED Viewed

@@ -45,7 +45,7 @@ class SugarCrepe(AbsTaskImageTextPairClassification):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return

mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py CHANGED Viewed

@@ -175,7 +175,7 @@ class mFollowIRCrossLingual(AbsTaskRetrieval):  # noqa: N801
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return
@@ -243,7 +243,7 @@ class mFollowIR(AbsTaskRetrieval):  # noqa: N801
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/multichoice/eng/cv_bench.py CHANGED Viewed

@@ -123,7 +123,7 @@ class CVBenchCount(AbsTaskRetrieval):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         self.corpus, self.queries, self.relevant_docs, self.top_ranked = _load_data(
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,
@@ -165,7 +165,7 @@ class CVBenchRelation(AbsTaskRetrieval):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         self.corpus, self.queries, self.relevant_docs, self.top_ranked = _load_data(
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,
@@ -207,7 +207,7 @@ class CVBenchDepth(AbsTaskRetrieval):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         self.corpus, self.queries, self.relevant_docs, self.top_ranked = _load_data(
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,
@@ -249,7 +249,7 @@ class CVBenchDistance(AbsTaskRetrieval):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         self.corpus, self.queries, self.relevant_docs, self.top_ranked = _load_data(
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,

mteb 2.7.3__py3-none-any.whl → 2.7.5__py3-none-any.whl

mteb 2.7.3py3-none-any.whl → 2.7.5py3-none-any.whl