PyPI - mteb - Versions diffs - 2.7.4__py3-none-any.whl → 2.7.5__py3-none-any.whl - Mend

mteb 2.7.4py3-none-any.whl → 2.7.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (287) hide show

mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py CHANGED Viewed

@@ -29,7 +29,7 @@ class RuSciBenchGRNTIClassification(AbsTaskClassification):
         superseded_by="RuSciBenchGRNTIClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
         )

mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py CHANGED Viewed

@@ -29,7 +29,7 @@ class RuSciBenchOECDClassification(AbsTaskClassification):
         superseded_by="RuSciBenchOECDClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
         )

mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py CHANGED Viewed

@@ -28,7 +28,7 @@ class RuToxicOKMLCUPClassification(AbsTaskClassification):
         superseded_by="RuToxicOKMLCUPClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("toxic", "label")

mteb/tasks/classification/san/sanskrit_shlokas_classification.py CHANGED Viewed

@@ -46,5 +46,5 @@ Tan, Liling},
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"Sloka": "text", "Class": "label"})

mteb/tasks/classification/sin/sinhala_news_classification.py CHANGED Viewed

@@ -42,7 +42,7 @@ class SinhalaNewsClassification(AbsTaskClassification):
         superseded_by="SinhalaNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"comments": "text", "labels": "label"}
         )
@@ -91,7 +91,7 @@ class SinhalaNewsClassificationV2(AbsTaskClassification):
         adapted_from=["SinhalaNewsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/sin/sinhala_news_source_classification.py CHANGED Viewed

@@ -35,7 +35,7 @@ class SinhalaNewsSourceClassification(AbsTaskClassification):
         superseded_by="SinhalaNewsSourceClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("comment", "text")
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
@@ -75,7 +75,7 @@ class SinhalaNewsSourceClassificationV2(AbsTaskClassification):
         adapted_from=["SinhalaNewsSourceClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CSFDSKMovieReviewSentimentClassification(AbsTaskClassification):
     # Increase the samples_per_label in order to improve baseline performance
     samples_per_label = 20
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"comment": "text", "rating_int": "label"}
         )
@@ -89,7 +89,7 @@ class CSFDSKMovieReviewSentimentClassificationV2(AbsTaskClassification):
     # Increase the samples_per_label in order to improve baseline performance
     samples_per_label = 20
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"], n_samples=N_SAMPLES
         )

mteb/tasks/classification/slv/frenk_sl_classification.py CHANGED Viewed

@@ -75,7 +75,7 @@ class FrenkSlClassificationV2(AbsTaskClassification):
         adapted_from=["FrenkSlClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/spa/spanish_news_classification.py CHANGED Viewed

@@ -29,7 +29,7 @@ class SpanishNewsClassification(AbsTaskClassification):
         superseded_by="SpanishNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"category": "label"})
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
@@ -63,7 +63,7 @@ class SpanishNewsClassificationV2(AbsTaskClassification):
         adapted_from=["SpanishNewsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/ssw/siswati_news_classification.py CHANGED Viewed

@@ -38,7 +38,7 @@ class SiswatiNewsClassification(AbsTaskClassification):
         superseded_by="SiswatiNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"title": "text"})

mteb/tasks/classification/tam/tamil_news_classification.py CHANGED Viewed

@@ -35,7 +35,7 @@ class TamilNewsClassification(AbsTaskClassification):
         superseded_by="TamilNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"NewsInTamil": "text", "Category": "label"}
         )
@@ -75,5 +75,5 @@ class TamilNewsClassificationV2(AbsTaskClassification):
         adapted_from=["TamilNewsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)

mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py CHANGED Viewed

@@ -28,7 +28,7 @@ class TeluguAndhraJyotiNewsClassification(AbsTaskClassification):
         superseded_by="TeluguAndhraJyotiNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"body": "text", "topic": "label"})
         self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)
@@ -59,5 +59,5 @@ class TeluguAndhraJyotiNewsClassificationV2(AbsTaskClassification):
         adapted_from=["TeluguAndhraJyotiNewsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)

mteb/tasks/classification/tha/wongnai_reviews_classification.py CHANGED Viewed

@@ -38,7 +38,7 @@ class WongnaiReviewsClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"review_body": "text", "star_rating": "label"}
         )

mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py CHANGED Viewed

@@ -36,7 +36,7 @@ class TurkishMovieSentimentClassification(AbsTaskClassification):
         superseded_by="TurkishMovieSentimentClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )
@@ -76,7 +76,7 @@ class TurkishMovieSentimentClassificationV2(AbsTaskClassification):
         adapted_from=["TurkishMovieSentimentClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/ukr/ukr_formality_classification.py CHANGED Viewed

@@ -39,7 +39,7 @@ Tetreault, Joel},
         superseded_by="UkrFormalityClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("labels", "label")
         self.dataset = self.dataset.class_encode_column("label")
         self.dataset = self.stratified_subsampling(
@@ -84,7 +84,7 @@ Tetreault, Joel},
         adapted_from=["UkrFormalityClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train", "test"]
         )

mteb/tasks/classification/vie/toxic_conversations_vn_classification.py CHANGED Viewed

@@ -39,7 +39,7 @@ class ToxicConversationsVNClassification(AbsTaskClassification):
         adapted_from=["ToxicConversationsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/vie/vie_student_feedback_classification.py CHANGED Viewed

@@ -79,7 +79,7 @@ class VieStudentFeedbackClassificationV2(AbsTaskClassification):
         adapted_from=["VieStudentFeedbackClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/zho/yue_openrice_review_classification.py CHANGED Viewed

@@ -39,7 +39,7 @@ class YueOpenriceReviewClassification(AbsTaskClassification):
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )
@@ -82,7 +82,7 @@ class YueOpenriceReviewClassificationV2(AbsTaskClassification):
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/zul/isi_zulu_news_classification.py CHANGED Viewed

@@ -38,7 +38,7 @@ class IsiZuluNewsClassification(AbsTaskClassification):
         superseded_by="IsiZuluNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"title": "text"})

mteb/tasks/clustering/deu/blurbs_clustering_p2p.py CHANGED Viewed

@@ -82,7 +82,7 @@ class BlurbsClusteringP2PFast(AbsTaskClustering):
         adapted_from=["BlurbsClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = _convert_to_fast(
             self.dataset, self.input_column_name, self.label_column_name, self.seed
         )

mteb/tasks/clustering/deu/blurbs_clustering_s2s.py CHANGED Viewed

@@ -91,7 +91,7 @@ class BlurbsClusteringS2SFast(AbsTaskClustering):
         adapted_from=["BlurbsClusteringS2S"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/arxiv_clustering_p2p.py CHANGED Viewed

@@ -82,7 +82,7 @@ class ArxivClusteringP2PFast(AbsTaskClusteringLegacy):
         # simply downsample each cluster.
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         rng_state = random.Random(self.seed)
         ds = {}

mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py CHANGED Viewed

@@ -38,7 +38,7 @@ class ArXivHierarchicalClusteringP2P(AbsTaskClustering):
         bibtex_citation="",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))
@@ -78,7 +78,7 @@ class ArXivHierarchicalClusteringS2S(AbsTaskClustering):
         bibtex_citation="",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/big_patent_clustering.py CHANGED Viewed

@@ -104,7 +104,7 @@ Summarization},
         adapted_from=["BigPatentClustering"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.metadata.eval_splits:
             _check_label_distribution(self.dataset[split])
         self.dataset = self.stratified_subsampling(

mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py CHANGED Viewed

@@ -33,7 +33,7 @@ class BiorxivClusteringP2PFast(AbsTaskClustering):
         adapted_from=["BiorxivClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.metadata.eval_splits:
             _check_label_distribution(self.dataset[split])

mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py CHANGED Viewed

@@ -33,7 +33,7 @@ class BiorxivClusteringS2SFast(AbsTaskClustering):
         adapted_from=["BiorxivClusteringS2S"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.metadata.eval_splits:
             _check_label_distribution(self.dataset[split])

mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py CHANGED Viewed

@@ -37,7 +37,7 @@ class MedrxivClusteringP2PFast(AbsTaskClustering):
         adapted_from=["MedrxivClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py CHANGED Viewed

@@ -37,7 +37,7 @@ class MedrxivClusteringS2SFast(AbsTaskClustering):
         adapted_from=["MedrxivClusteringS2S"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/reddit_clustering.py CHANGED Viewed

@@ -51,7 +51,7 @@ Iryna Gurevych},
         adapted_from=["RedditClustering"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/reddit_clustering_p2p.py CHANGED Viewed

@@ -94,7 +94,7 @@ Iryna Gurevych},
         adapted_from=["RedditClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/stack_exchange_clustering.py CHANGED Viewed

@@ -51,7 +51,7 @@ Iryna Gurevych},
         adapted_from=["StackExchangeClustering"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py CHANGED Viewed

@@ -52,7 +52,7 @@ Iryna Gurevych},
         adapted_from=["StackExchangeClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py CHANGED Viewed

@@ -93,7 +93,7 @@ class TwentyNewsgroupsClusteringFast(AbsTaskClustering):
         adapted_from=["TwentyNewsgroupsClustering"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))

mteb/tasks/clustering/fas/fa_mteb_clustering.py CHANGED Viewed

@@ -33,7 +33,7 @@ class BeytooteClustering(AbsTaskClustering):
         bibtex_citation=""" """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset,
             seed=self.seed,
@@ -93,7 +93,7 @@ class HamshahriClustring(AbsTaskClustering):
         bibtex_citation=""" """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.map(
             lambda x: {"sentences": f"{x['title']}\n: {x['summary']}"}
         )
@@ -151,7 +151,7 @@ class NLPTwitterAnalysisClustering(AbsTaskClustering):
         bibtex_citation=""" """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("tweet", "sentences")
         self.dataset = self.dataset.rename_column("label", "labels")
         self.dataset = self.stratified_subsampling(
@@ -187,7 +187,7 @@ class SIDClustring(AbsTaskClustering):
         bibtex_citation=""" """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset,
             seed=self.seed,

mteb/tasks/clustering/fra/hal_clustering_s2s.py CHANGED Viewed

@@ -48,7 +48,7 @@ class HALClusteringS2S(AbsTaskClusteringLegacy):
         superseded_by="HALClusteringS2S.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         """Convert to standard format"""
         self.dataset = self.dataset.remove_columns("hal_id")
         titles = self.dataset["test"]["title"]
@@ -98,7 +98,7 @@ class HALClusteringS2SFast(AbsTaskClustering):
         adapted_from=["HALClusteringS2S"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         """Convert to standard format"""
         self.dataset["test"] = self.dataset["test"].remove_columns("hal_id")
         self.dataset["test"] = self.dataset["test"].rename_columns(

mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py CHANGED Viewed

@@ -51,7 +51,7 @@ class MLSUMClusteringP2P(AbsTaskClusteringLegacy):
         superseded_by="MLSUMClusteringP2P.v2",
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return
@@ -124,7 +124,7 @@ class MLSUMClusteringP2PFast(AbsTaskClustering):
         adapted_from=["MLSUMClusteringP2P"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return

mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py CHANGED Viewed

@@ -51,7 +51,7 @@ class MLSUMClusteringS2S(AbsTaskClusteringLegacy):
         superseded_by="MLSUMClusteringS2S.v2",
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return
@@ -119,7 +119,7 @@ class MLSUMClusteringS2SFast(AbsTaskClustering):
         adapted_from=["MLSUMClusteringS2S"],
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return

mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py CHANGED Viewed

@@ -239,7 +239,7 @@ class SIB200ClusteringFast(AbsTaskClustering):
 """,  # combined train, validation, and test into test.
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for lang in self.hf_subsets:
             labels = []

mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py CHANGED Viewed

@@ -81,7 +81,7 @@ class WikiClusteringFastP2P(AbsTaskClustering):
         adapted_from=["WikiClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for lang in self.hf_subsets:
             labels = []

mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py CHANGED Viewed

@@ -33,7 +33,7 @@ class DutchNewsArticlesClusteringP2P(AbsTaskClustering):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].rename_columns(
                 {"label": "labels", "text": "sentences"}

mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py CHANGED Viewed

@@ -33,7 +33,7 @@ class DutchNewsArticlesClusteringS2S(AbsTaskClustering):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].rename_columns(
                 {"label": "labels", "title": "sentences"}

mteb/tasks/clustering/nld/iconclass_clustering_s2s.py CHANGED Viewed

@@ -43,7 +43,7 @@ class IconclassClusteringS2S(AbsTaskClustering):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].map(
                 lambda ex: {"labels": ex["label"], "sentences": ex["text"]}

mteb/tasks/clustering/nld/open_tender_clustering_p2p.py CHANGED Viewed

@@ -43,7 +43,7 @@ class OpenTenderClusteringP2P(AbsTaskClustering):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         # reuse the dataset for classification
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].map(

mteb/tasks/clustering/nld/vabb_clustering_p2p.py CHANGED Viewed

@@ -44,7 +44,7 @@ class VABBClusteringP2P(AbsTaskClustering):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].map(
                 lambda ex: {

mteb/tasks/clustering/nld/vabb_clustering_s2s.py CHANGED Viewed

@@ -44,7 +44,7 @@ class VABBClusteringS2S(AbsTaskClustering):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].rename_columns(
                 {"title": "sentences"}

mteb/tasks/clustering/nob/snl_clustering.py CHANGED Viewed

@@ -58,7 +58,7 @@ class SNLClustering(AbsTaskClusteringLegacy):
         superseded_by="SNLHierarchicalClusteringP2P",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         splits = self.metadata.eval_splits
         documents: list = []

mteb/tasks/clustering/nob/vg_clustering.py CHANGED Viewed

@@ -58,7 +58,7 @@ class VGClustering(AbsTaskClusteringLegacy):
         superseded_by="VGHierarchicalClusteringP2P",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         splits = self.metadata.eval_splits
         documents: list = []

mteb/tasks/clustering/pol/polish_clustering.py CHANGED Viewed

@@ -131,7 +131,7 @@ Piperidis, Stelios},
         adapted_from=["EightTagsClustering"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = list(chain.from_iterable(self.dataset[split]["labels"]))
@@ -204,7 +204,7 @@ class PlscClusteringS2SFast(AbsTaskClustering):
         adapted_from=["PlscClusteringS2S"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = self.dataset[split]["labels"]
@@ -286,7 +286,7 @@ class PlscClusteringP2PFast(AbsTaskClustering):
         adapted_from=["PlscClusteringP2P"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         ds = {}
         for split in self.metadata.eval_splits:
             labels = self.dataset[split]["labels"]

mteb 2.7.4__py3-none-any.whl → 2.7.5__py3-none-any.whl

mteb 2.7.4py3-none-any.whl → 2.7.5py3-none-any.whl