PyPI - mteb - Versions diffs - 2.7.3__py3-none-any.whl → 2.7.5__py3-none-any.whl - Mend

mteb 2.7.3py3-none-any.whl → 2.7.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (434) hide show

mteb/tasks/classification/fra/french_book_reviews.py CHANGED Viewed

@@ -29,7 +29,7 @@ class FrenchBookReviews(AbsTaskClassification):
         superseded_by="FrenchBookReviews.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"reader_review": "text"})
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
@@ -63,7 +63,7 @@ class FrenchBookReviewsV2(AbsTaskClassification):
         adapted_from=["FrenchBookReviews"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/fra/movie_review_sentiment_classification.py CHANGED Viewed

@@ -35,7 +35,7 @@ class MovieReviewSentimentClassification(AbsTaskClassification):
         superseded_by="MovieReviewSentimentClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("review", "text")
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["validation", "test"]
@@ -75,7 +75,7 @@ class MovieReviewSentimentClassificationV2(AbsTaskClassification):
         adapted_from=["MovieReviewSentimentClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["validation", "test"]
         )

mteb/tasks/classification/guj/gujarati_news_classification.py CHANGED Viewed

@@ -28,7 +28,7 @@ class GujaratiNewsClassification(AbsTaskClassification):
         superseded_by="GujaratiNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("headline", "text")

mteb/tasks/classification/hin/hindi_discourse_classification.py CHANGED Viewed

@@ -101,7 +101,7 @@ Stent, Amanda},
         adapted_from=["HindiDiscourseClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/hin/sentiment_analysis_hindi.py CHANGED Viewed

@@ -37,7 +37,7 @@ class SentimentAnalysisHindi(AbsTaskClassification):
         superseded_by="SentimentAnalysisHindi.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py CHANGED Viewed

@@ -41,7 +41,7 @@ class IndonesianIdClickbaitClassification(AbsTaskClassification):
         superseded_by="IndonesianIdClickbaitClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.remove_columns(["label"]).rename_columns(
             {"title": "text", "label_score": "label"}
         )
@@ -89,7 +89,7 @@ class IndonesianIdClickbaitClassificationV2(AbsTaskClassification):
         adapted_from=["IndonesianIdClickbaitClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py CHANGED Viewed

@@ -55,7 +55,7 @@ Purwarianti, Ayu},
         superseded_by="IndonesianMongabayConservationClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         splits = self.metadata.eval_splits
         class_labels = ["positif", "netral", "negatif"]

mteb/tasks/classification/ita/dado_eval_coarse_classification.py CHANGED Viewed

@@ -36,7 +36,7 @@ class DadoEvalCoarseClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("class", "label")
         unused_cols = [
             col

mteb/tasks/classification/ita/ita_casehold_classification.py CHANGED Viewed

@@ -44,7 +44,7 @@ class ItaCaseholdClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"summary": "text", "materia": "label"}
         )

mteb/tasks/classification/ita/sardi_stance_classification.py CHANGED Viewed

@@ -36,7 +36,7 @@ class SardiStanceClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         unused_cols = [
             col
             for col in self.dataset["test"].column_names

mteb/tasks/classification/jav/javanese_imdb_classification.py CHANGED Viewed

@@ -73,7 +73,7 @@ class JavaneseIMDBClassificationV2(AbsTaskClassification):
         adapted_from=["JavaneseIMDBClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/jpn/wrime_classification.py CHANGED Viewed

@@ -108,7 +108,7 @@ Zhou, Yichao},
         adapted_from=["WRIMEClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/kan/kannada_news_classification.py CHANGED Viewed

@@ -35,7 +35,7 @@ class KannadaNewsClassification(AbsTaskClassification):
         superseded_by="KannadaNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("headline", "text")
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
@@ -75,7 +75,7 @@ class KannadaNewsClassificationV2(AbsTaskClassification):
         adapted_from=["KannadaNewsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/kor/klue_tc.py CHANGED Viewed

@@ -38,7 +38,7 @@ class KlueTC(AbsTaskClassification):
         superseded_by="KLUE-TC.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         def id2str(example):
             return {"label": label_feature.int2str(example["label_id"])}
@@ -90,7 +90,7 @@ class KlueTCV2(AbsTaskClassification):
         adapted_from=["KlueTC"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["validation"]
         )

mteb/tasks/classification/kor/kor_fin.py CHANGED Viewed

@@ -37,7 +37,7 @@ class KorFin(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"SRC": "text", "SENTIMENT": "label"}
         ).remove_columns(["SID", "TYPE", "ASPECT"])

mteb/tasks/classification/kor/kor_hate_classification.py CHANGED Viewed

@@ -73,7 +73,7 @@ class KorHateClassificationV2(AbsTaskClassification):
         adapted_from=["KorHateClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/kor/kor_sarcasm_classification.py CHANGED Viewed

@@ -73,7 +73,7 @@ class KorSarcasmClassificationV2(AbsTaskClassification):
         adapted_from=["KorSarcasmClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/mal/malayalam_news_classification.py CHANGED Viewed

@@ -35,7 +35,7 @@ class MalayalamNewsClassification(AbsTaskClassification):
         superseded_by="MalayalamNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"headings": "text"})

mteb/tasks/classification/mar/marathi_news_classification.py CHANGED Viewed

@@ -35,7 +35,7 @@ class MarathiNewsClassification(AbsTaskClassification):
         superseded_by="MarathiNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"headline": "text"})
         self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)

mteb/tasks/classification/multilingual/afri_senti_lang_classification.py CHANGED Viewed

@@ -43,7 +43,7 @@ class AfriSentiLangClassification(AbsTaskClassification):
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("tweet", "text")
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]

mteb/tasks/classification/multilingual/catalonia_tweet_classification.py CHANGED Viewed

@@ -60,7 +60,7 @@ Piperidis, Stelios},
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for lang in self.dataset.keys():
             self.dataset[lang] = self.dataset[lang].rename_columns(
                 {"TWEET": "text", "LABEL": "label"}

mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py CHANGED Viewed

@@ -44,7 +44,7 @@ class CyrillicTurkicLangClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/multilingual/indic_nlp_news_classification.py CHANGED Viewed

@@ -45,7 +45,7 @@ class IndicNLPNewsClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for lang in self.hf_subsets:
             self.dataset[lang] = self.dataset[lang].rename_columns(
                 {"news": "text", "class": "label"}

mteb/tasks/classification/multilingual/masakha_news_classification.py CHANGED Viewed

@@ -55,7 +55,7 @@ class MasakhaNEWSClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for lang in self.dataset.keys():
             self.dataset[lang] = self.dataset[lang].rename_columns(
                 {"category": "label"}

mteb/tasks/classification/multilingual/multi_hate_classification.py CHANGED Viewed

@@ -86,7 +86,7 @@ Talat, Zeerak},
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         # for each language perform some transforms
         for lang in self.dataset.keys():
             _dataset = self.dataset[lang]

mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py CHANGED Viewed

@@ -89,7 +89,7 @@ Vylomova, Ekaterina},
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         # create a train set from the test set for Welsh language (cym)
         lang = "cym"
         if lang in self.dataset.keys():

mteb/tasks/classification/multilingual/scala_classification.py CHANGED Viewed

@@ -54,7 +54,7 @@ Fishel, Mark},
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for lang in self.dataset.keys():
             # convert label to a 0/1 label
             labels = self.dataset[lang]["train"]["label"]

mteb/tasks/classification/multilingual/sib200_classification.py CHANGED Viewed

@@ -234,7 +234,7 @@ class SIB200Classification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for lang in self.dataset.keys():
             self.dataset[lang] = self.dataset[lang].class_encode_column("category")
             self.dataset[lang] = self.dataset[lang].rename_columns(

mteb/tasks/classification/multilingual/turkic_classification.py CHANGED Viewed

@@ -49,7 +49,7 @@ class TurkicClassification(AbsTaskClassification):
         )
         return dataset_lang["train"]
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return

mteb/tasks/classification/multilingual/tweet_sentiment_classification.py CHANGED Viewed

@@ -53,7 +53,7 @@ Camacho-Collados, Jose},
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for lang in self.hf_subsets:
             self.dataset[lang] = self.stratified_subsampling(
                 self.dataset[lang], n_samples=256, seed=self.seed, splits=["test"]

mteb/tasks/classification/nep/nepali_news_classification.py CHANGED Viewed

@@ -47,7 +47,7 @@ Tan, Liling},
         superseded_by="NepaliNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("paras", "text")
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
@@ -99,7 +99,7 @@ Tan, Liling},
         adapted_from=["NepaliNewsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py CHANGED Viewed

@@ -32,7 +32,7 @@ class DutchSarcasticHeadlinesClassification(AbsTaskClassification):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].rename_columns(
                 {"headline": "text", "is_sarcastic": "label"}

mteb/tasks/classification/nld/vaccin_chat_nl_classification.py CHANGED Viewed

@@ -42,7 +42,7 @@ class VaccinChatNLClassification(AbsTaskClassification):
         },
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for split in self.dataset:
             self.dataset[split] = self.dataset[split].rename_columns(
                 {"sentence1": "text"}

mteb/tasks/classification/ory/odia_news_classification.py CHANGED Viewed

@@ -35,7 +35,7 @@ class OdiaNewsClassification(AbsTaskClassification):
         superseded_by="OdiaNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"headings": "text"})
         self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)
@@ -73,5 +73,5 @@ class OdiaNewsClassificationV2(AbsTaskClassification):
         adapted_from=["OdiaNewsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)

mteb/tasks/classification/pan/punjabi_news_classification.py CHANGED Viewed

@@ -34,7 +34,7 @@ class PunjabiNewsClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"article": "text", "is_about_politics": "label"}
         )

mteb/tasks/classification/ron/moroco.py CHANGED Viewed

@@ -77,7 +77,7 @@ class MorocoV2(AbsTaskClassification):
         adapted_from=["Moroco"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/ron/romanian_reviews_sentiment.py CHANGED Viewed

@@ -69,7 +69,7 @@ class RomanianReviewsSentimentV2(AbsTaskClassification):
         adapted_from=["RomanianReviewsSentiment"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/ron/romanian_sentiment_classification.py CHANGED Viewed

@@ -71,7 +71,7 @@ class RomanianSentimentClassificationV2(AbsTaskClassification):
         adapted_from=["RomanianSentimentClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/rus/georeview_classification.py CHANGED Viewed

@@ -57,7 +57,7 @@ class GeoreviewClassificationV2(AbsTaskClassification):
         adapted_from=["GeoreviewClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
         )

mteb/tasks/classification/rus/headline_classification.py CHANGED Viewed

@@ -53,7 +53,7 @@ Oda, Yusuke},
         superseded_by="HeadlineClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
         )
@@ -110,7 +110,7 @@ Oda, Yusuke},
         adapted_from=["HeadlineClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
         )

mteb/tasks/classification/rus/inappropriateness_classification.py CHANGED Viewed

@@ -57,7 +57,7 @@ Robnik-{\v{S}}ikonja, Marko},
         superseded_by="InappropriatenessClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
         )
@@ -118,7 +118,7 @@ Robnik-{\v{S}}ikonja, Marko},
         adapted_from=["InappropriatenessClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
         )

mteb/tasks/classification/rus/ru_reviews_classification.py CHANGED Viewed

@@ -42,7 +42,7 @@ class RuReviewsClassification(AbsTaskClassification):
         superseded_by="RuReviewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
         )
@@ -88,7 +88,7 @@ class RuReviewsClassificationV2(AbsTaskClassification):
         adapted_from=["RuReviewsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
         )

mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py CHANGED Viewed

@@ -29,7 +29,7 @@ class RuSciBenchGRNTIClassification(AbsTaskClassification):
         superseded_by="RuSciBenchGRNTIClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
         )

mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py CHANGED Viewed

@@ -29,7 +29,7 @@ class RuSciBenchOECDClassification(AbsTaskClassification):
         superseded_by="RuSciBenchOECDClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
         )

mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py CHANGED Viewed

@@ -28,7 +28,7 @@ class RuToxicOKMLCUPClassification(AbsTaskClassification):
         superseded_by="RuToxicOKMLCUPClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("toxic", "label")

mteb/tasks/classification/san/sanskrit_shlokas_classification.py CHANGED Viewed

@@ -46,5 +46,5 @@ Tan, Liling},
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"Sloka": "text", "Class": "label"})

mteb/tasks/classification/sin/sinhala_news_classification.py CHANGED Viewed

@@ -42,7 +42,7 @@ class SinhalaNewsClassification(AbsTaskClassification):
         superseded_by="SinhalaNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"comments": "text", "labels": "label"}
         )
@@ -91,7 +91,7 @@ class SinhalaNewsClassificationV2(AbsTaskClassification):
         adapted_from=["SinhalaNewsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/sin/sinhala_news_source_classification.py CHANGED Viewed

@@ -35,7 +35,7 @@ class SinhalaNewsSourceClassification(AbsTaskClassification):
         superseded_by="SinhalaNewsSourceClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("comment", "text")
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
@@ -75,7 +75,7 @@ class SinhalaNewsSourceClassificationV2(AbsTaskClassification):
         adapted_from=["SinhalaNewsSourceClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py CHANGED Viewed

@@ -42,7 +42,7 @@ class CSFDSKMovieReviewSentimentClassification(AbsTaskClassification):
     # Increase the samples_per_label in order to improve baseline performance
     samples_per_label = 20
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"comment": "text", "rating_int": "label"}
         )
@@ -89,7 +89,7 @@ class CSFDSKMovieReviewSentimentClassificationV2(AbsTaskClassification):
     # Increase the samples_per_label in order to improve baseline performance
     samples_per_label = 20
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"], n_samples=N_SAMPLES
         )

mteb/tasks/classification/slv/frenk_sl_classification.py CHANGED Viewed

@@ -75,7 +75,7 @@ class FrenkSlClassificationV2(AbsTaskClassification):
         adapted_from=["FrenkSlClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/spa/spanish_news_classification.py CHANGED Viewed

@@ -29,7 +29,7 @@ class SpanishNewsClassification(AbsTaskClassification):
         superseded_by="SpanishNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"category": "label"})
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
@@ -63,7 +63,7 @@ class SpanishNewsClassificationV2(AbsTaskClassification):
         adapted_from=["SpanishNewsClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb 2.7.3__py3-none-any.whl → 2.7.5__py3-none-any.whl

mteb 2.7.3py3-none-any.whl → 2.7.5py3-none-any.whl