PyPI - mteb - Versions diffs - 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl - Mend

mteb 2.5.2py3-none-any.whl → 2.7.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (529) hide show

mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from mteb.abstasks.abstask import AbsTask
-from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
+from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
+from mteb.abstasks.aggregated_task import AbsTaskAggregate
 from mteb.tasks.sts.multilingual.sts_benchmark_multilingual_visual_sts import (
     STSBenchmarkMultilingualVisualSTS,
 )
-task_list_stsb: list[AbsTask] = [
+task_list_stsb = [
     STSBenchmarkMultilingualVisualSTS().filter_languages(
         languages=["eng"], hf_subsets=["en"]
     )

mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from mteb.abstasks import AbsTask
-from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
+from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
+from mteb.abstasks.aggregated_task import AbsTaskAggregate
 from mteb.tasks.retrieval import (
     CQADupstackAndroidRetrievalFa,
     CQADupstackEnglishRetrievalFa,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
     CQADupstackWordpressRetrievalFa,
 )
-task_list_cqa: list[AbsTask] = [
+task_list_cqa = [
     CQADupstackAndroidRetrievalFa(),
     CQADupstackEnglishRetrievalFa(),
     CQADupstackGamingRetrievalFa(),

mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from mteb.abstasks import AbsTask
-from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
+from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
+from mteb.abstasks.aggregated_task import AbsTaskAggregate
 from mteb.tasks.classification import (
     SynPerChatbotConvSAAnger,
     SynPerChatbotConvSAFear,
@@ -12,7 +12,7 @@ from mteb.tasks.classification import (
     SynPerChatbotConvSASurprise,
 )
-task_list_cqa: list[AbsTask] = [
+task_list_cqa = [
     SynPerChatbotConvSAAnger(),
     SynPerChatbotConvSASatisfaction(),
     SynPerChatbotConvSAFriendship(),

mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from mteb.abstasks.abstask import AbsTask
-from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
+from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
+from mteb.abstasks.aggregated_task import AbsTaskAggregate
 from mteb.tasks.sts.multilingual.sts17_multilingual_visual_sts import (
     STS17MultilingualVisualSTS,
 )
-task_list_sts17_multi: list[AbsTask] = [
+task_list_sts17_multi = [
     STS17MultilingualVisualSTS().filter_languages(
         languages=["ara", "eng", "spa", "kor"],
         hf_subsets=[

mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from mteb.abstasks.abstask import AbsTask
-from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
+from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
+from mteb.abstasks.aggregated_task import AbsTaskAggregate
 from mteb.tasks.sts.multilingual.sts_benchmark_multilingual_visual_sts import (
     STSBenchmarkMultilingualVisualSTS,
 )
-task_list_multi: list[AbsTask] = [
+task_list_multi = [
     STSBenchmarkMultilingualVisualSTS().filter_languages(
         languages=[
             "deu",

mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from mteb.abstasks import AbsTask
-from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
+from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
+from mteb.abstasks.aggregated_task import AbsTaskAggregate
 from mteb.tasks.retrieval import (
     CQADupstackAndroidNLRetrieval,
     CQADupstackEnglishNLRetrieval,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
     CQADupstackWordpressNLRetrieval,
 )
-task_list_cqa: list[AbsTask] = [
+task_list_cqa = [
     CQADupstackAndroidNLRetrieval(),
     CQADupstackEnglishNLRetrieval(),
     CQADupstackGamingNLRetrieval(),

mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from mteb.abstasks import AbsTask
-from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
+from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
+from mteb.abstasks.aggregated_task import AbsTaskAggregate
 from mteb.tasks.retrieval.pol.cqadupstack_pl_retrieval import (
     CQADupstackAndroidRetrievalPL,
     CQADupstackEnglishRetrievalPL,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval.pol.cqadupstack_pl_retrieval import (
     CQADupstackWordpressRetrievalPL,
 )
-task_list_cqa: list[AbsTask] = [
+task_list_cqa = [
     CQADupstackAndroidRetrievalPL(),
     CQADupstackEnglishRetrievalPL(),
     CQADupstackGamingRetrievalPL(),

mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py CHANGED Viewed

@@ -59,7 +59,7 @@ class PubChemSMILESBitextMining(AbsTaskBitextMining):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for subset in self.hf_subsets:
             self.dataset[subset] = self.dataset[subset].rename_columns(
                 COL_MAPPING[subset]

mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py CHANGED Viewed

@@ -27,7 +27,7 @@ class SAMSumFa(AbsTaskBitextMining):
         bibtex_citation="",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"text": "sentence1", "summary": "sentence2"}
         )
@@ -58,7 +58,7 @@ class SynPerChatbotSumSRetrieval(AbsTaskBitextMining):
         bibtex_citation=""" """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"text": "sentence1", "summary": "sentence2"}
         )
@@ -89,7 +89,7 @@ class SynPerChatbotRAGSumSRetrieval(AbsTaskBitextMining):
         bibtex_citation=""" """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"text": "sentence1", "summary": "sentence2"}
         )

mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py CHANGED Viewed

@@ -60,7 +60,7 @@ Rapp, Reinhard},
         superseded_by="BUCC.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         dataset = {}
         for lang in self.dataset:
             dataset[lang] = {}

mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py CHANGED Viewed

@@ -265,7 +265,7 @@ class FloresBitextMining(AbsTaskBitextMining):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py CHANGED Viewed

@@ -99,7 +99,7 @@ class IN22ConvBitextMining(AbsTaskBitextMining):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py CHANGED Viewed

@@ -93,7 +93,7 @@ class IN22GenBitextMining(AbsTaskBitextMining):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py CHANGED Viewed

@@ -35,7 +35,7 @@ class NorwegianCourtsBitextMining(AbsTaskBitextMining):
         prompt="Retrieve parallel sentences in Norwegian Bokmål and Nynorsk",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         # Convert to standard format
         self.dataset = self.dataset.rename_column("nb", "sentence1")
         self.dataset = self.dataset.rename_column("nn", "sentence2")

mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py CHANGED Viewed

@@ -280,7 +280,7 @@ class NTREXBitextMining(AbsTaskBitextMining):
 """,
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py CHANGED Viewed

@@ -32,7 +32,7 @@ class RomaTalesBitextMining(AbsTaskBitextMining):
         bibtex_citation="",
     )
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return
@@ -44,7 +44,7 @@ class RomaTalesBitextMining(AbsTaskBitextMining):
         self.dataset_transform()
         self.data_loaded = True
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         for lang in self.hf_subsets:
             self.dataset[lang] = self.dataset[lang].rename_columns(
                 {"romani": "sentence1", "hungarian": "sentence2"}

mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py CHANGED Viewed

@@ -230,7 +230,7 @@ class WebFAQBitextMiningQuestions(AbsTaskBitextMining):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         dataset = {}
         for langs in self.dataset:
             dataset[langs] = {}
@@ -284,7 +284,7 @@ class WebFAQBitextMiningQAs(AbsTaskBitextMining):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         dataset = {}
         for langs in self.dataset:
             dataset[langs] = {}

mteb/tasks/classification/ara/online_store_review_sentiment_classification.py CHANGED Viewed

@@ -28,7 +28,7 @@ class OnlineStoreReviewSentimentClassification(AbsTaskClassification):
         superseded_by="OnlineStoreReviewSentimentClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py CHANGED Viewed

@@ -37,7 +37,7 @@ class RestaurantReviewSentimentClassification(AbsTaskClassification):
         superseded_by="RestaurantReviewSentimentClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         # labels: 0 negative, 1 positive
         self.dataset = self.dataset.rename_column("polarity", "label")
         self.dataset = self.stratified_subsampling(

mteb/tasks/classification/ara/tweet_sarcasm_classification.py CHANGED Viewed

@@ -48,7 +48,7 @@ Mubarak, Hamdy},
         superseded_by="TweetSarcasmClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         # labels: 0 non-sarcastic, 1 sarcastic
         self.dataset = self.dataset.rename_columns(
             {"tweet": "text", "sarcasm": "label"}

mteb/tasks/classification/ben/bengali_hate_speech_classification.py CHANGED Viewed

@@ -36,7 +36,7 @@ class BengaliHateSpeechClassification(AbsTaskClassification):
         superseded_by="BengaliHateSpeechClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/ben/bengali_sentiment_analysis.py CHANGED Viewed

@@ -36,7 +36,7 @@ class BengaliSentimentAnalysis(AbsTaskClassification):
         superseded_by="BengaliSentimentAnalysis.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py CHANGED Viewed

@@ -37,7 +37,7 @@ class BulgarianStoreReviewSentimentClassfication(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"Review": "text", "Category": "label"}
         )

mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py CHANGED Viewed

@@ -39,7 +39,7 @@ class CSFDCZMovieReviewSentimentClassification(AbsTaskClassification):
     # Increase the samples_per_label in order to improve baseline performance
     samples_per_label = 20
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns(
             {"comment": "text", "rating_int": "label"}
         )
@@ -85,7 +85,7 @@ class CSFDCZMovieReviewSentimentClassificationV2(AbsTaskClassification):
     # Increase the samples_per_label in order to improve baseline performance
     samples_per_label = 20
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"], n_samples=2048
         )

mteb/tasks/classification/dan/ddisco_cohesion_classification.py CHANGED Viewed

@@ -56,7 +56,7 @@ Piperidis, Stelios},
         superseded_by="Ddisco.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"rating": "label"}).remove_columns(
             ["domain"]
         )

mteb/tasks/classification/dan/dk_hate_classification.py CHANGED Viewed

@@ -60,9 +60,9 @@ Piperidis, Stelios},
     samples_per_label = 16
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         # convert label to a 0/1 label
-        labels = self.dataset["train"]["label"]  # type: ignore
+        labels = self.dataset["train"]["label"]
         lab2idx = {lab: idx for idx, lab in enumerate(set(labels))}
         self.dataset = self.dataset.map(
             lambda x: {"label": lab2idx[x["label"]]}, remove_columns=["label"]

mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py CHANGED Viewed

@@ -49,7 +49,7 @@ Zesch, Torsten},
         superseded_by="GermanPoliticiansTwitterSentimentClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("majority_sentiment", "label")

mteb/tasks/classification/ell/greek_legal_code_classification.py CHANGED Viewed

@@ -42,7 +42,7 @@ class GreekLegalCodeClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset["validation"] = (
             self.dataset["validation"]
             .shuffle(seed=self.seed)

mteb/tasks/classification/eng/dbpedia_classification.py CHANGED Viewed

@@ -40,7 +40,7 @@ class DBpediaClassification(AbsTaskClassification):
         superseded_by="DBpediaClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("content", "text")
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train", "test"]
@@ -85,7 +85,7 @@ class DBpediaClassificationV2(AbsTaskClassification):
         adapted_from=["DBpediaClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train", "test"]
         )

mteb/tasks/classification/eng/toxic_chat_classification.py CHANGED Viewed

@@ -40,7 +40,7 @@ class ToxicChatClassification(AbsTaskClassification):
         superseded_by="ToxicChatClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         keep_cols = ["user_input", "toxicity"]
         rename_dict = dict(zip(keep_cols, ["text", "label"]))
         remove_cols = [
@@ -93,7 +93,7 @@ class ToxicChatClassificationV2(AbsTaskClassification):
         adapted_from=["ToxicChatClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/eng/toxic_conversations_classification.py CHANGED Viewed

@@ -42,7 +42,7 @@ class ToxicConversationsClassification(AbsTaskClassification):
     samples_per_label = 16
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )
@@ -88,7 +88,7 @@ class ToxicConversationsClassificationV2(AbsTaskClassification):
     samples_per_label = 16
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/eng/tweet_topic_single_classification.py CHANGED Viewed

@@ -43,7 +43,7 @@ Barbieri, Francesco},
         superseded_by="TweetTopicSingleClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset["train"] = self.dataset["train_2021"]

mteb/tasks/classification/eng/yahoo_answers_topics_classification.py CHANGED Viewed

@@ -83,7 +83,7 @@ class YahooAnswersTopicsClassificationV2(AbsTaskClassification):
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train", "test"]
         )

mteb/tasks/classification/eng/yelp_review_full_classification.py CHANGED Viewed

@@ -42,7 +42,7 @@ class YelpReviewFullClassification(AbsTaskClassification):
     samples_per_label = 128
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )
@@ -88,7 +88,7 @@ class YelpReviewFullClassificationV2(AbsTaskClassification):
     samples_per_label = 128
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/est/estonian_valence.py CHANGED Viewed

@@ -40,12 +40,12 @@ class EstonianValenceClassification(AbsTaskClassification):
         superseded_by="EstonianValenceClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("paragraph", "text").rename_column(
             "valence", "label"
         )
         # convert label to a numbers
-        labels = self.dataset["train"]["label"]  # type: ignore
+        labels = self.dataset["train"]["label"]
         lab2idx = {lab: idx for idx, lab in enumerate(set(labels))}
         self.dataset = self.dataset.map(
             lambda x: {"label": lab2idx[x["label"]]}, remove_columns=["label"]

mteb/tasks/classification/fas/fa_mteb_classification.py CHANGED Viewed

@@ -602,7 +602,7 @@ class DeepSentiPers(AbsTaskClassification):
     )
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("review", "text")
@@ -773,7 +773,7 @@ class NLPTwitterAnalysisClassification(AbsTaskClassification):
     )
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("tweet", "text")
@@ -858,7 +858,7 @@ class FaIntentClassification(AbsTaskClassification):
     )
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("words", "text")
         self.dataset = self.dataset.rename_column("intent_label", "label")
@@ -889,7 +889,7 @@ class StyleClassification(AbsTaskClassification):
     )
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         mapping = {"formal": 1, "informal": 0}
         self.dataset = self.dataset.map(
             lambda example: {"label": mapping[example["label"]]}
@@ -927,7 +927,7 @@ class PerShopDomainClassification(AbsTaskClassification):
     )
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("domain", "label")
@@ -962,5 +962,5 @@ class PerShopIntentClassification(AbsTaskClassification):
     )
     samples_per_label = 32
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("Intents & Actions", "label")

mteb/tasks/classification/fas/persian_food_sentiment_classification.py CHANGED Viewed

@@ -37,7 +37,7 @@ class PersianFoodSentimentClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["validation", "test"]
         )

mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py CHANGED Viewed

@@ -36,7 +36,7 @@ class FilipinoShopeeReviewsClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["validation", "test"]
         )

mteb/tasks/classification/fin/fin_toxicity_classification.py CHANGED Viewed

@@ -40,7 +40,7 @@ Laippala, Veronika},
         superseded_by="FinToxicityClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("label_toxicity", "label")
         remove_cols = [
             col

mteb/tasks/classification/fra/french_book_reviews.py CHANGED Viewed

@@ -29,7 +29,7 @@ class FrenchBookReviews(AbsTaskClassification):
         superseded_by="FrenchBookReviews.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_columns({"reader_review": "text"})
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
@@ -63,7 +63,7 @@ class FrenchBookReviewsV2(AbsTaskClassification):
         adapted_from=["FrenchBookReviews"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/fra/movie_review_sentiment_classification.py CHANGED Viewed

@@ -35,7 +35,7 @@ class MovieReviewSentimentClassification(AbsTaskClassification):
         superseded_by="MovieReviewSentimentClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("review", "text")
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["validation", "test"]
@@ -75,7 +75,7 @@ class MovieReviewSentimentClassificationV2(AbsTaskClassification):
         adapted_from=["MovieReviewSentimentClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["validation", "test"]
         )

mteb/tasks/classification/guj/gujarati_news_classification.py CHANGED Viewed

@@ -28,7 +28,7 @@ class GujaratiNewsClassification(AbsTaskClassification):
         superseded_by="GujaratiNewsClassification.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.dataset.rename_column("headline", "text")

mteb/tasks/classification/hin/hindi_discourse_classification.py CHANGED Viewed

@@ -101,7 +101,7 @@ Stent, Amanda},
         adapted_from=["HindiDiscourseClassification"],
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/classification/hin/sentiment_analysis_hindi.py CHANGED Viewed

@@ -37,7 +37,7 @@ class SentimentAnalysisHindi(AbsTaskClassification):
         superseded_by="SentimentAnalysisHindi.v2",
     )
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

mteb 2.5.2py3-none-any.whl → 2.7.9py3-none-any.whl