mteb 2.7.4__py3-none-any.whl → 2.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +47 -5
- mteb/_evaluators/any_sts_evaluator.py +2 -0
- mteb/_evaluators/clustering_evaluator.py +2 -0
- mteb/_evaluators/evaluator.py +2 -1
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +8 -1
- mteb/_evaluators/pair_classification_evaluator.py +3 -0
- mteb/_evaluators/retrieval_evaluator.py +3 -0
- mteb/_evaluators/sklearn_evaluator.py +6 -1
- mteb/_evaluators/text/bitext_mining_evaluator.py +2 -0
- mteb/_evaluators/text/summarization_evaluator.py +2 -0
- mteb/_evaluators/zeroshot_classification_evaluator.py +2 -0
- mteb/abstasks/abstask.py +31 -12
- mteb/abstasks/classification.py +10 -3
- mteb/abstasks/clustering.py +6 -2
- mteb/abstasks/clustering_legacy.py +8 -2
- mteb/abstasks/image/image_text_pair_classification.py +6 -2
- mteb/abstasks/multilabel_classification.py +2 -0
- mteb/abstasks/pair_classification.py +8 -2
- mteb/abstasks/retrieval.py +26 -11
- mteb/abstasks/retrieval_dataset_loaders.py +29 -19
- mteb/abstasks/sts.py +10 -3
- mteb/abstasks/text/bitext_mining.py +9 -5
- mteb/abstasks/text/reranking.py +2 -2
- mteb/abstasks/text/summarization.py +2 -1
- mteb/abstasks/zeroshot_classification.py +8 -2
- mteb/evaluate.py +10 -2
- mteb/models/model_implementations/bm25.py +2 -0
- mteb/models/model_implementations/pylate_models.py +10 -0
- mteb/models/models_protocols.py +4 -0
- mteb/models/search_wrappers.py +12 -0
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +1 -1
- mteb/tasks/clustering/nob/vg_clustering.py +1 -1
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +4 -4
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/bright_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +1 -1
- mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/METADATA +1 -1
- {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/RECORD +287 -287
- {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/WHEEL +0 -0
- {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/top_level.txt +0 -0
|
@@ -83,7 +83,7 @@ class YahooAnswersTopicsClassificationV2(AbsTaskClassification):
|
|
|
83
83
|
|
|
84
84
|
samples_per_label = 32
|
|
85
85
|
|
|
86
|
-
def dataset_transform(self):
|
|
86
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
87
87
|
self.dataset = self.stratified_subsampling(
|
|
88
88
|
self.dataset, seed=self.seed, splits=["train", "test"]
|
|
89
89
|
)
|
|
@@ -42,7 +42,7 @@ class YelpReviewFullClassification(AbsTaskClassification):
|
|
|
42
42
|
|
|
43
43
|
samples_per_label = 128
|
|
44
44
|
|
|
45
|
-
def dataset_transform(self):
|
|
45
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
46
46
|
self.dataset = self.stratified_subsampling(
|
|
47
47
|
self.dataset, seed=self.seed, splits=["test"]
|
|
48
48
|
)
|
|
@@ -88,7 +88,7 @@ class YelpReviewFullClassificationV2(AbsTaskClassification):
|
|
|
88
88
|
|
|
89
89
|
samples_per_label = 128
|
|
90
90
|
|
|
91
|
-
def dataset_transform(self):
|
|
91
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
92
92
|
self.dataset = self.stratified_subsampling(
|
|
93
93
|
self.dataset, seed=self.seed, splits=["test"]
|
|
94
94
|
)
|
|
@@ -40,7 +40,7 @@ class EstonianValenceClassification(AbsTaskClassification):
|
|
|
40
40
|
superseded_by="EstonianValenceClassification.v2",
|
|
41
41
|
)
|
|
42
42
|
|
|
43
|
-
def dataset_transform(self):
|
|
43
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
44
44
|
self.dataset = self.dataset.rename_column("paragraph", "text").rename_column(
|
|
45
45
|
"valence", "label"
|
|
46
46
|
)
|
|
@@ -602,7 +602,7 @@ class DeepSentiPers(AbsTaskClassification):
|
|
|
602
602
|
)
|
|
603
603
|
samples_per_label = 32
|
|
604
604
|
|
|
605
|
-
def dataset_transform(self):
|
|
605
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
606
606
|
self.dataset = self.dataset.rename_column("review", "text")
|
|
607
607
|
|
|
608
608
|
|
|
@@ -773,7 +773,7 @@ class NLPTwitterAnalysisClassification(AbsTaskClassification):
|
|
|
773
773
|
)
|
|
774
774
|
samples_per_label = 32
|
|
775
775
|
|
|
776
|
-
def dataset_transform(self):
|
|
776
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
777
777
|
self.dataset = self.dataset.rename_column("tweet", "text")
|
|
778
778
|
|
|
779
779
|
|
|
@@ -858,7 +858,7 @@ class FaIntentClassification(AbsTaskClassification):
|
|
|
858
858
|
)
|
|
859
859
|
samples_per_label = 32
|
|
860
860
|
|
|
861
|
-
def dataset_transform(self):
|
|
861
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
862
862
|
self.dataset = self.dataset.rename_column("words", "text")
|
|
863
863
|
self.dataset = self.dataset.rename_column("intent_label", "label")
|
|
864
864
|
|
|
@@ -889,7 +889,7 @@ class StyleClassification(AbsTaskClassification):
|
|
|
889
889
|
)
|
|
890
890
|
samples_per_label = 32
|
|
891
891
|
|
|
892
|
-
def dataset_transform(self):
|
|
892
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
893
893
|
mapping = {"formal": 1, "informal": 0}
|
|
894
894
|
self.dataset = self.dataset.map(
|
|
895
895
|
lambda example: {"label": mapping[example["label"]]}
|
|
@@ -927,7 +927,7 @@ class PerShopDomainClassification(AbsTaskClassification):
|
|
|
927
927
|
)
|
|
928
928
|
samples_per_label = 32
|
|
929
929
|
|
|
930
|
-
def dataset_transform(self):
|
|
930
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
931
931
|
self.dataset = self.dataset.rename_column("domain", "label")
|
|
932
932
|
|
|
933
933
|
|
|
@@ -962,5 +962,5 @@ class PerShopIntentClassification(AbsTaskClassification):
|
|
|
962
962
|
)
|
|
963
963
|
samples_per_label = 32
|
|
964
964
|
|
|
965
|
-
def dataset_transform(self):
|
|
965
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
966
966
|
self.dataset = self.dataset.rename_column("Intents & Actions", "label")
|
|
@@ -37,7 +37,7 @@ class PersianFoodSentimentClassification(AbsTaskClassification):
|
|
|
37
37
|
""",
|
|
38
38
|
)
|
|
39
39
|
|
|
40
|
-
def dataset_transform(self):
|
|
40
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
41
41
|
self.dataset = self.stratified_subsampling(
|
|
42
42
|
self.dataset, seed=self.seed, splits=["validation", "test"]
|
|
43
43
|
)
|
|
@@ -36,7 +36,7 @@ class FilipinoShopeeReviewsClassification(AbsTaskClassification):
|
|
|
36
36
|
""",
|
|
37
37
|
)
|
|
38
38
|
|
|
39
|
-
def dataset_transform(self):
|
|
39
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
40
40
|
self.dataset = self.stratified_subsampling(
|
|
41
41
|
self.dataset, seed=self.seed, splits=["validation", "test"]
|
|
42
42
|
)
|
|
@@ -29,7 +29,7 @@ class FrenchBookReviews(AbsTaskClassification):
|
|
|
29
29
|
superseded_by="FrenchBookReviews.v2",
|
|
30
30
|
)
|
|
31
31
|
|
|
32
|
-
def dataset_transform(self):
|
|
32
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
33
33
|
self.dataset = self.dataset.rename_columns({"reader_review": "text"})
|
|
34
34
|
self.dataset = self.stratified_subsampling(
|
|
35
35
|
self.dataset, seed=self.seed, splits=["train"]
|
|
@@ -63,7 +63,7 @@ class FrenchBookReviewsV2(AbsTaskClassification):
|
|
|
63
63
|
adapted_from=["FrenchBookReviews"],
|
|
64
64
|
)
|
|
65
65
|
|
|
66
|
-
def dataset_transform(self):
|
|
66
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
67
67
|
self.dataset = self.stratified_subsampling(
|
|
68
68
|
self.dataset, seed=self.seed, splits=["train"]
|
|
69
69
|
)
|
|
@@ -35,7 +35,7 @@ class MovieReviewSentimentClassification(AbsTaskClassification):
|
|
|
35
35
|
superseded_by="MovieReviewSentimentClassification.v2",
|
|
36
36
|
)
|
|
37
37
|
|
|
38
|
-
def dataset_transform(self):
|
|
38
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
39
39
|
self.dataset = self.dataset.rename_column("review", "text")
|
|
40
40
|
self.dataset = self.stratified_subsampling(
|
|
41
41
|
self.dataset, seed=self.seed, splits=["validation", "test"]
|
|
@@ -75,7 +75,7 @@ class MovieReviewSentimentClassificationV2(AbsTaskClassification):
|
|
|
75
75
|
adapted_from=["MovieReviewSentimentClassification"],
|
|
76
76
|
)
|
|
77
77
|
|
|
78
|
-
def dataset_transform(self):
|
|
78
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
79
79
|
self.dataset = self.stratified_subsampling(
|
|
80
80
|
self.dataset, seed=self.seed, splits=["validation", "test"]
|
|
81
81
|
)
|
|
@@ -28,7 +28,7 @@ class GujaratiNewsClassification(AbsTaskClassification):
|
|
|
28
28
|
superseded_by="GujaratiNewsClassification.v2",
|
|
29
29
|
)
|
|
30
30
|
|
|
31
|
-
def dataset_transform(self):
|
|
31
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
32
32
|
self.dataset = self.dataset.rename_column("headline", "text")
|
|
33
33
|
|
|
34
34
|
|
|
@@ -101,7 +101,7 @@ Stent, Amanda},
|
|
|
101
101
|
adapted_from=["HindiDiscourseClassification"],
|
|
102
102
|
)
|
|
103
103
|
|
|
104
|
-
def dataset_transform(self):
|
|
104
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
105
105
|
self.dataset = self.stratified_subsampling(
|
|
106
106
|
self.dataset, seed=self.seed, splits=["train"]
|
|
107
107
|
)
|
|
@@ -37,7 +37,7 @@ class SentimentAnalysisHindi(AbsTaskClassification):
|
|
|
37
37
|
superseded_by="SentimentAnalysisHindi.v2",
|
|
38
38
|
)
|
|
39
39
|
|
|
40
|
-
def dataset_transform(self):
|
|
40
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
41
41
|
self.dataset = self.stratified_subsampling(
|
|
42
42
|
self.dataset, seed=self.seed, splits=["train"]
|
|
43
43
|
)
|
|
@@ -41,7 +41,7 @@ class IndonesianIdClickbaitClassification(AbsTaskClassification):
|
|
|
41
41
|
superseded_by="IndonesianIdClickbaitClassification.v2",
|
|
42
42
|
)
|
|
43
43
|
|
|
44
|
-
def dataset_transform(self):
|
|
44
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
45
45
|
self.dataset = self.dataset.remove_columns(["label"]).rename_columns(
|
|
46
46
|
{"title": "text", "label_score": "label"}
|
|
47
47
|
)
|
|
@@ -89,7 +89,7 @@ class IndonesianIdClickbaitClassificationV2(AbsTaskClassification):
|
|
|
89
89
|
adapted_from=["IndonesianIdClickbaitClassification"],
|
|
90
90
|
)
|
|
91
91
|
|
|
92
|
-
def dataset_transform(self):
|
|
92
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
93
93
|
self.dataset = self.stratified_subsampling(
|
|
94
94
|
self.dataset, seed=self.seed, splits=["train"]
|
|
95
95
|
)
|
|
@@ -55,7 +55,7 @@ Purwarianti, Ayu},
|
|
|
55
55
|
superseded_by="IndonesianMongabayConservationClassification.v2",
|
|
56
56
|
)
|
|
57
57
|
|
|
58
|
-
def dataset_transform(self):
|
|
58
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
59
59
|
splits = self.metadata.eval_splits
|
|
60
60
|
class_labels = ["positif", "netral", "negatif"]
|
|
61
61
|
|
|
@@ -73,7 +73,7 @@ class JavaneseIMDBClassificationV2(AbsTaskClassification):
|
|
|
73
73
|
adapted_from=["JavaneseIMDBClassification"],
|
|
74
74
|
)
|
|
75
75
|
|
|
76
|
-
def dataset_transform(self):
|
|
76
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
77
77
|
self.dataset = self.stratified_subsampling(
|
|
78
78
|
self.dataset, seed=self.seed, splits=["test"]
|
|
79
79
|
)
|
|
@@ -108,7 +108,7 @@ Zhou, Yichao},
|
|
|
108
108
|
adapted_from=["WRIMEClassification"],
|
|
109
109
|
)
|
|
110
110
|
|
|
111
|
-
def dataset_transform(self):
|
|
111
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
112
112
|
self.dataset = self.stratified_subsampling(
|
|
113
113
|
self.dataset, seed=self.seed, splits=["test"]
|
|
114
114
|
)
|
|
@@ -35,7 +35,7 @@ class KannadaNewsClassification(AbsTaskClassification):
|
|
|
35
35
|
superseded_by="KannadaNewsClassification.v2",
|
|
36
36
|
)
|
|
37
37
|
|
|
38
|
-
def dataset_transform(self):
|
|
38
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
39
39
|
self.dataset = self.dataset.rename_column("headline", "text")
|
|
40
40
|
self.dataset = self.stratified_subsampling(
|
|
41
41
|
self.dataset, seed=self.seed, splits=["train"]
|
|
@@ -75,7 +75,7 @@ class KannadaNewsClassificationV2(AbsTaskClassification):
|
|
|
75
75
|
adapted_from=["KannadaNewsClassification"],
|
|
76
76
|
)
|
|
77
77
|
|
|
78
|
-
def dataset_transform(self):
|
|
78
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
79
79
|
self.dataset = self.stratified_subsampling(
|
|
80
80
|
self.dataset, seed=self.seed, splits=["train"]
|
|
81
81
|
)
|
|
@@ -38,7 +38,7 @@ class KlueTC(AbsTaskClassification):
|
|
|
38
38
|
superseded_by="KLUE-TC.v2",
|
|
39
39
|
)
|
|
40
40
|
|
|
41
|
-
def dataset_transform(self):
|
|
41
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
42
42
|
def id2str(example):
|
|
43
43
|
return {"label": label_feature.int2str(example["label_id"])}
|
|
44
44
|
|
|
@@ -90,7 +90,7 @@ class KlueTCV2(AbsTaskClassification):
|
|
|
90
90
|
adapted_from=["KlueTC"],
|
|
91
91
|
)
|
|
92
92
|
|
|
93
|
-
def dataset_transform(self):
|
|
93
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
94
94
|
self.dataset = self.stratified_subsampling(
|
|
95
95
|
self.dataset, seed=self.seed, splits=["validation"]
|
|
96
96
|
)
|
|
@@ -37,7 +37,7 @@ class KorFin(AbsTaskClassification):
|
|
|
37
37
|
""",
|
|
38
38
|
)
|
|
39
39
|
|
|
40
|
-
def dataset_transform(self):
|
|
40
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
41
41
|
self.dataset = self.dataset.rename_columns(
|
|
42
42
|
{"SRC": "text", "SENTIMENT": "label"}
|
|
43
43
|
).remove_columns(["SID", "TYPE", "ASPECT"])
|
|
@@ -73,7 +73,7 @@ class KorHateClassificationV2(AbsTaskClassification):
|
|
|
73
73
|
adapted_from=["KorHateClassification"],
|
|
74
74
|
)
|
|
75
75
|
|
|
76
|
-
def dataset_transform(self):
|
|
76
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
77
77
|
self.dataset = self.stratified_subsampling(
|
|
78
78
|
self.dataset, seed=self.seed, splits=["train"]
|
|
79
79
|
)
|
|
@@ -73,7 +73,7 @@ class KorSarcasmClassificationV2(AbsTaskClassification):
|
|
|
73
73
|
adapted_from=["KorSarcasmClassification"],
|
|
74
74
|
)
|
|
75
75
|
|
|
76
|
-
def dataset_transform(self):
|
|
76
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
77
77
|
self.dataset = self.stratified_subsampling(
|
|
78
78
|
self.dataset, seed=self.seed, splits=["train"]
|
|
79
79
|
)
|
|
@@ -35,7 +35,7 @@ class MalayalamNewsClassification(AbsTaskClassification):
|
|
|
35
35
|
superseded_by="MalayalamNewsClassification.v2",
|
|
36
36
|
)
|
|
37
37
|
|
|
38
|
-
def dataset_transform(self):
|
|
38
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
39
39
|
self.dataset = self.dataset.rename_columns({"headings": "text"})
|
|
40
40
|
|
|
41
41
|
|
|
@@ -35,7 +35,7 @@ class MarathiNewsClassification(AbsTaskClassification):
|
|
|
35
35
|
superseded_by="MarathiNewsClassification.v2",
|
|
36
36
|
)
|
|
37
37
|
|
|
38
|
-
def dataset_transform(self):
|
|
38
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
39
39
|
self.dataset = self.dataset.rename_columns({"headline": "text"})
|
|
40
40
|
self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)
|
|
41
41
|
|
|
@@ -43,7 +43,7 @@ class AfriSentiLangClassification(AbsTaskClassification):
|
|
|
43
43
|
|
|
44
44
|
samples_per_label = 32
|
|
45
45
|
|
|
46
|
-
def dataset_transform(self):
|
|
46
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
47
47
|
self.dataset = self.dataset.rename_column("tweet", "text")
|
|
48
48
|
self.dataset = self.stratified_subsampling(
|
|
49
49
|
self.dataset, seed=self.seed, splits=["test"]
|
|
@@ -44,7 +44,7 @@ class CyrillicTurkicLangClassification(AbsTaskClassification):
|
|
|
44
44
|
""",
|
|
45
45
|
)
|
|
46
46
|
|
|
47
|
-
def dataset_transform(self):
|
|
47
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
48
48
|
self.dataset = self.stratified_subsampling(
|
|
49
49
|
self.dataset, seed=self.seed, splits=["test"]
|
|
50
50
|
)
|
|
@@ -45,7 +45,7 @@ class IndicNLPNewsClassification(AbsTaskClassification):
|
|
|
45
45
|
""",
|
|
46
46
|
)
|
|
47
47
|
|
|
48
|
-
def dataset_transform(self):
|
|
48
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
49
49
|
for lang in self.hf_subsets:
|
|
50
50
|
self.dataset[lang] = self.dataset[lang].rename_columns(
|
|
51
51
|
{"news": "text", "class": "label"}
|
|
@@ -55,7 +55,7 @@ class MasakhaNEWSClassification(AbsTaskClassification):
|
|
|
55
55
|
""",
|
|
56
56
|
)
|
|
57
57
|
|
|
58
|
-
def dataset_transform(self):
|
|
58
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
59
59
|
for lang in self.dataset.keys():
|
|
60
60
|
self.dataset[lang] = self.dataset[lang].rename_columns(
|
|
61
61
|
{"category": "label"}
|
|
@@ -234,7 +234,7 @@ class SIB200Classification(AbsTaskClassification):
|
|
|
234
234
|
""",
|
|
235
235
|
)
|
|
236
236
|
|
|
237
|
-
def dataset_transform(self):
|
|
237
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
238
238
|
for lang in self.dataset.keys():
|
|
239
239
|
self.dataset[lang] = self.dataset[lang].class_encode_column("category")
|
|
240
240
|
self.dataset[lang] = self.dataset[lang].rename_columns(
|
|
@@ -49,7 +49,7 @@ class TurkicClassification(AbsTaskClassification):
|
|
|
49
49
|
)
|
|
50
50
|
return dataset_lang["train"]
|
|
51
51
|
|
|
52
|
-
def load_data(self) -> None:
|
|
52
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
53
53
|
"""Load dataset from HuggingFace hub"""
|
|
54
54
|
if self.data_loaded:
|
|
55
55
|
return
|
|
@@ -53,7 +53,7 @@ Camacho-Collados, Jose},
|
|
|
53
53
|
""",
|
|
54
54
|
)
|
|
55
55
|
|
|
56
|
-
def dataset_transform(self):
|
|
56
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
57
57
|
for lang in self.hf_subsets:
|
|
58
58
|
self.dataset[lang] = self.stratified_subsampling(
|
|
59
59
|
self.dataset[lang], n_samples=256, seed=self.seed, splits=["test"]
|
|
@@ -47,7 +47,7 @@ Tan, Liling},
|
|
|
47
47
|
superseded_by="NepaliNewsClassification.v2",
|
|
48
48
|
)
|
|
49
49
|
|
|
50
|
-
def dataset_transform(self):
|
|
50
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
51
51
|
self.dataset = self.dataset.rename_column("paras", "text")
|
|
52
52
|
self.dataset = self.stratified_subsampling(
|
|
53
53
|
self.dataset, seed=self.seed, splits=["train"]
|
|
@@ -99,7 +99,7 @@ Tan, Liling},
|
|
|
99
99
|
adapted_from=["NepaliNewsClassification"],
|
|
100
100
|
)
|
|
101
101
|
|
|
102
|
-
def dataset_transform(self):
|
|
102
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
103
103
|
self.dataset = self.stratified_subsampling(
|
|
104
104
|
self.dataset, seed=self.seed, splits=["train"]
|
|
105
105
|
)
|
|
@@ -32,7 +32,7 @@ class DutchSarcasticHeadlinesClassification(AbsTaskClassification):
|
|
|
32
32
|
},
|
|
33
33
|
)
|
|
34
34
|
|
|
35
|
-
def dataset_transform(self):
|
|
35
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
36
36
|
for split in self.dataset:
|
|
37
37
|
self.dataset[split] = self.dataset[split].rename_columns(
|
|
38
38
|
{"headline": "text", "is_sarcastic": "label"}
|
|
@@ -42,7 +42,7 @@ class VaccinChatNLClassification(AbsTaskClassification):
|
|
|
42
42
|
},
|
|
43
43
|
)
|
|
44
44
|
|
|
45
|
-
def dataset_transform(self):
|
|
45
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
46
46
|
for split in self.dataset:
|
|
47
47
|
self.dataset[split] = self.dataset[split].rename_columns(
|
|
48
48
|
{"sentence1": "text"}
|
|
@@ -35,7 +35,7 @@ class OdiaNewsClassification(AbsTaskClassification):
|
|
|
35
35
|
superseded_by="OdiaNewsClassification.v2",
|
|
36
36
|
)
|
|
37
37
|
|
|
38
|
-
def dataset_transform(self):
|
|
38
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
39
39
|
self.dataset = self.dataset.rename_columns({"headings": "text"})
|
|
40
40
|
self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)
|
|
41
41
|
|
|
@@ -73,5 +73,5 @@ class OdiaNewsClassificationV2(AbsTaskClassification):
|
|
|
73
73
|
adapted_from=["OdiaNewsClassification"],
|
|
74
74
|
)
|
|
75
75
|
|
|
76
|
-
def dataset_transform(self):
|
|
76
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
77
77
|
self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)
|
|
@@ -69,7 +69,7 @@ class RomanianReviewsSentimentV2(AbsTaskClassification):
|
|
|
69
69
|
adapted_from=["RomanianReviewsSentiment"],
|
|
70
70
|
)
|
|
71
71
|
|
|
72
|
-
def dataset_transform(self):
|
|
72
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
73
73
|
self.dataset = self.stratified_subsampling(
|
|
74
74
|
self.dataset, seed=self.seed, splits=["test"]
|
|
75
75
|
)
|
|
@@ -71,7 +71,7 @@ class RomanianSentimentClassificationV2(AbsTaskClassification):
|
|
|
71
71
|
adapted_from=["RomanianSentimentClassification"],
|
|
72
72
|
)
|
|
73
73
|
|
|
74
|
-
def dataset_transform(self):
|
|
74
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
75
75
|
self.dataset = self.stratified_subsampling(
|
|
76
76
|
self.dataset, seed=self.seed, splits=["test"]
|
|
77
77
|
)
|
|
@@ -57,7 +57,7 @@ class GeoreviewClassificationV2(AbsTaskClassification):
|
|
|
57
57
|
adapted_from=["GeoreviewClassification"],
|
|
58
58
|
)
|
|
59
59
|
|
|
60
|
-
def dataset_transform(self):
|
|
60
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
61
61
|
self.dataset = self.stratified_subsampling(
|
|
62
62
|
self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
|
|
63
63
|
)
|
|
@@ -53,7 +53,7 @@ Oda, Yusuke},
|
|
|
53
53
|
superseded_by="HeadlineClassification.v2",
|
|
54
54
|
)
|
|
55
55
|
|
|
56
|
-
def dataset_transform(self):
|
|
56
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
57
57
|
self.dataset = self.stratified_subsampling(
|
|
58
58
|
self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
|
|
59
59
|
)
|
|
@@ -110,7 +110,7 @@ Oda, Yusuke},
|
|
|
110
110
|
adapted_from=["HeadlineClassification"],
|
|
111
111
|
)
|
|
112
112
|
|
|
113
|
-
def dataset_transform(self):
|
|
113
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
114
114
|
self.dataset = self.stratified_subsampling(
|
|
115
115
|
self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
|
|
116
116
|
)
|
|
@@ -57,7 +57,7 @@ Robnik-{\v{S}}ikonja, Marko},
|
|
|
57
57
|
superseded_by="InappropriatenessClassification.v2",
|
|
58
58
|
)
|
|
59
59
|
|
|
60
|
-
def dataset_transform(self):
|
|
60
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
61
61
|
self.dataset = self.stratified_subsampling(
|
|
62
62
|
self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
|
|
63
63
|
)
|
|
@@ -118,7 +118,7 @@ Robnik-{\v{S}}ikonja, Marko},
|
|
|
118
118
|
adapted_from=["InappropriatenessClassification"],
|
|
119
119
|
)
|
|
120
120
|
|
|
121
|
-
def dataset_transform(self):
|
|
121
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
122
122
|
self.dataset = self.stratified_subsampling(
|
|
123
123
|
self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
|
|
124
124
|
)
|
|
@@ -42,7 +42,7 @@ class RuReviewsClassification(AbsTaskClassification):
|
|
|
42
42
|
superseded_by="RuReviewsClassification.v2",
|
|
43
43
|
)
|
|
44
44
|
|
|
45
|
-
def dataset_transform(self):
|
|
45
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
46
46
|
self.dataset = self.stratified_subsampling(
|
|
47
47
|
self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
|
|
48
48
|
)
|
|
@@ -88,7 +88,7 @@ class RuReviewsClassificationV2(AbsTaskClassification):
|
|
|
88
88
|
adapted_from=["RuReviewsClassification"],
|
|
89
89
|
)
|
|
90
90
|
|
|
91
|
-
def dataset_transform(self):
|
|
91
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
92
92
|
self.dataset = self.stratified_subsampling(
|
|
93
93
|
self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
|
|
94
94
|
)
|