mteb 2.7.4__py3-none-any.whl → 2.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. mteb/_create_dataloaders.py +47 -5
  2. mteb/_evaluators/any_sts_evaluator.py +2 -0
  3. mteb/_evaluators/clustering_evaluator.py +2 -0
  4. mteb/_evaluators/evaluator.py +2 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +8 -1
  6. mteb/_evaluators/pair_classification_evaluator.py +3 -0
  7. mteb/_evaluators/retrieval_evaluator.py +3 -0
  8. mteb/_evaluators/sklearn_evaluator.py +6 -1
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +2 -0
  10. mteb/_evaluators/text/summarization_evaluator.py +2 -0
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -0
  12. mteb/abstasks/abstask.py +31 -12
  13. mteb/abstasks/classification.py +10 -3
  14. mteb/abstasks/clustering.py +6 -2
  15. mteb/abstasks/clustering_legacy.py +8 -2
  16. mteb/abstasks/image/image_text_pair_classification.py +6 -2
  17. mteb/abstasks/multilabel_classification.py +2 -0
  18. mteb/abstasks/pair_classification.py +8 -2
  19. mteb/abstasks/retrieval.py +26 -11
  20. mteb/abstasks/retrieval_dataset_loaders.py +29 -19
  21. mteb/abstasks/sts.py +10 -3
  22. mteb/abstasks/text/bitext_mining.py +9 -5
  23. mteb/abstasks/text/reranking.py +2 -2
  24. mteb/abstasks/text/summarization.py +2 -1
  25. mteb/abstasks/zeroshot_classification.py +8 -2
  26. mteb/evaluate.py +10 -2
  27. mteb/models/model_implementations/bm25.py +2 -0
  28. mteb/models/model_implementations/pylate_models.py +10 -0
  29. mteb/models/models_protocols.py +4 -0
  30. mteb/models/search_wrappers.py +12 -0
  31. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  32. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  33. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  34. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  35. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  36. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  37. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  38. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  39. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  40. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  41. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  42. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  43. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  44. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  45. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  46. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  47. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  48. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  49. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  50. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  51. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  52. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  53. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  54. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  55. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  56. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  57. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  58. mteb/tasks/classification/est/estonian_valence.py +1 -1
  59. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  60. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  61. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  62. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  63. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  64. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  65. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  66. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  67. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  68. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  69. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  70. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  71. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  72. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  73. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  74. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  75. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  76. mteb/tasks/classification/kor/klue_tc.py +2 -2
  77. mteb/tasks/classification/kor/kor_fin.py +1 -1
  78. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  79. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  80. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  81. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  82. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  83. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  84. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  85. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  86. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  87. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  88. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  89. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  90. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  91. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  92. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  93. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  94. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  95. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  96. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  97. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  98. mteb/tasks/classification/ron/moroco.py +1 -1
  99. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  100. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  101. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  102. mteb/tasks/classification/rus/headline_classification.py +2 -2
  103. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  104. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  105. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  106. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  107. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  108. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  109. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  110. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  111. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  112. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  113. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  114. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  115. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  116. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  117. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  118. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  119. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  120. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  121. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  122. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  123. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  124. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  125. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  126. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  127. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  128. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  129. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  130. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  131. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  132. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  133. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  134. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  135. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  136. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  137. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  138. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  139. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  140. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  141. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  142. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  143. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  144. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  145. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  146. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  147. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  148. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  149. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  150. mteb/tasks/clustering/nob/snl_clustering.py +1 -1
  151. mteb/tasks/clustering/nob/vg_clustering.py +1 -1
  152. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  153. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  154. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  155. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  156. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  157. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  158. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  159. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  160. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  161. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  162. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  163. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  164. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  165. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  166. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  167. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  168. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  169. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
  170. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  171. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  172. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  173. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  174. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  175. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  176. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  177. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  178. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  179. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  180. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  181. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  182. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  183. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  184. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  185. mteb/tasks/pair_classification/rus/terra.py +2 -2
  186. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  187. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  188. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  189. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  190. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  191. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  192. mteb/tasks/retrieval/code/code_rag.py +4 -4
  193. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  194. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  195. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  196. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  197. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  198. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  199. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  200. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  201. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  202. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  203. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  204. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  205. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  206. mteb/tasks/retrieval/eng/bright_retrieval.py +1 -1
  207. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  208. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  209. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  210. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  211. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  212. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  213. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  214. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  215. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  216. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  217. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  218. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  219. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  220. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  221. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  222. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  223. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  224. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  225. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  226. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  227. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  228. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  229. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  230. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  231. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  232. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  233. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  234. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  235. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  236. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  237. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  238. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  239. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  240. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  241. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  242. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  243. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  244. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  245. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  246. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  247. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  248. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  249. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  250. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  251. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  252. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
  253. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  254. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  255. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  256. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  257. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  258. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  259. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  260. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  261. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  262. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  263. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  264. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  265. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  266. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  267. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  268. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  269. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  270. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  271. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  272. mteb/tasks/retrieval/nob/norquad.py +1 -1
  273. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  274. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  275. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  276. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  277. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  278. mteb/tasks/sts/kor/klue_sts.py +1 -1
  279. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  280. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  281. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  282. {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/METADATA +1 -1
  283. {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/RECORD +287 -287
  284. {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/WHEEL +0 -0
  285. {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/entry_points.txt +0 -0
  286. {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/licenses/LICENSE +0 -0
  287. {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/top_level.txt +0 -0
@@ -83,7 +83,7 @@ class YahooAnswersTopicsClassificationV2(AbsTaskClassification):
83
83
 
84
84
  samples_per_label = 32
85
85
 
86
- def dataset_transform(self):
86
+ def dataset_transform(self, num_proc: int = 1):
87
87
  self.dataset = self.stratified_subsampling(
88
88
  self.dataset, seed=self.seed, splits=["train", "test"]
89
89
  )
@@ -42,7 +42,7 @@ class YelpReviewFullClassification(AbsTaskClassification):
42
42
 
43
43
  samples_per_label = 128
44
44
 
45
- def dataset_transform(self):
45
+ def dataset_transform(self, num_proc: int = 1):
46
46
  self.dataset = self.stratified_subsampling(
47
47
  self.dataset, seed=self.seed, splits=["test"]
48
48
  )
@@ -88,7 +88,7 @@ class YelpReviewFullClassificationV2(AbsTaskClassification):
88
88
 
89
89
  samples_per_label = 128
90
90
 
91
- def dataset_transform(self):
91
+ def dataset_transform(self, num_proc: int = 1):
92
92
  self.dataset = self.stratified_subsampling(
93
93
  self.dataset, seed=self.seed, splits=["test"]
94
94
  )
@@ -40,7 +40,7 @@ class EstonianValenceClassification(AbsTaskClassification):
40
40
  superseded_by="EstonianValenceClassification.v2",
41
41
  )
42
42
 
43
- def dataset_transform(self):
43
+ def dataset_transform(self, num_proc: int = 1):
44
44
  self.dataset = self.dataset.rename_column("paragraph", "text").rename_column(
45
45
  "valence", "label"
46
46
  )
@@ -602,7 +602,7 @@ class DeepSentiPers(AbsTaskClassification):
602
602
  )
603
603
  samples_per_label = 32
604
604
 
605
- def dataset_transform(self):
605
+ def dataset_transform(self, num_proc: int = 1):
606
606
  self.dataset = self.dataset.rename_column("review", "text")
607
607
 
608
608
 
@@ -773,7 +773,7 @@ class NLPTwitterAnalysisClassification(AbsTaskClassification):
773
773
  )
774
774
  samples_per_label = 32
775
775
 
776
- def dataset_transform(self):
776
+ def dataset_transform(self, num_proc: int = 1):
777
777
  self.dataset = self.dataset.rename_column("tweet", "text")
778
778
 
779
779
 
@@ -858,7 +858,7 @@ class FaIntentClassification(AbsTaskClassification):
858
858
  )
859
859
  samples_per_label = 32
860
860
 
861
- def dataset_transform(self):
861
+ def dataset_transform(self, num_proc: int = 1):
862
862
  self.dataset = self.dataset.rename_column("words", "text")
863
863
  self.dataset = self.dataset.rename_column("intent_label", "label")
864
864
 
@@ -889,7 +889,7 @@ class StyleClassification(AbsTaskClassification):
889
889
  )
890
890
  samples_per_label = 32
891
891
 
892
- def dataset_transform(self):
892
+ def dataset_transform(self, num_proc: int = 1):
893
893
  mapping = {"formal": 1, "informal": 0}
894
894
  self.dataset = self.dataset.map(
895
895
  lambda example: {"label": mapping[example["label"]]}
@@ -927,7 +927,7 @@ class PerShopDomainClassification(AbsTaskClassification):
927
927
  )
928
928
  samples_per_label = 32
929
929
 
930
- def dataset_transform(self):
930
+ def dataset_transform(self, num_proc: int = 1):
931
931
  self.dataset = self.dataset.rename_column("domain", "label")
932
932
 
933
933
 
@@ -962,5 +962,5 @@ class PerShopIntentClassification(AbsTaskClassification):
962
962
  )
963
963
  samples_per_label = 32
964
964
 
965
- def dataset_transform(self):
965
+ def dataset_transform(self, num_proc: int = 1):
966
966
  self.dataset = self.dataset.rename_column("Intents & Actions", "label")
@@ -37,7 +37,7 @@ class PersianFoodSentimentClassification(AbsTaskClassification):
37
37
  """,
38
38
  )
39
39
 
40
- def dataset_transform(self):
40
+ def dataset_transform(self, num_proc: int = 1):
41
41
  self.dataset = self.stratified_subsampling(
42
42
  self.dataset, seed=self.seed, splits=["validation", "test"]
43
43
  )
@@ -36,7 +36,7 @@ class FilipinoShopeeReviewsClassification(AbsTaskClassification):
36
36
  """,
37
37
  )
38
38
 
39
- def dataset_transform(self):
39
+ def dataset_transform(self, num_proc: int = 1):
40
40
  self.dataset = self.stratified_subsampling(
41
41
  self.dataset, seed=self.seed, splits=["validation", "test"]
42
42
  )
@@ -40,7 +40,7 @@ Laippala, Veronika},
40
40
  superseded_by="FinToxicityClassification.v2",
41
41
  )
42
42
 
43
- def dataset_transform(self):
43
+ def dataset_transform(self, num_proc: int = 1):
44
44
  self.dataset = self.dataset.rename_column("label_toxicity", "label")
45
45
  remove_cols = [
46
46
  col
@@ -29,7 +29,7 @@ class FrenchBookReviews(AbsTaskClassification):
29
29
  superseded_by="FrenchBookReviews.v2",
30
30
  )
31
31
 
32
- def dataset_transform(self):
32
+ def dataset_transform(self, num_proc: int = 1):
33
33
  self.dataset = self.dataset.rename_columns({"reader_review": "text"})
34
34
  self.dataset = self.stratified_subsampling(
35
35
  self.dataset, seed=self.seed, splits=["train"]
@@ -63,7 +63,7 @@ class FrenchBookReviewsV2(AbsTaskClassification):
63
63
  adapted_from=["FrenchBookReviews"],
64
64
  )
65
65
 
66
- def dataset_transform(self):
66
+ def dataset_transform(self, num_proc: int = 1):
67
67
  self.dataset = self.stratified_subsampling(
68
68
  self.dataset, seed=self.seed, splits=["train"]
69
69
  )
@@ -35,7 +35,7 @@ class MovieReviewSentimentClassification(AbsTaskClassification):
35
35
  superseded_by="MovieReviewSentimentClassification.v2",
36
36
  )
37
37
 
38
- def dataset_transform(self):
38
+ def dataset_transform(self, num_proc: int = 1):
39
39
  self.dataset = self.dataset.rename_column("review", "text")
40
40
  self.dataset = self.stratified_subsampling(
41
41
  self.dataset, seed=self.seed, splits=["validation", "test"]
@@ -75,7 +75,7 @@ class MovieReviewSentimentClassificationV2(AbsTaskClassification):
75
75
  adapted_from=["MovieReviewSentimentClassification"],
76
76
  )
77
77
 
78
- def dataset_transform(self):
78
+ def dataset_transform(self, num_proc: int = 1):
79
79
  self.dataset = self.stratified_subsampling(
80
80
  self.dataset, seed=self.seed, splits=["validation", "test"]
81
81
  )
@@ -28,7 +28,7 @@ class GujaratiNewsClassification(AbsTaskClassification):
28
28
  superseded_by="GujaratiNewsClassification.v2",
29
29
  )
30
30
 
31
- def dataset_transform(self):
31
+ def dataset_transform(self, num_proc: int = 1):
32
32
  self.dataset = self.dataset.rename_column("headline", "text")
33
33
 
34
34
 
@@ -101,7 +101,7 @@ Stent, Amanda},
101
101
  adapted_from=["HindiDiscourseClassification"],
102
102
  )
103
103
 
104
- def dataset_transform(self):
104
+ def dataset_transform(self, num_proc: int = 1):
105
105
  self.dataset = self.stratified_subsampling(
106
106
  self.dataset, seed=self.seed, splits=["train"]
107
107
  )
@@ -37,7 +37,7 @@ class SentimentAnalysisHindi(AbsTaskClassification):
37
37
  superseded_by="SentimentAnalysisHindi.v2",
38
38
  )
39
39
 
40
- def dataset_transform(self):
40
+ def dataset_transform(self, num_proc: int = 1):
41
41
  self.dataset = self.stratified_subsampling(
42
42
  self.dataset, seed=self.seed, splits=["train"]
43
43
  )
@@ -41,7 +41,7 @@ class IndonesianIdClickbaitClassification(AbsTaskClassification):
41
41
  superseded_by="IndonesianIdClickbaitClassification.v2",
42
42
  )
43
43
 
44
- def dataset_transform(self):
44
+ def dataset_transform(self, num_proc: int = 1):
45
45
  self.dataset = self.dataset.remove_columns(["label"]).rename_columns(
46
46
  {"title": "text", "label_score": "label"}
47
47
  )
@@ -89,7 +89,7 @@ class IndonesianIdClickbaitClassificationV2(AbsTaskClassification):
89
89
  adapted_from=["IndonesianIdClickbaitClassification"],
90
90
  )
91
91
 
92
- def dataset_transform(self):
92
+ def dataset_transform(self, num_proc: int = 1):
93
93
  self.dataset = self.stratified_subsampling(
94
94
  self.dataset, seed=self.seed, splits=["train"]
95
95
  )
@@ -55,7 +55,7 @@ Purwarianti, Ayu},
55
55
  superseded_by="IndonesianMongabayConservationClassification.v2",
56
56
  )
57
57
 
58
- def dataset_transform(self):
58
+ def dataset_transform(self, num_proc: int = 1):
59
59
  splits = self.metadata.eval_splits
60
60
  class_labels = ["positif", "netral", "negatif"]
61
61
 
@@ -36,7 +36,7 @@ class DadoEvalCoarseClassification(AbsTaskClassification):
36
36
  """,
37
37
  )
38
38
 
39
- def dataset_transform(self):
39
+ def dataset_transform(self, num_proc: int = 1):
40
40
  self.dataset = self.dataset.rename_column("class", "label")
41
41
  unused_cols = [
42
42
  col
@@ -44,7 +44,7 @@ class ItaCaseholdClassification(AbsTaskClassification):
44
44
  """,
45
45
  )
46
46
 
47
- def dataset_transform(self):
47
+ def dataset_transform(self, num_proc: int = 1):
48
48
  self.dataset = self.dataset.rename_columns(
49
49
  {"summary": "text", "materia": "label"}
50
50
  )
@@ -36,7 +36,7 @@ class SardiStanceClassification(AbsTaskClassification):
36
36
  """,
37
37
  )
38
38
 
39
- def dataset_transform(self):
39
+ def dataset_transform(self, num_proc: int = 1):
40
40
  unused_cols = [
41
41
  col
42
42
  for col in self.dataset["test"].column_names
@@ -73,7 +73,7 @@ class JavaneseIMDBClassificationV2(AbsTaskClassification):
73
73
  adapted_from=["JavaneseIMDBClassification"],
74
74
  )
75
75
 
76
- def dataset_transform(self):
76
+ def dataset_transform(self, num_proc: int = 1):
77
77
  self.dataset = self.stratified_subsampling(
78
78
  self.dataset, seed=self.seed, splits=["test"]
79
79
  )
@@ -108,7 +108,7 @@ Zhou, Yichao},
108
108
  adapted_from=["WRIMEClassification"],
109
109
  )
110
110
 
111
- def dataset_transform(self):
111
+ def dataset_transform(self, num_proc: int = 1):
112
112
  self.dataset = self.stratified_subsampling(
113
113
  self.dataset, seed=self.seed, splits=["test"]
114
114
  )
@@ -35,7 +35,7 @@ class KannadaNewsClassification(AbsTaskClassification):
35
35
  superseded_by="KannadaNewsClassification.v2",
36
36
  )
37
37
 
38
- def dataset_transform(self):
38
+ def dataset_transform(self, num_proc: int = 1):
39
39
  self.dataset = self.dataset.rename_column("headline", "text")
40
40
  self.dataset = self.stratified_subsampling(
41
41
  self.dataset, seed=self.seed, splits=["train"]
@@ -75,7 +75,7 @@ class KannadaNewsClassificationV2(AbsTaskClassification):
75
75
  adapted_from=["KannadaNewsClassification"],
76
76
  )
77
77
 
78
- def dataset_transform(self):
78
+ def dataset_transform(self, num_proc: int = 1):
79
79
  self.dataset = self.stratified_subsampling(
80
80
  self.dataset, seed=self.seed, splits=["train"]
81
81
  )
@@ -38,7 +38,7 @@ class KlueTC(AbsTaskClassification):
38
38
  superseded_by="KLUE-TC.v2",
39
39
  )
40
40
 
41
- def dataset_transform(self):
41
+ def dataset_transform(self, num_proc: int = 1):
42
42
  def id2str(example):
43
43
  return {"label": label_feature.int2str(example["label_id"])}
44
44
 
@@ -90,7 +90,7 @@ class KlueTCV2(AbsTaskClassification):
90
90
  adapted_from=["KlueTC"],
91
91
  )
92
92
 
93
- def dataset_transform(self):
93
+ def dataset_transform(self, num_proc: int = 1):
94
94
  self.dataset = self.stratified_subsampling(
95
95
  self.dataset, seed=self.seed, splits=["validation"]
96
96
  )
@@ -37,7 +37,7 @@ class KorFin(AbsTaskClassification):
37
37
  """,
38
38
  )
39
39
 
40
- def dataset_transform(self):
40
+ def dataset_transform(self, num_proc: int = 1):
41
41
  self.dataset = self.dataset.rename_columns(
42
42
  {"SRC": "text", "SENTIMENT": "label"}
43
43
  ).remove_columns(["SID", "TYPE", "ASPECT"])
@@ -73,7 +73,7 @@ class KorHateClassificationV2(AbsTaskClassification):
73
73
  adapted_from=["KorHateClassification"],
74
74
  )
75
75
 
76
- def dataset_transform(self):
76
+ def dataset_transform(self, num_proc: int = 1):
77
77
  self.dataset = self.stratified_subsampling(
78
78
  self.dataset, seed=self.seed, splits=["train"]
79
79
  )
@@ -73,7 +73,7 @@ class KorSarcasmClassificationV2(AbsTaskClassification):
73
73
  adapted_from=["KorSarcasmClassification"],
74
74
  )
75
75
 
76
- def dataset_transform(self):
76
+ def dataset_transform(self, num_proc: int = 1):
77
77
  self.dataset = self.stratified_subsampling(
78
78
  self.dataset, seed=self.seed, splits=["train"]
79
79
  )
@@ -35,7 +35,7 @@ class MalayalamNewsClassification(AbsTaskClassification):
35
35
  superseded_by="MalayalamNewsClassification.v2",
36
36
  )
37
37
 
38
- def dataset_transform(self):
38
+ def dataset_transform(self, num_proc: int = 1):
39
39
  self.dataset = self.dataset.rename_columns({"headings": "text"})
40
40
 
41
41
 
@@ -35,7 +35,7 @@ class MarathiNewsClassification(AbsTaskClassification):
35
35
  superseded_by="MarathiNewsClassification.v2",
36
36
  )
37
37
 
38
- def dataset_transform(self):
38
+ def dataset_transform(self, num_proc: int = 1):
39
39
  self.dataset = self.dataset.rename_columns({"headline": "text"})
40
40
  self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)
41
41
 
@@ -43,7 +43,7 @@ class AfriSentiLangClassification(AbsTaskClassification):
43
43
 
44
44
  samples_per_label = 32
45
45
 
46
- def dataset_transform(self):
46
+ def dataset_transform(self, num_proc: int = 1):
47
47
  self.dataset = self.dataset.rename_column("tweet", "text")
48
48
  self.dataset = self.stratified_subsampling(
49
49
  self.dataset, seed=self.seed, splits=["test"]
@@ -60,7 +60,7 @@ Piperidis, Stelios},
60
60
  """,
61
61
  )
62
62
 
63
- def dataset_transform(self):
63
+ def dataset_transform(self, num_proc: int = 1):
64
64
  for lang in self.dataset.keys():
65
65
  self.dataset[lang] = self.dataset[lang].rename_columns(
66
66
  {"TWEET": "text", "LABEL": "label"}
@@ -44,7 +44,7 @@ class CyrillicTurkicLangClassification(AbsTaskClassification):
44
44
  """,
45
45
  )
46
46
 
47
- def dataset_transform(self):
47
+ def dataset_transform(self, num_proc: int = 1):
48
48
  self.dataset = self.stratified_subsampling(
49
49
  self.dataset, seed=self.seed, splits=["test"]
50
50
  )
@@ -45,7 +45,7 @@ class IndicNLPNewsClassification(AbsTaskClassification):
45
45
  """,
46
46
  )
47
47
 
48
- def dataset_transform(self):
48
+ def dataset_transform(self, num_proc: int = 1):
49
49
  for lang in self.hf_subsets:
50
50
  self.dataset[lang] = self.dataset[lang].rename_columns(
51
51
  {"news": "text", "class": "label"}
@@ -55,7 +55,7 @@ class MasakhaNEWSClassification(AbsTaskClassification):
55
55
  """,
56
56
  )
57
57
 
58
- def dataset_transform(self):
58
+ def dataset_transform(self, num_proc: int = 1):
59
59
  for lang in self.dataset.keys():
60
60
  self.dataset[lang] = self.dataset[lang].rename_columns(
61
61
  {"category": "label"}
@@ -86,7 +86,7 @@ Talat, Zeerak},
86
86
  """,
87
87
  )
88
88
 
89
- def dataset_transform(self):
89
+ def dataset_transform(self, num_proc: int = 1):
90
90
  # for each language perform some transforms
91
91
  for lang in self.dataset.keys():
92
92
  _dataset = self.dataset[lang]
@@ -89,7 +89,7 @@ Vylomova, Ekaterina},
89
89
  """,
90
90
  )
91
91
 
92
- def dataset_transform(self):
92
+ def dataset_transform(self, num_proc: int = 1):
93
93
  # create a train set from the test set for Welsh language (cym)
94
94
  lang = "cym"
95
95
  if lang in self.dataset.keys():
@@ -54,7 +54,7 @@ Fishel, Mark},
54
54
 
55
55
  samples_per_label = 32
56
56
 
57
- def dataset_transform(self):
57
+ def dataset_transform(self, num_proc: int = 1):
58
58
  for lang in self.dataset.keys():
59
59
  # convert label to a 0/1 label
60
60
  labels = self.dataset[lang]["train"]["label"]
@@ -234,7 +234,7 @@ class SIB200Classification(AbsTaskClassification):
234
234
  """,
235
235
  )
236
236
 
237
- def dataset_transform(self):
237
+ def dataset_transform(self, num_proc: int = 1):
238
238
  for lang in self.dataset.keys():
239
239
  self.dataset[lang] = self.dataset[lang].class_encode_column("category")
240
240
  self.dataset[lang] = self.dataset[lang].rename_columns(
@@ -49,7 +49,7 @@ class TurkicClassification(AbsTaskClassification):
49
49
  )
50
50
  return dataset_lang["train"]
51
51
 
52
- def load_data(self) -> None:
52
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
53
53
  """Load dataset from HuggingFace hub"""
54
54
  if self.data_loaded:
55
55
  return
@@ -53,7 +53,7 @@ Camacho-Collados, Jose},
53
53
  """,
54
54
  )
55
55
 
56
- def dataset_transform(self):
56
+ def dataset_transform(self, num_proc: int = 1):
57
57
  for lang in self.hf_subsets:
58
58
  self.dataset[lang] = self.stratified_subsampling(
59
59
  self.dataset[lang], n_samples=256, seed=self.seed, splits=["test"]
@@ -47,7 +47,7 @@ Tan, Liling},
47
47
  superseded_by="NepaliNewsClassification.v2",
48
48
  )
49
49
 
50
- def dataset_transform(self):
50
+ def dataset_transform(self, num_proc: int = 1):
51
51
  self.dataset = self.dataset.rename_column("paras", "text")
52
52
  self.dataset = self.stratified_subsampling(
53
53
  self.dataset, seed=self.seed, splits=["train"]
@@ -99,7 +99,7 @@ Tan, Liling},
99
99
  adapted_from=["NepaliNewsClassification"],
100
100
  )
101
101
 
102
- def dataset_transform(self):
102
+ def dataset_transform(self, num_proc: int = 1):
103
103
  self.dataset = self.stratified_subsampling(
104
104
  self.dataset, seed=self.seed, splits=["train"]
105
105
  )
@@ -32,7 +32,7 @@ class DutchSarcasticHeadlinesClassification(AbsTaskClassification):
32
32
  },
33
33
  )
34
34
 
35
- def dataset_transform(self):
35
+ def dataset_transform(self, num_proc: int = 1):
36
36
  for split in self.dataset:
37
37
  self.dataset[split] = self.dataset[split].rename_columns(
38
38
  {"headline": "text", "is_sarcastic": "label"}
@@ -42,7 +42,7 @@ class VaccinChatNLClassification(AbsTaskClassification):
42
42
  },
43
43
  )
44
44
 
45
- def dataset_transform(self):
45
+ def dataset_transform(self, num_proc: int = 1):
46
46
  for split in self.dataset:
47
47
  self.dataset[split] = self.dataset[split].rename_columns(
48
48
  {"sentence1": "text"}
@@ -35,7 +35,7 @@ class OdiaNewsClassification(AbsTaskClassification):
35
35
  superseded_by="OdiaNewsClassification.v2",
36
36
  )
37
37
 
38
- def dataset_transform(self):
38
+ def dataset_transform(self, num_proc: int = 1):
39
39
  self.dataset = self.dataset.rename_columns({"headings": "text"})
40
40
  self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)
41
41
 
@@ -73,5 +73,5 @@ class OdiaNewsClassificationV2(AbsTaskClassification):
73
73
  adapted_from=["OdiaNewsClassification"],
74
74
  )
75
75
 
76
- def dataset_transform(self):
76
+ def dataset_transform(self, num_proc: int = 1):
77
77
  self.dataset = self.stratified_subsampling(self.dataset, seed=self.seed)
@@ -34,7 +34,7 @@ class PunjabiNewsClassification(AbsTaskClassification):
34
34
  """,
35
35
  )
36
36
 
37
- def dataset_transform(self):
37
+ def dataset_transform(self, num_proc: int = 1):
38
38
  self.dataset = self.dataset.rename_columns(
39
39
  {"article": "text", "is_about_politics": "label"}
40
40
  )
@@ -77,7 +77,7 @@ class MorocoV2(AbsTaskClassification):
77
77
  adapted_from=["Moroco"],
78
78
  )
79
79
 
80
- def dataset_transform(self):
80
+ def dataset_transform(self, num_proc: int = 1):
81
81
  self.dataset = self.stratified_subsampling(
82
82
  self.dataset, seed=self.seed, splits=["test"]
83
83
  )
@@ -69,7 +69,7 @@ class RomanianReviewsSentimentV2(AbsTaskClassification):
69
69
  adapted_from=["RomanianReviewsSentiment"],
70
70
  )
71
71
 
72
- def dataset_transform(self):
72
+ def dataset_transform(self, num_proc: int = 1):
73
73
  self.dataset = self.stratified_subsampling(
74
74
  self.dataset, seed=self.seed, splits=["test"]
75
75
  )
@@ -71,7 +71,7 @@ class RomanianSentimentClassificationV2(AbsTaskClassification):
71
71
  adapted_from=["RomanianSentimentClassification"],
72
72
  )
73
73
 
74
- def dataset_transform(self):
74
+ def dataset_transform(self, num_proc: int = 1):
75
75
  self.dataset = self.stratified_subsampling(
76
76
  self.dataset, seed=self.seed, splits=["test"]
77
77
  )
@@ -57,7 +57,7 @@ class GeoreviewClassificationV2(AbsTaskClassification):
57
57
  adapted_from=["GeoreviewClassification"],
58
58
  )
59
59
 
60
- def dataset_transform(self):
60
+ def dataset_transform(self, num_proc: int = 1):
61
61
  self.dataset = self.stratified_subsampling(
62
62
  self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
63
63
  )
@@ -53,7 +53,7 @@ Oda, Yusuke},
53
53
  superseded_by="HeadlineClassification.v2",
54
54
  )
55
55
 
56
- def dataset_transform(self):
56
+ def dataset_transform(self, num_proc: int = 1):
57
57
  self.dataset = self.stratified_subsampling(
58
58
  self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
59
59
  )
@@ -110,7 +110,7 @@ Oda, Yusuke},
110
110
  adapted_from=["HeadlineClassification"],
111
111
  )
112
112
 
113
- def dataset_transform(self):
113
+ def dataset_transform(self, num_proc: int = 1):
114
114
  self.dataset = self.stratified_subsampling(
115
115
  self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
116
116
  )
@@ -57,7 +57,7 @@ Robnik-{\v{S}}ikonja, Marko},
57
57
  superseded_by="InappropriatenessClassification.v2",
58
58
  )
59
59
 
60
- def dataset_transform(self):
60
+ def dataset_transform(self, num_proc: int = 1):
61
61
  self.dataset = self.stratified_subsampling(
62
62
  self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
63
63
  )
@@ -118,7 +118,7 @@ Robnik-{\v{S}}ikonja, Marko},
118
118
  adapted_from=["InappropriatenessClassification"],
119
119
  )
120
120
 
121
- def dataset_transform(self):
121
+ def dataset_transform(self, num_proc: int = 1):
122
122
  self.dataset = self.stratified_subsampling(
123
123
  self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
124
124
  )
@@ -42,7 +42,7 @@ class RuReviewsClassification(AbsTaskClassification):
42
42
  superseded_by="RuReviewsClassification.v2",
43
43
  )
44
44
 
45
- def dataset_transform(self):
45
+ def dataset_transform(self, num_proc: int = 1):
46
46
  self.dataset = self.stratified_subsampling(
47
47
  self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
48
48
  )
@@ -88,7 +88,7 @@ class RuReviewsClassificationV2(AbsTaskClassification):
88
88
  adapted_from=["RuReviewsClassification"],
89
89
  )
90
90
 
91
- def dataset_transform(self):
91
+ def dataset_transform(self, num_proc: int = 1):
92
92
  self.dataset = self.stratified_subsampling(
93
93
  self.dataset, seed=self.seed, n_samples=2048, splits=["test"]
94
94
  )