mteb 2.7.4__py3-none-any.whl → 2.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. mteb/_create_dataloaders.py +47 -5
  2. mteb/_evaluators/any_sts_evaluator.py +2 -0
  3. mteb/_evaluators/clustering_evaluator.py +2 -0
  4. mteb/_evaluators/evaluator.py +2 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +8 -1
  6. mteb/_evaluators/pair_classification_evaluator.py +3 -0
  7. mteb/_evaluators/retrieval_evaluator.py +3 -0
  8. mteb/_evaluators/sklearn_evaluator.py +6 -1
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +2 -0
  10. mteb/_evaluators/text/summarization_evaluator.py +2 -0
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -0
  12. mteb/abstasks/abstask.py +31 -12
  13. mteb/abstasks/classification.py +10 -3
  14. mteb/abstasks/clustering.py +6 -2
  15. mteb/abstasks/clustering_legacy.py +8 -2
  16. mteb/abstasks/image/image_text_pair_classification.py +6 -2
  17. mteb/abstasks/multilabel_classification.py +2 -0
  18. mteb/abstasks/pair_classification.py +8 -2
  19. mteb/abstasks/retrieval.py +26 -11
  20. mteb/abstasks/retrieval_dataset_loaders.py +29 -19
  21. mteb/abstasks/sts.py +10 -3
  22. mteb/abstasks/text/bitext_mining.py +9 -5
  23. mteb/abstasks/text/reranking.py +2 -2
  24. mteb/abstasks/text/summarization.py +2 -1
  25. mteb/abstasks/zeroshot_classification.py +8 -2
  26. mteb/evaluate.py +10 -2
  27. mteb/models/model_implementations/bm25.py +2 -0
  28. mteb/models/model_implementations/pylate_models.py +10 -0
  29. mteb/models/models_protocols.py +4 -0
  30. mteb/models/search_wrappers.py +12 -0
  31. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  32. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  33. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  34. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  35. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  36. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  37. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  38. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  39. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  40. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  41. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  42. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  43. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  44. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  45. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  46. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  47. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  48. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  49. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  50. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  51. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  52. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  53. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  54. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  55. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  56. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  57. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  58. mteb/tasks/classification/est/estonian_valence.py +1 -1
  59. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  60. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  61. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  62. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  63. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  64. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  65. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  66. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  67. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  68. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  69. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  70. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  71. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  72. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  73. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  74. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  75. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  76. mteb/tasks/classification/kor/klue_tc.py +2 -2
  77. mteb/tasks/classification/kor/kor_fin.py +1 -1
  78. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  79. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  80. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  81. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  82. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  83. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  84. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  85. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  86. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  87. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  88. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  89. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  90. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  91. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  92. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  93. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  94. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  95. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  96. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  97. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  98. mteb/tasks/classification/ron/moroco.py +1 -1
  99. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  100. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  101. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  102. mteb/tasks/classification/rus/headline_classification.py +2 -2
  103. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  104. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  105. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  106. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  107. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  108. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  109. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  110. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  111. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  112. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  113. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  114. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  115. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  116. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  117. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  118. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  119. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  120. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  121. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  122. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  123. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  124. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  125. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  126. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  127. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  128. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  129. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  130. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  131. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  132. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  133. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  134. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  135. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  136. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  137. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  138. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  139. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  140. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  141. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  142. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  143. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  144. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  145. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  146. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  147. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  148. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  149. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  150. mteb/tasks/clustering/nob/snl_clustering.py +1 -1
  151. mteb/tasks/clustering/nob/vg_clustering.py +1 -1
  152. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  153. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  154. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  155. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  156. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  157. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  158. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  159. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  160. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  161. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  162. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  163. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  164. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  165. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  166. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  167. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  168. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  169. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
  170. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  171. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  172. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  173. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  174. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  175. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  176. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  177. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  178. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  179. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  180. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  181. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  182. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  183. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  184. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  185. mteb/tasks/pair_classification/rus/terra.py +2 -2
  186. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  187. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  188. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  189. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  190. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  191. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  192. mteb/tasks/retrieval/code/code_rag.py +4 -4
  193. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  194. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  195. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  196. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  197. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  198. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  199. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  200. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  201. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  202. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  203. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  204. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  205. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  206. mteb/tasks/retrieval/eng/bright_retrieval.py +1 -1
  207. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  208. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  209. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  210. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  211. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  212. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  213. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  214. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  215. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  216. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  217. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  218. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  219. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  220. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  221. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  222. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  223. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  224. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  225. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  226. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  227. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  228. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  229. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  230. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  231. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  232. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  233. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  234. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  235. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  236. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  237. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  238. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  239. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  240. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  241. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  242. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  243. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  244. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  245. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  246. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  247. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  248. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  249. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  250. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  251. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  252. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
  253. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  254. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  255. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  256. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  257. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  258. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  259. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  260. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  261. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  262. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  263. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  264. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  265. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  266. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  267. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  268. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  269. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  270. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  271. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  272. mteb/tasks/retrieval/nob/norquad.py +1 -1
  273. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  274. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  275. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  276. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  277. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  278. mteb/tasks/sts/kor/klue_sts.py +1 -1
  279. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  280. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  281. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  282. {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/METADATA +1 -1
  283. {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/RECORD +287 -287
  284. {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/WHEEL +0 -0
  285. {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/entry_points.txt +0 -0
  286. {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/licenses/LICENSE +0 -0
  287. {mteb-2.7.4.dist-info → mteb-2.7.5.dist-info}/top_level.txt +0 -0
@@ -108,7 +108,7 @@ class MrTidyRetrieval(AbsTaskRetrieval):
108
108
  """,
109
109
  )
110
110
 
111
- def load_data(self) -> None:
111
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
112
112
  if self.data_loaded:
113
113
  return
114
114
 
@@ -97,7 +97,7 @@ class PublicHealthQARetrieval(AbsTaskRetrieval):
97
97
  """,
98
98
  )
99
99
 
100
- def load_data(self) -> None:
100
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
101
101
  if self.data_loaded:
102
102
  return
103
103
 
@@ -103,7 +103,7 @@ class RuSciBenchCiteRetrieval(AbsTaskRetrieval):
103
103
  },
104
104
  )
105
105
 
106
- def load_data(self) -> None:
106
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
107
107
  if self.data_loaded:
108
108
  return
109
109
 
@@ -161,7 +161,7 @@ class RuSciBenchCociteRetrieval(AbsTaskRetrieval):
161
161
  },
162
162
  )
163
163
 
164
- def load_data(self) -> None:
164
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
165
165
  if self.data_loaded:
166
166
  return
167
167
 
@@ -96,7 +96,7 @@ de Vries, Harm},
96
96
  """,
97
97
  )
98
98
 
99
- def load_data(self) -> None:
99
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
100
100
  if self.data_loaded:
101
101
  return
102
102
 
@@ -126,7 +126,7 @@ class VDRMultilingualRetrieval(AbsTaskRetrieval):
126
126
  """,
127
127
  )
128
128
 
129
- def load_data(self) -> None:
129
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
130
130
  if self.data_loaded:
131
131
  return
132
132
 
@@ -16,6 +16,7 @@ def _load_data(
16
16
  splits: list[str],
17
17
  langs: list | None = None,
18
18
  revision: str | None = None,
19
+ num_proc: int = 1,
19
20
  ):
20
21
  if langs is None:
21
22
  corpus = {}
@@ -32,6 +33,7 @@ def _load_data(
32
33
  "queries",
33
34
  split=split,
34
35
  revision=revision,
36
+ num_proc=num_proc,
35
37
  )
36
38
  query_ds = query_ds.map(
37
39
  lambda x: {
@@ -40,6 +42,7 @@ def _load_data(
40
42
  "modality": "text",
41
43
  },
42
44
  remove_columns=["query-id", "query"],
45
+ num_proc=num_proc,
43
46
  )
44
47
 
45
48
  corpus_ds = load_dataset(
@@ -47,6 +50,7 @@ def _load_data(
47
50
  "corpus",
48
51
  split=split,
49
52
  revision=revision,
53
+ num_proc=num_proc,
50
54
  )
51
55
  corpus_ds = corpus_ds.map(
52
56
  lambda x: {
@@ -54,6 +58,7 @@ def _load_data(
54
58
  "modality": "image",
55
59
  },
56
60
  remove_columns=["corpus-id"],
61
+ num_proc=num_proc,
57
62
  )
58
63
  corpus_ds = corpus_ds.select_columns(["id", "image"])
59
64
 
@@ -62,6 +67,7 @@ def _load_data(
62
67
  "qrels",
63
68
  split=split,
64
69
  revision=revision,
70
+ num_proc=num_proc,
65
71
  )
66
72
 
67
73
  if langs is None:
@@ -125,7 +131,7 @@ class Vidore2ESGReportsRetrieval(AbsTaskRetrieval):
125
131
  prompt={"query": "Find a screenshot that relevant to the user's question."},
126
132
  )
127
133
 
128
- def load_data(self) -> None:
134
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
129
135
  if self.data_loaded:
130
136
  return
131
137
 
@@ -134,6 +140,7 @@ class Vidore2ESGReportsRetrieval(AbsTaskRetrieval):
134
140
  splits=self.metadata.eval_splits,
135
141
  langs=_LANGS.keys(),
136
142
  revision=self.metadata.dataset["revision"],
143
+ num_proc=num_proc,
137
144
  )
138
145
 
139
146
  self.data_loaded = True
@@ -172,7 +179,7 @@ class Vidore2EconomicsReportsRetrieval(AbsTaskRetrieval):
172
179
  prompt={"query": "Find a screenshot that relevant to the user's question."},
173
180
  )
174
181
 
175
- def load_data(self) -> None:
182
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
176
183
  if self.data_loaded:
177
184
  return
178
185
 
@@ -181,6 +188,7 @@ class Vidore2EconomicsReportsRetrieval(AbsTaskRetrieval):
181
188
  splits=self.metadata.eval_splits,
182
189
  langs=_LANGS.keys(),
183
190
  revision=self.metadata.dataset["revision"],
191
+ num_proc=num_proc,
184
192
  )
185
193
 
186
194
  self.data_loaded = True
@@ -219,7 +227,7 @@ class Vidore2BioMedicalLecturesRetrieval(AbsTaskRetrieval):
219
227
  prompt={"query": "Find a screenshot that relevant to the user's question."},
220
228
  )
221
229
 
222
- def load_data(self) -> None:
230
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
223
231
  if self.data_loaded:
224
232
  return
225
233
 
@@ -228,6 +236,7 @@ class Vidore2BioMedicalLecturesRetrieval(AbsTaskRetrieval):
228
236
  splits=self.metadata.eval_splits,
229
237
  langs=_LANGS.keys(),
230
238
  revision=self.metadata.dataset["revision"],
239
+ num_proc=num_proc,
231
240
  )
232
241
 
233
242
  self.data_loaded = True
@@ -266,7 +275,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskRetrieval):
266
275
  prompt={"query": "Find a screenshot that relevant to the user's question."},
267
276
  )
268
277
 
269
- def load_data(self) -> None:
278
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
270
279
  if self.data_loaded:
271
280
  return
272
281
 
@@ -274,6 +283,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskRetrieval):
274
283
  path=self.metadata.dataset["path"],
275
284
  splits=self.metadata.eval_splits,
276
285
  revision=self.metadata.dataset["revision"],
286
+ num_proc=num_proc,
277
287
  )
278
288
 
279
289
  self.data_loaded = True
@@ -116,7 +116,7 @@ class WITT2IRetrieval(AbsTaskRetrieval):
116
116
  """,
117
117
  )
118
118
 
119
- def load_data(self) -> None:
119
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
120
120
  if self.data_loaded:
121
121
  return
122
122
 
@@ -104,7 +104,7 @@ class XFlickr30kCoT2IRetrieval(AbsTaskRetrieval):
104
104
  """,
105
105
  )
106
106
 
107
- def load_data(self) -> None:
107
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
108
108
  if self.data_loaded:
109
109
  return
110
110
 
@@ -64,7 +64,7 @@ class XQuADRetrieval(AbsTaskRetrieval):
64
64
  """,
65
65
  )
66
66
 
67
- def load_data(self) -> None:
67
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
68
68
  if self.data_loaded:
69
69
  return
70
70
 
@@ -146,7 +146,7 @@ class XM3600T2IRetrieval(AbsTaskRetrieval):
146
146
  """,
147
147
  )
148
148
 
149
- def load_data(self) -> None:
149
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
150
150
  if self.data_loaded:
151
151
  return
152
152
 
@@ -42,7 +42,7 @@ class CQADupstackAndroidNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackAndroid"],
43
43
  )
44
44
 
45
- def load_data(self) -> None:
45
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackEnglishNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackEnglish"],
43
43
  )
44
44
 
45
- def load_data(self) -> None:
45
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackGamingNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackGamingRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self) -> None:
45
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackGisNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackGisRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self) -> None:
45
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackMathematicaNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackMathematicaRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self) -> None:
45
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackPhysicsNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackPhysicsRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self) -> None:
45
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackProgrammersNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackProgrammersRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self) -> None:
45
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackStatsNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackStatsRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self) -> None:
45
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackTexNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackTexRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self) -> None:
45
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackUnixNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackUnixRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self) -> None:
45
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackWebmastersNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackWebmastersRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self) -> None:
45
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackWordpressNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackWordpressRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self) -> None:
45
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -50,7 +50,7 @@ Fishel, Mark},
50
50
  },
51
51
  )
52
52
 
53
- def load_data(self) -> None:
53
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
54
54
  """Load dataset from HuggingFace hub"""
55
55
  if self.data_loaded:
56
56
  return
@@ -37,7 +37,7 @@ class SNLRetrieval(AbsTaskRetrieval):
37
37
  task_subtypes=["Article retrieval"],
38
38
  )
39
39
 
40
- def load_data(self) -> None:
40
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
41
41
  """Load dataset from HuggingFace hub"""
42
42
  if self.data_loaded:
43
43
  return
@@ -36,7 +36,7 @@ class SlovakSumRetrieval(AbsTaskRetrieval):
36
36
  """,
37
37
  )
38
38
 
39
- def load_data(self) -> None:
39
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
40
40
  if self.data_loaded:
41
41
  return
42
42
  self.corpus, self.queries, self.relevant_docs = {}, {}, {}
@@ -52,7 +52,7 @@ Zong, Chengqing},
52
52
  """,
53
53
  )
54
54
 
55
- def load_data(self) -> None:
55
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
56
56
  if self.data_loaded:
57
57
  return
58
58
 
@@ -43,5 +43,5 @@ Vulić, Ivan},
43
43
  min_score = 0
44
44
  max_score = 5
45
45
 
46
- def dataset_transform(self):
46
+ def dataset_transform(self, num_proc: int = 1):
47
47
  self.dataset = self.dataset.rename_column("label", "score")
@@ -30,7 +30,7 @@ class SickFrSTS(AbsTaskSTS):
30
30
  min_score = 0
31
31
  max_score = 5
32
32
 
33
- def dataset_transform(self):
33
+ def dataset_transform(self, num_proc: int = 1):
34
34
  self.dataset = self.dataset.rename_columns(
35
35
  {
36
36
  "sentence_A": "sentence1",
@@ -40,7 +40,7 @@ class KlueSTS(AbsTaskSTS):
40
40
  min_score = 0
41
41
  max_score = 5
42
42
 
43
- def dataset_transform(self):
43
+ def dataset_transform(self, num_proc: int = 1):
44
44
  # In the case of KLUE STS, score value is nested within the `labels` field.
45
45
  # We need to extract the `score` and move it outside of the `labels` field for access.
46
46
  for split in self.dataset:
@@ -52,7 +52,7 @@ and de Paiva, Valeria},
52
52
  min_score = 1
53
53
  max_score = 5
54
54
 
55
- def dataset_transform(self):
55
+ def dataset_transform(self, num_proc: int = 1):
56
56
  self.dataset = self.stratified_subsampling(
57
57
  self.dataset,
58
58
  seed=42,
@@ -54,7 +54,7 @@ Filippskikh, Elizaveta},
54
54
  min_score = -1
55
55
  max_score = 1
56
56
 
57
- def dataset_transform(self):
57
+ def dataset_transform(self, num_proc: int = 1):
58
58
  self.dataset = self.dataset.rename_columns(
59
59
  {
60
60
  "text_1": "sentence1",
@@ -41,7 +41,7 @@ class SciMMIR(AbsTaskZeroShotClassification):
41
41
 
42
42
  label_column_name: str = "class"
43
43
 
44
- def dataset_transform(self):
44
+ def dataset_transform(self, num_proc: int = 1):
45
45
  class_code = {
46
46
  "fig_result": 0,
47
47
  "fig_illustration": 1,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.7.4
3
+ Version: 2.7.5
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>