mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. mteb/_create_dataloaders.py +63 -14
  2. mteb/_evaluators/any_sts_evaluator.py +12 -5
  3. mteb/_evaluators/clustering_evaluator.py +12 -4
  4. mteb/_evaluators/evaluator.py +11 -5
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
  6. mteb/_evaluators/pair_classification_evaluator.py +13 -5
  7. mteb/_evaluators/retrieval_evaluator.py +22 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +20 -11
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +10 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +48 -21
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +25 -9
  21. mteb/abstasks/clustering.py +23 -10
  22. mteb/abstasks/clustering_legacy.py +22 -8
  23. mteb/abstasks/image/image_text_pair_classification.py +23 -9
  24. mteb/abstasks/multilabel_classification.py +13 -5
  25. mteb/abstasks/pair_classification.py +27 -11
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +56 -30
  28. mteb/abstasks/retrieval_dataset_loaders.py +48 -37
  29. mteb/abstasks/sts.py +29 -13
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +23 -12
  32. mteb/abstasks/text/reranking.py +2 -2
  33. mteb/abstasks/text/summarization.py +19 -8
  34. mteb/abstasks/zeroshot_classification.py +23 -9
  35. mteb/benchmarks/_create_table.py +13 -7
  36. mteb/benchmarks/benchmark.py +11 -1
  37. mteb/benchmarks/benchmarks/__init__.py +2 -0
  38. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  39. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  40. mteb/cache.py +10 -5
  41. mteb/cli/_display_tasks.py +9 -3
  42. mteb/cli/build_cli.py +5 -2
  43. mteb/cli/generate_model_card.py +9 -2
  44. mteb/deprecated_evaluator.py +16 -12
  45. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  65. mteb/evaluate.py +33 -20
  66. mteb/filter_tasks.py +12 -7
  67. mteb/get_tasks.py +9 -4
  68. mteb/languages/language_scripts.py +8 -3
  69. mteb/leaderboard/app.py +11 -4
  70. mteb/leaderboard/table.py +7 -2
  71. mteb/load_results.py +9 -3
  72. mteb/models/abs_encoder.py +22 -12
  73. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  74. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  75. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  76. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  77. mteb/models/get_model_meta.py +32 -6
  78. mteb/models/instruct_wrapper.py +13 -5
  79. mteb/models/model_implementations/align_models.py +10 -4
  80. mteb/models/model_implementations/amazon_models.py +1 -0
  81. mteb/models/model_implementations/andersborges.py +2 -0
  82. mteb/models/model_implementations/ara_models.py +1 -0
  83. mteb/models/model_implementations/arctic_models.py +8 -0
  84. mteb/models/model_implementations/b1ade_models.py +1 -0
  85. mteb/models/model_implementations/bedrock_models.py +20 -6
  86. mteb/models/model_implementations/bge_models.py +40 -1
  87. mteb/models/model_implementations/bica_model.py +1 -0
  88. mteb/models/model_implementations/blip2_models.py +11 -4
  89. mteb/models/model_implementations/blip_models.py +17 -4
  90. mteb/models/model_implementations/bm25.py +24 -14
  91. mteb/models/model_implementations/bmretriever_models.py +10 -2
  92. mteb/models/model_implementations/cadet_models.py +1 -0
  93. mteb/models/model_implementations/cde_models.py +11 -5
  94. mteb/models/model_implementations/clip_models.py +12 -4
  95. mteb/models/model_implementations/clips_models.py +3 -0
  96. mteb/models/model_implementations/codefuse_models.py +5 -0
  97. mteb/models/model_implementations/codesage_models.py +3 -0
  98. mteb/models/model_implementations/cohere_models.py +14 -4
  99. mteb/models/model_implementations/cohere_v.py +14 -4
  100. mteb/models/model_implementations/colpali_models.py +7 -3
  101. mteb/models/model_implementations/colqwen_models.py +17 -31
  102. mteb/models/model_implementations/colsmol_models.py +3 -1
  103. mteb/models/model_implementations/conan_models.py +11 -4
  104. mteb/models/model_implementations/dino_models.py +28 -4
  105. mteb/models/model_implementations/e5_instruct.py +4 -0
  106. mteb/models/model_implementations/e5_models.py +9 -0
  107. mteb/models/model_implementations/e5_v.py +10 -4
  108. mteb/models/model_implementations/eagerworks_models.py +11 -4
  109. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  110. mteb/models/model_implementations/en_code_retriever.py +1 -0
  111. mteb/models/model_implementations/euler_models.py +1 -0
  112. mteb/models/model_implementations/evaclip_models.py +13 -4
  113. mteb/models/model_implementations/fa_models.py +9 -0
  114. mteb/models/model_implementations/facebookai.py +2 -0
  115. mteb/models/model_implementations/geogpt_models.py +1 -0
  116. mteb/models/model_implementations/gme_v_models.py +7 -3
  117. mteb/models/model_implementations/google_models.py +15 -4
  118. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  119. mteb/models/model_implementations/gritlm_models.py +3 -0
  120. mteb/models/model_implementations/gte_models.py +9 -0
  121. mteb/models/model_implementations/hinvec_models.py +6 -1
  122. mteb/models/model_implementations/human.py +1 -0
  123. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  124. mteb/models/model_implementations/inf_models.py +2 -0
  125. mteb/models/model_implementations/jasper_models.py +14 -5
  126. mteb/models/model_implementations/jina_clip.py +10 -4
  127. mteb/models/model_implementations/jina_models.py +17 -5
  128. mteb/models/model_implementations/kalm_models.py +24 -12
  129. mteb/models/model_implementations/kblab.py +1 -0
  130. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  131. mteb/models/model_implementations/kfst.py +1 -0
  132. mteb/models/model_implementations/kowshik24_models.py +1 -0
  133. mteb/models/model_implementations/lens_models.py +2 -0
  134. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  135. mteb/models/model_implementations/linq_models.py +7 -1
  136. mteb/models/model_implementations/listconranker.py +10 -4
  137. mteb/models/model_implementations/llm2clip_models.py +12 -4
  138. mteb/models/model_implementations/llm2vec_models.py +20 -6
  139. mteb/models/model_implementations/mcinext_models.py +8 -2
  140. mteb/models/model_implementations/mdbr_models.py +2 -0
  141. mteb/models/model_implementations/misc_models.py +63 -0
  142. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  143. mteb/models/model_implementations/mme5_models.py +2 -1
  144. mteb/models/model_implementations/moco_models.py +11 -4
  145. mteb/models/model_implementations/mod_models.py +2 -1
  146. mteb/models/model_implementations/model2vec_models.py +23 -4
  147. mteb/models/model_implementations/moka_models.py +3 -0
  148. mteb/models/model_implementations/nbailab.py +3 -0
  149. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  150. mteb/models/model_implementations/nomic_models.py +17 -4
  151. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  152. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  153. mteb/models/model_implementations/nvidia_models.py +15 -4
  154. mteb/models/model_implementations/octen_models.py +3 -1
  155. mteb/models/model_implementations/openai_models.py +14 -4
  156. mteb/models/model_implementations/openclip_models.py +17 -4
  157. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  158. mteb/models/model_implementations/ops_moa_models.py +9 -2
  159. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  160. mteb/models/model_implementations/pawan_models.py +1 -0
  161. mteb/models/model_implementations/piccolo_models.py +2 -0
  162. mteb/models/model_implementations/promptriever_models.py +16 -6
  163. mteb/models/model_implementations/pylate_models.py +32 -13
  164. mteb/models/model_implementations/qodo_models.py +2 -0
  165. mteb/models/model_implementations/qtack_models.py +1 -0
  166. mteb/models/model_implementations/qwen3_models.py +11 -1
  167. mteb/models/model_implementations/qzhou_models.py +2 -0
  168. mteb/models/model_implementations/random_baseline.py +4 -3
  169. mteb/models/model_implementations/rasgaard_models.py +1 -0
  170. mteb/models/model_implementations/reasonir_model.py +65 -0
  171. mteb/models/model_implementations/repllama_models.py +15 -6
  172. mteb/models/model_implementations/rerankers_custom.py +13 -4
  173. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  174. mteb/models/model_implementations/richinfoai_models.py +1 -0
  175. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  176. mteb/models/model_implementations/ruri_models.py +10 -0
  177. mteb/models/model_implementations/salesforce_models.py +10 -1
  178. mteb/models/model_implementations/samilpwc_models.py +1 -0
  179. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  180. mteb/models/model_implementations/searchmap_models.py +1 -0
  181. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  182. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  183. mteb/models/model_implementations/seed_models.py +2 -1
  184. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  185. mteb/models/model_implementations/shuu_model.py +1 -0
  186. mteb/models/model_implementations/siglip_models.py +19 -4
  187. mteb/models/model_implementations/slm_models.py +7 -4
  188. mteb/models/model_implementations/sonar_models.py +2 -1
  189. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  190. mteb/models/model_implementations/stella_models.py +6 -0
  191. mteb/models/model_implementations/tarka_models.py +2 -0
  192. mteb/models/model_implementations/text2vec_models.py +3 -0
  193. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  194. mteb/models/model_implementations/uae_models.py +10 -4
  195. mteb/models/model_implementations/vdr_models.py +8 -1
  196. mteb/models/model_implementations/vi_vn_models.py +6 -0
  197. mteb/models/model_implementations/vista_models.py +11 -4
  198. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  199. mteb/models/model_implementations/voyage_models.py +52 -4
  200. mteb/models/model_implementations/voyage_v.py +11 -6
  201. mteb/models/model_implementations/xyz_models.py +1 -0
  202. mteb/models/model_implementations/youtu_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models.py +1 -0
  204. mteb/models/model_implementations/yuan_models_en.py +2 -1
  205. mteb/models/model_meta.py +47 -9
  206. mteb/models/models_protocols.py +23 -18
  207. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  208. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  209. mteb/models/search_wrappers.py +31 -12
  210. mteb/models/sentence_transformer_wrapper.py +4 -3
  211. mteb/models/vllm_wrapper.py +8 -6
  212. mteb/results/benchmark_results.py +22 -17
  213. mteb/results/model_result.py +21 -15
  214. mteb/results/task_result.py +32 -16
  215. mteb/similarity_functions.py +8 -2
  216. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  220. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  223. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  224. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  225. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  226. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  227. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  228. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  229. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  230. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  231. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  232. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  233. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  234. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  235. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  236. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  237. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  238. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  239. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  240. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  241. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  242. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  243. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  244. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  245. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  246. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  247. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  248. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  249. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  250. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  251. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  252. mteb/tasks/classification/est/estonian_valence.py +1 -1
  253. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  254. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  256. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  257. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  258. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  259. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  260. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  261. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  262. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  263. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  264. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  265. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  266. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  267. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  268. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  269. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  270. mteb/tasks/classification/kor/klue_tc.py +2 -2
  271. mteb/tasks/classification/kor/kor_fin.py +1 -1
  272. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  273. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  274. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  275. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  276. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  277. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  278. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  279. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  280. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  281. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  282. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  283. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  284. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  285. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  286. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  287. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  288. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  289. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  290. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  291. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  292. mteb/tasks/classification/ron/moroco.py +1 -1
  293. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  294. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  295. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  296. mteb/tasks/classification/rus/headline_classification.py +2 -2
  297. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  298. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  299. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  300. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  301. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  302. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  303. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  304. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  305. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  306. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  307. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  308. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  309. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  310. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  311. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  312. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  313. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  314. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  315. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  316. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  317. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  318. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  319. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  320. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  321. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  322. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  323. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  324. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  325. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  326. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  327. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  328. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  329. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  330. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  331. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  332. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  333. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  334. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  335. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  336. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  337. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  338. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  341. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  342. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  343. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  344. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  345. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  346. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  347. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  348. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  349. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  350. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  351. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  352. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  353. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  354. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  355. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  356. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  357. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  358. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  359. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  360. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  361. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  362. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  363. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  364. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  365. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  366. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  367. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  368. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  369. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  370. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  371. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  372. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  373. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  374. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  375. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  376. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  377. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  378. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  379. mteb/tasks/pair_classification/rus/terra.py +2 -2
  380. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  381. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  382. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  383. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  384. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  385. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  386. mteb/tasks/retrieval/code/code_rag.py +4 -4
  387. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  388. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  389. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  390. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  391. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  392. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  393. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  394. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  395. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  396. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  397. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  398. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  399. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  400. mteb/tasks/retrieval/eng/__init__.py +42 -0
  401. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  402. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  403. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  404. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  405. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  406. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  407. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  408. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  409. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  410. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  411. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  412. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  413. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  414. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  415. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  416. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  417. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  418. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  419. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  420. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  421. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  422. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  423. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  424. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  425. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  426. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  428. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  435. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  438. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  439. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  440. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  441. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  442. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  443. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  444. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  445. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  446. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  447. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  448. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  449. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  450. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  451. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  452. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  453. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  454. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  455. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  456. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  457. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  458. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  459. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  460. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  461. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  462. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  463. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  464. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  465. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  466. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  467. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  468. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  469. mteb/tasks/retrieval/nob/norquad.py +1 -1
  470. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  471. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  472. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  473. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  474. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  475. mteb/tasks/sts/kor/klue_sts.py +1 -1
  476. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  477. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  478. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  479. mteb/types/_encoder_io.py +1 -1
  480. mteb/types/statistics.py +9 -2
  481. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
  482. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
  483. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  484. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  485. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  486. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -4,34 +4,40 @@ import json
4
4
  import logging
5
5
  import warnings
6
6
  from collections import defaultdict
7
- from collections.abc import Callable, Iterable, Mapping
8
7
  from functools import cached_property
9
8
  from importlib.metadata import version
10
- from pathlib import Path
11
- from typing import Any
9
+ from typing import TYPE_CHECKING, Any
12
10
 
13
11
  import numpy as np
14
12
  from huggingface_hub import EvalResult
15
13
  from packaging.version import Version
16
14
  from pydantic import BaseModel, field_validator
17
- from typing_extensions import Self
18
15
 
19
16
  from mteb import TaskMetadata
20
17
  from mteb._helpful_enum import HelpfulStrEnum
21
18
  from mteb.abstasks import AbsTaskClassification
22
19
  from mteb.abstasks.abstask import AbsTask
23
- from mteb.abstasks.task_metadata import TaskDomain
24
20
  from mteb.languages import LanguageScripts
25
21
  from mteb.models.model_meta import ScoringFunction
26
22
  from mteb.types import (
27
- HFSubset,
28
- ISOLanguage,
29
- ISOLanguageScript,
30
- Score,
31
23
  ScoresDict,
32
24
  SplitName,
33
25
  )
34
26
 
27
+ if TYPE_CHECKING:
28
+ from collections.abc import Callable, Iterable, Mapping
29
+ from pathlib import Path
30
+
31
+ from typing_extensions import Self
32
+
33
+ from mteb.abstasks.task_metadata import TaskDomain
34
+ from mteb.types import (
35
+ HFSubset,
36
+ ISOLanguage,
37
+ ISOLanguageScript,
38
+ Score,
39
+ )
40
+
35
41
  logger = logging.getLogger(__name__)
36
42
 
37
43
 
@@ -641,16 +647,26 @@ class TaskResult(BaseModel):
641
647
  if split not in splits:
642
648
  continue
643
649
  seen_subsets = set()
644
- # Use list comprehension for better performance
645
- new_scores[split] = [
646
- _scores
647
- for _scores in self.scores[split]
648
- if _scores["hf_subset"] in hf_subsets
649
- ]
650
+ if task.is_aggregate:
651
+ # aggregate tasks only have the default subset, but in metadata can be multiple
652
+ new_scores[split] = [
653
+ _scores
654
+ for _scores in self.scores[split]
655
+ if _scores["hf_subset"] == "default"
656
+ ]
657
+ seen_subsets = {"default"}
658
+ else:
659
+ new_scores[split] = [
660
+ _scores
661
+ for _scores in self.scores[split]
662
+ if _scores["hf_subset"] in hf_subsets
663
+ ]
650
664
  for _scores in new_scores[split]:
651
665
  seen_subsets.add(_scores["hf_subset"])
652
666
 
653
- if seen_subsets != hf_subsets:
667
+ if seen_subsets != hf_subsets and not (
668
+ task.is_aggregate and "default" in seen_subsets
669
+ ):
654
670
  missing_subsets = hf_subsets - seen_subsets
655
671
  if len(missing_subsets) > 2:
656
672
  subset1, subset2 = list(missing_subsets)[:2]
@@ -1,8 +1,14 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  import torch
2
6
 
3
- from mteb.models import EncoderProtocol
4
7
  from mteb.models.model_meta import ScoringFunction
5
- from mteb.types import Array
8
+
9
+ if TYPE_CHECKING:
10
+ from mteb.models import EncoderProtocol
11
+ from mteb.types import Array
6
12
 
7
13
 
8
14
  def _use_torch_compile():
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.retrieval import (
4
4
  CQADupstackAndroidRetrieval,
5
5
  CQADupstackEnglishRetrieval,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
15
15
  CQADupstackWordpressRetrieval,
16
16
  )
17
17
 
18
- task_list_cqa: list[AbsTask] = [
18
+ task_list_cqa = [
19
19
  CQADupstackAndroidRetrieval(),
20
20
  CQADupstackEnglishRetrieval(),
21
21
  CQADupstackGamingRetrieval(),
@@ -1,10 +1,10 @@
1
- from mteb.abstasks.abstask import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.sts.multilingual.sts17_multilingual_visual_sts import (
4
4
  STS17MultilingualVisualSTS,
5
5
  )
6
6
 
7
- task_list_sts17: list[AbsTask] = [
7
+ task_list_sts17 = [
8
8
  STS17MultilingualVisualSTS().filter_languages(
9
9
  languages=["eng"], hf_subsets=["en-en"]
10
10
  )
@@ -1,10 +1,10 @@
1
- from mteb.abstasks.abstask import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.sts.multilingual.sts_benchmark_multilingual_visual_sts import (
4
4
  STSBenchmarkMultilingualVisualSTS,
5
5
  )
6
6
 
7
- task_list_stsb: list[AbsTask] = [
7
+ task_list_stsb = [
8
8
  STSBenchmarkMultilingualVisualSTS().filter_languages(
9
9
  languages=["eng"], hf_subsets=["en"]
10
10
  )
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.retrieval import (
4
4
  CQADupstackAndroidRetrievalFa,
5
5
  CQADupstackEnglishRetrievalFa,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
15
15
  CQADupstackWordpressRetrievalFa,
16
16
  )
17
17
 
18
- task_list_cqa: list[AbsTask] = [
18
+ task_list_cqa = [
19
19
  CQADupstackAndroidRetrievalFa(),
20
20
  CQADupstackEnglishRetrievalFa(),
21
21
  CQADupstackGamingRetrievalFa(),
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.classification import (
4
4
  SynPerChatbotConvSAAnger,
5
5
  SynPerChatbotConvSAFear,
@@ -12,7 +12,7 @@ from mteb.tasks.classification import (
12
12
  SynPerChatbotConvSASurprise,
13
13
  )
14
14
 
15
- task_list_cqa: list[AbsTask] = [
15
+ task_list_cqa = [
16
16
  SynPerChatbotConvSAAnger(),
17
17
  SynPerChatbotConvSASatisfaction(),
18
18
  SynPerChatbotConvSAFriendship(),
@@ -1,10 +1,10 @@
1
- from mteb.abstasks.abstask import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.sts.multilingual.sts17_multilingual_visual_sts import (
4
4
  STS17MultilingualVisualSTS,
5
5
  )
6
6
 
7
- task_list_sts17_multi: list[AbsTask] = [
7
+ task_list_sts17_multi = [
8
8
  STS17MultilingualVisualSTS().filter_languages(
9
9
  languages=["ara", "eng", "spa", "kor"],
10
10
  hf_subsets=[
@@ -1,10 +1,10 @@
1
- from mteb.abstasks.abstask import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.sts.multilingual.sts_benchmark_multilingual_visual_sts import (
4
4
  STSBenchmarkMultilingualVisualSTS,
5
5
  )
6
6
 
7
- task_list_multi: list[AbsTask] = [
7
+ task_list_multi = [
8
8
  STSBenchmarkMultilingualVisualSTS().filter_languages(
9
9
  languages=[
10
10
  "deu",
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.retrieval import (
4
4
  CQADupstackAndroidNLRetrieval,
5
5
  CQADupstackEnglishNLRetrieval,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
15
15
  CQADupstackWordpressNLRetrieval,
16
16
  )
17
17
 
18
- task_list_cqa: list[AbsTask] = [
18
+ task_list_cqa = [
19
19
  CQADupstackAndroidNLRetrieval(),
20
20
  CQADupstackEnglishNLRetrieval(),
21
21
  CQADupstackGamingNLRetrieval(),
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.retrieval.pol.cqadupstack_pl_retrieval import (
4
4
  CQADupstackAndroidRetrievalPL,
5
5
  CQADupstackEnglishRetrievalPL,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval.pol.cqadupstack_pl_retrieval import (
15
15
  CQADupstackWordpressRetrievalPL,
16
16
  )
17
17
 
18
- task_list_cqa: list[AbsTask] = [
18
+ task_list_cqa = [
19
19
  CQADupstackAndroidRetrievalPL(),
20
20
  CQADupstackEnglishRetrievalPL(),
21
21
  CQADupstackGamingRetrievalPL(),
@@ -59,7 +59,7 @@ class PubChemSMILESBitextMining(AbsTaskBitextMining):
59
59
  """,
60
60
  )
61
61
 
62
- def dataset_transform(self):
62
+ def dataset_transform(self, num_proc: int = 1):
63
63
  for subset in self.hf_subsets:
64
64
  self.dataset[subset] = self.dataset[subset].rename_columns(
65
65
  COL_MAPPING[subset]
@@ -27,7 +27,7 @@ class SAMSumFa(AbsTaskBitextMining):
27
27
  bibtex_citation="",
28
28
  )
29
29
 
30
- def dataset_transform(self):
30
+ def dataset_transform(self, num_proc: int = 1):
31
31
  self.dataset = self.dataset.rename_columns(
32
32
  {"text": "sentence1", "summary": "sentence2"}
33
33
  )
@@ -58,7 +58,7 @@ class SynPerChatbotSumSRetrieval(AbsTaskBitextMining):
58
58
  bibtex_citation=""" """,
59
59
  )
60
60
 
61
- def dataset_transform(self):
61
+ def dataset_transform(self, num_proc: int = 1):
62
62
  self.dataset = self.dataset.rename_columns(
63
63
  {"text": "sentence1", "summary": "sentence2"}
64
64
  )
@@ -89,7 +89,7 @@ class SynPerChatbotRAGSumSRetrieval(AbsTaskBitextMining):
89
89
  bibtex_citation=""" """,
90
90
  )
91
91
 
92
- def dataset_transform(self):
92
+ def dataset_transform(self, num_proc: int = 1):
93
93
  self.dataset = self.dataset.rename_columns(
94
94
  {"text": "sentence1", "summary": "sentence2"}
95
95
  )
@@ -60,7 +60,7 @@ Rapp, Reinhard},
60
60
  superseded_by="BUCC.v2",
61
61
  )
62
62
 
63
- def dataset_transform(self):
63
+ def dataset_transform(self, num_proc: int = 1):
64
64
  dataset = {}
65
65
  for lang in self.dataset:
66
66
  dataset[lang] = {}
@@ -265,7 +265,7 @@ class FloresBitextMining(AbsTaskBitextMining):
265
265
  """,
266
266
  )
267
267
 
268
- def load_data(self) -> None:
268
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
269
269
  if self.data_loaded:
270
270
  return
271
271
 
@@ -99,7 +99,7 @@ class IN22ConvBitextMining(AbsTaskBitextMining):
99
99
  """,
100
100
  )
101
101
 
102
- def load_data(self) -> None:
102
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
103
103
  if self.data_loaded:
104
104
  return
105
105
 
@@ -93,7 +93,7 @@ class IN22GenBitextMining(AbsTaskBitextMining):
93
93
  """,
94
94
  )
95
95
 
96
- def load_data(self) -> None:
96
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
97
97
  if self.data_loaded:
98
98
  return
99
99
 
@@ -35,7 +35,7 @@ class NorwegianCourtsBitextMining(AbsTaskBitextMining):
35
35
  prompt="Retrieve parallel sentences in Norwegian Bokmål and Nynorsk",
36
36
  )
37
37
 
38
- def dataset_transform(self):
38
+ def dataset_transform(self, num_proc: int = 1):
39
39
  # Convert to standard format
40
40
  self.dataset = self.dataset.rename_column("nb", "sentence1")
41
41
  self.dataset = self.dataset.rename_column("nn", "sentence2")
@@ -280,7 +280,7 @@ class NTREXBitextMining(AbsTaskBitextMining):
280
280
  """,
281
281
  )
282
282
 
283
- def load_data(self) -> None:
283
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
284
284
  if self.data_loaded:
285
285
  return
286
286
 
@@ -32,7 +32,7 @@ class RomaTalesBitextMining(AbsTaskBitextMining):
32
32
  bibtex_citation="",
33
33
  )
34
34
 
35
- def load_data(self) -> None:
35
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
36
36
  """Load dataset from HuggingFace hub and convert it to the standard format."""
37
37
  if self.data_loaded:
38
38
  return
@@ -44,7 +44,7 @@ class RomaTalesBitextMining(AbsTaskBitextMining):
44
44
  self.dataset_transform()
45
45
  self.data_loaded = True
46
46
 
47
- def dataset_transform(self):
47
+ def dataset_transform(self, num_proc: int = 1):
48
48
  for lang in self.hf_subsets:
49
49
  self.dataset[lang] = self.dataset[lang].rename_columns(
50
50
  {"romani": "sentence1", "hungarian": "sentence2"}
@@ -230,7 +230,7 @@ class WebFAQBitextMiningQuestions(AbsTaskBitextMining):
230
230
  """,
231
231
  )
232
232
 
233
- def dataset_transform(self):
233
+ def dataset_transform(self, num_proc: int = 1):
234
234
  dataset = {}
235
235
  for langs in self.dataset:
236
236
  dataset[langs] = {}
@@ -284,7 +284,7 @@ class WebFAQBitextMiningQAs(AbsTaskBitextMining):
284
284
  """,
285
285
  )
286
286
 
287
- def dataset_transform(self):
287
+ def dataset_transform(self, num_proc: int = 1):
288
288
  dataset = {}
289
289
  for langs in self.dataset:
290
290
  dataset[langs] = {}
@@ -28,7 +28,7 @@ class OnlineStoreReviewSentimentClassification(AbsTaskClassification):
28
28
  superseded_by="OnlineStoreReviewSentimentClassification.v2",
29
29
  )
30
30
 
31
- def dataset_transform(self):
31
+ def dataset_transform(self, num_proc: int = 1):
32
32
  self.dataset = self.stratified_subsampling(
33
33
  self.dataset, seed=self.seed, splits=["train"]
34
34
  )
@@ -37,7 +37,7 @@ class RestaurantReviewSentimentClassification(AbsTaskClassification):
37
37
  superseded_by="RestaurantReviewSentimentClassification.v2",
38
38
  )
39
39
 
40
- def dataset_transform(self):
40
+ def dataset_transform(self, num_proc: int = 1):
41
41
  # labels: 0 negative, 1 positive
42
42
  self.dataset = self.dataset.rename_column("polarity", "label")
43
43
  self.dataset = self.stratified_subsampling(
@@ -48,7 +48,7 @@ Mubarak, Hamdy},
48
48
  superseded_by="TweetSarcasmClassification.v2",
49
49
  )
50
50
 
51
- def dataset_transform(self):
51
+ def dataset_transform(self, num_proc: int = 1):
52
52
  # labels: 0 non-sarcastic, 1 sarcastic
53
53
  self.dataset = self.dataset.rename_columns(
54
54
  {"tweet": "text", "sarcasm": "label"}
@@ -36,7 +36,7 @@ class BengaliHateSpeechClassification(AbsTaskClassification):
36
36
  superseded_by="BengaliHateSpeechClassification.v2",
37
37
  )
38
38
 
39
- def dataset_transform(self):
39
+ def dataset_transform(self, num_proc: int = 1):
40
40
  self.dataset = self.stratified_subsampling(
41
41
  self.dataset, seed=self.seed, splits=["train"]
42
42
  )
@@ -36,7 +36,7 @@ class BengaliSentimentAnalysis(AbsTaskClassification):
36
36
  superseded_by="BengaliSentimentAnalysis.v2",
37
37
  )
38
38
 
39
- def dataset_transform(self):
39
+ def dataset_transform(self, num_proc: int = 1):
40
40
  self.dataset = self.stratified_subsampling(
41
41
  self.dataset, seed=self.seed, splits=["train"]
42
42
  )
@@ -37,7 +37,7 @@ class BulgarianStoreReviewSentimentClassfication(AbsTaskClassification):
37
37
  """,
38
38
  )
39
39
 
40
- def dataset_transform(self):
40
+ def dataset_transform(self, num_proc: int = 1):
41
41
  self.dataset = self.dataset.rename_columns(
42
42
  {"Review": "text", "Category": "label"}
43
43
  )
@@ -39,7 +39,7 @@ class CSFDCZMovieReviewSentimentClassification(AbsTaskClassification):
39
39
  # Increase the samples_per_label in order to improve baseline performance
40
40
  samples_per_label = 20
41
41
 
42
- def dataset_transform(self):
42
+ def dataset_transform(self, num_proc: int = 1):
43
43
  self.dataset = self.dataset.rename_columns(
44
44
  {"comment": "text", "rating_int": "label"}
45
45
  )
@@ -85,7 +85,7 @@ class CSFDCZMovieReviewSentimentClassificationV2(AbsTaskClassification):
85
85
  # Increase the samples_per_label in order to improve baseline performance
86
86
  samples_per_label = 20
87
87
 
88
- def dataset_transform(self):
88
+ def dataset_transform(self, num_proc: int = 1):
89
89
  self.dataset = self.stratified_subsampling(
90
90
  self.dataset, seed=self.seed, splits=["test"], n_samples=2048
91
91
  )
@@ -56,7 +56,7 @@ Piperidis, Stelios},
56
56
  superseded_by="Ddisco.v2",
57
57
  )
58
58
 
59
- def dataset_transform(self):
59
+ def dataset_transform(self, num_proc: int = 1):
60
60
  self.dataset = self.dataset.rename_columns({"rating": "label"}).remove_columns(
61
61
  ["domain"]
62
62
  )
@@ -60,7 +60,7 @@ Piperidis, Stelios},
60
60
 
61
61
  samples_per_label = 16
62
62
 
63
- def dataset_transform(self):
63
+ def dataset_transform(self, num_proc: int = 1):
64
64
  # convert label to a 0/1 label
65
65
  labels = self.dataset["train"]["label"]
66
66
  lab2idx = {lab: idx for idx, lab in enumerate(set(labels))}
@@ -49,7 +49,7 @@ Zesch, Torsten},
49
49
  superseded_by="GermanPoliticiansTwitterSentimentClassification.v2",
50
50
  )
51
51
 
52
- def dataset_transform(self):
52
+ def dataset_transform(self, num_proc: int = 1):
53
53
  self.dataset = self.dataset.rename_column("majority_sentiment", "label")
54
54
 
55
55
 
@@ -42,7 +42,7 @@ class GreekLegalCodeClassification(AbsTaskClassification):
42
42
  """,
43
43
  )
44
44
 
45
- def dataset_transform(self):
45
+ def dataset_transform(self, num_proc: int = 1):
46
46
  self.dataset["validation"] = (
47
47
  self.dataset["validation"]
48
48
  .shuffle(seed=self.seed)
@@ -40,7 +40,7 @@ class DBpediaClassification(AbsTaskClassification):
40
40
  superseded_by="DBpediaClassification.v2",
41
41
  )
42
42
 
43
- def dataset_transform(self):
43
+ def dataset_transform(self, num_proc: int = 1):
44
44
  self.dataset = self.dataset.rename_column("content", "text")
45
45
  self.dataset = self.stratified_subsampling(
46
46
  self.dataset, seed=self.seed, splits=["train", "test"]
@@ -85,7 +85,7 @@ class DBpediaClassificationV2(AbsTaskClassification):
85
85
  adapted_from=["DBpediaClassification"],
86
86
  )
87
87
 
88
- def dataset_transform(self):
88
+ def dataset_transform(self, num_proc: int = 1):
89
89
  self.dataset = self.stratified_subsampling(
90
90
  self.dataset, seed=self.seed, splits=["train", "test"]
91
91
  )
@@ -40,7 +40,7 @@ class ToxicChatClassification(AbsTaskClassification):
40
40
  superseded_by="ToxicChatClassification.v2",
41
41
  )
42
42
 
43
- def dataset_transform(self):
43
+ def dataset_transform(self, num_proc: int = 1):
44
44
  keep_cols = ["user_input", "toxicity"]
45
45
  rename_dict = dict(zip(keep_cols, ["text", "label"]))
46
46
  remove_cols = [
@@ -93,7 +93,7 @@ class ToxicChatClassificationV2(AbsTaskClassification):
93
93
  adapted_from=["ToxicChatClassification"],
94
94
  )
95
95
 
96
- def dataset_transform(self):
96
+ def dataset_transform(self, num_proc: int = 1):
97
97
  self.dataset = self.stratified_subsampling(
98
98
  self.dataset, seed=self.seed, splits=["test"]
99
99
  )
@@ -42,7 +42,7 @@ class ToxicConversationsClassification(AbsTaskClassification):
42
42
 
43
43
  samples_per_label = 16
44
44
 
45
- def dataset_transform(self):
45
+ def dataset_transform(self, num_proc: int = 1):
46
46
  self.dataset = self.stratified_subsampling(
47
47
  self.dataset, seed=self.seed, splits=["test"]
48
48
  )
@@ -88,7 +88,7 @@ class ToxicConversationsClassificationV2(AbsTaskClassification):
88
88
 
89
89
  samples_per_label = 16
90
90
 
91
- def dataset_transform(self):
91
+ def dataset_transform(self, num_proc: int = 1):
92
92
  self.dataset = self.stratified_subsampling(
93
93
  self.dataset, seed=self.seed, splits=["test"]
94
94
  )
@@ -43,7 +43,7 @@ Barbieri, Francesco},
43
43
  superseded_by="TweetTopicSingleClassification.v2",
44
44
  )
45
45
 
46
- def dataset_transform(self):
46
+ def dataset_transform(self, num_proc: int = 1):
47
47
  self.dataset["train"] = self.dataset["train_2021"]
48
48
 
49
49
 
@@ -83,7 +83,7 @@ class YahooAnswersTopicsClassificationV2(AbsTaskClassification):
83
83
 
84
84
  samples_per_label = 32
85
85
 
86
- def dataset_transform(self):
86
+ def dataset_transform(self, num_proc: int = 1):
87
87
  self.dataset = self.stratified_subsampling(
88
88
  self.dataset, seed=self.seed, splits=["train", "test"]
89
89
  )
@@ -42,7 +42,7 @@ class YelpReviewFullClassification(AbsTaskClassification):
42
42
 
43
43
  samples_per_label = 128
44
44
 
45
- def dataset_transform(self):
45
+ def dataset_transform(self, num_proc: int = 1):
46
46
  self.dataset = self.stratified_subsampling(
47
47
  self.dataset, seed=self.seed, splits=["test"]
48
48
  )
@@ -88,7 +88,7 @@ class YelpReviewFullClassificationV2(AbsTaskClassification):
88
88
 
89
89
  samples_per_label = 128
90
90
 
91
- def dataset_transform(self):
91
+ def dataset_transform(self, num_proc: int = 1):
92
92
  self.dataset = self.stratified_subsampling(
93
93
  self.dataset, seed=self.seed, splits=["test"]
94
94
  )
@@ -40,7 +40,7 @@ class EstonianValenceClassification(AbsTaskClassification):
40
40
  superseded_by="EstonianValenceClassification.v2",
41
41
  )
42
42
 
43
- def dataset_transform(self):
43
+ def dataset_transform(self, num_proc: int = 1):
44
44
  self.dataset = self.dataset.rename_column("paragraph", "text").rename_column(
45
45
  "valence", "label"
46
46
  )