mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from collections import defaultdict
3
- from pathlib import Path
4
- from typing import Any, ClassVar, TypedDict
5
+ from typing import TYPE_CHECKING, Any, ClassVar, TypedDict, cast
5
6
 
6
7
  from datasets import Dataset, DatasetDict
7
8
  from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
@@ -9,9 +10,15 @@ from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_sc
9
10
  from mteb._evaluators import BitextMiningEvaluator
10
11
  from mteb.abstasks._statistics_calculation import calculate_text_statistics
11
12
  from mteb.abstasks.abstask import AbsTask
12
- from mteb.models import EncoderProtocol, MTEBModels
13
- from mteb.types import HFSubset, ScoresDict
14
- from mteb.types.statistics import SplitDescriptiveStatistics, TextStatistics
13
+ from mteb.models import EncoderProtocol
14
+ from mteb.types.statistics import SplitDescriptiveStatistics
15
+
16
+ if TYPE_CHECKING:
17
+ from pathlib import Path
18
+
19
+ from mteb.models import MTEBModels
20
+ from mteb.types import EncodeKwargs, HFSubset, ScoresDict
21
+ from mteb.types.statistics import TextStatistics
15
22
 
16
23
  logger = logging.getLogger(__name__)
17
24
 
@@ -73,13 +80,17 @@ class AbsTaskBitextMining(AbsTask):
73
80
  split: str = "test",
74
81
  subsets_to_run: list[HFSubset] | None = None,
75
82
  *,
76
- encode_kwargs: dict[str, Any],
83
+ encode_kwargs: EncodeKwargs,
77
84
  prediction_folder: Path | None = None,
85
+ num_proc: int = 1,
78
86
  **kwargs: Any,
79
87
  ) -> dict[HFSubset, ScoresDict]:
80
88
  """Added load for "parallel" datasets"""
89
+ if not isinstance(model, EncoderProtocol):
90
+ raise TypeError("Expected model to be an instance of EncoderProtocol")
91
+
81
92
  if not self.data_loaded:
82
- self.load_data()
93
+ self.load_data(num_proc=num_proc)
83
94
 
84
95
  hf_subsets = self.hf_subsets
85
96
 
@@ -87,16 +98,22 @@ class AbsTaskBitextMining(AbsTask):
87
98
  if subsets_to_run is not None:
88
99
  hf_subsets = [s for s in hf_subsets if s in subsets_to_run]
89
100
 
90
- scores = {}
101
+ encoder_model = cast("EncoderProtocol", model)
102
+
103
+ if self.dataset is None:
104
+ raise ValueError("Dataset is not loaded.")
105
+
106
+ scores: dict[str, BitextMiningMetrics] = {}
91
107
  if self.parallel_subsets:
92
- scores = self._evaluate_subset(
93
- model,
94
- self.dataset[split], # type: ignore
108
+ scores = self._evaluate_subset( # type: ignore[assignment]
109
+ encoder_model,
110
+ self.dataset[split],
95
111
  parallel=True,
96
112
  hf_split=split,
97
113
  hf_subset="parallel",
98
114
  encode_kwargs=encode_kwargs,
99
115
  prediction_folder=prediction_folder,
116
+ num_proc=num_proc,
100
117
  **kwargs,
101
118
  )
102
119
  else:
@@ -109,42 +126,44 @@ class AbsTaskBitextMining(AbsTask):
109
126
  data_split = self.dataset[split]
110
127
  else:
111
128
  data_split = self.dataset[hf_subset][split]
112
- scores[hf_subset] = self._evaluate_subset(
113
- model,
129
+ scores[hf_subset] = self._evaluate_subset( # type: ignore[assignment]
130
+ encoder_model,
114
131
  data_split,
115
132
  hf_split=split,
116
133
  hf_subset=hf_subset,
117
134
  encode_kwargs=encode_kwargs,
118
135
  prediction_folder=prediction_folder,
136
+ num_proc=num_proc,
119
137
  **kwargs,
120
138
  )
121
139
 
122
- return scores
140
+ return cast("dict[HFSubset, ScoresDict]", scores)
123
141
 
124
142
  def _get_pairs(self, parallel: bool) -> list[tuple[str, str]]:
125
143
  pairs = self._DEFAULT_PAIR
126
144
  if parallel:
127
- pairs = [langpair.split("-") for langpair in self.hf_subsets]
145
+ pairs = [langpair.split("-") for langpair in self.hf_subsets] # type: ignore[misc]
128
146
  return pairs
129
147
 
130
- def _evaluate_subset(
148
+ def _evaluate_subset( # type: ignore[override]
131
149
  self,
132
150
  model: EncoderProtocol,
133
151
  data_split: Dataset,
134
152
  *,
135
153
  hf_split: str,
136
154
  hf_subset: str,
137
- parallel: bool = False,
138
- encode_kwargs: dict[str, Any],
155
+ encode_kwargs: EncodeKwargs,
139
156
  prediction_folder: Path | None = None,
157
+ parallel: bool = False,
158
+ num_proc: int = 1,
140
159
  **kwargs,
141
- ) -> ScoresDict:
160
+ ) -> BitextMiningMetrics | dict[str, BitextMiningMetrics]:
142
161
  pairs = self._get_pairs(parallel)
143
162
 
144
163
  evaluator = BitextMiningEvaluator(
145
164
  data_split,
146
165
  task_metadata=self.metadata,
147
- pair_columns=pairs, # type: ignore
166
+ pair_columns=pairs,
148
167
  hf_split=hf_split,
149
168
  hf_subset=hf_subset,
150
169
  **kwargs,
@@ -156,7 +175,7 @@ class AbsTaskBitextMining(AbsTask):
156
175
  else data_split["gold"]
157
176
  )
158
177
 
159
- neighbours = evaluator(model, encode_kwargs=encode_kwargs)
178
+ neighbours = evaluator(model, encode_kwargs=encode_kwargs, num_proc=num_proc)
160
179
 
161
180
  if prediction_folder:
162
181
  self._save_task_predictions(
@@ -168,16 +187,16 @@ class AbsTaskBitextMining(AbsTask):
168
187
  )
169
188
 
170
189
  if parallel:
171
- metrics = {}
190
+ parallel_metrics = {}
172
191
  for keys, nearest_neighbors in neighbours.items():
173
- metrics[keys] = self._compute_metrics(nearest_neighbors, gold)
192
+ parallel_metrics[keys] = self._compute_metrics(nearest_neighbors, gold)
174
193
 
175
- for v in metrics.values():
194
+ for v in parallel_metrics.values():
176
195
  self._add_main_score(v)
177
- else:
178
- def_pair_str = "-".join(self._DEFAULT_PAIR[0])
179
- metrics = self._compute_metrics(neighbours[def_pair_str], gold)
180
- self._add_main_score(metrics)
196
+ return parallel_metrics
197
+ def_pair_str = "-".join(self._DEFAULT_PAIR[0])
198
+ metrics = self._compute_metrics(neighbours[def_pair_str], gold)
199
+ self._add_main_score(metrics)
181
200
  return metrics
182
201
 
183
202
  def _compute_metrics(
@@ -249,9 +268,12 @@ class AbsTaskBitextMining(AbsTask):
249
268
  sentence2_statistics=text2_statistics,
250
269
  )
251
270
 
252
- def _push_dataset_to_hub(self, repo_name: str) -> None:
271
+ def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
272
+ if self.dataset is None:
273
+ raise ValueError("Dataset is not loaded.")
274
+
253
275
  if self.metadata.is_multilingual:
254
- dataset = defaultdict(dict)
276
+ dataset: dict[str, dict[str, list[str]]] = defaultdict(dict)
255
277
  for config in self.metadata.eval_langs:
256
278
  logger.info(f"Converting {config} of {self.metadata.name}")
257
279
 
@@ -266,10 +288,10 @@ class AbsTaskBitextMining(AbsTask):
266
288
  for split in self.dataset[config]:
267
289
  dataset[split][lang_1] = self.dataset[config][split][sent_1]
268
290
  dataset[split][lang_2] = self.dataset[config][split][sent_2]
269
- for split in dataset:
270
- dataset[split] = Dataset.from_dict(dataset[split])
271
- dataset = DatasetDict(dataset)
272
- dataset.push_to_hub(repo_name)
291
+ dataset_dict = DatasetDict(
292
+ {split: Dataset.from_dict(dataset[split]) for split in dataset}
293
+ )
294
+ dataset_dict.push_to_hub(repo_name, num_proc=num_proc)
273
295
  else:
274
296
  sentences = {}
275
297
  for split in self.dataset:
@@ -281,4 +303,4 @@ class AbsTaskBitextMining(AbsTask):
281
303
  }
282
304
  )
283
305
  sentences = DatasetDict(sentences)
284
- sentences.push_to_hub(repo_name)
306
+ sentences.push_to_hub(repo_name, num_proc=num_proc)
@@ -16,7 +16,7 @@ else:
16
16
 
17
17
  logger = logging.getLogger(__name__)
18
18
 
19
- OLD_FORMAT_RERANKING_TASKS = []
19
+ OLD_FORMAT_RERANKING_TASKS: list[str] = []
20
20
 
21
21
 
22
22
  @deprecated(
@@ -34,7 +34,7 @@ class AbsTaskReranking(AbsTaskRetrieval):
34
34
  For dataformat and other information, see [AbsTaskRetrieval][mteb.abstasks.retrieval.AbsTaskRetrieval].
35
35
  """
36
36
 
37
- def load_data(self) -> None:
37
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
38
38
  """Load the dataset."""
39
39
  if self.data_loaded:
40
40
  return
@@ -43,7 +43,7 @@ class AbsTaskReranking(AbsTaskRetrieval):
43
43
  self.transform_old_dataset_format()
44
44
  else:
45
45
  # use AbsTaskRetrieval default to load the data
46
- return super().load_data()
46
+ return super().load_data(num_proc=num_proc)
47
47
 
48
48
  def _process_example(self, example: dict, split: str, query_idx: int) -> dict:
49
49
  """Process a single example from the dataset.
@@ -100,12 +100,14 @@ class AbsTaskReranking(AbsTaskRetrieval):
100
100
  if self.metadata.name not in OLD_FORMAT_RERANKING_TASKS:
101
101
  return
102
102
 
103
- logging.info(
103
+ logger.info(
104
104
  f"Transforming old format to standard format for {self.metadata.name}"
105
105
  )
106
106
 
107
107
  given_dataset = copy(given_dataset)
108
- self.dataset = defaultdict(lambda: defaultdict(dict))
108
+ self.dataset: dict[str, dict[str, RetrievalSplitData]] = defaultdict(
109
+ lambda: defaultdict(dict) # type: ignore[arg-type]
110
+ )
109
111
 
110
112
  hf_subsets = self.hf_subsets
111
113
 
@@ -115,19 +117,19 @@ class AbsTaskReranking(AbsTaskRetrieval):
115
117
  if hf_subset in cur_dataset:
116
118
  cur_dataset = cur_dataset[hf_subset]
117
119
  elif "name" in self.metadata.dataset:
118
- cur_dataset = datasets.load_dataset(**self.metadata.dataset) # type: ignore
120
+ cur_dataset = datasets.load_dataset(**self.metadata.dataset)
119
121
  assert hf_subset == "default", (
120
122
  f"Only default subset is supported for {self.metadata.name} since `name` is given in the metadata."
121
123
  )
122
124
  else:
123
125
  cur_dataset = datasets.load_dataset(
124
126
  **self.metadata.dataset, name=hf_subset
125
- ) # type: ignore
127
+ )
126
128
 
127
129
  for split in cur_dataset:
128
130
  corpus = []
129
131
  queries = []
130
- relevant_docs = defaultdict(dict)
132
+ relevant_docs: dict[str, dict[str, int]] = defaultdict(dict)
131
133
  top_ranked = defaultdict(list)
132
134
 
133
135
  # Create an enumerated dataset to pass indices
@@ -1,12 +1,11 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from pathlib import Path
3
- from typing import Any
4
+ from typing import TYPE_CHECKING
4
5
 
5
6
  import numpy as np
6
- from datasets import Dataset
7
7
 
8
8
  from mteb._evaluators import SummarizationEvaluator
9
- from mteb._evaluators.text.summarization_evaluator import SummarizationMetrics
10
9
  from mteb.abstasks._statistics_calculation import (
11
10
  calculate_score_statistics,
12
11
  calculate_text_statistics,
@@ -14,11 +13,22 @@ from mteb.abstasks._statistics_calculation import (
14
13
  from mteb.abstasks.abstask import AbsTask
15
14
  from mteb.models import EncoderProtocol
16
15
  from mteb.types.statistics import (
17
- ScoreStatistics,
18
16
  SplitDescriptiveStatistics,
19
- TextStatistics,
20
17
  )
21
18
 
19
+ if TYPE_CHECKING:
20
+ from pathlib import Path
21
+
22
+ from datasets import Dataset
23
+
24
+ from mteb._evaluators.text.summarization_evaluator import SummarizationMetrics
25
+ from mteb.models import MTEBModels
26
+ from mteb.types import EncodeKwargs
27
+ from mteb.types.statistics import (
28
+ ScoreStatistics,
29
+ TextStatistics,
30
+ )
31
+
22
32
  logger = logging.getLogger(__name__)
23
33
 
24
34
 
@@ -77,17 +87,23 @@ class AbsTaskSummarization(AbsTask):
77
87
 
78
88
  def _evaluate_subset(
79
89
  self,
80
- model: EncoderProtocol,
90
+ model: MTEBModels,
81
91
  data_split: Dataset,
82
92
  *,
83
93
  hf_split: str,
84
94
  hf_subset: str,
85
- encode_kwargs: dict[str, Any],
95
+ encode_kwargs: EncodeKwargs,
86
96
  prediction_folder: Path | None = None,
97
+ num_proc: int = 1,
87
98
  **kwargs,
88
99
  ) -> SummarizationMetrics:
100
+ if not isinstance(model, EncoderProtocol):
101
+ raise TypeError("Expected model to be an instance of EncoderProtocol")
102
+
89
103
  normalized_scores = [
90
- (np.array(x) - self.min_score) / (self.max_score - self.min_score)
104
+ (
105
+ (np.array(x) - self.min_score) / (self.max_score - self.min_score)
106
+ ).tolist()
91
107
  for x in data_split[self.relevancy_column_name]
92
108
  ]
93
109
  evaluator = self.evaluator(
@@ -100,7 +116,7 @@ class AbsTaskSummarization(AbsTask):
100
116
  hf_subset=hf_subset,
101
117
  **kwargs,
102
118
  )
103
- scores = evaluator(model, encode_kwargs=encode_kwargs)
119
+ scores = evaluator(model, encode_kwargs=encode_kwargs, num_proc=num_proc)
104
120
  if prediction_folder:
105
121
  self._save_task_predictions(
106
122
  scores,
@@ -1,6 +1,7 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from pathlib import Path
3
- from typing import Any, TypedDict
4
+ from typing import TYPE_CHECKING, TypedDict
4
5
 
5
6
  import torch
6
7
  from datasets import Dataset
@@ -9,10 +10,7 @@ from sklearn import metrics
9
10
  from mteb._evaluators import ZeroShotClassificationEvaluator
10
11
  from mteb.models import EncoderProtocol
11
12
  from mteb.types.statistics import (
12
- ImageStatistics,
13
- LabelStatistics,
14
13
  SplitDescriptiveStatistics,
15
- TextStatistics,
16
14
  )
17
15
 
18
16
  from ._statistics_calculation import (
@@ -22,6 +20,17 @@ from ._statistics_calculation import (
22
20
  )
23
21
  from .abstask import AbsTask
24
22
 
23
+ if TYPE_CHECKING:
24
+ from pathlib import Path
25
+
26
+ from mteb.models import MTEBModels
27
+ from mteb.types import EncodeKwargs
28
+ from mteb.types.statistics import (
29
+ ImageStatistics,
30
+ LabelStatistics,
31
+ TextStatistics,
32
+ )
33
+
25
34
  logger = logging.getLogger(__name__)
26
35
 
27
36
 
@@ -111,15 +120,19 @@ class AbsTaskZeroShotClassification(AbsTask):
111
120
 
112
121
  def _evaluate_subset(
113
122
  self,
114
- model: EncoderProtocol,
123
+ model: MTEBModels,
115
124
  data_split: Dataset,
116
125
  *,
117
126
  hf_split: str,
118
127
  hf_subset: str,
119
- encode_kwargs: dict[str, Any],
128
+ encode_kwargs: EncodeKwargs,
120
129
  prediction_folder: Path | None = None,
130
+ num_proc: int = 1,
121
131
  **kwargs,
122
132
  ) -> ZeroShotClassificationMetrics:
133
+ if not isinstance(model, EncoderProtocol):
134
+ raise TypeError("Expected model to be an instance of EncoderProtocol")
135
+
123
136
  candidate_labels = self.get_candidate_labels()
124
137
  data_split = data_split.select_columns(
125
138
  [self.input_column_name, self.label_column_name]
@@ -133,7 +146,11 @@ class AbsTaskZeroShotClassification(AbsTask):
133
146
  hf_subset=hf_subset,
134
147
  **kwargs,
135
148
  )
136
- probs = evaluator(model, encode_kwargs=encode_kwargs)
149
+ probs = evaluator(
150
+ model,
151
+ encode_kwargs=encode_kwargs,
152
+ num_proc=num_proc,
153
+ )
137
154
 
138
155
  if prediction_folder:
139
156
  self._save_task_predictions(
@@ -158,13 +175,14 @@ class AbsTaskZeroShotClassification(AbsTask):
158
175
  accuracy=metrics.accuracy_score(labels, predictions),
159
176
  )
160
177
 
161
- def _push_dataset_to_hub(self, repo_name: str) -> None:
178
+ def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
162
179
  self._upload_dataset_to_hub(
163
180
  repo_name,
164
181
  [
165
182
  self.input_column_name,
166
183
  self.label_column_name,
167
184
  ],
185
+ num_proc=num_proc,
168
186
  )
169
187
  labels_dataset = Dataset.from_dict({"labels": self.get_candidate_labels()})
170
188
  labels_dataset.push_to_hub(repo_name, config_name="labels")
@@ -1,13 +1,17 @@
1
+ from __future__ import annotations
2
+
1
3
  import re
2
4
  from collections import defaultdict
3
- from typing import Literal
5
+ from typing import TYPE_CHECKING, Literal
4
6
 
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
 
8
10
  import mteb
9
11
  from mteb.get_tasks import get_task, get_tasks
10
- from mteb.results.benchmark_results import BenchmarkResults
12
+
13
+ if TYPE_CHECKING:
14
+ from mteb.results.benchmark_results import BenchmarkResults
11
15
 
12
16
 
13
17
  def _borda_count(scores: pd.Series) -> pd.Series:
@@ -115,7 +119,6 @@ def _create_summary_table_from_benchmark_results(
115
119
 
116
120
  # Build joint table
117
121
  joint_table = mean_per_type.copy()
118
- joint_table = joint_table.drop(models_to_remove, axis=0)
119
122
  joint_table.insert(0, "mean", overall_mean)
120
123
  joint_table.insert(1, "mean_by_task_type", typed_mean)
121
124
  joint_table["borda_rank"] = _get_borda_rank(per_task)
@@ -303,6 +306,7 @@ def _create_per_language_table_from_benchmark_results(
303
306
 
304
307
  def _create_summary_table_mean_public_private(
305
308
  benchmark_results: BenchmarkResults,
309
+ exclude_private_from_borda: bool = False,
306
310
  ) -> pd.DataFrame:
307
311
  """Create summary table from BenchmarkResults.
308
312
 
@@ -311,6 +315,7 @@ def _create_summary_table_mean_public_private(
311
315
 
312
316
  Args:
313
317
  benchmark_results: BenchmarkResults object containing model results
318
+ exclude_private_from_borda: If True, calculate Borda rank using only public tasks
314
319
 
315
320
  Returns:
316
321
  DataFrame with model summaries, ready for styling in the leaderboard
@@ -353,10 +358,13 @@ def _create_summary_table_mean_public_private(
353
358
 
354
359
  # Build joint table
355
360
  joint_table = mean_per_type.copy()
356
- joint_table = joint_table.drop(models_to_remove, axis=0)
357
361
  joint_table.insert(0, "mean(public)", public_mean)
358
362
  joint_table.insert(1, "mean(private)", private_mean)
359
- joint_table["borda_rank"] = _get_borda_rank(per_task)
363
+ if exclude_private_from_borda:
364
+ borda_per_task = per_task[public_task_name]
365
+ else:
366
+ borda_per_task = per_task
367
+ joint_table["borda_rank"] = _get_borda_rank(borda_per_task)
360
368
  joint_table = joint_table.sort_values("borda_rank", ascending=True)
361
369
  joint_table = joint_table.reset_index()
362
370
 
@@ -476,7 +484,6 @@ def _create_summary_table_mean_subset(
476
484
 
477
485
  # Build joint table
478
486
  joint_table = mean_per_type.copy()
479
- joint_table = joint_table.drop(models_to_remove, axis=0)
480
487
  joint_table.insert(0, "mean(subset)", overall_subset_mean)
481
488
  joint_table["borda_rank"] = _get_borda_rank(per_subset)
482
489
  joint_table = joint_table.sort_values("mean(subset)", ascending=False)
@@ -595,7 +602,6 @@ def _create_summary_table_mean_task_type(
595
602
 
596
603
  # Build joint table
597
604
  joint_table = mean_per_type.copy()
598
- joint_table = joint_table.drop(models_to_remove, axis=0)
599
605
  joint_table.insert(0, "mean_by_task_type", typed_mean)
600
606
  joint_table = joint_table.sort_values("mean_by_task_type", ascending=False)
601
607
  joint_table["borda_rank"] = _get_borda_rank(per_task)
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from collections.abc import Iterable, Sequence
3
+ from collections.abc import Iterator, Sequence
4
4
  from dataclasses import dataclass, field
5
5
  from typing import TYPE_CHECKING, Literal
6
6
 
@@ -19,6 +19,7 @@ class Benchmark:
19
19
 
20
20
  Args:
21
21
  name: The name of the benchmark
22
+ aliases: Alternative names for the benchmark
22
23
  tasks: The tasks within the benchmark.
23
24
  description: A description of the benchmark, should include its intended goal and potentially a description of its construction
24
25
  reference: A link reference, to a source containing additional information typically to a paper, leaderboard or github.
@@ -38,6 +39,7 @@ class Benchmark:
38
39
 
39
40
  name: str
40
41
  tasks: Sequence[AbsTask]
42
+ aliases: Sequence[str] = field(default_factory=tuple)
41
43
  description: str | None = None
42
44
  reference: StrURL | None = None
43
45
  citation: str | None = None
@@ -47,7 +49,7 @@ class Benchmark:
47
49
  display_name: str | None = None
48
50
  language_view: list[str] | Literal["all"] = field(default_factory=list)
49
51
 
50
- def __iter__(self) -> Iterable[AbsTask]:
52
+ def __iter__(self) -> Iterator[AbsTask]:
51
53
  return iter(self.tasks)
52
54
 
53
55
  def __len__(self) -> int:
@@ -121,9 +123,19 @@ class RtebBenchmark(Benchmark):
121
123
  _create_summary_table_mean_public_private,
122
124
  )
123
125
 
124
- joint_table = _create_summary_table_mean_public_private(benchmark_results)
126
+ joint_table = _create_summary_table_mean_public_private(
127
+ benchmark_results, exclude_private_from_borda=True
128
+ )
129
+ # issue 3902: temporary remove the private column from RTEB summary table
130
+ if "Mean (Private)" in joint_table.columns:
131
+ joint_table = joint_table.drop(columns=["Mean (Private)"])
125
132
  # For RTEB: all tasks are Retrieval type, so Retrieval column = Mean (Task)
133
+ # but due to 3902, if Private column existed, Mean (Task) was the mean of Public and Private so instead we drop Mean (Task) and rename Mean (Public) to Mean (Task)
126
134
  joint_table = joint_table.rename(columns={"Retrieval": "Mean (Task)"})
135
+ if "Mean (Task)" in joint_table.columns:
136
+ joint_table = joint_table.drop(columns=["Mean (Task)"])
137
+ joint_table = joint_table.rename(columns={"Mean (Public)": "Mean (Task)"})
138
+
127
139
  return joint_table
128
140
 
129
141
 
@@ -3,9 +3,11 @@ from mteb.benchmarks.benchmarks.benchmarks import (
3
3
  BEIR_NL,
4
4
  BRIGHT,
5
5
  BRIGHT_LONG,
6
+ BRIGHT_V1_1,
6
7
  BUILT_MTEB,
7
8
  C_MTEB,
8
9
  CHEMTEB,
10
+ CHEMTEB_V1_1,
9
11
  CODE_RAG,
10
12
  ENCODECHKA,
11
13
  FA_MTEB,
@@ -14,6 +16,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
14
16
  JINA_VDR,
15
17
  JMTEB_LITE_V1,
16
18
  JMTEB_V2,
19
+ KOVIDORE_V2,
17
20
  LONG_EMBED,
18
21
  MIEB_ENG,
19
22
  MIEB_IMG,
@@ -67,8 +70,10 @@ __all__ = [
67
70
  "BEIR_NL",
68
71
  "BRIGHT",
69
72
  "BRIGHT_LONG",
73
+ "BRIGHT_V1_1",
70
74
  "BUILT_MTEB",
71
75
  "CHEMTEB",
76
+ "CHEMTEB_V1_1",
72
77
  "CODE_RAG",
73
78
  "C_MTEB",
74
79
  "ENCODECHKA",
@@ -79,6 +84,7 @@ __all__ = [
79
84
  "JINA_VDR",
80
85
  "JMTEB_LITE_V1",
81
86
  "JMTEB_V2",
87
+ "KOVIDORE_V2",
82
88
  "LONG_EMBED",
83
89
  "MIEB_ENG",
84
90
  "MIEB_IMG",