mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
mteb/__init__.py CHANGED
@@ -3,6 +3,7 @@ from importlib.metadata import version
3
3
  from mteb import types
4
4
  from mteb.abstasks import AbsTask
5
5
  from mteb.abstasks.task_metadata import TaskMetadata
6
+ from mteb.cache import ResultCache
6
7
  from mteb.deprecated_evaluator import MTEB
7
8
  from mteb.evaluate import evaluate
8
9
  from mteb.filter_tasks import filter_tasks
@@ -33,6 +34,7 @@ __all__ = [
33
34
  "CrossEncoderProtocol",
34
35
  "EncoderProtocol",
35
36
  "IndexEncoderSearchProtocol",
37
+ "ResultCache",
36
38
  "SearchProtocol",
37
39
  "SentenceTransformerEncoderWrapper",
38
40
  "TaskMetadata",
@@ -1,20 +1,28 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any, cast
4
+ import warnings
5
+ from typing import TYPE_CHECKING, Any, cast
4
6
 
5
7
  import torch
6
8
  from datasets import Dataset, Image
7
9
  from torch.utils.data import DataLoader, default_collate
8
10
 
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
11
  from mteb.types import (
11
- BatchedInput,
12
- Conversation,
13
12
  ConversationTurn,
14
13
  PromptType,
15
- QueryDatasetType,
16
14
  )
17
- from mteb.types._encoder_io import CorpusInput, ImageInput, QueryInput, TextInput
15
+
16
+ if TYPE_CHECKING:
17
+ from collections.abc import Callable
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import (
21
+ BatchedInput,
22
+ Conversation,
23
+ QueryDatasetType,
24
+ )
25
+ from mteb.types._encoder_io import CorpusInput, ImageInput, QueryInput, TextInput
18
26
 
19
27
  logger = logging.getLogger(__name__)
20
28
 
@@ -22,22 +30,25 @@ logger = logging.getLogger(__name__)
22
30
  def _create_dataloader_from_texts(
23
31
  text: list[str],
24
32
  batch_size: int = 32,
25
- **kwargs: dict[str, Any],
33
+ num_proc: int = 1,
34
+ **kwargs: Any,
26
35
  ) -> DataLoader[TextInput]:
27
36
  """Create a dataloader from a list of text.
28
37
 
29
38
  Args:
30
39
  text: A list of text to create a dataloader from.
31
40
  batch_size: Batch size for the dataloader.
41
+ num_proc: Number of processes to use.
32
42
  kwargs: Not used, present catching extra arguments.
33
43
 
34
44
  Returns:
35
45
  A dataloader with the text.
36
46
  """
37
47
  dataset = Dataset.from_dict({"text": text})
38
- return torch.utils.data.DataLoader(
48
+ return DataLoader(
39
49
  dataset,
40
50
  batch_size=batch_size,
51
+ num_workers=num_proc if num_proc > 1 else 0,
41
52
  )
42
53
 
43
54
 
@@ -63,20 +74,27 @@ def _corpus_to_dict(
63
74
  def _create_dataloader_for_retrieval_corpus(
64
75
  dataset: Dataset,
65
76
  batch_size: int = 32,
77
+ num_proc: int = 1,
66
78
  ) -> DataLoader[CorpusInput]:
67
79
  """Create a dataloader from a corpus.
68
80
 
69
81
  Args:
70
82
  dataset: Corpus
71
83
  batch_size: Batch size for the dataloader.
84
+ num_proc: Number of processes to use.
72
85
 
73
86
  Returns:
74
87
  A dataloader with the corpus.
75
88
  """
76
- new_ds = dataset.map(_corpus_to_dict, desc="Converting corpus dict")
77
- return torch.utils.data.DataLoader(
89
+ new_ds = dataset.map(
90
+ _corpus_to_dict,
91
+ desc="Converting corpus dict",
92
+ num_proc=num_proc,
93
+ )
94
+ return DataLoader(
78
95
  new_ds,
79
96
  batch_size=batch_size,
97
+ num_workers=num_proc if num_proc > 1 else 0,
80
98
  )
81
99
 
82
100
 
@@ -93,12 +111,14 @@ def _combine_queries_with_instruction_text(row: dict[str, str]) -> dict[str, str
93
111
  def _create_text_dataloader_for_queries(
94
112
  queries: QueryDatasetType,
95
113
  batch_size: int = 32,
114
+ num_proc: int = 1,
96
115
  ) -> DataLoader[QueryInput]:
97
116
  """Create a dataloader from a list of queries.
98
117
 
99
118
  Args:
100
119
  queries: A list of queries.
101
120
  batch_size: Batch size for the dataloader.
121
+ num_proc: Number of processes to use.
102
122
 
103
123
  Returns:
104
124
  A dataloader with the queries.
@@ -106,18 +126,17 @@ def _create_text_dataloader_for_queries(
106
126
  queries = queries.map(
107
127
  _combine_queries_with_instruction_text,
108
128
  desc="Processing queries for dataloading",
129
+ num_proc=num_proc,
109
130
  )
110
- return torch.utils.data.DataLoader(
131
+ return DataLoader(
111
132
  queries,
112
133
  batch_size=batch_size,
134
+ num_workers=num_proc if num_proc > 1 else 0,
113
135
  )
114
136
 
115
137
 
116
- _warned_about_user_role = False
117
-
118
-
119
138
  def _convert_conv_history_to_query(
120
- row: dict[str, list[str] | Conversation],
139
+ row: dict[str, str | list[str] | Conversation],
121
140
  ) -> dict[str, str | Conversation]:
122
141
  """Convert a conversation history to a single query string.
123
142
 
@@ -127,21 +146,18 @@ def _convert_conv_history_to_query(
127
146
  Returns:
128
147
  The updated row with the "query" and "text" fields set to the conversation string, and the "conversation" field set to the list of ConversationTurn.
129
148
  """
130
- global _warned_about_user_role
131
-
132
149
  conversation = row["text"]
133
150
  # if it's a list of strings, just join them
134
151
  if isinstance(conversation, list) and isinstance(conversation[0], str):
135
- conversation = cast(list[str], conversation)
136
- conv_str = "; ".join(conversation)
152
+ conversation_ = cast("list[str]", conversation)
153
+ conv_str = "; ".join(conversation_)
137
154
  current_conversation = [
138
- ConversationTurn(role="user", content=message) for message in conversation
155
+ ConversationTurn(role="user", content=message) for message in conversation_
139
156
  ]
140
- if not _warned_about_user_role:
141
- logger.warning(
142
- "Conversations are a list of strings. Used 'user' role for all turns."
143
- )
144
- _warned_about_user_role = True
157
+ warnings.warn(
158
+ "Conversations are a list of strings. Used 'user' role for all turns.",
159
+ category=UserWarning,
160
+ )
145
161
  # otherwise, it's a list of dictionaries, which we need to convert to strings
146
162
  elif isinstance(conversation, list) and isinstance(conversation[0], dict):
147
163
  conv = []
@@ -178,28 +194,33 @@ def _convert_conv_history_to_query(
178
194
 
179
195
  row["text"] = conv_str
180
196
  row["conversation"] = current_conversation
181
- return row
197
+ return cast("dict[str, str | list[ConversationTurn]]", row)
182
198
 
183
199
 
184
200
  def _create_dataloader_for_queries_conversation(
185
201
  queries: QueryDatasetType,
186
202
  batch_size: int = 32,
203
+ num_proc: int = 1,
187
204
  ) -> DataLoader[QueryInput]:
188
205
  """Create a dataloader from a list of queries.
189
206
 
190
207
  Args:
191
208
  queries: A list of queries.
192
209
  batch_size: Batch size for the dataloader.
210
+ num_proc: Number of processes to use.
193
211
 
194
212
  Returns:
195
213
  A dataloader with the queries.
196
214
  """
197
215
  return DataLoader(
198
216
  queries.map(
199
- _convert_conv_history_to_query, desc="Converting conversations to queries"
217
+ _convert_conv_history_to_query,
218
+ desc="Converting conversations to queries",
219
+ num_proc=num_proc,
200
220
  ),
201
221
  collate_fn=_custom_collate_fn,
202
222
  batch_size=batch_size,
223
+ num_workers=num_proc if num_proc > 1 else 0,
203
224
  )
204
225
 
205
226
 
@@ -244,6 +265,7 @@ def _prepare_image_dataset(
244
265
  dataset: Dataset,
245
266
  image_column_name: str | None = None,
246
267
  transform: Callable[[Any], Any] | None = None,
268
+ num_proc: int = 1,
247
269
  ) -> Dataset:
248
270
  """Prepare the image dataset by converting images to RGB and applying transformations."""
249
271
  if (
@@ -259,6 +281,7 @@ def _prepare_image_dataset(
259
281
  _convert_images_to_rgb,
260
282
  fn_kwargs={"image_col_name": "image", "transform": transform},
261
283
  desc="Converting images to RGB",
284
+ num_proc=num_proc,
262
285
  )
263
286
 
264
287
 
@@ -292,6 +315,7 @@ def _create_image_dataloader(
292
315
  batch_size: int = 32,
293
316
  transform: Callable[[Any], Any] | None = None,
294
317
  collate_fn: Callable[[list[dict[str, Any]]], dict[str, Any]] = _custom_collate_fn,
318
+ num_proc: int = 1,
295
319
  ) -> DataLoader[ImageInput]:
296
320
  """Creates a DataLoader with the image dataset prepared using the explicit transformation.
297
321
 
@@ -301,33 +325,41 @@ def _create_image_dataloader(
301
325
  batch_size: Batch size for the dataloader.
302
326
  transform: A transformation function to apply to each image (e.g., converting to tensor).
303
327
  collate_fn: A custom collate function to handle batching.
328
+ num_proc: Number of processes to use.
304
329
 
305
330
  Returns:
306
331
  A DataLoader with the image dataset.
307
332
  """
308
333
  dataset = _prepare_image_dataset(
309
- dataset, image_column_name, transform
334
+ dataset,
335
+ image_column_name,
336
+ transform,
337
+ num_proc=num_proc,
310
338
  ).select_columns(["image"])
311
339
  return DataLoader(
312
340
  dataset,
313
341
  batch_size=batch_size,
314
342
  collate_fn=collate_fn,
315
343
  shuffle=False,
344
+ num_workers=num_proc if num_proc > 1 else 0,
316
345
  )
317
346
 
318
347
 
319
348
  def _create_text_queries_dataloader(
320
349
  dataset: Dataset,
321
350
  batch_size: int = 32,
351
+ num_proc: int = 1,
322
352
  ) -> DataLoader[QueryInput]:
323
353
  if not isinstance(dataset["text"][0], list):
324
354
  return _create_text_dataloader_for_queries(
325
355
  dataset,
326
356
  batch_size=batch_size,
357
+ num_proc=num_proc,
327
358
  )
328
359
  return _create_dataloader_for_queries_conversation(
329
360
  dataset,
330
361
  batch_size=batch_size,
362
+ num_proc=num_proc,
331
363
  )
332
364
 
333
365
 
@@ -336,6 +368,7 @@ def _create_queries_dataloader(
336
368
  task_metadata: TaskMetadata,
337
369
  input_column: str | None = None,
338
370
  batch_size: int = 32,
371
+ num_proc: int = 1,
339
372
  ) -> DataLoader[QueryInput | ImageInput]:
340
373
  """Create a dataloader for queries."""
341
374
  queries_type = task_metadata.get_modalities(PromptType.query)
@@ -343,12 +376,14 @@ def _create_queries_dataloader(
343
376
  return _create_text_queries_dataloader(
344
377
  dataset,
345
378
  batch_size=batch_size,
379
+ num_proc=num_proc,
346
380
  )
347
381
  if "image" in queries_type: # contains image
348
382
  return _create_image_dataloader(
349
383
  dataset,
350
384
  image_column_name="image",
351
385
  batch_size=batch_size,
386
+ num_proc=num_proc,
352
387
  )
353
388
  raise ValueError(f"Can't handle queries type {queries_type}")
354
389
 
@@ -358,6 +393,7 @@ def _create_document_dataloader(
358
393
  task_metadata: TaskMetadata,
359
394
  input_column: str | None = None,
360
395
  batch_size: int = 32,
396
+ num_proc: int = 1,
361
397
  ) -> DataLoader[CorpusInput | ImageInput]:
362
398
  """Create a dataloader for documents.
363
399
 
@@ -366,18 +402,24 @@ def _create_document_dataloader(
366
402
  task_metadata: Metadata of the task to determine the document type.
367
403
  input_column: The column to use as input. If None, it will use the first column that matches the modality.
368
404
  batch_size: Batch size for the dataloader.
405
+ num_proc: Number of processes to use.
406
+
407
+ Returns:
408
+ A dataloader for the documents.
369
409
  """
370
410
  document_type = task_metadata.get_modalities(PromptType.document)
371
411
  if document_type == ["text"]: # text only
372
412
  return _create_dataloader_for_retrieval_corpus(
373
413
  dataset,
374
414
  batch_size=batch_size,
415
+ num_proc=num_proc,
375
416
  )
376
417
  if "image" in document_type: # contains image
377
418
  return _create_image_dataloader(
378
419
  dataset,
379
420
  image_column_name="image",
380
421
  batch_size=batch_size,
422
+ num_proc=num_proc,
381
423
  )
382
424
  raise ValueError(f"Can't handle queries type {document_type}")
383
425
 
@@ -388,7 +430,8 @@ def create_dataloader(
388
430
  prompt_type: PromptType | None = None,
389
431
  input_column: str | None = None,
390
432
  batch_size: int = 32,
391
- **kwargs: dict[str, Any],
433
+ num_proc: int = 1,
434
+ **kwargs: Any,
392
435
  ) -> DataLoader[BatchedInput]:
393
436
  """Create a dataloader from a dataset.
394
437
 
@@ -401,6 +444,7 @@ def create_dataloader(
401
444
  prompt_type: The type of prompt to create a dataloader for. If None, it will be inferred from the task metadata.
402
445
  input_column: The column to use as input. If None, it will use the first column that matches the modality.
403
446
  batch_size: The batch size for the dataloader.
447
+ num_proc: The number of processes to use for dataset processing.
404
448
  **kwargs: Additional arguments to pass to the dataloader creation functions.
405
449
 
406
450
  Returns:
@@ -412,6 +456,7 @@ def create_dataloader(
412
456
  task_metadata,
413
457
  batch_size=batch_size,
414
458
  input_column=input_column,
459
+ num_proc=num_proc,
415
460
  )
416
461
  if prompt_type == PromptType.document:
417
462
  return _create_document_dataloader(
@@ -419,6 +464,7 @@ def create_dataloader(
419
464
  task_metadata,
420
465
  input_column=input_column,
421
466
  batch_size=batch_size,
467
+ num_proc=num_proc,
422
468
  )
423
469
 
424
470
  if "image" in task_metadata.modalities:
@@ -426,6 +472,7 @@ def create_dataloader(
426
472
  dataset,
427
473
  image_column_name=input_column,
428
474
  batch_size=batch_size,
475
+ num_proc=num_proc,
429
476
  )
430
477
  if "text" in task_metadata.modalities and input_column is not None:
431
478
  return _create_dataloader_from_texts(
@@ -435,4 +482,5 @@ def create_dataloader(
435
482
  return DataLoader(
436
483
  dataset,
437
484
  batch_size=batch_size,
485
+ num_workers=num_proc if num_proc > 1 else 0,
438
486
  )
@@ -1,7 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any, TypedDict
4
+ from typing import TYPE_CHECKING, TypedDict
3
5
 
4
- from datasets import Dataset
5
6
  from sklearn.metrics.pairwise import (
6
7
  paired_cosine_distances,
7
8
  paired_euclidean_distances,
@@ -9,13 +10,17 @@ from sklearn.metrics.pairwise import (
9
10
  )
10
11
 
11
12
  from mteb._create_dataloaders import create_dataloader
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
- from mteb.models import EncoderProtocol
14
13
  from mteb.similarity_functions import compute_pairwise_similarity
15
- from mteb.types import PromptType
16
14
 
17
15
  from .evaluator import Evaluator
18
16
 
17
+ if TYPE_CHECKING:
18
+ from datasets import Dataset
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.models import EncoderProtocol
22
+ from mteb.types import EncodeKwargs, PromptType
23
+
19
24
  logger = logging.getLogger(__name__)
20
25
 
21
26
 
@@ -60,7 +65,8 @@ class AnySTSEvaluator(Evaluator):
60
65
  self,
61
66
  model: EncoderProtocol,
62
67
  *,
63
- encode_kwargs: dict[str, Any],
68
+ encode_kwargs: EncodeKwargs,
69
+ num_proc: int = 1,
64
70
  ) -> STSEvaluatorScores:
65
71
  logger.info("Running semantic similarity - Encoding samples (1/2)")
66
72
  embeddings1 = model.encode(
@@ -68,6 +74,7 @@ class AnySTSEvaluator(Evaluator):
68
74
  self.dataset,
69
75
  self.task_metadata,
70
76
  input_column=self.input_columns[0],
77
+ num_proc=num_proc,
71
78
  **encode_kwargs,
72
79
  ),
73
80
  task_metadata=self.task_metadata,
@@ -1,15 +1,21 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING
3
5
 
4
- from datasets import Dataset
5
6
  from sklearn import cluster
6
7
 
7
8
  from mteb._create_dataloaders import create_dataloader
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
- from mteb.models import EncoderProtocol
10
9
 
11
10
  from .evaluator import Evaluator
12
11
 
12
+ if TYPE_CHECKING:
13
+ from datasets import Dataset
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.models import EncoderProtocol
17
+ from mteb.types import EncodeKwargs
18
+
13
19
  logger = logging.getLogger(__name__)
14
20
 
15
21
 
@@ -38,12 +44,14 @@ class ClusteringEvaluator(Evaluator):
38
44
  self,
39
45
  model: EncoderProtocol,
40
46
  *,
41
- encode_kwargs: dict[str, Any],
47
+ encode_kwargs: EncodeKwargs,
48
+ num_proc: int = 1,
42
49
  ) -> list[int]:
43
50
  data_loader = create_dataloader(
44
51
  self.dataset,
45
52
  self.task_metadata,
46
53
  input_column=self.input_column_name,
54
+ num_proc=num_proc,
47
55
  **encode_kwargs,
48
56
  )
49
57
 
@@ -1,8 +1,15 @@
1
+ from __future__ import annotations
2
+
1
3
  from abc import ABC, abstractmethod
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  from mteb.abstasks.abstask import _set_seed
5
- from mteb.models import EncoderProtocol
7
+
8
+ if TYPE_CHECKING:
9
+ from collections.abc import Iterable, Mapping
10
+
11
+ from mteb.models import EncoderProtocol
12
+ from mteb.types import EncodeKwargs
6
13
 
7
14
 
8
15
  class Evaluator(ABC):
@@ -17,8 +24,8 @@ class Evaluator(ABC):
17
24
 
18
25
  @abstractmethod
19
26
  def __call__(
20
- self, model: EncoderProtocol, *, encode_kwargs: dict[str, Any]
21
- ) -> dict[str, float]:
27
+ self, model: EncoderProtocol, *, encode_kwargs: EncodeKwargs, num_proc: int = 1
28
+ ) -> Mapping[str, float] | Iterable[Any]:
22
29
  """This is called during training to evaluate the model.
23
30
 
24
31
  It returns scores.
@@ -26,5 +33,6 @@ class Evaluator(ABC):
26
33
  Args:
27
34
  model: the model to evaluate
28
35
  encode_kwargs: kwargs to pass to the model's encode method
36
+ num_proc: number of processes to use for data loading
29
37
  """
30
38
  pass
@@ -5,20 +5,24 @@ from typing import TYPE_CHECKING, Any
5
5
 
6
6
  import torch
7
7
  import torch.nn.functional as F
8
- from datasets import Dataset
9
8
  from torch.utils.data import DataLoader
10
9
 
11
10
  from mteb._create_dataloaders import (
11
+ _create_dataloader_from_texts,
12
12
  _transform_image_to_rgb,
13
13
  )
14
14
  from mteb._evaluators.evaluator import Evaluator
15
15
  from mteb._requires_package import requires_image_dependencies
16
- from mteb.abstasks.task_metadata import TaskMetadata
17
- from mteb.models.models_protocols import EncoderProtocol
18
16
 
19
17
  if TYPE_CHECKING:
18
+ from collections.abc import Sequence
19
+
20
20
  from PIL.Image import Image
21
21
 
22
+ from mteb.abstasks.task_metadata import TaskMetadata
23
+ from mteb.models.models_protocols import EncoderProtocol
24
+ from mteb.types import EncodeKwargs
25
+
22
26
 
23
27
  logger = logging.getLogger(__name__)
24
28
 
@@ -61,8 +65,8 @@ class ImageTextPairClassificationEvaluator(Evaluator):
61
65
  def __init__(
62
66
  self,
63
67
  dataset,
64
- images_column_names: str | list[str],
65
- texts_column_names: str | list[str],
68
+ images_column_names: str | Sequence[str],
69
+ texts_column_names: str | Sequence[str],
66
70
  num_images_per_sample: int,
67
71
  num_texts_per_sample: int,
68
72
  task_metadata: TaskMetadata,
@@ -82,10 +86,12 @@ class ImageTextPairClassificationEvaluator(Evaluator):
82
86
  self.hf_split = hf_split
83
87
  self.hf_subset = hf_subset
84
88
 
85
- def __call__(
89
+ def __call__( # type: ignore[override]
86
90
  self,
87
91
  model: EncoderProtocol,
88
- encode_kwargs: dict[str, Any],
92
+ *,
93
+ encode_kwargs: EncodeKwargs,
94
+ num_proc: int = 1,
89
95
  ) -> list[torch.Tensor]:
90
96
  images = []
91
97
  if isinstance(self.images_column_names, str):
@@ -106,8 +112,9 @@ class ImageTextPairClassificationEvaluator(Evaluator):
106
112
  texts.append(row[col])
107
113
 
108
114
  text_embeddings = model.encode(
109
- DataLoader(
110
- Dataset.from_dict({"text": texts}),
115
+ _create_dataloader_from_texts(
116
+ texts,
117
+ num_proc=num_proc,
111
118
  **encode_kwargs,
112
119
  ),
113
120
  task_metadata=self.task_metadata,
@@ -124,11 +131,15 @@ class ImageTextPairClassificationEvaluator(Evaluator):
124
131
  dim=-1,
125
132
  ).view(len(self.dataset), self.num_texts_per_sample, -1)
126
133
 
134
+ def _image_collate_fn(batch):
135
+ """Collate function for image batches."""
136
+ return {"image": [item["image"] for item in batch]}
137
+
127
138
  image_embeddings = model.encode(
128
139
  DataLoader(
129
140
  CustomImageDataset(images),
130
- collate_fn=lambda x: {"image": [item["image"] for item in x]},
131
- **encode_kwargs,
141
+ collate_fn=_image_collate_fn,
142
+ num_workers=num_proc if num_proc > 1 else 0,
132
143
  ),
133
144
  task_metadata=self.task_metadata,
134
145
  hf_subset=self.hf_subset,
@@ -1,8 +1,9 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any, TypedDict
4
+ from typing import TYPE_CHECKING, Any, TypedDict
3
5
 
4
6
  import numpy as np
5
- from datasets import Dataset
6
7
  from sklearn.metrics.pairwise import (
7
8
  paired_cosine_distances,
8
9
  paired_euclidean_distances,
@@ -11,10 +12,14 @@ from sklearn.metrics.pairwise import (
11
12
 
12
13
  from mteb._create_dataloaders import _create_dataloader_from_texts, create_dataloader
13
14
  from mteb._evaluators.evaluator import Evaluator
14
- from mteb.abstasks.task_metadata import TaskMetadata
15
- from mteb.models import EncoderProtocol
16
15
  from mteb.similarity_functions import compute_pairwise_similarity
17
- from mteb.types import PromptType
16
+
17
+ if TYPE_CHECKING:
18
+ from datasets import Dataset
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.models import EncoderProtocol
22
+ from mteb.types import EncodeKwargs, PromptType
18
23
 
19
24
  logger = logging.getLogger(__name__)
20
25
 
@@ -85,7 +90,8 @@ class PairClassificationEvaluator(Evaluator):
85
90
  def __call__(
86
91
  self,
87
92
  model: EncoderProtocol,
88
- encode_kwargs: dict[str, Any],
93
+ encode_kwargs: EncodeKwargs,
94
+ num_proc: int = 1,
89
95
  ) -> PairClassificationDistances:
90
96
  logger.info("Running pair classification - Encoding samples (1/2)")
91
97
  embeddings1 = model.encode(
@@ -93,6 +99,7 @@ class PairClassificationEvaluator(Evaluator):
93
99
  self.dataset,
94
100
  task_metadata=self.task_metadata,
95
101
  input_column=self.input1_column_name,
102
+ num_proc=num_proc,
96
103
  **encode_kwargs,
97
104
  ),
98
105
  task_metadata=self.task_metadata,
@@ -107,6 +114,7 @@ class PairClassificationEvaluator(Evaluator):
107
114
  self.dataset,
108
115
  task_metadata=self.task_metadata,
109
116
  input_column=self.input2_column_name,
117
+ num_proc=num_proc,
110
118
  **encode_kwargs,
111
119
  ),
112
120
  task_metadata=self.task_metadata,
@@ -148,7 +156,9 @@ class PairClassificationEvaluator(Evaluator):
148
156
  hf_subset: str,
149
157
  **encode_kwargs: Any,
150
158
  ) -> np.ndarray:
151
- index_map, all_unique_texts, all_texts_indexes = {}, [], []
159
+ index_map = {}
160
+ all_unique_texts: list[str] = []
161
+ all_texts_indexes = []
152
162
  for text in all_texts:
153
163
  text_hash = hash(text)
154
164
  if text_hash not in index_map: