mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,329 @@
1
+ from __future__ import annotations
2
+
3
+ import atexit
4
+ import gc
5
+ import logging
6
+ import os
7
+ from typing import TYPE_CHECKING, Any, Literal
8
+
9
+ import numpy as np
10
+ import torch
11
+
12
+ from mteb._requires_package import requires_package
13
+ from mteb.models import ModelMeta
14
+ from mteb.models.abs_encoder import AbsEncoder
15
+ from mteb.types import PromptType
16
+
17
+ if TYPE_CHECKING:
18
+ from collections.abc import Callable
19
+
20
+ from torch.utils.data import DataLoader
21
+ from vllm.config import PoolerConfig # type: ignore[import-not-found]
22
+
23
+ from mteb.abstasks.task_metadata import TaskMetadata
24
+ from mteb.types import Array, BatchedInput
25
+
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ Dtype = Literal["half", "float16", "float", "float32", "bfloat16", "auto"]
30
+
31
+
32
+ class VllmWrapperBase:
33
+ """Wrapper for vllm serving engine."""
34
+
35
+ convert = "auto"
36
+ mteb_model_meta: ModelMeta | None = None
37
+
38
+ def __init__(
39
+ self,
40
+ model: str | ModelMeta,
41
+ revision: str | None = None,
42
+ *,
43
+ trust_remote_code: bool = True,
44
+ dtype: Dtype = "auto",
45
+ head_dtype: Literal["model"] | Dtype | None = None,
46
+ max_model_len: int | None = None,
47
+ max_num_batched_tokens: int | None = None,
48
+ max_num_seqs: int = 128,
49
+ tensor_parallel_size: int = 1,
50
+ enable_prefix_caching: bool | None = None,
51
+ gpu_memory_utilization: float = 0.9,
52
+ hf_overrides: dict[str, Any] | None = None,
53
+ pooler_config: PoolerConfig | None = None,
54
+ enforce_eager: bool = False,
55
+ **kwargs: Any,
56
+ ):
57
+ """Wrapper for vllm serving engine.
58
+
59
+ Args:
60
+ model: model name string.
61
+ revision: The revision of the model to use.
62
+ trust_remote_code: Whether to trust remote code execution when loading the model.
63
+ Should be True for models with custom code.
64
+ dtype: Data type for model weights. "auto" will automatically select appropriate
65
+ dtype based on hardware and model capabilities. vllm uses flash attention by
66
+ default, which does not support fp32. Therefore, it defaults to using fp16 for
67
+ inference on fp32 models. Testing has shown a relatively small drop in accuracy.
68
+ You can manually opt for fp32, but inference speed will be very slow.
69
+ head_dtype: "head" refers to the last Linear layer(s) of an LLMs, such as the score
70
+ or classifier in a classification model. Uses fp32 for the head by default to
71
+ gain extra precision.
72
+ max_model_len: Maximum sequence length (context window) supported by the model.
73
+ If None, uses the model's default maximum length.
74
+ max_num_batched_tokens: Maximum number of tokens to process in a single batch.
75
+ If None, automatically determined.
76
+ max_num_seqs: Maximum number of sequences to process concurrently.
77
+ tensor_parallel_size: Number of GPUs for tensor parallelism.
78
+ enable_prefix_caching: Whether to enable KV cache sharing for common prompt prefixes.
79
+ If None, uses the model's default setting.
80
+ gpu_memory_utilization: Target GPU memory utilization ratio (0.0 to 1.0).
81
+ hf_overrides: Dictionary mapping Hugging Face configuration keys to override values.
82
+ pooler_config: Controls the behavior of output pooling in pooling models.
83
+ enforce_eager: Whether to disable CUDA graph optimization and use eager execution.
84
+ **kwargs: Additional arguments to pass to the vllm serving engine model.
85
+ """
86
+ requires_package(
87
+ self,
88
+ "vllm",
89
+ "Wrapper for vllm serving engine",
90
+ install_instruction="pip install mteb[vllm]",
91
+ )
92
+
93
+ os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
94
+
95
+ from vllm import LLM, EngineArgs
96
+
97
+ hf_overrides = {} if hf_overrides is None else hf_overrides
98
+
99
+ if head_dtype is not None:
100
+ hf_overrides["head_dtype"] = head_dtype
101
+
102
+ model_name = model if isinstance(model, str) else model.name
103
+
104
+ if isinstance(model, ModelMeta):
105
+ logger.info(
106
+ "Using revision from model meta. Passed revision will be ignored"
107
+ )
108
+ revision = model.revision
109
+
110
+ args = EngineArgs(
111
+ model=model_name,
112
+ revision=revision,
113
+ runner="pooling",
114
+ convert=self.convert, # type: ignore[arg-type]
115
+ max_model_len=max_model_len,
116
+ max_num_batched_tokens=max_num_batched_tokens,
117
+ max_num_seqs=max_num_seqs,
118
+ tensor_parallel_size=tensor_parallel_size,
119
+ enable_prefix_caching=enable_prefix_caching,
120
+ gpu_memory_utilization=gpu_memory_utilization,
121
+ hf_overrides=hf_overrides,
122
+ pooler_config=pooler_config,
123
+ enforce_eager=enforce_eager,
124
+ trust_remote_code=trust_remote_code,
125
+ dtype=dtype,
126
+ **kwargs,
127
+ )
128
+ self.llm = LLM(**vars(args))
129
+
130
+ if isinstance(model, str):
131
+ self.mteb_model_meta = ModelMeta.from_hub(model=model, revision=revision)
132
+ else:
133
+ self.mteb_model_meta = model
134
+
135
+ atexit.register(self.cleanup)
136
+
137
+ def cleanup(self):
138
+ """Clean up the VLLM distributed runtime environment and release GPU resources."""
139
+ if self.llm is None:
140
+ return
141
+
142
+ from vllm.distributed import ( # type: ignore[import-not-found]
143
+ cleanup_dist_env_and_memory,
144
+ )
145
+
146
+ self.llm = None
147
+ gc.collect()
148
+ cleanup_dist_env_and_memory()
149
+
150
+ def __del__(self):
151
+ try:
152
+ self.cleanup()
153
+ except Exception:
154
+ pass
155
+
156
+
157
+ class VllmEncoderWrapper(AbsEncoder, VllmWrapperBase):
158
+ """vLLM wrapper for Encoder models.
159
+
160
+ Args:
161
+ model: model name string or ModelMeta.
162
+ revision: The revision of the model to use.
163
+ prompt_dict: A dictionary mapping task names to prompt strings.
164
+ use_instructions: Whether to use instructions from the prompt_dict.
165
+ When False, values from prompt_dict are used as static prompts (prefixes).
166
+ When True, values from prompt_dict are used as instructions to be formatted
167
+ using the instruction_template.
168
+ instruction_template: A template or callable to format instructions.
169
+ Can be a string with '{instruction}' placeholder or a callable that takes
170
+ the instruction and prompt type and returns a formatted string.
171
+ apply_instruction_to_documents: Whether to apply instructions to documents prompts.
172
+ **kwargs: Additional arguments to pass to the vllm serving engine model.
173
+ """
174
+
175
+ convert = "embed"
176
+
177
+ def __init__(
178
+ self,
179
+ model: str | ModelMeta,
180
+ revision: str | None = None,
181
+ prompt_dict: dict[str, str] | None = None,
182
+ use_instructions: bool = False,
183
+ instruction_template: (
184
+ str | Callable[[str, PromptType | None], str] | None
185
+ ) = None,
186
+ apply_instruction_to_documents: bool = True,
187
+ **kwargs: Any,
188
+ ):
189
+ if use_instructions and instruction_template is None:
190
+ raise ValueError(
191
+ "To use instructions, an instruction_template must be provided. "
192
+ "For example, `Instruction: {instruction}`"
193
+ )
194
+
195
+ if (
196
+ isinstance(instruction_template, str)
197
+ and "{instruction}" not in instruction_template
198
+ ):
199
+ raise ValueError(
200
+ "Instruction template must contain the string '{instruction}'."
201
+ )
202
+
203
+ self.prompts_dict = prompt_dict
204
+ self.use_instructions = use_instructions
205
+ self.instruction_template = instruction_template
206
+ self.apply_instruction_to_passages = apply_instruction_to_documents
207
+ super().__init__(
208
+ model,
209
+ revision,
210
+ **kwargs,
211
+ )
212
+
213
+ def encode(
214
+ self,
215
+ inputs: DataLoader[BatchedInput],
216
+ *,
217
+ task_metadata: TaskMetadata,
218
+ hf_split: str,
219
+ hf_subset: str,
220
+ prompt_type: PromptType | None = None,
221
+ **kwargs: Any,
222
+ ) -> Array:
223
+ """Encodes the given sentences using the encoder.
224
+
225
+ Args:
226
+ inputs: The sentences to encode.
227
+ task_metadata: The metadata of the task. Sentence-transformers uses this to
228
+ determine which prompt to use from a specified dictionary.
229
+ prompt_type: The name type of prompt. (query or passage)
230
+ hf_split: Split of current task
231
+ hf_subset: Subset of current task
232
+ **kwargs: Additional arguments to pass to the encoder.
233
+
234
+ Returns:
235
+ The encoded sentences.
236
+ """
237
+ prompt = ""
238
+ if self.use_instructions and self.prompts_dict is not None:
239
+ prompt = self.get_task_instruction(task_metadata, prompt_type)
240
+ elif self.prompts_dict is not None:
241
+ prompt_name = self.get_prompt_name(task_metadata, prompt_type)
242
+ if prompt_name is not None:
243
+ prompt = self.prompts_dict.get(prompt_name, "")
244
+
245
+ if (
246
+ self.use_instructions
247
+ and self.apply_instruction_to_passages is False
248
+ and prompt_type == PromptType.document
249
+ ):
250
+ logger.info(
251
+ f"No instruction used, because prompt type = {prompt_type.document}"
252
+ )
253
+ prompt = ""
254
+ else:
255
+ logger.info(
256
+ f"Using instruction: '{prompt}' for task: '{task_metadata.name}' prompt type: '{prompt_type}'"
257
+ )
258
+
259
+ prompts = [prompt + text for batch in inputs for text in batch["text"]]
260
+ outputs = self.llm.encode(
261
+ prompts, pooling_task="embed", truncate_prompt_tokens=-1
262
+ )
263
+ embeddings = torch.stack([output.outputs.data for output in outputs])
264
+ return embeddings
265
+
266
+
267
+ class VllmCrossEncoderWrapper(VllmWrapperBase):
268
+ """vLLM wrapper for CrossEncoder models."""
269
+
270
+ convert = "classify"
271
+
272
+ def __init__(
273
+ self,
274
+ model: str | ModelMeta,
275
+ revision: str | None = None,
276
+ query_prefix: str = "",
277
+ document_prefix: str = "",
278
+ **kwargs: Any,
279
+ ):
280
+ super().__init__(
281
+ model,
282
+ revision,
283
+ **kwargs,
284
+ )
285
+ self.query_prefix = query_prefix
286
+ self.document_prefix = document_prefix
287
+
288
+ def predict(
289
+ self,
290
+ inputs1: DataLoader[BatchedInput],
291
+ inputs2: DataLoader[BatchedInput],
292
+ *,
293
+ task_metadata: TaskMetadata,
294
+ hf_split: str,
295
+ hf_subset: str,
296
+ prompt_type: PromptType | None = None,
297
+ **kwargs: Any,
298
+ ) -> Array:
299
+ """Predicts relevance scores for pairs of inputs. Note that, unlike the encoder, the cross-encoder can compare across inputs.
300
+
301
+ Args:
302
+ inputs1: First Dataloader of inputs to encode. For reranking tasks, these are queries (for text only tasks `QueryDatasetType`).
303
+ inputs2: Second Dataloader of inputs to encode. For reranking, these are documents (for text only tasks `RetrievalOutputType`).
304
+ task_metadata: Metadata of the current task.
305
+ hf_split: Split of current task, allows to know some additional information about current split.
306
+ E.g. Current language
307
+ hf_subset: Subset of current task. Similar to `hf_split` to get more information
308
+ prompt_type: The name type of prompt. (query or passage)
309
+ **kwargs: Additional arguments to pass to the cross-encoder.
310
+
311
+ Returns:
312
+ The predicted relevance scores for each inputs pair.
313
+ """
314
+ queries = [
315
+ self.query_prefix + text for batch in inputs1 for text in batch["text"]
316
+ ]
317
+ corpus = [
318
+ self.document_prefix + text for batch in inputs2 for text in batch["text"]
319
+ ]
320
+ # TODO: support score prompt
321
+
322
+ outputs = self.llm.score(
323
+ queries,
324
+ corpus,
325
+ truncate_prompt_tokens=-1,
326
+ use_tqdm=False,
327
+ )
328
+ scores = np.array([output.outputs.score for output in outputs])
329
+ return scores
mteb/py.typed ADDED
File without changes
@@ -1,43 +1,51 @@
1
+ from __future__ import annotations
2
+
1
3
  import functools
2
4
  import json
3
5
  import logging
4
6
  import warnings
5
- from collections.abc import Callable, Iterable, Iterator, Sequence
6
7
  from pathlib import Path
7
- from typing import Any, Literal
8
+ from typing import TYPE_CHECKING, Any, Literal, cast
8
9
 
9
10
  import pandas as pd
10
11
  from packaging.version import InvalidVersion, Version
11
12
  from pydantic import BaseModel, ConfigDict
12
- from typing_extensions import Self
13
13
 
14
- from mteb.abstasks.abstask import AbsTask
15
- from mteb.abstasks.task_metadata import (
16
- TaskDomain,
17
- TaskType,
18
- )
19
14
  from mteb.benchmarks.benchmark import Benchmark
20
15
  from mteb.models import ModelMeta
21
16
  from mteb.models.get_model_meta import get_model_metas
22
- from mteb.types import (
23
- ISOLanguage,
24
- ISOLanguageScript,
25
- Modalities,
26
- Score,
27
- ScoresDict,
28
- SplitName,
29
- )
30
17
 
31
18
  from .model_result import ModelResult, _aggregate_and_pivot
32
19
 
20
+ if TYPE_CHECKING:
21
+ from collections.abc import Callable, Iterable, Iterator
22
+
23
+ from typing_extensions import Self
24
+
25
+ from mteb.abstasks.abstask import AbsTask
26
+ from mteb.abstasks.task_metadata import (
27
+ TaskDomain,
28
+ TaskType,
29
+ )
30
+ from mteb.types import (
31
+ ISOLanguage,
32
+ ISOLanguageScript,
33
+ Modalities,
34
+ Score,
35
+ ScoresDict,
36
+ SplitName,
37
+ )
38
+
39
+
33
40
  logger = logging.getLogger(__name__)
34
41
 
35
42
 
36
- # Global cache for model metas and version parsing
37
43
  @functools.lru_cache
38
44
  def _get_cached_model_metas() -> dict[str, str | None]:
39
45
  """Cache model metas to avoid repeated calls."""
40
- return {meta.name: meta.revision for meta in get_model_metas()}
46
+ return {
47
+ meta.name: meta.revision for meta in get_model_metas() if meta.name is not None
48
+ }
41
49
 
42
50
 
43
51
  @functools.lru_cache(maxsize=10000)
@@ -77,10 +85,10 @@ class BenchmarkResults(BaseModel):
77
85
  task_names: list[str] | None = None,
78
86
  languages: list[str] | None = None,
79
87
  domains: list[TaskDomain] | None = None,
80
- task_types: list[TaskType] | None = None, # type: ignore
88
+ task_types: list[TaskType] | None = None,
81
89
  modalities: list[Modalities] | None = None,
82
90
  is_public: bool | None = None,
83
- ) -> Self:
91
+ ) -> BenchmarkResults:
84
92
  # TODO: Same as filter_models
85
93
  model_results = [
86
94
  res._filter_tasks(
@@ -97,7 +105,7 @@ class BenchmarkResults(BaseModel):
97
105
  model_results=[res for res in model_results if res.task_results]
98
106
  )
99
107
 
100
- def select_tasks(self, tasks: Sequence[AbsTask]) -> Self:
108
+ def select_tasks(self, tasks: Iterable[AbsTask]) -> BenchmarkResults:
101
109
  """Select tasks from the benchmark results.
102
110
 
103
111
  Args:
@@ -115,7 +123,7 @@ class BenchmarkResults(BaseModel):
115
123
  self,
116
124
  names: list[str] | list[ModelMeta],
117
125
  revisions: list[str | None] | None = None,
118
- ) -> Self:
126
+ ) -> BenchmarkResults:
119
127
  """Get models by name and revision.
120
128
 
121
129
  Args:
@@ -128,7 +136,7 @@ class BenchmarkResults(BaseModel):
128
136
  models_res = []
129
137
  _revisions = revisions if revisions is not None else [None] * len(names)
130
138
 
131
- name_rev = {}
139
+ name_rev: dict[str, str | None] = {}
132
140
 
133
141
  if len(names) != len(_revisions):
134
142
  raise ValueError(
@@ -137,9 +145,12 @@ class BenchmarkResults(BaseModel):
137
145
 
138
146
  for name, revision in zip(names, _revisions):
139
147
  if isinstance(name, ModelMeta):
148
+ if name.name is None:
149
+ raise ValueError("name in ModelMeta is None. It must be a string.")
140
150
  name_rev[name.name] = name.revision
141
151
  else:
142
- name_rev[name] = revision
152
+ name_ = cast("str", name)
153
+ name_rev[name_] = revision
143
154
 
144
155
  for model_res in self.model_results:
145
156
  model_name = model_res.model_name
@@ -159,7 +170,7 @@ class BenchmarkResults(BaseModel):
159
170
  n_parameters_range: tuple[int | None, int | None] = (None, None),
160
171
  use_instructions: bool | None = None,
161
172
  zero_shot_on: list[AbsTask] | None = None,
162
- ) -> Self:
173
+ ) -> BenchmarkResults:
163
174
  # mostly a utility function for the leaderboard app.
164
175
  # I would probably move the filtering of the models outside of this call. No need to call get_model_metas inside the filter.
165
176
  # interface would then be the same as the get_models function
@@ -182,7 +193,7 @@ class BenchmarkResults(BaseModel):
182
193
 
183
194
  return type(self).model_construct(model_results=new_model_results)
184
195
 
185
- def join_revisions(self) -> Self:
196
+ def join_revisions(self) -> BenchmarkResults:
186
197
  """Join revisions of the same model.
187
198
 
188
199
  In case of conflicts, the following rules are applied:
@@ -212,10 +223,10 @@ class BenchmarkResults(BaseModel):
212
223
 
213
224
  # Use cached model metas
214
225
  model_to_main_revision = _get_cached_model_metas()
215
- task_df["main_revision"] = task_df["model"].map(model_to_main_revision) # type: ignore
226
+ task_df["main_revision"] = task_df["model"].map(model_to_main_revision)
216
227
 
217
228
  # Use cached version parsing
218
- task_df["mteb_version"] = task_df["mteb_version"].map(_parse_version_cached) # type: ignore
229
+ task_df["mteb_version"] = task_df["mteb_version"].map(_parse_version_cached)
219
230
 
220
231
  # Filter out rows without scores first
221
232
  task_df = task_df[task_df["has_scores"]]
@@ -259,8 +270,8 @@ class BenchmarkResults(BaseModel):
259
270
  # so grouping by original revision ensures consistent ModelResult creation
260
271
  for (model, model_revision), group in task_df.groupby(["model", "revision"]):
261
272
  model_result = ModelResult.model_construct(
262
- model_name=model,
263
- model_revision=model_revision,
273
+ model_name=model, # type: ignore[arg-type]
274
+ model_revision=model_revision, # type: ignore[arg-type]
264
275
  task_results=list(group["task_result"]),
265
276
  )
266
277
  model_results.append(model_result)
@@ -291,7 +302,7 @@ class BenchmarkResults(BaseModel):
291
302
  {
292
303
  "model": model_res.model_name,
293
304
  "revision": model_res.model_revision,
294
- **model_scores, # type: ignore
305
+ **model_scores,
295
306
  }
296
307
  )
297
308
  except Exception as e:
@@ -364,7 +375,9 @@ class BenchmarkResults(BaseModel):
364
375
  scores_data.extend(model_result._get_score_for_table())
365
376
 
366
377
  if not scores_data:
367
- logger.warning("No scores data available. Returning empty DataFrame.")
378
+ msg = "No scores data available. Returning empty DataFrame."
379
+ logger.warning(msg)
380
+ warnings.warn(msg)
368
381
  return pd.DataFrame()
369
382
 
370
383
  # Create DataFrame
@@ -402,7 +415,7 @@ class BenchmarkResults(BaseModel):
402
415
 
403
416
  return self.benchmark._create_summary_table(self)
404
417
 
405
- def __iter__(self) -> Iterator[ModelResult]:
418
+ def __iter__(self) -> Iterator[ModelResult]: # type: ignore[override]
406
419
  return iter(self.model_results)
407
420
 
408
421
  def __getitem__(self, index: int) -> ModelResult:
@@ -424,11 +437,11 @@ class BenchmarkResults(BaseModel):
424
437
  out_file.write(self.model_dump_json(indent=2))
425
438
 
426
439
  @classmethod
427
- def from_validated(cls, **data) -> Self:
440
+ def from_validated(cls, **data: Any) -> BenchmarkResults:
428
441
  """Create BenchmarkResults from validated data.
429
442
 
430
443
  Args:
431
- data: Dictionary containing the data.
444
+ **data: Arbitrary keyword arguments containing the data.
432
445
 
433
446
  Returns:
434
447
  An instance of BenchmarkResults.