mteb 2.1.4__py3-none-any.whl → 2.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. mteb/__init__.py +4 -0
  2. mteb/_create_dataloaders.py +6 -3
  3. mteb/_evaluators/any_sts_evaluator.py +21 -12
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +1 -1
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +9 -4
  7. mteb/_evaluators/pair_classification_evaluator.py +30 -38
  8. mteb/_evaluators/sklearn_evaluator.py +15 -28
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
  10. mteb/_evaluators/text/summarization_evaluator.py +4 -2
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
  12. mteb/abstasks/_data_filter/__init__.py +0 -0
  13. mteb/abstasks/_data_filter/filters.py +125 -0
  14. mteb/abstasks/_data_filter/task_pipelines.py +102 -0
  15. mteb/abstasks/_statistics_calculation.py +6 -2
  16. mteb/abstasks/classification.py +0 -2
  17. mteb/abstasks/clustering.py +1 -1
  18. mteb/abstasks/clustering_legacy.py +3 -0
  19. mteb/abstasks/multilabel_classification.py +10 -3
  20. mteb/abstasks/pair_classification.py +8 -1
  21. mteb/abstasks/sts.py +7 -0
  22. mteb/abstasks/task_metadata.py +1 -0
  23. mteb/benchmarks/_create_table.py +84 -37
  24. mteb/benchmarks/benchmark.py +74 -15
  25. mteb/benchmarks/benchmarks/__init__.py +8 -0
  26. mteb/benchmarks/benchmarks/benchmarks.py +259 -15
  27. mteb/benchmarks/get_benchmark.py +2 -0
  28. mteb/cache.py +47 -10
  29. mteb/deprecated_evaluator.py +8 -13
  30. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  31. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  32. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  33. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  34. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  35. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  36. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  37. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  38. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  39. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  40. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  41. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  42. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  43. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  44. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  45. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  46. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  47. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  48. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  49. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  50. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  51. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  52. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  53. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  54. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  55. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  56. mteb/evaluate.py +65 -45
  57. mteb/leaderboard/app.py +268 -133
  58. mteb/leaderboard/benchmark_selector.py +14 -5
  59. mteb/leaderboard/figures.py +13 -15
  60. mteb/leaderboard/table.py +82 -17
  61. mteb/models/__init__.py +4 -1
  62. mteb/models/abs_encoder.py +21 -17
  63. mteb/models/cache_wrappers/__init__.py +2 -1
  64. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +2 -2
  65. mteb/models/cache_wrappers/cache_wrapper.py +1 -1
  66. mteb/models/get_model_meta.py +3 -114
  67. mteb/models/instruct_wrapper.py +5 -1
  68. mteb/models/model_implementations/align_models.py +7 -0
  69. mteb/models/model_implementations/amazon_models.py +1 -0
  70. mteb/models/model_implementations/andersborges.py +65 -0
  71. mteb/models/model_implementations/ara_models.py +8 -0
  72. mteb/models/model_implementations/arctic_models.py +8 -0
  73. mteb/models/model_implementations/b1ade_models.py +1 -0
  74. mteb/models/model_implementations/bedrock_models.py +4 -0
  75. mteb/models/model_implementations/bge_models.py +60 -0
  76. mteb/models/model_implementations/bica_model.py +35 -0
  77. mteb/models/model_implementations/blip2_models.py +11 -0
  78. mteb/models/model_implementations/blip_models.py +27 -0
  79. mteb/models/model_implementations/bm25.py +1 -0
  80. mteb/models/model_implementations/bmretriever_models.py +4 -0
  81. mteb/models/model_implementations/cadet_models.py +9 -0
  82. mteb/models/model_implementations/cde_models.py +14 -0
  83. mteb/models/model_implementations/clip_models.py +3 -0
  84. mteb/models/model_implementations/clips_models.py +100 -0
  85. mteb/models/model_implementations/codefuse_models.py +162 -0
  86. mteb/models/model_implementations/codesage_models.py +15 -0
  87. mteb/models/model_implementations/cohere_models.py +8 -1
  88. mteb/models/model_implementations/cohere_v.py +5 -0
  89. mteb/models/model_implementations/colpali_models.py +14 -6
  90. mteb/models/model_implementations/colqwen_models.py +271 -1
  91. mteb/models/model_implementations/colsmol_models.py +2 -0
  92. mteb/models/model_implementations/conan_models.py +1 -0
  93. mteb/models/model_implementations/dino_models.py +171 -0
  94. mteb/models/model_implementations/e5_instruct.py +4 -0
  95. mteb/models/model_implementations/e5_models.py +12 -101
  96. mteb/models/model_implementations/e5_v.py +1 -0
  97. mteb/models/model_implementations/eagerworks_models.py +164 -0
  98. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  99. mteb/models/model_implementations/en_code_retriever.py +1 -0
  100. mteb/models/model_implementations/euler_models.py +32 -0
  101. mteb/models/model_implementations/evaclip_models.py +4 -0
  102. mteb/models/model_implementations/fa_models.py +58 -0
  103. mteb/models/model_implementations/facebookai.py +193 -0
  104. mteb/models/model_implementations/geogpt_models.py +1 -0
  105. mteb/models/model_implementations/gme_v_models.py +11 -5
  106. mteb/models/model_implementations/google_models.py +16 -5
  107. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -2
  108. mteb/models/model_implementations/gritlm_models.py +2 -0
  109. mteb/models/model_implementations/gte_models.py +78 -0
  110. mteb/models/model_implementations/hinvec_models.py +1 -0
  111. mteb/models/model_implementations/human.py +1 -0
  112. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  113. mteb/models/model_implementations/inf_models.py +2 -0
  114. mteb/models/model_implementations/jasper_models.py +255 -2
  115. mteb/models/model_implementations/jina_clip.py +1 -0
  116. mteb/models/model_implementations/jina_models.py +209 -5
  117. mteb/models/model_implementations/kalm_models.py +203 -25
  118. mteb/models/model_implementations/kblab.py +31 -0
  119. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  120. mteb/models/model_implementations/kfst.py +25 -0
  121. mteb/models/model_implementations/kowshik24_models.py +32 -0
  122. mteb/models/model_implementations/lens_models.py +2 -0
  123. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  124. mteb/models/model_implementations/linq_models.py +3 -2
  125. mteb/models/model_implementations/listconranker.py +1 -1
  126. mteb/models/model_implementations/llm2clip_models.py +3 -0
  127. mteb/models/model_implementations/llm2vec_models.py +8 -0
  128. mteb/models/model_implementations/mcinext_models.py +3 -0
  129. mteb/models/model_implementations/mdbr_models.py +2 -0
  130. mteb/models/model_implementations/misc_models.py +362 -0
  131. mteb/models/model_implementations/mme5_models.py +1 -0
  132. mteb/models/model_implementations/moco_models.py +11 -0
  133. mteb/models/model_implementations/mod_models.py +191 -0
  134. mteb/models/model_implementations/model2vec_models.py +13 -0
  135. mteb/models/model_implementations/moka_models.py +3 -0
  136. mteb/models/model_implementations/mxbai_models.py +9 -0
  137. mteb/models/model_implementations/nbailab.py +70 -0
  138. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  139. mteb/models/model_implementations/nomic_models.py +156 -4
  140. mteb/models/model_implementations/nomic_models_vision.py +7 -2
  141. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +23 -16
  142. mteb/models/model_implementations/nvidia_models.py +4 -1
  143. mteb/models/model_implementations/octen_models.py +195 -0
  144. mteb/models/model_implementations/openai_models.py +20 -16
  145. mteb/models/model_implementations/openclip_models.py +24 -0
  146. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  147. mteb/models/model_implementations/ops_moa_models.py +4 -2
  148. mteb/models/model_implementations/pawan_models.py +39 -0
  149. mteb/models/model_implementations/piccolo_models.py +8 -0
  150. mteb/models/model_implementations/promptriever_models.py +8 -4
  151. mteb/models/model_implementations/pylate_models.py +37 -4
  152. mteb/models/model_implementations/qodo_models.py +2 -0
  153. mteb/models/model_implementations/qtack_models.py +1 -0
  154. mteb/models/model_implementations/qwen3_models.py +6 -3
  155. mteb/models/model_implementations/qzhou_models.py +3 -1
  156. mteb/models/model_implementations/random_baseline.py +16 -21
  157. mteb/models/model_implementations/rasgaard_models.py +34 -0
  158. mteb/models/model_implementations/reasonir_model.py +1 -0
  159. mteb/models/model_implementations/repllama_models.py +2 -0
  160. mteb/models/model_implementations/rerankers_custom.py +3 -3
  161. mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
  162. mteb/models/model_implementations/richinfoai_models.py +1 -0
  163. mteb/models/model_implementations/ru_sentence_models.py +51 -0
  164. mteb/models/model_implementations/ruri_models.py +322 -0
  165. mteb/models/model_implementations/salesforce_models.py +3 -0
  166. mteb/models/model_implementations/samilpwc_models.py +1 -0
  167. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  168. mteb/models/model_implementations/searchmap_models.py +1 -0
  169. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  170. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +658 -0
  171. mteb/models/model_implementations/seed_models.py +1 -0
  172. mteb/models/model_implementations/sentence_transformers_models.py +57 -0
  173. mteb/models/model_implementations/shuu_model.py +32 -31
  174. mteb/models/model_implementations/siglip_models.py +10 -0
  175. mteb/models/model_implementations/sonar_models.py +1 -0
  176. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  177. mteb/models/model_implementations/stella_models.py +6 -0
  178. mteb/models/model_implementations/tarka_models.py +376 -0
  179. mteb/models/model_implementations/ua_sentence_models.py +10 -0
  180. mteb/models/model_implementations/uae_models.py +1 -0
  181. mteb/models/model_implementations/vdr_models.py +2 -0
  182. mteb/models/model_implementations/vi_vn_models.py +39 -0
  183. mteb/models/model_implementations/vista_models.py +2 -0
  184. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  185. mteb/models/model_implementations/voyage_models.py +15 -0
  186. mteb/models/model_implementations/voyage_v.py +8 -2
  187. mteb/models/model_implementations/xyz_models.py +1 -0
  188. mteb/models/model_implementations/youtu_models.py +1 -0
  189. mteb/models/model_implementations/yuan_models.py +34 -0
  190. mteb/models/model_implementations/yuan_models_en.py +58 -0
  191. mteb/models/model_meta.py +442 -22
  192. mteb/models/search_encoder_index/__init__.py +7 -0
  193. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  194. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  195. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +157 -0
  196. mteb/models/search_wrappers.py +165 -48
  197. mteb/models/sentence_transformer_wrapper.py +2 -7
  198. mteb/results/benchmark_results.py +88 -47
  199. mteb/results/model_result.py +11 -4
  200. mteb/results/task_result.py +37 -19
  201. mteb/similarity_functions.py +49 -0
  202. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  203. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  204. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  205. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  206. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  207. mteb/tasks/classification/ara/ajgt.py +1 -2
  208. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  209. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  210. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  211. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  212. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  213. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  214. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  215. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  216. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  217. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  218. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  219. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  220. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  221. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  222. mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
  223. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  224. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  225. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  226. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  227. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  228. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  229. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  230. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  231. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  232. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  233. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  234. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  235. mteb/tasks/classification/eng/news_classification.py +1 -2
  236. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  237. mteb/tasks/classification/eng/patent_classification.py +1 -2
  238. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  239. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  240. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  241. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  242. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  243. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  244. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  245. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  246. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  247. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  248. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  249. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  250. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  251. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  252. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  253. mteb/tasks/classification/est/estonian_valence.py +1 -2
  254. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  255. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  256. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  257. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  258. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  259. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  260. mteb/tasks/classification/heb/__init__.py +6 -1
  261. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  262. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  263. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  264. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  265. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  266. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  267. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  268. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  269. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  270. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  271. mteb/tasks/classification/kor/klue_tc.py +1 -2
  272. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  273. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  274. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
  275. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  276. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  277. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  278. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  279. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  280. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  281. mteb/tasks/classification/multilingual/scala_classification.py +1 -2
  282. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  283. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  284. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  285. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  286. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  287. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  288. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  289. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  290. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  291. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  292. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  293. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  294. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  295. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  296. mteb/tasks/classification/pol/polish_classification.py +3 -6
  297. mteb/tasks/classification/ron/moroco.py +1 -2
  298. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  299. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  300. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  301. mteb/tasks/classification/rus/headline_classification.py +1 -2
  302. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  303. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  304. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  305. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  306. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  307. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  308. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  309. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  310. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  311. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  312. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  313. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  314. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  315. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  316. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  317. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  318. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  319. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  320. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  321. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  322. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  323. mteb/tasks/classification/tur/__init__.py +4 -0
  324. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  325. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  326. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  327. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  328. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  329. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  330. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  331. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  332. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  333. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  334. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  335. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  336. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  337. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  338. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  339. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  340. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  341. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  342. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  343. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  344. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  345. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  346. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  347. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  348. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  349. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  350. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  351. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  352. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  353. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  354. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  355. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  356. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  357. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  358. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  359. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  360. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  361. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  362. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  363. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  364. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  365. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  366. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  367. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  368. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  369. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  370. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  371. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  372. mteb/tasks/pair_classification/rus/terra.py +51 -25
  373. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  374. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  375. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  376. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  377. mteb/tasks/reranking/jpn/__init__.py +9 -1
  378. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  379. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  380. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  381. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  382. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  383. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  384. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  385. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  386. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  387. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  388. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  389. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  390. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  391. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  392. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  393. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  394. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  395. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  396. mteb/tasks/retrieval/kor/__init__.py +2 -1
  397. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  398. mteb/tasks/retrieval/multilingual/__init__.py +22 -0
  399. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  400. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  401. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  402. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  403. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  404. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  405. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  406. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  407. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
  408. mteb/tasks/retrieval/nld/__init__.py +8 -4
  409. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  410. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  411. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  412. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  413. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  414. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  415. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  416. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  417. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  418. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  419. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  420. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
  421. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  422. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  423. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  424. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  425. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  426. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  427. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  428. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  429. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  430. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  431. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
  432. mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
  433. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  434. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  435. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
  436. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
  437. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  438. mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
  439. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  440. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  441. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  442. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  443. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  444. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  445. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  446. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  447. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  448. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  449. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  450. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  451. mteb/types/_encoder_io.py +7 -2
  452. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/METADATA +11 -5
  453. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/RECORD +457 -391
  454. mteb/models/model_implementations/nb_sbert.py +0 -25
  455. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/WHEEL +0 -0
  456. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/entry_points.txt +0 -0
  457. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/licenses/LICENSE +0 -0
  458. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from typing import get_args
2
3
 
3
4
  import numpy as np
@@ -7,6 +8,8 @@ import plotly.graph_objects as go
7
8
 
8
9
  from mteb.abstasks.task_metadata import TaskType
9
10
 
11
+ logger = logging.getLogger(__name__)
12
+
10
13
 
11
14
  def _text_plot(text: str):
12
15
  """Returns empty scatter plot with text added, this can be great for error messages."""
@@ -29,16 +32,17 @@ def _failsafe_plot(fun):
29
32
  try:
30
33
  return fun(*args, **kwargs)
31
34
  except Exception as e:
35
+ logger.error(f"Plot generation failed: {e}")
32
36
  return _text_plot(f"Couldn't produce plot. Reason: {e}")
33
37
 
34
38
  return wrapper
35
39
 
36
40
 
37
- def _parse_n_params(text: str) -> int:
38
- if text.endswith("M"):
39
- return float(text[:-1]) * 1e6
40
- if text.endswith("B"):
41
- return float(text[:-1]) * 1e9
41
+ def _parse_n_params(params: float | None) -> int | float:
42
+ """Specified in billions."""
43
+ if params is None or np.isnan(params):
44
+ return None
45
+ return int(params * 1e9)
42
46
 
43
47
 
44
48
  def _parse_model_name(name: str) -> str:
@@ -51,20 +55,14 @@ def _parse_model_name(name: str) -> str:
51
55
 
52
56
 
53
57
  def _parse_float(value) -> float:
54
- try:
55
- if value == "Infinite":
56
- return np.inf
57
- else:
58
- return float(value)
59
- except ValueError:
58
+ if value is None or np.isnan(value):
60
59
  return np.nan
60
+ return float(value)
61
61
 
62
62
 
63
63
  def _process_max_tokens(x):
64
- if pd.isna(x):
64
+ if pd.isna(x) or x is None or np.isinf(x):
65
65
  return "Unknown"
66
- if np.isinf(x):
67
- return "Infinite"
68
66
  return str(int(x))
69
67
 
70
68
 
@@ -112,7 +110,7 @@ def _add_size_guide(fig: go.Figure):
112
110
  @_failsafe_plot
113
111
  def _performance_size_plot(df: pd.DataFrame) -> go.Figure:
114
112
  df = df.copy()
115
- df["Number of Parameters"] = df["Number of Parameters"].map(_parse_n_params)
113
+ df["Number of Parameters"] = df["Number of Parameters (B)"].map(_parse_n_params)
116
114
  df["Model"] = df["Model"].map(_parse_model_name)
117
115
  df["model_text"] = df["Model"].where(df["Model"].isin(models_to_annotate), "")
118
116
  df["Embedding Dimensions"] = df["Embedding Dimensions"].map(_parse_float)
mteb/leaderboard/table.py CHANGED
@@ -26,16 +26,6 @@ def _format_scores(score: float) -> float:
26
26
  return round(score * 100, 2)
27
27
 
28
28
 
29
- def _get_column_types(df: pd.DataFrame) -> list[str]:
30
- types = []
31
- for column_name in df.columns:
32
- if is_numeric_dtype(df[column_name]):
33
- types.append("number")
34
- else:
35
- types.append("str")
36
- return types
37
-
38
-
39
29
  def _get_column_widths(df: pd.DataFrame) -> list[str]:
40
30
  # Please do not remove this function when refactoring.
41
31
  # Column width calculation seeminlgy changes regularly with Gradio releases,
@@ -120,6 +110,39 @@ def apply_per_task_styling_from_benchmark(
120
110
  return _apply_per_task_table_styling(per_task_df)
121
111
 
122
112
 
113
+ def apply_per_language_styling_from_benchmark(
114
+ benchmark_instance: Benchmark, benchmark_results: BenchmarkResults
115
+ ) -> gr.DataFrame:
116
+ """Apply styling to per-language table created by the benchmark instance's _create_per_language_table method.
117
+
118
+ This supports polymorphism - different benchmark classes can have different table generation logic.
119
+
120
+ Args:
121
+ benchmark_instance: The benchmark instance
122
+ benchmark_results: BenchmarkResults object containing model results (may be pre-filtered)
123
+
124
+ Returns:
125
+ Styled gr.DataFrame ready for display in the leaderboard
126
+ """
127
+ # Use the instance method to support polymorphism
128
+ per_language_df = benchmark_instance._create_per_language_table(benchmark_results)
129
+
130
+ # If it's a no-results DataFrame, return it as-is
131
+ if "No results" in per_language_df.columns:
132
+ return gr.DataFrame(per_language_df)
133
+
134
+ # Apply the styling
135
+ return _apply_per_language_table_styling(per_language_df)
136
+
137
+
138
+ def _style_number_of_parameters(num_params: float) -> str:
139
+ """Anything bigger than 1B is shown in billions with 1 decimal (e.g. 1.712 > 1.7) while anything smaller as 0.xxx B (e.g. 0.345 remains 0.345)"""
140
+ if num_params >= 1:
141
+ return f"{num_params:.1f}"
142
+ else:
143
+ return f"{num_params:.3f}"
144
+
145
+
123
146
  def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
124
147
  """Apply styling to a raw summary DataFrame
125
148
 
@@ -130,7 +153,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
130
153
  "Rank (Borda)",
131
154
  "Rank",
132
155
  "Model",
133
- "Number of Parameters",
156
+ "Number of Parameters (B)",
134
157
  "Embedding Dimensions",
135
158
  "Max Tokens",
136
159
  "Memory Usage (MB)",
@@ -156,7 +179,14 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
156
179
  joint_table[score_columns] = joint_table[score_columns].map(_format_scores)
157
180
 
158
181
  joint_table_style = joint_table.style.format(
159
- {**dict.fromkeys(score_columns, "{:.2f}"), "Rank (Borda)": "{:.0f}"},
182
+ {
183
+ **dict.fromkeys(score_columns, "{:.2f}"),
184
+ "Rank (Borda)": "{:.0f}",
185
+ "Memory Usage (MB)": "{:.0f}",
186
+ "Embedding Dimensions": "{:.0f}",
187
+ "Max Tokens": "{:.0f}",
188
+ "Number of Parameters (B)": lambda x: _style_number_of_parameters(x),
189
+ },
160
190
  na_rep="",
161
191
  )
162
192
  joint_table_style = joint_table_style.highlight_min(
@@ -186,7 +216,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
186
216
  gmap=gmap_values.loc[mask],
187
217
  )
188
218
 
189
- column_types = _get_column_types(joint_table_style.data)
219
+ column_types = ["auto" for _ in joint_table_style.data.columns]
190
220
  # setting model name column to markdown
191
221
  if len(column_types) > 1:
192
222
  column_types[1] = "markdown"
@@ -204,8 +234,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
204
234
  pinned_columns=2,
205
235
  column_widths=column_widths,
206
236
  wrap=True,
207
- show_fullscreen_button=True,
208
- show_copy_button=True,
237
+ buttons=["copy", "fullscreen"],
209
238
  show_search="filter",
210
239
  )
211
240
 
@@ -223,11 +252,47 @@ def _apply_per_task_table_styling(per_task: pd.DataFrame) -> gr.DataFrame:
223
252
  "{:.2f}", subset=task_score_columns, na_rep=""
224
253
  ).highlight_max(subset=task_score_columns, props="font-weight: bold")
225
254
 
255
+ # setting task name column width to 250px
256
+ column_widths = _get_column_widths(per_task_style.data)
257
+ if len(column_widths) > 0:
258
+ column_widths[0] = "250px"
259
+
226
260
  return gr.DataFrame(
227
261
  per_task_style,
228
262
  interactive=False,
229
263
  pinned_columns=1,
230
- show_fullscreen_button=True,
231
- show_copy_button=True,
264
+ column_widths=column_widths,
265
+ buttons=["copy", "fullscreen"],
266
+ show_search="filter",
267
+ )
268
+
269
+
270
+ def _apply_per_language_table_styling(per_language: pd.DataFrame) -> gr.DataFrame:
271
+ """Apply styling to a raw per-task DataFrame
272
+
273
+ Returns:
274
+ Styled gr.DataFrame ready for display in the leaderboard
275
+ """
276
+ language_score_columns = per_language.select_dtypes("number").columns
277
+ per_language[language_score_columns] *= 100
278
+
279
+ if len(per_language.columns) > 100: # Avoid gradio error on very wide tables
280
+ per_language_style = per_language.round(2)
281
+ else:
282
+ per_language_style = per_language.style.format(
283
+ "{:.2f}", subset=language_score_columns, na_rep=""
284
+ ).highlight_max(subset=language_score_columns, props="font-weight: bold")
285
+
286
+ # setting task name column width to 250px
287
+ column_widths = _get_column_widths(per_language_style.data)
288
+ if len(column_widths) > 0:
289
+ column_widths[0] = "250px"
290
+
291
+ return gr.DataFrame(
292
+ per_language_style,
293
+ interactive=False,
294
+ pinned_columns=1,
295
+ column_widths=column_widths,
296
+ buttons=["copy", "fullscreen"],
232
297
  show_search="filter",
233
298
  )
mteb/models/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .cache_wrappers import CachedEmbeddingWrapper
1
+ from .cache_wrappers import CacheBackendProtocol, CachedEmbeddingWrapper
2
2
  from .model_meta import ModelMeta
3
3
  from .models_protocols import (
4
4
  CrossEncoderProtocol,
@@ -6,6 +6,7 @@ from .models_protocols import (
6
6
  MTEBModels,
7
7
  SearchProtocol,
8
8
  )
9
+ from .search_encoder_index.search_backend_protocol import IndexEncoderSearchProtocol
9
10
  from .search_wrappers import SearchCrossEncoderWrapper, SearchEncoderWrapper
10
11
  from .sentence_transformer_wrapper import (
11
12
  CrossEncoderWrapper,
@@ -14,10 +15,12 @@ from .sentence_transformer_wrapper import (
14
15
  )
15
16
 
16
17
  __all__ = [
18
+ "CacheBackendProtocol",
17
19
  "CachedEmbeddingWrapper",
18
20
  "CrossEncoderProtocol",
19
21
  "CrossEncoderWrapper",
20
22
  "EncoderProtocol",
23
+ "IndexEncoderSearchProtocol",
21
24
  "MTEBModels",
22
25
  "ModelMeta",
23
26
  "SearchCrossEncoderWrapper",
@@ -54,11 +54,11 @@ class AbsEncoder(ABC):
54
54
  """A wrapper function around the model.encode method that handles the prompt_name argument and standardizes the output to a numpy array.
55
55
 
56
56
  The order of priorities for prompt selection are:
57
- 1. Composed prompt of task name + prompt type (query or passage)
57
+ 1. Composed prompt of task name + prompt type
58
58
  2. Specific task prompt
59
- 3. Composed prompt of task type + prompt type (query or passage)
59
+ 3. Composed prompt of task type + prompt type
60
60
  4. Specific task type prompt
61
- 5. Specific prompt type (query or passage)
61
+ 5. Specific prompt type
62
62
 
63
63
  Args:
64
64
  task_metadata: The task name to use for building the encoding prompt
@@ -105,7 +105,7 @@ class AbsEncoder(ABC):
105
105
 
106
106
  Args:
107
107
  task_metadata: The metadata of the task.
108
- prompt_type: The name type of prompt. (query or passage)
108
+ prompt_type: The name type of prompt.
109
109
  """
110
110
  if not self.model_prompts:
111
111
  return None
@@ -210,13 +210,11 @@ class AbsEncoder(ABC):
210
210
  task_metadata: The metadata of the task. Sentence-transformers uses this to
211
211
  determine which prompt to use from a specified dictionary.
212
212
  The order of priorities for prompt selection are:
213
- 1. Composed prompt of task name + prompt type (query or passage)
214
- 2. Specific task prompt
215
- 3. Composed prompt of task type + prompt type (query or passage)
216
- 4. Specific task type prompt
217
- 5. Specific prompt type (query or passage)
218
- 6. Default prompt from the task definition
219
- prompt_type: The name type of prompt. (query or passage)
213
+ 1. Specific task prompt
214
+ 2. Specific task type prompt
215
+ 3. Specific prompt type
216
+ 4. Default prompt from the task definition
217
+ prompt_type: The name type of prompt.
220
218
 
221
219
  Returns:
222
220
  The instruction/prompt to be used for encoding sentences.
@@ -224,6 +222,12 @@ class AbsEncoder(ABC):
224
222
  prompt = task_metadata.prompt
225
223
  if self.prompts_dict and task_metadata.name in self.prompts_dict:
226
224
  prompt = self.prompts_dict[task_metadata.name]
225
+ elif self.prompts_dict and task_metadata.type in self.prompts_dict:
226
+ prompt = self.prompts_dict[task_metadata.type]
227
+ elif (
228
+ self.prompts_dict and prompt_type and prompt_type.value in self.prompts_dict
229
+ ):
230
+ prompt = self.prompts_dict[prompt_type.value]
227
231
 
228
232
  if isinstance(prompt, dict) and prompt_type:
229
233
  if prompt.get(prompt_type.value):
@@ -246,7 +250,7 @@ class AbsEncoder(ABC):
246
250
 
247
251
  Args:
248
252
  instruction: The instruction to be formatted.
249
- prompt_type: The name type of prompt. (query or passage)
253
+ prompt_type: The name type of prompt.
250
254
  """
251
255
  if self.instruction_template is None:
252
256
  raise ValueError(
@@ -269,7 +273,7 @@ class AbsEncoder(ABC):
269
273
 
270
274
  Args:
271
275
  task_metadata: The metadata of the task
272
- prompt_type: The name type of prompt. (query or passage)
276
+ prompt_type: The name type of prompt.
273
277
 
274
278
  Returns:
275
279
  The instruction to be used for encoding sentences.
@@ -373,14 +377,14 @@ class AbsEncoder(ABC):
373
377
  task_metadata: The metadata of the task. Sentence-transformers uses this to
374
378
  determine which prompt to use from a specified dictionary.
375
379
  The order of priorities for prompt selection are:
376
- 1. Composed prompt of task name + prompt type (query or passage)
380
+ 1. Composed prompt of task name + prompt type
377
381
  2. Specific task prompt
378
- 3. Composed prompt of task type + prompt type (query or passage)
382
+ 3. Composed prompt of task type + prompt type
379
383
  4. Specific task type prompt
380
- 5. Specific prompt type (query or passage)
384
+ 5. Specific prompt type
381
385
  hf_split: Split of current task
382
386
  hf_subset: Subset of current task
383
- prompt_type: The name type of prompt. (query or passage)
387
+ prompt_type: The name type of prompt.
384
388
  **kwargs: Additional arguments to pass to the encoder.
385
389
 
386
390
  Returns:
@@ -1,3 +1,4 @@
1
+ from .cache_backend_protocol import CacheBackendProtocol
1
2
  from .cache_wrapper import CachedEmbeddingWrapper
2
3
 
3
- __all__ = ["CachedEmbeddingWrapper"]
4
+ __all__ = ["CacheBackendProtocol", "CachedEmbeddingWrapper"]
@@ -1,7 +1,5 @@
1
1
  import hashlib
2
2
 
3
- from PIL import Image
4
-
5
3
  from mteb.types import BatchedInput
6
4
 
7
5
 
@@ -11,6 +9,8 @@ def _hash_item(item: BatchedInput) -> str:
11
9
  item_hash = hashlib.sha256(item["text"].encode()).hexdigest()
12
10
 
13
11
  if "image" in item:
12
+ from PIL import Image
13
+
14
14
  image: Image.Image = item["image"]
15
15
  item_hash += hashlib.sha256(image.tobytes()).hexdigest()
16
16
 
@@ -112,7 +112,7 @@ class CachedEmbeddingWrapper:
112
112
  dataset,
113
113
  task_metadata=task_metadata,
114
114
  prompt_type=prompt_type,
115
- batch_size=batch_size,
115
+ **kwargs,
116
116
  )
117
117
  new_vectors = self._model.encode(
118
118
  dl,
@@ -1,25 +1,15 @@
1
- from __future__ import annotations
2
-
3
1
  import difflib
4
2
  import logging
5
3
  from collections.abc import Iterable
6
- from typing import TYPE_CHECKING, Any
7
-
8
- from huggingface_hub import ModelCard
9
- from huggingface_hub.errors import RepositoryNotFoundError
4
+ from typing import Any
10
5
 
11
6
  from mteb.abstasks import AbsTask
12
7
  from mteb.models import (
13
- CrossEncoderWrapper,
14
8
  ModelMeta,
15
9
  MTEBModels,
16
- sentence_transformers_loader,
17
10
  )
18
11
  from mteb.models.model_implementations import MODEL_REGISTRY
19
12
 
20
- if TYPE_CHECKING:
21
- from sentence_transformers import CrossEncoder, SentenceTransformer
22
-
23
13
  logger = logging.getLogger(__name__)
24
14
 
25
15
 
@@ -100,24 +90,9 @@ def get_model(
100
90
  Returns:
101
91
  A model object
102
92
  """
103
- from sentence_transformers import CrossEncoder, SentenceTransformer
104
-
105
93
  meta = get_model_meta(model_name, revision)
106
94
  model = meta.load_model(**kwargs)
107
95
 
108
- # If revision not available in the modelmeta, try to extract it from sentence-transformers
109
- if hasattr(model, "model") and isinstance(model.model, SentenceTransformer): # type: ignore
110
- _meta = _model_meta_from_sentence_transformers(model.model) # type: ignore
111
- if meta.revision is None:
112
- meta.revision = _meta.revision if _meta.revision else meta.revision
113
- if not meta.similarity_fn_name:
114
- meta.similarity_fn_name = _meta.similarity_fn_name
115
-
116
- elif isinstance(model, CrossEncoder):
117
- _meta = _model_meta_from_cross_encoder(model.model)
118
- if meta.revision is None:
119
- meta.revision = _meta.revision if _meta.revision else meta.revision
120
-
121
96
  model.mteb_model_meta = meta # type: ignore
122
97
  return model
123
98
 
@@ -147,12 +122,8 @@ def get_model_meta(
147
122
  logger.info(
148
123
  "Model not found in model registry. Attempting to extract metadata by loading the model ({model_name}) using HuggingFace."
149
124
  )
150
- try:
151
- meta = _model_meta_from_hf_hub(model_name)
152
- meta.revision = revision
153
- return meta
154
- except RepositoryNotFoundError:
155
- pass
125
+ meta = ModelMeta.from_hub(model_name, revision)
126
+ return meta
156
127
 
157
128
  not_found_msg = f"Model '{model_name}' not found in MTEB registry"
158
129
  not_found_msg += " nor on the Huggingface Hub." if fetch_from_hf else "."
@@ -170,85 +141,3 @@ def get_model_meta(
170
141
  suggestion = f" Did you mean: '{close_matches[0]}'?"
171
142
 
172
143
  raise KeyError(not_found_msg + suggestion)
173
-
174
-
175
- def _model_meta_from_hf_hub(model_name: str) -> ModelMeta:
176
- card = ModelCard.load(model_name)
177
- card_data = card.data.to_dict()
178
- frameworks = ["PyTorch"]
179
- loader = None
180
- if card_data.get("library_name", None) == "sentence-transformers":
181
- frameworks.append("Sentence Transformers")
182
- loader = sentence_transformers_loader
183
- revision = card_data.get("base_model_revision", None)
184
- license = card_data.get("license", None)
185
- return ModelMeta(
186
- loader=loader,
187
- name=model_name,
188
- revision=revision,
189
- release_date=None,
190
- languages=None,
191
- license=license,
192
- framework=frameworks, # type: ignore
193
- training_datasets=None,
194
- similarity_fn_name=None,
195
- n_parameters=None,
196
- memory_usage_mb=None,
197
- max_tokens=None,
198
- embed_dim=None,
199
- open_weights=True,
200
- public_training_code=None,
201
- public_training_data=None,
202
- use_instructions=None,
203
- )
204
-
205
-
206
- def _model_meta_from_cross_encoder(model: CrossEncoder) -> ModelMeta:
207
- return ModelMeta(
208
- loader=CrossEncoderWrapper,
209
- name=model.model.name_or_path,
210
- revision=model.config._commit_hash,
211
- release_date=None,
212
- languages=None,
213
- framework=["Sentence Transformers"],
214
- similarity_fn_name=None,
215
- n_parameters=None,
216
- memory_usage_mb=None,
217
- max_tokens=None,
218
- embed_dim=None,
219
- license=None,
220
- open_weights=True,
221
- public_training_code=None,
222
- public_training_data=None,
223
- use_instructions=None,
224
- training_datasets=None,
225
- )
226
-
227
-
228
- def _model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMeta:
229
- name: str | None = (
230
- model.model_card_data.model_name
231
- if model.model_card_data.model_name
232
- else model.model_card_data.base_model
233
- )
234
- embeddings_dim = model.get_sentence_embedding_dimension()
235
- meta = ModelMeta(
236
- loader=sentence_transformers_loader,
237
- name=name,
238
- revision=model.model_card_data.base_model_revision,
239
- release_date=None,
240
- languages=None,
241
- framework=["Sentence Transformers"],
242
- similarity_fn_name=None,
243
- n_parameters=None,
244
- memory_usage_mb=None,
245
- max_tokens=None,
246
- embed_dim=embeddings_dim,
247
- license=None,
248
- open_weights=True,
249
- public_training_code=None,
250
- public_training_data=None,
251
- use_instructions=None,
252
- training_datasets=None,
253
- )
254
- return meta
@@ -122,7 +122,8 @@ class InstructSentenceTransformerModel(AbsEncoder):
122
122
  apply_instruction_to_passages: Whether to apply the instruction template to the passages.
123
123
  padding_side: Padding side. If None, the padding side will be read from the model config.
124
124
  add_eos_token: Whether to add the eos token to each input example.
125
- prompts_dict: Dictionary of task names to prompt names. If None, the prompts will be read from the model config.
125
+ prompts_dict: Dictionary of task names to prompt names. If task name is missing in the dict or prompts dict is None, prompt from task metadata or
126
+ AbsTask.abstask_prompt will be used.
126
127
  **kwargs: Kwargs for Sentence Transformer model.
127
128
  """
128
129
  from sentence_transformers import SentenceTransformer
@@ -153,6 +154,9 @@ class InstructSentenceTransformerModel(AbsEncoder):
153
154
 
154
155
  self.model_name = model_name
155
156
  self.model = SentenceTransformer(model_name, revision=revision, **kwargs)
157
+ if max_seq_length:
158
+ # https://github.com/huggingface/sentence-transformers/issues/3575
159
+ self.model.max_seq_length = max_seq_length
156
160
  self.apply_instruction_to_passages = apply_instruction_to_passages
157
161
  self.prompts_dict = prompts_dict
158
162
 
@@ -105,6 +105,7 @@ class ALIGNModel(AbsEncoder):
105
105
  align_base = ModelMeta(
106
106
  loader=ALIGNModel,
107
107
  name="kakaobrain/align-base",
108
+ model_type=["dense"],
108
109
  languages=["eng-Latn"],
109
110
  revision="e96a37facc7b1f59090ece82293226b817afd6ba",
110
111
  release_date="2023-02-24",
@@ -124,4 +125,10 @@ align_base = ModelMeta(
124
125
  training_datasets=set(
125
126
  # COYO-700M
126
127
  ),
128
+ citation="""@misc{kakaobrain2022coyo-align,
129
+ title = {COYO-ALIGN},
130
+ author = {Yoon, Boogeo and Lee, Youhan and Baek, Woonhyuk},
131
+ year = {2022},
132
+ howpublished = {https://github.com/kakaobrain/coyo-align},
133
+ }""",
127
134
  )
@@ -3,6 +3,7 @@ from mteb.models.model_meta import ModelMeta, ScoringFunction
3
3
  amazon_titan_text_embeddings_v2 = ModelMeta(
4
4
  loader=None,
5
5
  name="amazon/Titan-text-embeddings-v2",
6
+ model_type=["dense"],
6
7
  revision="1",
7
8
  release_date="2024-04-30",
8
9
  languages=["eng-Latn"],
@@ -0,0 +1,65 @@
1
+ import numpy as np
2
+
3
+ from mteb.models.model_implementations.model2vec_models import Model2VecModel
4
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
5
+
6
+ model2vecdk = ModelMeta(
7
+ loader=Model2VecModel, # type: ignore
8
+ name="andersborges/model2vecdk",
9
+ model_type=["dense"],
10
+ languages=["dan-Latn"],
11
+ open_weights=True,
12
+ revision="cb576c78dcc1b729e4612645f61db59929d69e61",
13
+ release_date="2025-11-21",
14
+ n_parameters=48042496,
15
+ memory_usage_mb=183,
16
+ max_tokens=np.inf,
17
+ embed_dim=256,
18
+ license="mit",
19
+ similarity_fn_name=ScoringFunction.COSINE,
20
+ framework=["NumPy", "Sentence Transformers"],
21
+ reference="https://huggingface.co/andersborges/model2vecdk",
22
+ use_instructions=False,
23
+ adapted_from="https://huggingface.co/jealk/TTC-L2V-supervised-2",
24
+ superseded_by=None,
25
+ training_datasets=set(), # distilled
26
+ public_training_code="https://github.com/andersborges/dkmodel2vec",
27
+ public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
28
+ citation="""@article{minishlab2024model2vec,
29
+ author = {Tulkens, Stephan and {van Dongen}, Thomas},
30
+ title = {Model2Vec: Fast State-of-the-Art Static Embeddings},
31
+ year = {2024},
32
+ url = {https://github.com/MinishLab/model2vec}
33
+ }""",
34
+ )
35
+
36
+
37
+ model2vecdk_stem = ModelMeta(
38
+ loader=Model2VecModel, # type: ignore
39
+ name="andersborges/model2vecdk-stem",
40
+ model_type=["dense"],
41
+ languages=["dan-Latn"],
42
+ open_weights=True,
43
+ revision="cb576c78dcc1b729e4612645f61db59929d69e61",
44
+ release_date="2025-11-21",
45
+ n_parameters=48578560,
46
+ memory_usage_mb=185,
47
+ max_tokens=np.inf,
48
+ embed_dim=256,
49
+ license="mit",
50
+ similarity_fn_name=ScoringFunction.COSINE,
51
+ framework=["NumPy", "Sentence Transformers"],
52
+ reference="https://huggingface.co/andersborges/model2vecdk",
53
+ use_instructions=False,
54
+ adapted_from="https://huggingface.co/jealk/TTC-L2V-supervised-2",
55
+ superseded_by=None,
56
+ training_datasets=set(), # distilled
57
+ public_training_code="https://github.com/andersborges/dkmodel2vec",
58
+ public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
59
+ citation="""@article{minishlab2024model2vec,
60
+ author = {Tulkens, Stephan and {van Dongen}, Thomas},
61
+ title = {Model2Vec: Fast State-of-the-Art Static Embeddings},
62
+ year = {2024},
63
+ url = {https://github.com/MinishLab/model2vec}
64
+ }""",
65
+ )
@@ -4,6 +4,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
4
4
  arabic_triplet_matryoshka = ModelMeta(
5
5
  loader=sentence_transformers_loader,
6
6
  name="Omartificial-Intelligence-Space/Arabic-Triplet-Matryoshka-V2",
7
+ model_type=["dense"],
7
8
  languages=["ara-Arab"],
8
9
  open_weights=True,
9
10
  revision="ed357f222f0b6ea6670d2c9b5a1cb93950d34200",
@@ -23,4 +24,11 @@ arabic_triplet_matryoshka = ModelMeta(
23
24
  training_datasets=set(
24
25
  # "akhooli/arabic-triplets-1m-curated-sims-len"
25
26
  ),
27
+ citation="""
28
+ @article{nacar2025gate,
29
+ title={GATE: General Arabic Text Embedding for Enhanced Semantic Textual Similarity with Matryoshka Representation Learning and Hybrid Loss Training},
30
+ author={Nacar, Omer and Koubaa, Anis and Sibaee, Serry and Al-Habashi, Yasser and Ammar, Adel and Boulila, Wadii},
31
+ journal={arXiv preprint arXiv:2505.24581},
32
+ year={2025}
33
+ }""",
26
34
  )