mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,9 @@
1
- from mteb.benchmarks.benchmark import Benchmark, HUMEBenchmark, MIEBBenchmark
1
+ from mteb.benchmarks.benchmark import (
2
+ Benchmark,
3
+ HUMEBenchmark,
4
+ MIEBBenchmark,
5
+ VidoreBenchmark,
6
+ )
2
7
  from mteb.get_tasks import MTEBTasks, get_task, get_tasks
3
8
 
4
9
  MMTEB_CITATION = r"""@article{enevoldsen2025mmtebmassivemultilingualtext,
@@ -13,6 +18,7 @@ MMTEB_CITATION = r"""@article{enevoldsen2025mmtebmassivemultilingualtext,
13
18
 
14
19
  MTEB_EN = Benchmark(
15
20
  name="MTEB(eng, v2)",
21
+ aliases=["MTEB(eng)"],
16
22
  display_name="English",
17
23
  icon="https://github.com/lipis/flag-icons/raw/refs/heads/main/flags/4x3/us.svg",
18
24
  tasks=MTEBTasks(
@@ -84,6 +90,7 @@ The original MTEB leaderboard is available under the [MTEB(eng, v1)](http://mteb
84
90
 
85
91
  MTEB_ENG_CLASSIC = Benchmark(
86
92
  name="MTEB(eng, v1)",
93
+ aliases=["MTEB(eng, classic)", "MTEB"],
87
94
  display_name="English Legacy",
88
95
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/gb.svg",
89
96
  tasks=MTEBTasks(
@@ -180,7 +187,8 @@ We recommend that you use [MTEB(eng, v2)](http://mteb-leaderboard.hf.space/?benc
180
187
 
181
188
  MTEB_MAIN_RU = Benchmark(
182
189
  name="MTEB(rus, v1)",
183
- display_name="Russian",
190
+ aliases=["MTEB(rus)"],
191
+ display_name="Russian legacy",
184
192
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ru.svg",
185
193
  tasks=MTEBTasks(
186
194
  get_tasks(
@@ -235,6 +243,67 @@ MTEB_MAIN_RU = Benchmark(
235
243
  year = {2024},
236
244
  }
237
245
  """,
246
+ contacts=["Samoed", "artemsnegirev", "Drozhzhinastya"],
247
+ )
248
+
249
+ MTEB_MAIN_RU_v1_1 = Benchmark(
250
+ name="MTEB(rus, v1.1)",
251
+ display_name="Russian",
252
+ icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ru.svg",
253
+ tasks=MTEBTasks(
254
+ get_tasks(
255
+ languages=["rus"],
256
+ tasks=[
257
+ # Classification
258
+ "GeoreviewClassification",
259
+ "HeadlineClassification",
260
+ "InappropriatenessClassification",
261
+ "KinopoiskClassification",
262
+ "MassiveIntentClassification",
263
+ "MassiveScenarioClassification",
264
+ "RuReviewsClassification",
265
+ "RuSciBenchGRNTIClassification",
266
+ "RuSciBenchOECDClassification",
267
+ # Clustering
268
+ "GeoreviewClusteringP2P",
269
+ "RuSciBenchGRNTIClusteringP2P",
270
+ "RuSciBenchOECDClusteringP2P",
271
+ # MultiLabelClassification
272
+ "CEDRClassification",
273
+ "SensitiveTopicsClassification",
274
+ # PairClassification
275
+ "TERRa",
276
+ # Reranking
277
+ "MIRACLReranking",
278
+ "RuBQReranking",
279
+ # Retrieval
280
+ "MIRACLRetrievalHardNegatives.v2",
281
+ "RiaNewsRetrievalHardNegatives.v2",
282
+ "RuBQRetrieval",
283
+ # STS
284
+ "RUParaPhraserSTS",
285
+ "STS22",
286
+ ],
287
+ )
288
+ + get_tasks(
289
+ tasks=["RuSTSBenchmarkSTS"],
290
+ eval_splits=["test"],
291
+ )
292
+ ),
293
+ description="A Russian version of the Massive Text Embedding Benchmark covering the task categories of classification, clustering, reranking, pair classification, retrieval, and semantic similarity. In v1.1, MIRACLRetrieval and RiaNewsRetrieval were replaced with their HardNegatives variants for improved time-optimization measurement. MIRACLRetrievalHardNegatives and RiaNewsRetrievalHardNegatives are used in their updated versions (v2), both of which include improved default prompts.",
294
+ reference="https://aclanthology.org/2023.eacl-main.148/",
295
+ citation=r"""
296
+ @misc{snegirev2024russianfocusedembeddersexplorationrumteb,
297
+ archiveprefix = {arXiv},
298
+ author = {Artem Snegirev and Maria Tikhonova and Anna Maksimova and Alena Fenogenova and Alexander Abramov},
299
+ eprint = {2408.12503},
300
+ primaryclass = {cs.CL},
301
+ title = {The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design},
302
+ url = {https://arxiv.org/abs/2408.12503},
303
+ year = {2024},
304
+ }
305
+ """,
306
+ contacts=["Samoed", "artemsnegirev", "Drozhzhinastya"],
238
307
  )
239
308
 
240
309
 
@@ -243,7 +312,7 @@ RU_SCI_BENCH = Benchmark(
243
312
  tasks=get_tasks(
244
313
  tasks=[
245
314
  # BitextMining
246
- "RuSciBenchBitextMining",
315
+ "RuSciBenchBitextMining.v2",
247
316
  # Classification
248
317
  "RuSciBenchCoreRiscClassification",
249
318
  "RuSciBenchGRNTIClassification.v2",
@@ -278,6 +347,7 @@ RU_SCI_BENCH = Benchmark(
278
347
 
279
348
  MTEB_RETRIEVAL_WITH_INSTRUCTIONS = Benchmark(
280
349
  name="FollowIR",
350
+ aliases=["MTEB(Retrieval w/Instructions)"],
281
351
  display_name="Instruction Following",
282
352
  tasks=get_tasks(
283
353
  tasks=[
@@ -328,7 +398,9 @@ MTEB_RETRIEVAL_WITH_DOMAIN_INSTRUCTIONS = Benchmark(
328
398
  )
329
399
 
330
400
  MTEB_RETRIEVAL_LAW = Benchmark(
331
- name="MTEB(Law, v1)", # This benchmark is likely in the need of an update
401
+ # This benchmark is likely in the need of an update
402
+ name="MTEB(Law, v1)",
403
+ aliases=["MTEB(law)"],
332
404
  display_name="Legal",
333
405
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-library.svg",
334
406
  tasks=get_tasks(
@@ -350,6 +422,7 @@ MTEB_RETRIEVAL_LAW = Benchmark(
350
422
 
351
423
  MTEB_RETRIEVAL_MEDICAL = Benchmark(
352
424
  name="MTEB(Medical, v1)",
425
+ aliases=["MTEB(Medical)"],
353
426
  display_name="Medical",
354
427
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-hospital.svg",
355
428
  tasks=get_tasks(
@@ -369,7 +442,7 @@ MTEB_RETRIEVAL_MEDICAL = Benchmark(
369
442
  ],
370
443
  ),
371
444
  description="A curated set of MTEB tasks designed to evaluate systems in the context of medical information retrieval.",
372
- reference="",
445
+ reference=None,
373
446
  citation=None,
374
447
  )
375
448
 
@@ -403,8 +476,10 @@ MTEB_MINERS_BITEXT_MINING = Benchmark(
403
476
 
404
477
  SEB = Benchmark(
405
478
  name="MTEB(Scandinavian, v1)",
479
+ aliases=["MTEB(Scandinavian)", "SEB"],
406
480
  display_name="Scandinavian",
407
481
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/dk.svg",
482
+ language_view=["dan-Latn", "swe-Latn", "nno-Latn", "nob-Latn"],
408
483
  tasks=get_tasks(
409
484
  tasks=[
410
485
  # Bitext
@@ -528,6 +603,7 @@ RAR_b = Benchmark(
528
603
 
529
604
  MTEB_FRA = Benchmark(
530
605
  name="MTEB(fra, v1)",
606
+ aliases=["MTEB(fra)"],
531
607
  display_name="French",
532
608
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/fr.svg",
533
609
  tasks=MTEBTasks(
@@ -586,6 +662,7 @@ MTEB_FRA = Benchmark(
586
662
 
587
663
  MTEB_DEU = Benchmark(
588
664
  name="MTEB(deu, v1)",
665
+ aliases=["MTEB(deu)"],
589
666
  display_name="German",
590
667
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/de.svg",
591
668
  tasks=get_tasks(
@@ -637,6 +714,7 @@ MTEB_DEU = Benchmark(
637
714
 
638
715
  MTEB_KOR = Benchmark(
639
716
  name="MTEB(kor, v1)",
717
+ aliases=["MTEB(kor)"],
640
718
  display_name="Korean",
641
719
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/kr.svg",
642
720
  tasks=get_tasks(
@@ -661,6 +739,7 @@ MTEB_KOR = Benchmark(
661
739
 
662
740
  MTEB_POL = Benchmark(
663
741
  name="MTEB(pol, v1)",
742
+ aliases=["MTEB(pol)"],
664
743
  display_name="Polish",
665
744
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/pl.svg",
666
745
  tasks=MTEBTasks(
@@ -710,6 +789,7 @@ two novel clustering tasks.""", # Rephrased from the abstract
710
789
 
711
790
  MTEB_code = Benchmark(
712
791
  name="MTEB(Code, v1)",
792
+ aliases=["MTEB(code)"],
713
793
  display_name="Code",
714
794
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-tech-electronics.svg",
715
795
  tasks=get_tasks(
@@ -886,7 +966,30 @@ MTEB_multilingual_v1 = Benchmark(
886
966
 
887
967
  MTEB_multilingual_v2 = Benchmark(
888
968
  name="MTEB(Multilingual, v2)",
969
+ aliases=["MTEB(Multilingual)", "MMTEB"],
889
970
  display_name="Multilingual",
971
+ language_view=[
972
+ "eng-Latn", # English
973
+ "zho-Hans", # Chinese (Simplified)
974
+ "hin-Deva", # Hindi
975
+ "spa-Latn", # Spanish
976
+ "fra-Latn", # French
977
+ "ara-Arab", # Arabic
978
+ "ben-Beng", # Bengali
979
+ "rus-Cyrl", # Russian
980
+ "por-Latn", # Portuguese
981
+ "urd-Arab", # Urdu
982
+ "ind-Latn", # Indonesian
983
+ "deu-Latn", # German
984
+ "jpn-Jpan", # Japanese
985
+ "swa-Latn", # Swahili
986
+ "mar-Deva", # Marathi
987
+ "tel-Telu", # Telugu
988
+ "tur-Latn", # Turkish
989
+ "tam-Taml", # Tamil
990
+ "vie-Latn", # Vietnamese
991
+ "kor-Hang", # Korean
992
+ ],
890
993
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-globe.svg",
891
994
  tasks=mteb_multilingual_tasks,
892
995
  description="A large-scale multilingual expansion of MTEB, driven mainly by highly-curated community contributions covering 250+ languages. ",
@@ -897,7 +1000,8 @@ MTEB_multilingual_v2 = Benchmark(
897
1000
 
898
1001
  MTEB_JPN = Benchmark(
899
1002
  name="MTEB(jpn, v1)",
900
- display_name="Japanese",
1003
+ aliases=["MTEB(jpn)"],
1004
+ display_name="Japanese Legacy",
901
1005
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
902
1006
  tasks=get_tasks(
903
1007
  languages=["jpn"],
@@ -967,6 +1071,7 @@ indic_languages = [
967
1071
 
968
1072
  MTEB_INDIC = Benchmark(
969
1073
  name="MTEB(Indic, v1)",
1074
+ aliases=["MTEB(Indic)"],
970
1075
  display_name="Indic",
971
1076
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/in.svg",
972
1077
  tasks=MTEBTasks(
@@ -1057,6 +1162,7 @@ eu_languages = [
1057
1162
 
1058
1163
  MTEB_EU = Benchmark(
1059
1164
  name="MTEB(Europe, v1)",
1165
+ aliases=["MTEB(Europe)"],
1060
1166
  display_name="European",
1061
1167
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/eu.svg",
1062
1168
  tasks=get_tasks(
@@ -1196,6 +1302,7 @@ BRIGHT = Benchmark(
1196
1302
 
1197
1303
  BRIGHT_LONG = Benchmark(
1198
1304
  name="BRIGHT (long)",
1305
+ aliases=["BRIGHT(long)"],
1199
1306
  tasks=MTEBTasks(
1200
1307
  (
1201
1308
  get_task(
@@ -1311,6 +1418,7 @@ NANOBEIR = Benchmark(
1311
1418
 
1312
1419
  C_MTEB = Benchmark(
1313
1420
  name="MTEB(cmn, v1)",
1421
+ aliases=["MTEB(Chinese)", "CMTEB"],
1314
1422
  display_name="Chinese",
1315
1423
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/cn.svg",
1316
1424
  tasks=MTEBTasks(
@@ -1377,6 +1485,7 @@ C_MTEB = Benchmark(
1377
1485
 
1378
1486
  FA_MTEB = Benchmark(
1379
1487
  name="MTEB(fas, v1)",
1488
+ aliases=["FaMTEB(fas, beta)"],
1380
1489
  display_name="Farsi Legacy",
1381
1490
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ir.svg",
1382
1491
  tasks=get_tasks(
@@ -1547,6 +1656,7 @@ FA_MTEB_2 = Benchmark(
1547
1656
 
1548
1657
  CHEMTEB = Benchmark(
1549
1658
  name="ChemTEB",
1659
+ aliases=["ChemTEB(v1)"],
1550
1660
  display_name="Chemical",
1551
1661
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-purge.svg",
1552
1662
  tasks=get_tasks(
@@ -1592,6 +1702,62 @@ CHEMTEB = Benchmark(
1592
1702
  """,
1593
1703
  )
1594
1704
 
1705
+ CHEMTEB_V1_1 = Benchmark(
1706
+ name="ChemTEB(v1.1)",
1707
+ aliases=["ChemTEB(latest)"],
1708
+ display_name="Chemical",
1709
+ icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-purge.svg",
1710
+ tasks=get_tasks(
1711
+ tasks=[
1712
+ "PubChemSMILESBitextMining",
1713
+ "SDSEyeProtectionClassification",
1714
+ "SDSGlovesClassification",
1715
+ "WikipediaBioMetChemClassification",
1716
+ "WikipediaGreenhouseEnantiopureClassification",
1717
+ "WikipediaSolidStateColloidalClassification",
1718
+ "WikipediaOrganicInorganicClassification",
1719
+ "WikipediaCryobiologySeparationClassification",
1720
+ "WikipediaChemistryTopicsClassification",
1721
+ "WikipediaTheoreticalAppliedClassification",
1722
+ "WikipediaChemFieldsClassification",
1723
+ "WikipediaLuminescenceClassification",
1724
+ "WikipediaIsotopesFissionClassification",
1725
+ "WikipediaSaltsSemiconductorsClassification",
1726
+ "WikipediaBiolumNeurochemClassification",
1727
+ "WikipediaCrystallographyAnalyticalClassification",
1728
+ "WikipediaCompChemSpectroscopyClassification",
1729
+ "WikipediaChemEngSpecialtiesClassification",
1730
+ "WikipediaChemistryTopicsClustering",
1731
+ "WikipediaSpecialtiesInChemistryClustering",
1732
+ "PubChemAISentenceParaphrasePC",
1733
+ "PubChemSMILESPC",
1734
+ "PubChemSynonymPC",
1735
+ "PubChemWikiParagraphsPC",
1736
+ "PubChemWikiPairClassification",
1737
+ "ChemNQRetrieval",
1738
+ "ChemHotpotQARetrieval",
1739
+ "ChemRxivRetrieval",
1740
+ ],
1741
+ ),
1742
+ description="ChemTEB evaluates the performance of text embedding models on chemical domain data. This version adds the ChemRxivRetrieval task.",
1743
+ reference="https://arxiv.org/abs/2412.00532",
1744
+ citation=r"""
1745
+ @article{kasmaee2024chemteb,
1746
+ author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila},
1747
+ journal = {arXiv preprint arXiv:2412.00532},
1748
+ title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \\& Efficiency on a Specific Domain},
1749
+ year = {2024},
1750
+ }
1751
+
1752
+ @article{kasmaee2025chembed,
1753
+ author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Astaraki, Mahdi and Saloot, Mohammad Arshi and Sherck, Nicholas and Mahyar, Hamidreza and Samiee, Soheila},
1754
+ journal = {arXiv preprint arXiv:2508.01643},
1755
+ title = {Chembed: Enhancing chemical literature search through domain-specific text embeddings},
1756
+ year = {2025},
1757
+ }
1758
+ """,
1759
+ )
1760
+
1595
1761
  BEIR_NL = Benchmark(
1596
1762
  name="BEIR-NL",
1597
1763
  display_name="BEIR-NL",
@@ -1642,7 +1808,7 @@ MTEB_NL = Benchmark(
1642
1808
  exclusive_language_filter=True,
1643
1809
  tasks=[
1644
1810
  # Classification
1645
- "DutchBookReviewSentimentClassification",
1811
+ "DutchBookReviewSentimentClassification.v2",
1646
1812
  "MassiveIntentClassification",
1647
1813
  "MassiveScenarioClassification",
1648
1814
  "SIB200Classification",
@@ -1673,10 +1839,10 @@ MTEB_NL = Benchmark(
1673
1839
  # # Reranking
1674
1840
  "WikipediaRerankingMultilingual",
1675
1841
  # # Retrieval
1676
- "ArguAna-NL",
1677
- "SCIDOCS-NL",
1678
- "SciFact-NL",
1679
- "NFCorpus-NL",
1842
+ "ArguAna-NL.v2",
1843
+ "SCIDOCS-NL.v2",
1844
+ "SciFact-NL.v2",
1845
+ "NFCorpus-NL.v2",
1680
1846
  "BelebeleRetrieval",
1681
1847
  "WebFAQRetrieval",
1682
1848
  "DutchNewsArticlesRetrieval",
@@ -2214,10 +2380,51 @@ VIDORE_V2 = Benchmark(
2214
2380
  """,
2215
2381
  )
2216
2382
 
2217
- VISUAL_DOCUMENT_RETRIEVAL = Benchmark(
2218
- name="VisualDocumentRetrieval",
2219
- display_name="Visual Document Retrieval",
2220
- icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-picture.svg",
2383
+ VIDORE_V3 = VidoreBenchmark(
2384
+ name="ViDoRe(v3)",
2385
+ display_name="ViDoRe V3",
2386
+ language_view=[
2387
+ "deu-Latn",
2388
+ "eng-Latn",
2389
+ "fra-Latn",
2390
+ "ita-Latn",
2391
+ "por-Latn",
2392
+ "spa-Latn",
2393
+ ],
2394
+ icon="https://cdn-uploads.huggingface.co/production/uploads/66e16a677c2eb2da5109fb5c/x99xqw__fl2UaPbiIdC_f.png",
2395
+ tasks=get_tasks(
2396
+ tasks=[
2397
+ "Vidore3FinanceEnRetrieval",
2398
+ "Vidore3IndustrialRetrieval",
2399
+ "Vidore3ComputerScienceRetrieval",
2400
+ "Vidore3PharmaceuticalsRetrieval",
2401
+ "Vidore3HrRetrieval",
2402
+ "Vidore3FinanceFrRetrieval",
2403
+ "Vidore3PhysicsRetrieval",
2404
+ "Vidore3EnergyRetrieval",
2405
+ "Vidore3TelecomRetrieval",
2406
+ "Vidore3NuclearRetrieval",
2407
+ ]
2408
+ ),
2409
+ description="ViDoRe V3 sets a new industry gold standard for multi-modal, enterprise document visual retrieval evaluation. It addresses a critical challenge in production RAG systems: retrieving accurate information from complex, visually-rich documents. The benchmark includes both open and closed datasets: to submit results on private tasks, please [open an issue](https://github.com/embeddings-benchmark/mteb/issues?template=eval_request.yaml).",
2410
+ reference="https://arxiv.org/abs/2601.08620",
2411
+ citation=r"""
2412
+ @article{loison2026vidorev3comprehensiveevaluation,
2413
+ archiveprefix = {arXiv},
2414
+ author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
2415
+ eprint = {2601.08620},
2416
+ primaryclass = {cs.AI},
2417
+ title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
2418
+ url = {https://arxiv.org/abs/2601.08620},
2419
+ year = {2026},
2420
+ }
2421
+ """,
2422
+ )
2423
+
2424
+ VISUAL_DOCUMENT_RETRIEVAL = VidoreBenchmark(
2425
+ name="ViDoRe(v1&v2)",
2426
+ aliases=["VisualDocumentRetrieval"],
2427
+ display_name="ViDoRe (V1&V2)",
2221
2428
  tasks=get_tasks(
2222
2429
  tasks=[
2223
2430
  # v1
@@ -2459,7 +2666,145 @@ HUME = HUMEBenchmark(
2459
2666
  ],
2460
2667
  ),
2461
2668
  description="The HUME benchmark is designed to evaluate the performance of text embedding models and humans on a comparable set of tasks. This captures areas where models perform better than human annotators and the reverse. In the paper, we go further into the analysis and what conclusions can be drawn.",
2462
- reference="Coming soon (in review)",
2669
+ reference=None,
2463
2670
  citation=None,
2464
2671
  contacts=["AdnanElAssadi56", "KennethEnevoldsen", "isaac-chung", "Samoed"],
2465
2672
  )
2673
+
2674
+ JMTEB_V2 = Benchmark(
2675
+ name="JMTEB(v2)",
2676
+ display_name="Japanese",
2677
+ icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
2678
+ tasks=get_tasks(
2679
+ languages=["jpn"],
2680
+ tasks=[
2681
+ # Clustering (3)
2682
+ "LivedoorNewsClustering.v2",
2683
+ "MewsC16JaClustering",
2684
+ "SIB200ClusteringS2S",
2685
+ # Classification (7)
2686
+ "AmazonReviewsClassification",
2687
+ "AmazonCounterfactualClassification",
2688
+ "MassiveIntentClassification",
2689
+ "MassiveScenarioClassification",
2690
+ "JapaneseSentimentClassification",
2691
+ "SIB200Classification",
2692
+ "WRIMEClassification",
2693
+ # STS (2)
2694
+ "JSTS",
2695
+ "JSICK",
2696
+ # Retrieval (11)
2697
+ "JaqketRetrieval",
2698
+ "MrTidyRetrieval",
2699
+ "JaGovFaqsRetrieval",
2700
+ "NLPJournalTitleAbsRetrieval.V2",
2701
+ "NLPJournalTitleIntroRetrieval.V2",
2702
+ "NLPJournalAbsIntroRetrieval.V2",
2703
+ "NLPJournalAbsArticleRetrieval.V2",
2704
+ "JaCWIRRetrieval",
2705
+ "MIRACLRetrieval",
2706
+ "MintakaRetrieval",
2707
+ "MultiLongDocRetrieval",
2708
+ # Reranking (5)
2709
+ "ESCIReranking",
2710
+ "JQaRAReranking",
2711
+ "JaCWIRReranking",
2712
+ "MIRACLReranking",
2713
+ "MultiLongDocReranking",
2714
+ ],
2715
+ ),
2716
+ description="JMTEB is a benchmark for evaluating Japanese text embedding models. In v2, we have extended the benchmark to 28 datasets, enabling more comprehensive evaluation compared with v1 (MTEB(jpn, v1)).",
2717
+ reference="https://github.com/sbintuitions/JMTEB",
2718
+ citation=r"""
2719
+ @article{li2025jmteb,
2720
+ author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide and Kawahara, Daisuke},
2721
+ issue = {3},
2722
+ journal = {Vol.2025-NL-265,No.3,1-15},
2723
+ month = {sep},
2724
+ title = {{JMTEB and JMTEB-lite: Japanese Massive Text Embedding Benchmark and Its Lightweight Version}},
2725
+ year = {2025},
2726
+ }
2727
+ """,
2728
+ contacts=["lsz05"],
2729
+ )
2730
+
2731
+ JMTEB_LITE_V1 = Benchmark(
2732
+ name="JMTEB-lite(v1)",
2733
+ display_name="Japanese",
2734
+ icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
2735
+ tasks=get_tasks(
2736
+ languages=["jpn"],
2737
+ tasks=[
2738
+ # Clustering (3)
2739
+ "LivedoorNewsClustering.v2",
2740
+ "MewsC16JaClustering",
2741
+ "SIB200ClusteringS2S",
2742
+ # Classification (7)
2743
+ "AmazonReviewsClassification",
2744
+ "AmazonCounterfactualClassification",
2745
+ "MassiveIntentClassification",
2746
+ "MassiveScenarioClassification",
2747
+ "JapaneseSentimentClassification",
2748
+ "SIB200Classification",
2749
+ "WRIMEClassification",
2750
+ # STS (2)
2751
+ "JSTS",
2752
+ "JSICK",
2753
+ # Retrieval (11)
2754
+ "JaqketRetrievalLite",
2755
+ "MrTyDiJaRetrievalLite",
2756
+ "JaGovFaqsRetrieval",
2757
+ "NLPJournalTitleAbsRetrieval.V2",
2758
+ "NLPJournalTitleIntroRetrieval.V2",
2759
+ "NLPJournalAbsIntroRetrieval.V2",
2760
+ "NLPJournalAbsArticleRetrieval.V2",
2761
+ "JaCWIRRetrievalLite",
2762
+ "MIRACLJaRetrievalLite",
2763
+ "MintakaRetrieval",
2764
+ "MultiLongDocRetrieval",
2765
+ # Reranking (5)
2766
+ "ESCIReranking",
2767
+ "JQaRARerankingLite",
2768
+ "JaCWIRRerankingLite",
2769
+ "MIRACLReranking",
2770
+ "MultiLongDocReranking",
2771
+ ],
2772
+ ),
2773
+ description="JMTEB-lite is a lightweight version of JMTEB. It makes agile evaluation possible by reaching an average of 5x faster evaluation comparing with JMTEB, as 6 heavy datasets in JMTEB are optimized with hard negative pooling strategy, making them much smaller. The result of JMTEB-lite is proved to be highly relevant with that of JMTEB, making it a faithful preview of JMTEB.",
2774
+ reference="https://huggingface.co/datasets/sbintuitions/JMTEB-lite",
2775
+ citation=r"""
2776
+ @article{li2025jmteb,
2777
+ author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide and Kawahara, Daisuke},
2778
+ issue = {3},
2779
+ journal = {Vol.2025-NL-265,No.3,1-15},
2780
+ month = {sep},
2781
+ title = {{JMTEB and JMTEB-lite: Japanese Massive Text Embedding Benchmark and Its Lightweight Version}},
2782
+ year = {2025},
2783
+ }
2784
+ """,
2785
+ contacts=["lsz05"],
2786
+ )
2787
+
2788
+ KOVIDORE_V2 = Benchmark(
2789
+ name="KoViDoRe(v2)",
2790
+ display_name="KoViDoRe v2",
2791
+ tasks=get_tasks(
2792
+ tasks=[
2793
+ "KoVidore2CybersecurityRetrieval",
2794
+ "KoVidore2EconomicRetrieval",
2795
+ "KoVidore2EnergyRetrieval",
2796
+ "KoVidore2HrRetrieval",
2797
+ ]
2798
+ ),
2799
+ description="KoViDoRe v2 sets a new industry gold standard for multi-modal, enterprise document visual retrieval evaluation. It addresses a critical challenge in production RAG systems: retrieving accurate information from complex, visually-rich documents.",
2800
+ reference="https://github.com/whybe-choi/kovidore-data-generator",
2801
+ citation=r"""
2802
+ @misc{choi2026kovidorev2,
2803
+ author = {Yongbin Choi},
2804
+ note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
2805
+ title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
2806
+ url = {https://github.com/whybe-choi/kovidore-data-generator},
2807
+ year = {2026},
2808
+ }
2809
+ """,
2810
+ )
@@ -1,6 +1,5 @@
1
1
  import difflib
2
2
  import logging
3
- import warnings
4
3
  from functools import lru_cache
5
4
 
6
5
  from .benchmark import Benchmark
@@ -20,51 +19,16 @@ def _build_registry() -> dict[str, Benchmark]:
20
19
  return benchmark_registry
21
20
 
22
21
 
23
- def _get_previous_benchmark_names() -> dict[str, str]:
24
- from .benchmarks import (
25
- BRIGHT_LONG,
26
- C_MTEB,
27
- FA_MTEB,
28
- MTEB_DEU,
29
- MTEB_EN,
30
- MTEB_ENG_CLASSIC,
31
- MTEB_EU,
32
- MTEB_FRA,
33
- MTEB_INDIC,
34
- MTEB_JPN,
35
- MTEB_KOR,
36
- MTEB_MAIN_RU,
37
- MTEB_POL,
38
- MTEB_RETRIEVAL_LAW,
39
- MTEB_RETRIEVAL_MEDICAL,
40
- MTEB_RETRIEVAL_WITH_INSTRUCTIONS,
41
- SEB,
42
- MTEB_code,
43
- MTEB_multilingual_v2,
44
- )
45
-
46
- previous_benchmark_names = {
47
- "MTEB(eng)": MTEB_EN.name,
48
- "MTEB(eng, classic)": MTEB_ENG_CLASSIC.name,
49
- "MTEB(rus)": MTEB_MAIN_RU.name,
50
- "MTEB(Retrieval w/Instructions)": MTEB_RETRIEVAL_WITH_INSTRUCTIONS.name,
51
- "MTEB(law)": MTEB_RETRIEVAL_LAW.name,
52
- "MTEB(Medical)": MTEB_RETRIEVAL_MEDICAL.name,
53
- "MTEB(Scandinavian)": SEB.name,
54
- "MTEB(fra)": MTEB_FRA.name,
55
- "MTEB(deu)": MTEB_DEU.name,
56
- "MTEB(kor)": MTEB_KOR.name,
57
- "MTEB(pol)": MTEB_POL.name,
58
- "MTEB(code)": MTEB_code.name,
59
- "MTEB(Multilingual)": MTEB_multilingual_v2.name,
60
- "MTEB(jpn)": MTEB_JPN.name,
61
- "MTEB(Indic)": MTEB_INDIC.name,
62
- "MTEB(Europe)": MTEB_EU.name,
63
- "MTEB(Chinese)": C_MTEB.name,
64
- "FaMTEB(fas, beta)": FA_MTEB.name,
65
- "BRIGHT(long)": BRIGHT_LONG.name,
66
- }
67
- return previous_benchmark_names
22
+ @lru_cache
23
+ def _build_aliases_registry() -> dict[str, Benchmark]:
24
+ import mteb.benchmarks.benchmarks as benchmark_module
25
+
26
+ aliases: dict[str, Benchmark] = {}
27
+ for _, inst in benchmark_module.__dict__.items():
28
+ if isinstance(inst, Benchmark) and inst.aliases is not None:
29
+ for alias in inst.aliases:
30
+ aliases[alias] = inst
31
+ return aliases
68
32
 
69
33
 
70
34
  def get_benchmark(
@@ -78,14 +42,11 @@ def get_benchmark(
78
42
  Returns:
79
43
  The Benchmark instance corresponding to the given name.
80
44
  """
81
- previous_benchmark_names = _get_previous_benchmark_names()
82
45
  benchmark_registry = _build_registry()
83
- if benchmark_name in previous_benchmark_names:
84
- warnings.warn(
85
- f"Using the previous benchmark name '{benchmark_name}' is deprecated. Please use '{previous_benchmark_names[benchmark_name]}' instead.",
86
- DeprecationWarning,
87
- )
88
- benchmark_name = previous_benchmark_names[benchmark_name]
46
+ aliases_registry = _build_aliases_registry()
47
+
48
+ if benchmark_name in aliases_registry:
49
+ return aliases_registry[benchmark_name]
89
50
  if benchmark_name not in benchmark_registry:
90
51
  close_matches = difflib.get_close_matches(
91
52
  benchmark_name, benchmark_registry.keys()