mteb 2.5.2__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +17 -18
  3. mteb/_evaluators/any_sts_evaluator.py +3 -3
  4. mteb/_evaluators/clustering_evaluator.py +2 -2
  5. mteb/_evaluators/evaluator.py +4 -2
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +10 -8
  7. mteb/_evaluators/pair_classification_evaluator.py +5 -3
  8. mteb/_evaluators/retrieval_evaluator.py +2 -2
  9. mteb/_evaluators/retrieval_metrics.py +18 -17
  10. mteb/_evaluators/sklearn_evaluator.py +11 -10
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +27 -18
  12. mteb/_evaluators/text/summarization_evaluator.py +23 -18
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +5 -3
  14. mteb/abstasks/_data_filter/filters.py +1 -1
  15. mteb/abstasks/_data_filter/task_pipelines.py +3 -0
  16. mteb/abstasks/_statistics_calculation.py +18 -10
  17. mteb/abstasks/_stratification.py +18 -18
  18. mteb/abstasks/abstask.py +35 -28
  19. mteb/abstasks/aggregate_task_metadata.py +1 -9
  20. mteb/abstasks/aggregated_task.py +10 -29
  21. mteb/abstasks/classification.py +15 -10
  22. mteb/abstasks/clustering.py +19 -15
  23. mteb/abstasks/clustering_legacy.py +10 -10
  24. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  25. mteb/abstasks/multilabel_classification.py +23 -19
  26. mteb/abstasks/pair_classification.py +20 -11
  27. mteb/abstasks/regression.py +4 -4
  28. mteb/abstasks/retrieval.py +28 -24
  29. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  30. mteb/abstasks/sts.py +8 -5
  31. mteb/abstasks/task_metadata.py +31 -33
  32. mteb/abstasks/text/bitext_mining.py +39 -28
  33. mteb/abstasks/text/reranking.py +8 -6
  34. mteb/abstasks/text/summarization.py +10 -5
  35. mteb/abstasks/zeroshot_classification.py +8 -4
  36. mteb/benchmarks/benchmark.py +4 -2
  37. mteb/benchmarks/benchmarks/__init__.py +4 -0
  38. mteb/benchmarks/benchmarks/benchmarks.py +112 -11
  39. mteb/benchmarks/get_benchmark.py +14 -55
  40. mteb/cache.py +182 -29
  41. mteb/cli/_display_tasks.py +2 -2
  42. mteb/cli/build_cli.py +110 -14
  43. mteb/cli/generate_model_card.py +43 -23
  44. mteb/deprecated_evaluator.py +63 -49
  45. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  46. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  47. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  49. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  50. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  51. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  52. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  53. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  54. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  55. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  56. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  57. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  58. mteb/evaluate.py +44 -33
  59. mteb/filter_tasks.py +25 -26
  60. mteb/get_tasks.py +29 -30
  61. mteb/languages/language_scripts.py +5 -3
  62. mteb/leaderboard/app.py +162 -34
  63. mteb/load_results.py +12 -12
  64. mteb/models/abs_encoder.py +10 -6
  65. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  66. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +5 -4
  67. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  68. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  69. mteb/models/cache_wrappers/cache_wrapper.py +2 -2
  70. mteb/models/get_model_meta.py +21 -3
  71. mteb/models/instruct_wrapper.py +28 -8
  72. mteb/models/model_implementations/align_models.py +1 -1
  73. mteb/models/model_implementations/andersborges.py +4 -4
  74. mteb/models/model_implementations/ara_models.py +1 -1
  75. mteb/models/model_implementations/arctic_models.py +8 -8
  76. mteb/models/model_implementations/b1ade_models.py +1 -1
  77. mteb/models/model_implementations/bge_models.py +45 -21
  78. mteb/models/model_implementations/bica_model.py +3 -3
  79. mteb/models/model_implementations/blip2_models.py +2 -2
  80. mteb/models/model_implementations/blip_models.py +16 -16
  81. mteb/models/model_implementations/bm25.py +4 -4
  82. mteb/models/model_implementations/bmretriever_models.py +6 -4
  83. mteb/models/model_implementations/cadet_models.py +1 -1
  84. mteb/models/model_implementations/cde_models.py +11 -4
  85. mteb/models/model_implementations/clip_models.py +6 -6
  86. mteb/models/model_implementations/clips_models.py +3 -3
  87. mteb/models/model_implementations/codefuse_models.py +5 -5
  88. mteb/models/model_implementations/codesage_models.py +3 -3
  89. mteb/models/model_implementations/cohere_models.py +5 -5
  90. mteb/models/model_implementations/cohere_v.py +2 -2
  91. mteb/models/model_implementations/colpali_models.py +3 -3
  92. mteb/models/model_implementations/colqwen_models.py +8 -8
  93. mteb/models/model_implementations/colsmol_models.py +2 -2
  94. mteb/models/model_implementations/conan_models.py +1 -1
  95. mteb/models/model_implementations/dino_models.py +42 -42
  96. mteb/models/model_implementations/e5_instruct.py +23 -4
  97. mteb/models/model_implementations/e5_models.py +9 -9
  98. mteb/models/model_implementations/e5_v.py +6 -6
  99. mteb/models/model_implementations/eagerworks_models.py +1 -1
  100. mteb/models/model_implementations/emillykkejensen_models.py +6 -6
  101. mteb/models/model_implementations/en_code_retriever.py +1 -1
  102. mteb/models/model_implementations/euler_models.py +2 -2
  103. mteb/models/model_implementations/fa_models.py +9 -9
  104. mteb/models/model_implementations/facebookai.py +14 -2
  105. mteb/models/model_implementations/geogpt_models.py +1 -1
  106. mteb/models/model_implementations/gme_v_models.py +6 -5
  107. mteb/models/model_implementations/google_models.py +1 -1
  108. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -1
  109. mteb/models/model_implementations/gritlm_models.py +2 -2
  110. mteb/models/model_implementations/gte_models.py +25 -13
  111. mteb/models/model_implementations/hinvec_models.py +1 -1
  112. mteb/models/model_implementations/ibm_granite_models.py +30 -6
  113. mteb/models/model_implementations/inf_models.py +2 -2
  114. mteb/models/model_implementations/jasper_models.py +2 -2
  115. mteb/models/model_implementations/jina_clip.py +48 -10
  116. mteb/models/model_implementations/jina_models.py +18 -11
  117. mteb/models/model_implementations/kblab.py +12 -6
  118. mteb/models/model_implementations/kennethenevoldsen_models.py +4 -4
  119. mteb/models/model_implementations/kfst.py +1 -1
  120. mteb/models/model_implementations/kowshik24_models.py +1 -1
  121. mteb/models/model_implementations/lgai_embedding_models.py +1 -1
  122. mteb/models/model_implementations/linq_models.py +1 -1
  123. mteb/models/model_implementations/listconranker.py +1 -1
  124. mteb/models/model_implementations/llm2clip_models.py +6 -6
  125. mteb/models/model_implementations/llm2vec_models.py +8 -8
  126. mteb/models/model_implementations/mcinext_models.py +4 -1
  127. mteb/models/model_implementations/mdbr_models.py +17 -3
  128. mteb/models/model_implementations/misc_models.py +68 -68
  129. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  130. mteb/models/model_implementations/mme5_models.py +1 -1
  131. mteb/models/model_implementations/moco_models.py +4 -4
  132. mteb/models/model_implementations/mod_models.py +1 -1
  133. mteb/models/model_implementations/model2vec_models.py +14 -14
  134. mteb/models/model_implementations/moka_models.py +1 -1
  135. mteb/models/model_implementations/nbailab.py +3 -3
  136. mteb/models/model_implementations/no_instruct_sentence_models.py +2 -2
  137. mteb/models/model_implementations/nomic_models.py +30 -15
  138. mteb/models/model_implementations/nomic_models_vision.py +1 -1
  139. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +15 -9
  140. mteb/models/model_implementations/nvidia_models.py +151 -19
  141. mteb/models/model_implementations/octen_models.py +61 -2
  142. mteb/models/model_implementations/openclip_models.py +13 -13
  143. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -5
  144. mteb/models/model_implementations/ops_moa_models.py +1 -1
  145. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  146. mteb/models/model_implementations/pawan_models.py +1 -1
  147. mteb/models/model_implementations/piccolo_models.py +1 -1
  148. mteb/models/model_implementations/pixie_models.py +56 -0
  149. mteb/models/model_implementations/promptriever_models.py +4 -4
  150. mteb/models/model_implementations/pylate_models.py +10 -9
  151. mteb/models/model_implementations/qodo_models.py +2 -2
  152. mteb/models/model_implementations/qtack_models.py +1 -1
  153. mteb/models/model_implementations/qwen3_models.py +3 -3
  154. mteb/models/model_implementations/qzhou_models.py +2 -2
  155. mteb/models/model_implementations/random_baseline.py +3 -3
  156. mteb/models/model_implementations/rasgaard_models.py +2 -2
  157. mteb/models/model_implementations/reasonir_model.py +1 -1
  158. mteb/models/model_implementations/repllama_models.py +3 -3
  159. mteb/models/model_implementations/rerankers_custom.py +12 -6
  160. mteb/models/model_implementations/rerankers_monot5_based.py +17 -17
  161. mteb/models/model_implementations/richinfoai_models.py +1 -1
  162. mteb/models/model_implementations/ru_sentence_models.py +20 -20
  163. mteb/models/model_implementations/ruri_models.py +10 -10
  164. mteb/models/model_implementations/salesforce_models.py +3 -3
  165. mteb/models/model_implementations/samilpwc_models.py +1 -1
  166. mteb/models/model_implementations/sarashina_embedding_models.py +2 -2
  167. mteb/models/model_implementations/searchmap_models.py +1 -1
  168. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +113 -146
  169. mteb/models/model_implementations/sentence_transformers_models.py +124 -22
  170. mteb/models/model_implementations/shuu_model.py +1 -1
  171. mteb/models/model_implementations/siglip_models.py +20 -20
  172. mteb/models/model_implementations/slm_models.py +416 -0
  173. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -1
  174. mteb/models/model_implementations/stella_models.py +17 -4
  175. mteb/models/model_implementations/tarka_models.py +2 -2
  176. mteb/models/model_implementations/text2vec_models.py +9 -3
  177. mteb/models/model_implementations/ua_sentence_models.py +1 -1
  178. mteb/models/model_implementations/uae_models.py +7 -1
  179. mteb/models/model_implementations/vdr_models.py +1 -1
  180. mteb/models/model_implementations/vi_vn_models.py +6 -6
  181. mteb/models/model_implementations/vlm2vec_models.py +3 -3
  182. mteb/models/model_implementations/voyage_models.py +84 -0
  183. mteb/models/model_implementations/voyage_v.py +9 -7
  184. mteb/models/model_implementations/youtu_models.py +1 -1
  185. mteb/models/model_implementations/yuan_models.py +1 -1
  186. mteb/models/model_implementations/yuan_models_en.py +1 -1
  187. mteb/models/model_meta.py +80 -31
  188. mteb/models/models_protocols.py +22 -6
  189. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +9 -6
  190. mteb/models/search_wrappers.py +33 -18
  191. mteb/models/sentence_transformer_wrapper.py +50 -25
  192. mteb/models/vllm_wrapper.py +327 -0
  193. mteb/py.typed +0 -0
  194. mteb/results/benchmark_results.py +29 -21
  195. mteb/results/model_result.py +52 -22
  196. mteb/results/task_result.py +80 -58
  197. mteb/similarity_functions.py +11 -7
  198. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  199. mteb/tasks/classification/est/estonian_valence.py +1 -1
  200. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  201. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  202. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  203. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  204. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  205. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  206. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  207. mteb/tasks/retrieval/code/code_rag.py +12 -12
  208. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  209. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  210. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  211. mteb/tasks/retrieval/eng/__init__.py +2 -0
  212. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  213. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  214. mteb/tasks/retrieval/kor/__init__.py +15 -1
  215. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  216. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  217. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  218. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  219. mteb/tasks/retrieval/nob/norquad.py +2 -2
  220. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  221. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  222. mteb/tasks/retrieval/vie/__init__.py +14 -6
  223. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  224. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  225. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  226. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  227. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  228. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  229. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  230. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  231. mteb/types/__init__.py +2 -0
  232. mteb/types/_encoder_io.py +12 -0
  233. mteb/types/_result.py +2 -1
  234. mteb/types/statistics.py +9 -3
  235. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/METADATA +15 -4
  236. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/RECORD +240 -219
  237. mteb/models/model_implementations/mxbai_models.py +0 -111
  238. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  239. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  240. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  241. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,7 @@ MMTEB_CITATION = r"""@article{enevoldsen2025mmtebmassivemultilingualtext,
18
18
 
19
19
  MTEB_EN = Benchmark(
20
20
  name="MTEB(eng, v2)",
21
+ aliases=["MTEB(eng)"],
21
22
  display_name="English",
22
23
  icon="https://github.com/lipis/flag-icons/raw/refs/heads/main/flags/4x3/us.svg",
23
24
  tasks=MTEBTasks(
@@ -89,6 +90,7 @@ The original MTEB leaderboard is available under the [MTEB(eng, v1)](http://mteb
89
90
 
90
91
  MTEB_ENG_CLASSIC = Benchmark(
91
92
  name="MTEB(eng, v1)",
93
+ aliases=["MTEB(eng, classic)", "MTEB"],
92
94
  display_name="English Legacy",
93
95
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/gb.svg",
94
96
  tasks=MTEBTasks(
@@ -185,6 +187,7 @@ We recommend that you use [MTEB(eng, v2)](http://mteb-leaderboard.hf.space/?benc
185
187
 
186
188
  MTEB_MAIN_RU = Benchmark(
187
189
  name="MTEB(rus, v1)",
190
+ aliases=["MTEB(rus)"],
188
191
  display_name="Russian legacy",
189
192
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ru.svg",
190
193
  tasks=MTEBTasks(
@@ -344,6 +347,7 @@ RU_SCI_BENCH = Benchmark(
344
347
 
345
348
  MTEB_RETRIEVAL_WITH_INSTRUCTIONS = Benchmark(
346
349
  name="FollowIR",
350
+ aliases=["MTEB(Retrieval w/Instructions)"],
347
351
  display_name="Instruction Following",
348
352
  tasks=get_tasks(
349
353
  tasks=[
@@ -394,7 +398,9 @@ MTEB_RETRIEVAL_WITH_DOMAIN_INSTRUCTIONS = Benchmark(
394
398
  )
395
399
 
396
400
  MTEB_RETRIEVAL_LAW = Benchmark(
397
- name="MTEB(Law, v1)", # This benchmark is likely in the need of an update
401
+ # This benchmark is likely in the need of an update
402
+ name="MTEB(Law, v1)",
403
+ aliases=["MTEB(law)"],
398
404
  display_name="Legal",
399
405
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-library.svg",
400
406
  tasks=get_tasks(
@@ -416,6 +422,7 @@ MTEB_RETRIEVAL_LAW = Benchmark(
416
422
 
417
423
  MTEB_RETRIEVAL_MEDICAL = Benchmark(
418
424
  name="MTEB(Medical, v1)",
425
+ aliases=["MTEB(Medical)"],
419
426
  display_name="Medical",
420
427
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-hospital.svg",
421
428
  tasks=get_tasks(
@@ -469,6 +476,7 @@ MTEB_MINERS_BITEXT_MINING = Benchmark(
469
476
 
470
477
  SEB = Benchmark(
471
478
  name="MTEB(Scandinavian, v1)",
479
+ aliases=["MTEB(Scandinavian)", "SEB"],
472
480
  display_name="Scandinavian",
473
481
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/dk.svg",
474
482
  language_view=["dan-Latn", "swe-Latn", "nno-Latn", "nob-Latn"],
@@ -595,6 +603,7 @@ RAR_b = Benchmark(
595
603
 
596
604
  MTEB_FRA = Benchmark(
597
605
  name="MTEB(fra, v1)",
606
+ aliases=["MTEB(fra)"],
598
607
  display_name="French",
599
608
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/fr.svg",
600
609
  tasks=MTEBTasks(
@@ -653,6 +662,7 @@ MTEB_FRA = Benchmark(
653
662
 
654
663
  MTEB_DEU = Benchmark(
655
664
  name="MTEB(deu, v1)",
665
+ aliases=["MTEB(deu)"],
656
666
  display_name="German",
657
667
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/de.svg",
658
668
  tasks=get_tasks(
@@ -704,6 +714,7 @@ MTEB_DEU = Benchmark(
704
714
 
705
715
  MTEB_KOR = Benchmark(
706
716
  name="MTEB(kor, v1)",
717
+ aliases=["MTEB(kor)"],
707
718
  display_name="Korean",
708
719
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/kr.svg",
709
720
  tasks=get_tasks(
@@ -728,6 +739,7 @@ MTEB_KOR = Benchmark(
728
739
 
729
740
  MTEB_POL = Benchmark(
730
741
  name="MTEB(pol, v1)",
742
+ aliases=["MTEB(pol)"],
731
743
  display_name="Polish",
732
744
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/pl.svg",
733
745
  tasks=MTEBTasks(
@@ -777,6 +789,7 @@ two novel clustering tasks.""", # Rephrased from the abstract
777
789
 
778
790
  MTEB_code = Benchmark(
779
791
  name="MTEB(Code, v1)",
792
+ aliases=["MTEB(code)"],
780
793
  display_name="Code",
781
794
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-tech-electronics.svg",
782
795
  tasks=get_tasks(
@@ -953,6 +966,7 @@ MTEB_multilingual_v1 = Benchmark(
953
966
 
954
967
  MTEB_multilingual_v2 = Benchmark(
955
968
  name="MTEB(Multilingual, v2)",
969
+ aliases=["MTEB(Multilingual)", "MMTEB"],
956
970
  display_name="Multilingual",
957
971
  language_view=[
958
972
  "eng-Latn", # English
@@ -986,6 +1000,7 @@ MTEB_multilingual_v2 = Benchmark(
986
1000
 
987
1001
  MTEB_JPN = Benchmark(
988
1002
  name="MTEB(jpn, v1)",
1003
+ aliases=["MTEB(jpn)"],
989
1004
  display_name="Japanese Legacy",
990
1005
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
991
1006
  tasks=get_tasks(
@@ -1056,6 +1071,7 @@ indic_languages = [
1056
1071
 
1057
1072
  MTEB_INDIC = Benchmark(
1058
1073
  name="MTEB(Indic, v1)",
1074
+ aliases=["MTEB(Indic)"],
1059
1075
  display_name="Indic",
1060
1076
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/in.svg",
1061
1077
  tasks=MTEBTasks(
@@ -1146,6 +1162,7 @@ eu_languages = [
1146
1162
 
1147
1163
  MTEB_EU = Benchmark(
1148
1164
  name="MTEB(Europe, v1)",
1165
+ aliases=["MTEB(Europe)"],
1149
1166
  display_name="European",
1150
1167
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/eu.svg",
1151
1168
  tasks=get_tasks(
@@ -1285,6 +1302,7 @@ BRIGHT = Benchmark(
1285
1302
 
1286
1303
  BRIGHT_LONG = Benchmark(
1287
1304
  name="BRIGHT (long)",
1305
+ aliases=["BRIGHT(long)"],
1288
1306
  tasks=MTEBTasks(
1289
1307
  (
1290
1308
  get_task(
@@ -1400,6 +1418,7 @@ NANOBEIR = Benchmark(
1400
1418
 
1401
1419
  C_MTEB = Benchmark(
1402
1420
  name="MTEB(cmn, v1)",
1421
+ aliases=["MTEB(Chinese)", "CMTEB"],
1403
1422
  display_name="Chinese",
1404
1423
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/cn.svg",
1405
1424
  tasks=MTEBTasks(
@@ -1466,6 +1485,7 @@ C_MTEB = Benchmark(
1466
1485
 
1467
1486
  FA_MTEB = Benchmark(
1468
1487
  name="MTEB(fas, v1)",
1488
+ aliases=["FaMTEB(fas, beta)"],
1469
1489
  display_name="Farsi Legacy",
1470
1490
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ir.svg",
1471
1491
  tasks=get_tasks(
@@ -1636,6 +1656,7 @@ FA_MTEB_2 = Benchmark(
1636
1656
 
1637
1657
  CHEMTEB = Benchmark(
1638
1658
  name="ChemTEB",
1659
+ aliases=["ChemTEB(v1)"],
1639
1660
  display_name="Chemical",
1640
1661
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-purge.svg",
1641
1662
  tasks=get_tasks(
@@ -1681,6 +1702,62 @@ CHEMTEB = Benchmark(
1681
1702
  """,
1682
1703
  )
1683
1704
 
1705
+ CHEMTEB_V1_1 = Benchmark(
1706
+ name="ChemTEB(v1.1)",
1707
+ aliases=["ChemTEB(latest)"],
1708
+ display_name="Chemical",
1709
+ icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-purge.svg",
1710
+ tasks=get_tasks(
1711
+ tasks=[
1712
+ "PubChemSMILESBitextMining",
1713
+ "SDSEyeProtectionClassification",
1714
+ "SDSGlovesClassification",
1715
+ "WikipediaBioMetChemClassification",
1716
+ "WikipediaGreenhouseEnantiopureClassification",
1717
+ "WikipediaSolidStateColloidalClassification",
1718
+ "WikipediaOrganicInorganicClassification",
1719
+ "WikipediaCryobiologySeparationClassification",
1720
+ "WikipediaChemistryTopicsClassification",
1721
+ "WikipediaTheoreticalAppliedClassification",
1722
+ "WikipediaChemFieldsClassification",
1723
+ "WikipediaLuminescenceClassification",
1724
+ "WikipediaIsotopesFissionClassification",
1725
+ "WikipediaSaltsSemiconductorsClassification",
1726
+ "WikipediaBiolumNeurochemClassification",
1727
+ "WikipediaCrystallographyAnalyticalClassification",
1728
+ "WikipediaCompChemSpectroscopyClassification",
1729
+ "WikipediaChemEngSpecialtiesClassification",
1730
+ "WikipediaChemistryTopicsClustering",
1731
+ "WikipediaSpecialtiesInChemistryClustering",
1732
+ "PubChemAISentenceParaphrasePC",
1733
+ "PubChemSMILESPC",
1734
+ "PubChemSynonymPC",
1735
+ "PubChemWikiParagraphsPC",
1736
+ "PubChemWikiPairClassification",
1737
+ "ChemNQRetrieval",
1738
+ "ChemHotpotQARetrieval",
1739
+ "ChemRxivRetrieval",
1740
+ ],
1741
+ ),
1742
+ description="ChemTEB evaluates the performance of text embedding models on chemical domain data. This version adds the ChemRxivRetrieval task.",
1743
+ reference="https://arxiv.org/abs/2412.00532",
1744
+ citation=r"""
1745
+ @article{kasmaee2024chemteb,
1746
+ author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila},
1747
+ journal = {arXiv preprint arXiv:2412.00532},
1748
+ title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \\& Efficiency on a Specific Domain},
1749
+ year = {2024},
1750
+ }
1751
+
1752
+ @article{kasmaee2025chembed,
1753
+ author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Astaraki, Mahdi and Saloot, Mohammad Arshi and Sherck, Nicholas and Mahyar, Hamidreza and Samiee, Soheila},
1754
+ journal = {arXiv preprint arXiv:2508.01643},
1755
+ title = {Chembed: Enhancing chemical literature search through domain-specific text embeddings},
1756
+ year = {2025},
1757
+ }
1758
+ """,
1759
+ )
1760
+
1684
1761
  BEIR_NL = Benchmark(
1685
1762
  name="BEIR-NL",
1686
1763
  display_name="BEIR-NL",
@@ -2330,23 +2407,23 @@ VIDORE_V3 = VidoreBenchmark(
2330
2407
  ]
2331
2408
  ),
2332
2409
  description="ViDoRe V3 sets a new industry gold standard for multi-modal, enterprise document visual retrieval evaluation. It addresses a critical challenge in production RAG systems: retrieving accurate information from complex, visually-rich documents. The benchmark includes both open and closed datasets: to submit results on private tasks, please [open an issue](https://github.com/embeddings-benchmark/mteb/issues?template=eval_request.yaml).",
2333
- reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
2410
+ reference="https://arxiv.org/abs/2601.08620",
2334
2411
  citation=r"""
2335
- @misc{mace2025vidorev3,
2336
- author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
2337
- day = {5},
2338
- howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
2339
- journal = {Hugging Face Blog},
2340
- month = {November},
2341
- publisher = {Hugging Face},
2342
- title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
2343
- year = {2025},
2412
+ @article{loison2026vidorev3comprehensiveevaluation,
2413
+ archiveprefix = {arXiv},
2414
+ author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
2415
+ eprint = {2601.08620},
2416
+ primaryclass = {cs.AI},
2417
+ title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
2418
+ url = {https://arxiv.org/abs/2601.08620},
2419
+ year = {2026},
2344
2420
  }
2345
2421
  """,
2346
2422
  )
2347
2423
 
2348
2424
  VISUAL_DOCUMENT_RETRIEVAL = VidoreBenchmark(
2349
2425
  name="ViDoRe(v1&v2)",
2426
+ aliases=["VisualDocumentRetrieval"],
2350
2427
  display_name="ViDoRe (V1&V2)",
2351
2428
  tasks=get_tasks(
2352
2429
  tasks=[
@@ -2707,3 +2784,27 @@ JMTEB_LITE_V1 = Benchmark(
2707
2784
  """,
2708
2785
  contacts=["lsz05"],
2709
2786
  )
2787
+
2788
+ KOVIDORE_V2 = Benchmark(
2789
+ name="KoViDoRe(v2)",
2790
+ display_name="KoViDoRe v2",
2791
+ tasks=get_tasks(
2792
+ tasks=[
2793
+ "KoVidore2CybersecurityRetrieval",
2794
+ "KoVidore2EconomicRetrieval",
2795
+ "KoVidore2EnergyRetrieval",
2796
+ "KoVidore2HrRetrieval",
2797
+ ]
2798
+ ),
2799
+ description="KoViDoRe v2 sets a new industry gold standard for multi-modal, enterprise document visual retrieval evaluation. It addresses a critical challenge in production RAG systems: retrieving accurate information from complex, visually-rich documents.",
2800
+ reference="https://github.com/whybe-choi/kovidore-data-generator",
2801
+ citation=r"""
2802
+ @misc{choi2026kovidorev2,
2803
+ author = {Yongbin Choi},
2804
+ note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
2805
+ title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
2806
+ url = {https://github.com/whybe-choi/kovidore-data-generator},
2807
+ year = {2026},
2808
+ }
2809
+ """,
2810
+ )
@@ -1,6 +1,5 @@
1
1
  import difflib
2
2
  import logging
3
- import warnings
4
3
  from functools import lru_cache
5
4
 
6
5
  from .benchmark import Benchmark
@@ -20,53 +19,16 @@ def _build_registry() -> dict[str, Benchmark]:
20
19
  return benchmark_registry
21
20
 
22
21
 
23
- def _get_previous_benchmark_names() -> dict[str, str]:
24
- from .benchmarks import (
25
- BRIGHT_LONG,
26
- C_MTEB,
27
- FA_MTEB,
28
- MTEB_DEU,
29
- MTEB_EN,
30
- MTEB_ENG_CLASSIC,
31
- MTEB_EU,
32
- MTEB_FRA,
33
- MTEB_INDIC,
34
- MTEB_JPN,
35
- MTEB_KOR,
36
- MTEB_MAIN_RU,
37
- MTEB_POL,
38
- MTEB_RETRIEVAL_LAW,
39
- MTEB_RETRIEVAL_MEDICAL,
40
- MTEB_RETRIEVAL_WITH_INSTRUCTIONS,
41
- SEB,
42
- VISUAL_DOCUMENT_RETRIEVAL,
43
- MTEB_code,
44
- MTEB_multilingual_v2,
45
- )
46
-
47
- previous_benchmark_names = {
48
- "MTEB(eng)": MTEB_EN.name,
49
- "MTEB(eng, classic)": MTEB_ENG_CLASSIC.name,
50
- "MTEB(rus)": MTEB_MAIN_RU.name,
51
- "MTEB(Retrieval w/Instructions)": MTEB_RETRIEVAL_WITH_INSTRUCTIONS.name,
52
- "MTEB(law)": MTEB_RETRIEVAL_LAW.name,
53
- "MTEB(Medical)": MTEB_RETRIEVAL_MEDICAL.name,
54
- "MTEB(Scandinavian)": SEB.name,
55
- "MTEB(fra)": MTEB_FRA.name,
56
- "MTEB(deu)": MTEB_DEU.name,
57
- "MTEB(kor)": MTEB_KOR.name,
58
- "MTEB(pol)": MTEB_POL.name,
59
- "MTEB(code)": MTEB_code.name,
60
- "MTEB(Multilingual)": MTEB_multilingual_v2.name,
61
- "MTEB(jpn)": MTEB_JPN.name,
62
- "MTEB(Indic)": MTEB_INDIC.name,
63
- "MTEB(Europe)": MTEB_EU.name,
64
- "MTEB(Chinese)": C_MTEB.name,
65
- "FaMTEB(fas, beta)": FA_MTEB.name,
66
- "BRIGHT(long)": BRIGHT_LONG.name,
67
- "VisualDocumentRetrieval": VISUAL_DOCUMENT_RETRIEVAL.name,
68
- }
69
- return previous_benchmark_names
22
+ @lru_cache
23
+ def _build_aliases_registry() -> dict[str, Benchmark]:
24
+ import mteb.benchmarks.benchmarks as benchmark_module
25
+
26
+ aliases: dict[str, Benchmark] = {}
27
+ for _, inst in benchmark_module.__dict__.items():
28
+ if isinstance(inst, Benchmark) and inst.aliases is not None:
29
+ for alias in inst.aliases:
30
+ aliases[alias] = inst
31
+ return aliases
70
32
 
71
33
 
72
34
  def get_benchmark(
@@ -80,14 +42,11 @@ def get_benchmark(
80
42
  Returns:
81
43
  The Benchmark instance corresponding to the given name.
82
44
  """
83
- previous_benchmark_names = _get_previous_benchmark_names()
84
45
  benchmark_registry = _build_registry()
85
- if benchmark_name in previous_benchmark_names:
86
- warnings.warn(
87
- f"Using the previous benchmark name '{benchmark_name}' is deprecated. Please use '{previous_benchmark_names[benchmark_name]}' instead.",
88
- DeprecationWarning,
89
- )
90
- benchmark_name = previous_benchmark_names[benchmark_name]
46
+ aliases_registry = _build_aliases_registry()
47
+
48
+ if benchmark_name in aliases_registry:
49
+ return aliases_registry[benchmark_name]
91
50
  if benchmark_name not in benchmark_registry:
92
51
  close_matches = difflib.get_close_matches(
93
52
  benchmark_name, benchmark_registry.keys()