mteb 2.5.2__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +17 -18
- mteb/_evaluators/any_sts_evaluator.py +3 -3
- mteb/_evaluators/clustering_evaluator.py +2 -2
- mteb/_evaluators/evaluator.py +4 -2
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +10 -8
- mteb/_evaluators/pair_classification_evaluator.py +5 -3
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/retrieval_metrics.py +18 -17
- mteb/_evaluators/sklearn_evaluator.py +11 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +27 -18
- mteb/_evaluators/text/summarization_evaluator.py +23 -18
- mteb/_evaluators/zeroshot_classification_evaluator.py +5 -3
- mteb/abstasks/_data_filter/filters.py +1 -1
- mteb/abstasks/_data_filter/task_pipelines.py +3 -0
- mteb/abstasks/_statistics_calculation.py +18 -10
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +35 -28
- mteb/abstasks/aggregate_task_metadata.py +1 -9
- mteb/abstasks/aggregated_task.py +10 -29
- mteb/abstasks/classification.py +15 -10
- mteb/abstasks/clustering.py +19 -15
- mteb/abstasks/clustering_legacy.py +10 -10
- mteb/abstasks/image/image_text_pair_classification.py +7 -4
- mteb/abstasks/multilabel_classification.py +23 -19
- mteb/abstasks/pair_classification.py +20 -11
- mteb/abstasks/regression.py +4 -4
- mteb/abstasks/retrieval.py +28 -24
- mteb/abstasks/retrieval_dataset_loaders.py +2 -2
- mteb/abstasks/sts.py +8 -5
- mteb/abstasks/task_metadata.py +31 -33
- mteb/abstasks/text/bitext_mining.py +39 -28
- mteb/abstasks/text/reranking.py +8 -6
- mteb/abstasks/text/summarization.py +10 -5
- mteb/abstasks/zeroshot_classification.py +8 -4
- mteb/benchmarks/benchmark.py +4 -2
- mteb/benchmarks/benchmarks/__init__.py +4 -0
- mteb/benchmarks/benchmarks/benchmarks.py +112 -11
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +182 -29
- mteb/cli/_display_tasks.py +2 -2
- mteb/cli/build_cli.py +110 -14
- mteb/cli/generate_model_card.py +43 -23
- mteb/deprecated_evaluator.py +63 -49
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +44 -33
- mteb/filter_tasks.py +25 -26
- mteb/get_tasks.py +29 -30
- mteb/languages/language_scripts.py +5 -3
- mteb/leaderboard/app.py +162 -34
- mteb/load_results.py +12 -12
- mteb/models/abs_encoder.py +10 -6
- mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +5 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +2 -2
- mteb/models/get_model_meta.py +21 -3
- mteb/models/instruct_wrapper.py +28 -8
- mteb/models/model_implementations/align_models.py +1 -1
- mteb/models/model_implementations/andersborges.py +4 -4
- mteb/models/model_implementations/ara_models.py +1 -1
- mteb/models/model_implementations/arctic_models.py +8 -8
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +45 -21
- mteb/models/model_implementations/bica_model.py +3 -3
- mteb/models/model_implementations/blip2_models.py +2 -2
- mteb/models/model_implementations/blip_models.py +16 -16
- mteb/models/model_implementations/bm25.py +4 -4
- mteb/models/model_implementations/bmretriever_models.py +6 -4
- mteb/models/model_implementations/cadet_models.py +1 -1
- mteb/models/model_implementations/cde_models.py +11 -4
- mteb/models/model_implementations/clip_models.py +6 -6
- mteb/models/model_implementations/clips_models.py +3 -3
- mteb/models/model_implementations/codefuse_models.py +5 -5
- mteb/models/model_implementations/codesage_models.py +3 -3
- mteb/models/model_implementations/cohere_models.py +5 -5
- mteb/models/model_implementations/cohere_v.py +2 -2
- mteb/models/model_implementations/colpali_models.py +3 -3
- mteb/models/model_implementations/colqwen_models.py +8 -8
- mteb/models/model_implementations/colsmol_models.py +2 -2
- mteb/models/model_implementations/conan_models.py +1 -1
- mteb/models/model_implementations/dino_models.py +42 -42
- mteb/models/model_implementations/e5_instruct.py +23 -4
- mteb/models/model_implementations/e5_models.py +9 -9
- mteb/models/model_implementations/e5_v.py +6 -6
- mteb/models/model_implementations/eagerworks_models.py +1 -1
- mteb/models/model_implementations/emillykkejensen_models.py +6 -6
- mteb/models/model_implementations/en_code_retriever.py +1 -1
- mteb/models/model_implementations/euler_models.py +2 -2
- mteb/models/model_implementations/fa_models.py +9 -9
- mteb/models/model_implementations/facebookai.py +14 -2
- mteb/models/model_implementations/geogpt_models.py +1 -1
- mteb/models/model_implementations/gme_v_models.py +6 -5
- mteb/models/model_implementations/google_models.py +1 -1
- mteb/models/model_implementations/granite_vision_embedding_models.py +1 -1
- mteb/models/model_implementations/gritlm_models.py +2 -2
- mteb/models/model_implementations/gte_models.py +25 -13
- mteb/models/model_implementations/hinvec_models.py +1 -1
- mteb/models/model_implementations/ibm_granite_models.py +30 -6
- mteb/models/model_implementations/inf_models.py +2 -2
- mteb/models/model_implementations/jasper_models.py +2 -2
- mteb/models/model_implementations/jina_clip.py +48 -10
- mteb/models/model_implementations/jina_models.py +18 -11
- mteb/models/model_implementations/kblab.py +12 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +4 -4
- mteb/models/model_implementations/kfst.py +1 -1
- mteb/models/model_implementations/kowshik24_models.py +1 -1
- mteb/models/model_implementations/lgai_embedding_models.py +1 -1
- mteb/models/model_implementations/linq_models.py +1 -1
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +6 -6
- mteb/models/model_implementations/llm2vec_models.py +8 -8
- mteb/models/model_implementations/mcinext_models.py +4 -1
- mteb/models/model_implementations/mdbr_models.py +17 -3
- mteb/models/model_implementations/misc_models.py +68 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
- mteb/models/model_implementations/mme5_models.py +1 -1
- mteb/models/model_implementations/moco_models.py +4 -4
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +14 -14
- mteb/models/model_implementations/moka_models.py +1 -1
- mteb/models/model_implementations/nbailab.py +3 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +2 -2
- mteb/models/model_implementations/nomic_models.py +30 -15
- mteb/models/model_implementations/nomic_models_vision.py +1 -1
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +15 -9
- mteb/models/model_implementations/nvidia_models.py +151 -19
- mteb/models/model_implementations/octen_models.py +61 -2
- mteb/models/model_implementations/openclip_models.py +13 -13
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -5
- mteb/models/model_implementations/ops_moa_models.py +1 -1
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +1 -1
- mteb/models/model_implementations/piccolo_models.py +1 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +4 -4
- mteb/models/model_implementations/pylate_models.py +10 -9
- mteb/models/model_implementations/qodo_models.py +2 -2
- mteb/models/model_implementations/qtack_models.py +1 -1
- mteb/models/model_implementations/qwen3_models.py +3 -3
- mteb/models/model_implementations/qzhou_models.py +2 -2
- mteb/models/model_implementations/random_baseline.py +3 -3
- mteb/models/model_implementations/rasgaard_models.py +2 -2
- mteb/models/model_implementations/reasonir_model.py +1 -1
- mteb/models/model_implementations/repllama_models.py +3 -3
- mteb/models/model_implementations/rerankers_custom.py +12 -6
- mteb/models/model_implementations/rerankers_monot5_based.py +17 -17
- mteb/models/model_implementations/richinfoai_models.py +1 -1
- mteb/models/model_implementations/ru_sentence_models.py +20 -20
- mteb/models/model_implementations/ruri_models.py +10 -10
- mteb/models/model_implementations/salesforce_models.py +3 -3
- mteb/models/model_implementations/samilpwc_models.py +1 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -2
- mteb/models/model_implementations/searchmap_models.py +1 -1
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +113 -146
- mteb/models/model_implementations/sentence_transformers_models.py +124 -22
- mteb/models/model_implementations/shuu_model.py +1 -1
- mteb/models/model_implementations/siglip_models.py +20 -20
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -1
- mteb/models/model_implementations/stella_models.py +17 -4
- mteb/models/model_implementations/tarka_models.py +2 -2
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +1 -1
- mteb/models/model_implementations/uae_models.py +7 -1
- mteb/models/model_implementations/vdr_models.py +1 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -6
- mteb/models/model_implementations/vlm2vec_models.py +3 -3
- mteb/models/model_implementations/voyage_models.py +84 -0
- mteb/models/model_implementations/voyage_v.py +9 -7
- mteb/models/model_implementations/youtu_models.py +1 -1
- mteb/models/model_implementations/yuan_models.py +1 -1
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +80 -31
- mteb/models/models_protocols.py +22 -6
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +9 -6
- mteb/models/search_wrappers.py +33 -18
- mteb/models/sentence_transformer_wrapper.py +50 -25
- mteb/models/vllm_wrapper.py +327 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +29 -21
- mteb/results/model_result.py +52 -22
- mteb/results/task_result.py +80 -58
- mteb/similarity_functions.py +11 -7
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +12 -12
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +2 -0
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +12 -0
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +9 -3
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/RECORD +240 -219
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
|
@@ -18,6 +18,7 @@ MMTEB_CITATION = r"""@article{enevoldsen2025mmtebmassivemultilingualtext,
|
|
|
18
18
|
|
|
19
19
|
MTEB_EN = Benchmark(
|
|
20
20
|
name="MTEB(eng, v2)",
|
|
21
|
+
aliases=["MTEB(eng)"],
|
|
21
22
|
display_name="English",
|
|
22
23
|
icon="https://github.com/lipis/flag-icons/raw/refs/heads/main/flags/4x3/us.svg",
|
|
23
24
|
tasks=MTEBTasks(
|
|
@@ -89,6 +90,7 @@ The original MTEB leaderboard is available under the [MTEB(eng, v1)](http://mteb
|
|
|
89
90
|
|
|
90
91
|
MTEB_ENG_CLASSIC = Benchmark(
|
|
91
92
|
name="MTEB(eng, v1)",
|
|
93
|
+
aliases=["MTEB(eng, classic)", "MTEB"],
|
|
92
94
|
display_name="English Legacy",
|
|
93
95
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/gb.svg",
|
|
94
96
|
tasks=MTEBTasks(
|
|
@@ -185,6 +187,7 @@ We recommend that you use [MTEB(eng, v2)](http://mteb-leaderboard.hf.space/?benc
|
|
|
185
187
|
|
|
186
188
|
MTEB_MAIN_RU = Benchmark(
|
|
187
189
|
name="MTEB(rus, v1)",
|
|
190
|
+
aliases=["MTEB(rus)"],
|
|
188
191
|
display_name="Russian legacy",
|
|
189
192
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ru.svg",
|
|
190
193
|
tasks=MTEBTasks(
|
|
@@ -344,6 +347,7 @@ RU_SCI_BENCH = Benchmark(
|
|
|
344
347
|
|
|
345
348
|
MTEB_RETRIEVAL_WITH_INSTRUCTIONS = Benchmark(
|
|
346
349
|
name="FollowIR",
|
|
350
|
+
aliases=["MTEB(Retrieval w/Instructions)"],
|
|
347
351
|
display_name="Instruction Following",
|
|
348
352
|
tasks=get_tasks(
|
|
349
353
|
tasks=[
|
|
@@ -394,7 +398,9 @@ MTEB_RETRIEVAL_WITH_DOMAIN_INSTRUCTIONS = Benchmark(
|
|
|
394
398
|
)
|
|
395
399
|
|
|
396
400
|
MTEB_RETRIEVAL_LAW = Benchmark(
|
|
397
|
-
|
|
401
|
+
# This benchmark is likely in the need of an update
|
|
402
|
+
name="MTEB(Law, v1)",
|
|
403
|
+
aliases=["MTEB(law)"],
|
|
398
404
|
display_name="Legal",
|
|
399
405
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-library.svg",
|
|
400
406
|
tasks=get_tasks(
|
|
@@ -416,6 +422,7 @@ MTEB_RETRIEVAL_LAW = Benchmark(
|
|
|
416
422
|
|
|
417
423
|
MTEB_RETRIEVAL_MEDICAL = Benchmark(
|
|
418
424
|
name="MTEB(Medical, v1)",
|
|
425
|
+
aliases=["MTEB(Medical)"],
|
|
419
426
|
display_name="Medical",
|
|
420
427
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-hospital.svg",
|
|
421
428
|
tasks=get_tasks(
|
|
@@ -469,6 +476,7 @@ MTEB_MINERS_BITEXT_MINING = Benchmark(
|
|
|
469
476
|
|
|
470
477
|
SEB = Benchmark(
|
|
471
478
|
name="MTEB(Scandinavian, v1)",
|
|
479
|
+
aliases=["MTEB(Scandinavian)", "SEB"],
|
|
472
480
|
display_name="Scandinavian",
|
|
473
481
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/dk.svg",
|
|
474
482
|
language_view=["dan-Latn", "swe-Latn", "nno-Latn", "nob-Latn"],
|
|
@@ -595,6 +603,7 @@ RAR_b = Benchmark(
|
|
|
595
603
|
|
|
596
604
|
MTEB_FRA = Benchmark(
|
|
597
605
|
name="MTEB(fra, v1)",
|
|
606
|
+
aliases=["MTEB(fra)"],
|
|
598
607
|
display_name="French",
|
|
599
608
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/fr.svg",
|
|
600
609
|
tasks=MTEBTasks(
|
|
@@ -653,6 +662,7 @@ MTEB_FRA = Benchmark(
|
|
|
653
662
|
|
|
654
663
|
MTEB_DEU = Benchmark(
|
|
655
664
|
name="MTEB(deu, v1)",
|
|
665
|
+
aliases=["MTEB(deu)"],
|
|
656
666
|
display_name="German",
|
|
657
667
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/de.svg",
|
|
658
668
|
tasks=get_tasks(
|
|
@@ -704,6 +714,7 @@ MTEB_DEU = Benchmark(
|
|
|
704
714
|
|
|
705
715
|
MTEB_KOR = Benchmark(
|
|
706
716
|
name="MTEB(kor, v1)",
|
|
717
|
+
aliases=["MTEB(kor)"],
|
|
707
718
|
display_name="Korean",
|
|
708
719
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/kr.svg",
|
|
709
720
|
tasks=get_tasks(
|
|
@@ -728,6 +739,7 @@ MTEB_KOR = Benchmark(
|
|
|
728
739
|
|
|
729
740
|
MTEB_POL = Benchmark(
|
|
730
741
|
name="MTEB(pol, v1)",
|
|
742
|
+
aliases=["MTEB(pol)"],
|
|
731
743
|
display_name="Polish",
|
|
732
744
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/pl.svg",
|
|
733
745
|
tasks=MTEBTasks(
|
|
@@ -777,6 +789,7 @@ two novel clustering tasks.""", # Rephrased from the abstract
|
|
|
777
789
|
|
|
778
790
|
MTEB_code = Benchmark(
|
|
779
791
|
name="MTEB(Code, v1)",
|
|
792
|
+
aliases=["MTEB(code)"],
|
|
780
793
|
display_name="Code",
|
|
781
794
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-tech-electronics.svg",
|
|
782
795
|
tasks=get_tasks(
|
|
@@ -953,6 +966,7 @@ MTEB_multilingual_v1 = Benchmark(
|
|
|
953
966
|
|
|
954
967
|
MTEB_multilingual_v2 = Benchmark(
|
|
955
968
|
name="MTEB(Multilingual, v2)",
|
|
969
|
+
aliases=["MTEB(Multilingual)", "MMTEB"],
|
|
956
970
|
display_name="Multilingual",
|
|
957
971
|
language_view=[
|
|
958
972
|
"eng-Latn", # English
|
|
@@ -986,6 +1000,7 @@ MTEB_multilingual_v2 = Benchmark(
|
|
|
986
1000
|
|
|
987
1001
|
MTEB_JPN = Benchmark(
|
|
988
1002
|
name="MTEB(jpn, v1)",
|
|
1003
|
+
aliases=["MTEB(jpn)"],
|
|
989
1004
|
display_name="Japanese Legacy",
|
|
990
1005
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
|
|
991
1006
|
tasks=get_tasks(
|
|
@@ -1056,6 +1071,7 @@ indic_languages = [
|
|
|
1056
1071
|
|
|
1057
1072
|
MTEB_INDIC = Benchmark(
|
|
1058
1073
|
name="MTEB(Indic, v1)",
|
|
1074
|
+
aliases=["MTEB(Indic)"],
|
|
1059
1075
|
display_name="Indic",
|
|
1060
1076
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/in.svg",
|
|
1061
1077
|
tasks=MTEBTasks(
|
|
@@ -1146,6 +1162,7 @@ eu_languages = [
|
|
|
1146
1162
|
|
|
1147
1163
|
MTEB_EU = Benchmark(
|
|
1148
1164
|
name="MTEB(Europe, v1)",
|
|
1165
|
+
aliases=["MTEB(Europe)"],
|
|
1149
1166
|
display_name="European",
|
|
1150
1167
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/eu.svg",
|
|
1151
1168
|
tasks=get_tasks(
|
|
@@ -1285,6 +1302,7 @@ BRIGHT = Benchmark(
|
|
|
1285
1302
|
|
|
1286
1303
|
BRIGHT_LONG = Benchmark(
|
|
1287
1304
|
name="BRIGHT (long)",
|
|
1305
|
+
aliases=["BRIGHT(long)"],
|
|
1288
1306
|
tasks=MTEBTasks(
|
|
1289
1307
|
(
|
|
1290
1308
|
get_task(
|
|
@@ -1400,6 +1418,7 @@ NANOBEIR = Benchmark(
|
|
|
1400
1418
|
|
|
1401
1419
|
C_MTEB = Benchmark(
|
|
1402
1420
|
name="MTEB(cmn, v1)",
|
|
1421
|
+
aliases=["MTEB(Chinese)", "CMTEB"],
|
|
1403
1422
|
display_name="Chinese",
|
|
1404
1423
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/cn.svg",
|
|
1405
1424
|
tasks=MTEBTasks(
|
|
@@ -1466,6 +1485,7 @@ C_MTEB = Benchmark(
|
|
|
1466
1485
|
|
|
1467
1486
|
FA_MTEB = Benchmark(
|
|
1468
1487
|
name="MTEB(fas, v1)",
|
|
1488
|
+
aliases=["FaMTEB(fas, beta)"],
|
|
1469
1489
|
display_name="Farsi Legacy",
|
|
1470
1490
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ir.svg",
|
|
1471
1491
|
tasks=get_tasks(
|
|
@@ -1636,6 +1656,7 @@ FA_MTEB_2 = Benchmark(
|
|
|
1636
1656
|
|
|
1637
1657
|
CHEMTEB = Benchmark(
|
|
1638
1658
|
name="ChemTEB",
|
|
1659
|
+
aliases=["ChemTEB(v1)"],
|
|
1639
1660
|
display_name="Chemical",
|
|
1640
1661
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-purge.svg",
|
|
1641
1662
|
tasks=get_tasks(
|
|
@@ -1681,6 +1702,62 @@ CHEMTEB = Benchmark(
|
|
|
1681
1702
|
""",
|
|
1682
1703
|
)
|
|
1683
1704
|
|
|
1705
|
+
CHEMTEB_V1_1 = Benchmark(
|
|
1706
|
+
name="ChemTEB(v1.1)",
|
|
1707
|
+
aliases=["ChemTEB(latest)"],
|
|
1708
|
+
display_name="Chemical",
|
|
1709
|
+
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-purge.svg",
|
|
1710
|
+
tasks=get_tasks(
|
|
1711
|
+
tasks=[
|
|
1712
|
+
"PubChemSMILESBitextMining",
|
|
1713
|
+
"SDSEyeProtectionClassification",
|
|
1714
|
+
"SDSGlovesClassification",
|
|
1715
|
+
"WikipediaBioMetChemClassification",
|
|
1716
|
+
"WikipediaGreenhouseEnantiopureClassification",
|
|
1717
|
+
"WikipediaSolidStateColloidalClassification",
|
|
1718
|
+
"WikipediaOrganicInorganicClassification",
|
|
1719
|
+
"WikipediaCryobiologySeparationClassification",
|
|
1720
|
+
"WikipediaChemistryTopicsClassification",
|
|
1721
|
+
"WikipediaTheoreticalAppliedClassification",
|
|
1722
|
+
"WikipediaChemFieldsClassification",
|
|
1723
|
+
"WikipediaLuminescenceClassification",
|
|
1724
|
+
"WikipediaIsotopesFissionClassification",
|
|
1725
|
+
"WikipediaSaltsSemiconductorsClassification",
|
|
1726
|
+
"WikipediaBiolumNeurochemClassification",
|
|
1727
|
+
"WikipediaCrystallographyAnalyticalClassification",
|
|
1728
|
+
"WikipediaCompChemSpectroscopyClassification",
|
|
1729
|
+
"WikipediaChemEngSpecialtiesClassification",
|
|
1730
|
+
"WikipediaChemistryTopicsClustering",
|
|
1731
|
+
"WikipediaSpecialtiesInChemistryClustering",
|
|
1732
|
+
"PubChemAISentenceParaphrasePC",
|
|
1733
|
+
"PubChemSMILESPC",
|
|
1734
|
+
"PubChemSynonymPC",
|
|
1735
|
+
"PubChemWikiParagraphsPC",
|
|
1736
|
+
"PubChemWikiPairClassification",
|
|
1737
|
+
"ChemNQRetrieval",
|
|
1738
|
+
"ChemHotpotQARetrieval",
|
|
1739
|
+
"ChemRxivRetrieval",
|
|
1740
|
+
],
|
|
1741
|
+
),
|
|
1742
|
+
description="ChemTEB evaluates the performance of text embedding models on chemical domain data. This version adds the ChemRxivRetrieval task.",
|
|
1743
|
+
reference="https://arxiv.org/abs/2412.00532",
|
|
1744
|
+
citation=r"""
|
|
1745
|
+
@article{kasmaee2024chemteb,
|
|
1746
|
+
author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila},
|
|
1747
|
+
journal = {arXiv preprint arXiv:2412.00532},
|
|
1748
|
+
title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \\& Efficiency on a Specific Domain},
|
|
1749
|
+
year = {2024},
|
|
1750
|
+
}
|
|
1751
|
+
|
|
1752
|
+
@article{kasmaee2025chembed,
|
|
1753
|
+
author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Astaraki, Mahdi and Saloot, Mohammad Arshi and Sherck, Nicholas and Mahyar, Hamidreza and Samiee, Soheila},
|
|
1754
|
+
journal = {arXiv preprint arXiv:2508.01643},
|
|
1755
|
+
title = {Chembed: Enhancing chemical literature search through domain-specific text embeddings},
|
|
1756
|
+
year = {2025},
|
|
1757
|
+
}
|
|
1758
|
+
""",
|
|
1759
|
+
)
|
|
1760
|
+
|
|
1684
1761
|
BEIR_NL = Benchmark(
|
|
1685
1762
|
name="BEIR-NL",
|
|
1686
1763
|
display_name="BEIR-NL",
|
|
@@ -2330,23 +2407,23 @@ VIDORE_V3 = VidoreBenchmark(
|
|
|
2330
2407
|
]
|
|
2331
2408
|
),
|
|
2332
2409
|
description="ViDoRe V3 sets a new industry gold standard for multi-modal, enterprise document visual retrieval evaluation. It addresses a critical challenge in production RAG systems: retrieving accurate information from complex, visually-rich documents. The benchmark includes both open and closed datasets: to submit results on private tasks, please [open an issue](https://github.com/embeddings-benchmark/mteb/issues?template=eval_request.yaml).",
|
|
2333
|
-
reference="https://
|
|
2410
|
+
reference="https://arxiv.org/abs/2601.08620",
|
|
2334
2411
|
citation=r"""
|
|
2335
|
-
@
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2339
|
-
|
|
2340
|
-
|
|
2341
|
-
|
|
2342
|
-
|
|
2343
|
-
year = {2025},
|
|
2412
|
+
@article{loison2026vidorev3comprehensiveevaluation,
|
|
2413
|
+
archiveprefix = {arXiv},
|
|
2414
|
+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
|
|
2415
|
+
eprint = {2601.08620},
|
|
2416
|
+
primaryclass = {cs.AI},
|
|
2417
|
+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
|
|
2418
|
+
url = {https://arxiv.org/abs/2601.08620},
|
|
2419
|
+
year = {2026},
|
|
2344
2420
|
}
|
|
2345
2421
|
""",
|
|
2346
2422
|
)
|
|
2347
2423
|
|
|
2348
2424
|
VISUAL_DOCUMENT_RETRIEVAL = VidoreBenchmark(
|
|
2349
2425
|
name="ViDoRe(v1&v2)",
|
|
2426
|
+
aliases=["VisualDocumentRetrieval"],
|
|
2350
2427
|
display_name="ViDoRe (V1&V2)",
|
|
2351
2428
|
tasks=get_tasks(
|
|
2352
2429
|
tasks=[
|
|
@@ -2707,3 +2784,27 @@ JMTEB_LITE_V1 = Benchmark(
|
|
|
2707
2784
|
""",
|
|
2708
2785
|
contacts=["lsz05"],
|
|
2709
2786
|
)
|
|
2787
|
+
|
|
2788
|
+
KOVIDORE_V2 = Benchmark(
|
|
2789
|
+
name="KoViDoRe(v2)",
|
|
2790
|
+
display_name="KoViDoRe v2",
|
|
2791
|
+
tasks=get_tasks(
|
|
2792
|
+
tasks=[
|
|
2793
|
+
"KoVidore2CybersecurityRetrieval",
|
|
2794
|
+
"KoVidore2EconomicRetrieval",
|
|
2795
|
+
"KoVidore2EnergyRetrieval",
|
|
2796
|
+
"KoVidore2HrRetrieval",
|
|
2797
|
+
]
|
|
2798
|
+
),
|
|
2799
|
+
description="KoViDoRe v2 sets a new industry gold standard for multi-modal, enterprise document visual retrieval evaluation. It addresses a critical challenge in production RAG systems: retrieving accurate information from complex, visually-rich documents.",
|
|
2800
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
2801
|
+
citation=r"""
|
|
2802
|
+
@misc{choi2026kovidorev2,
|
|
2803
|
+
author = {Yongbin Choi},
|
|
2804
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
2805
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
2806
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
2807
|
+
year = {2026},
|
|
2808
|
+
}
|
|
2809
|
+
""",
|
|
2810
|
+
)
|
mteb/benchmarks/get_benchmark.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import difflib
|
|
2
2
|
import logging
|
|
3
|
-
import warnings
|
|
4
3
|
from functools import lru_cache
|
|
5
4
|
|
|
6
5
|
from .benchmark import Benchmark
|
|
@@ -20,53 +19,16 @@ def _build_registry() -> dict[str, Benchmark]:
|
|
|
20
19
|
return benchmark_registry
|
|
21
20
|
|
|
22
21
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
MTEB_INDIC,
|
|
34
|
-
MTEB_JPN,
|
|
35
|
-
MTEB_KOR,
|
|
36
|
-
MTEB_MAIN_RU,
|
|
37
|
-
MTEB_POL,
|
|
38
|
-
MTEB_RETRIEVAL_LAW,
|
|
39
|
-
MTEB_RETRIEVAL_MEDICAL,
|
|
40
|
-
MTEB_RETRIEVAL_WITH_INSTRUCTIONS,
|
|
41
|
-
SEB,
|
|
42
|
-
VISUAL_DOCUMENT_RETRIEVAL,
|
|
43
|
-
MTEB_code,
|
|
44
|
-
MTEB_multilingual_v2,
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
previous_benchmark_names = {
|
|
48
|
-
"MTEB(eng)": MTEB_EN.name,
|
|
49
|
-
"MTEB(eng, classic)": MTEB_ENG_CLASSIC.name,
|
|
50
|
-
"MTEB(rus)": MTEB_MAIN_RU.name,
|
|
51
|
-
"MTEB(Retrieval w/Instructions)": MTEB_RETRIEVAL_WITH_INSTRUCTIONS.name,
|
|
52
|
-
"MTEB(law)": MTEB_RETRIEVAL_LAW.name,
|
|
53
|
-
"MTEB(Medical)": MTEB_RETRIEVAL_MEDICAL.name,
|
|
54
|
-
"MTEB(Scandinavian)": SEB.name,
|
|
55
|
-
"MTEB(fra)": MTEB_FRA.name,
|
|
56
|
-
"MTEB(deu)": MTEB_DEU.name,
|
|
57
|
-
"MTEB(kor)": MTEB_KOR.name,
|
|
58
|
-
"MTEB(pol)": MTEB_POL.name,
|
|
59
|
-
"MTEB(code)": MTEB_code.name,
|
|
60
|
-
"MTEB(Multilingual)": MTEB_multilingual_v2.name,
|
|
61
|
-
"MTEB(jpn)": MTEB_JPN.name,
|
|
62
|
-
"MTEB(Indic)": MTEB_INDIC.name,
|
|
63
|
-
"MTEB(Europe)": MTEB_EU.name,
|
|
64
|
-
"MTEB(Chinese)": C_MTEB.name,
|
|
65
|
-
"FaMTEB(fas, beta)": FA_MTEB.name,
|
|
66
|
-
"BRIGHT(long)": BRIGHT_LONG.name,
|
|
67
|
-
"VisualDocumentRetrieval": VISUAL_DOCUMENT_RETRIEVAL.name,
|
|
68
|
-
}
|
|
69
|
-
return previous_benchmark_names
|
|
22
|
+
@lru_cache
|
|
23
|
+
def _build_aliases_registry() -> dict[str, Benchmark]:
|
|
24
|
+
import mteb.benchmarks.benchmarks as benchmark_module
|
|
25
|
+
|
|
26
|
+
aliases: dict[str, Benchmark] = {}
|
|
27
|
+
for _, inst in benchmark_module.__dict__.items():
|
|
28
|
+
if isinstance(inst, Benchmark) and inst.aliases is not None:
|
|
29
|
+
for alias in inst.aliases:
|
|
30
|
+
aliases[alias] = inst
|
|
31
|
+
return aliases
|
|
70
32
|
|
|
71
33
|
|
|
72
34
|
def get_benchmark(
|
|
@@ -80,14 +42,11 @@ def get_benchmark(
|
|
|
80
42
|
Returns:
|
|
81
43
|
The Benchmark instance corresponding to the given name.
|
|
82
44
|
"""
|
|
83
|
-
previous_benchmark_names = _get_previous_benchmark_names()
|
|
84
45
|
benchmark_registry = _build_registry()
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
)
|
|
90
|
-
benchmark_name = previous_benchmark_names[benchmark_name]
|
|
46
|
+
aliases_registry = _build_aliases_registry()
|
|
47
|
+
|
|
48
|
+
if benchmark_name in aliases_registry:
|
|
49
|
+
return aliases_registry[benchmark_name]
|
|
91
50
|
if benchmark_name not in benchmark_registry:
|
|
92
51
|
close_matches = difflib.get_close_matches(
|
|
93
52
|
benchmark_name, benchmark_registry.keys()
|