mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +78 -30
- mteb/_evaluators/any_sts_evaluator.py +13 -6
- mteb/_evaluators/clustering_evaluator.py +13 -5
- mteb/_evaluators/evaluator.py +12 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
- mteb/_evaluators/pair_classification_evaluator.py +17 -7
- mteb/_evaluators/retrieval_evaluator.py +23 -14
- mteb/_evaluators/retrieval_metrics.py +26 -19
- mteb/_evaluators/sklearn_evaluator.py +27 -17
- mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
- mteb/_evaluators/text/summarization_evaluator.py +31 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +9 -3
- mteb/abstasks/_data_filter/task_pipelines.py +10 -2
- mteb/abstasks/_statistics_calculation.py +21 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +78 -44
- mteb/abstasks/aggregate_task_metadata.py +21 -18
- mteb/abstasks/aggregated_task.py +23 -35
- mteb/abstasks/classification.py +39 -18
- mteb/abstasks/clustering.py +37 -20
- mteb/abstasks/clustering_legacy.py +30 -16
- mteb/abstasks/image/image_text_pair_classification.py +26 -9
- mteb/abstasks/multilabel_classification.py +33 -21
- mteb/abstasks/pair_classification.py +44 -19
- mteb/abstasks/regression.py +18 -10
- mteb/abstasks/retrieval.py +82 -52
- mteb/abstasks/retrieval_dataset_loaders.py +50 -39
- mteb/abstasks/sts.py +34 -15
- mteb/abstasks/task_metadata.py +44 -37
- mteb/abstasks/text/bitext_mining.py +57 -35
- mteb/abstasks/text/reranking.py +10 -8
- mteb/abstasks/text/summarization.py +26 -10
- mteb/abstasks/zeroshot_classification.py +27 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +15 -3
- mteb/benchmarks/benchmarks/__init__.py +6 -0
- mteb/benchmarks/benchmarks/benchmarks.py +153 -13
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +189 -31
- mteb/cli/_display_tasks.py +10 -4
- mteb/cli/build_cli.py +112 -13
- mteb/cli/generate_model_card.py +50 -23
- mteb/deprecated_evaluator.py +72 -54
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +71 -47
- mteb/filter_tasks.py +36 -32
- mteb/get_tasks.py +37 -33
- mteb/languages/language_scripts.py +11 -4
- mteb/leaderboard/app.py +172 -37
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +20 -14
- mteb/models/abs_encoder.py +30 -16
- mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +16 -11
- mteb/models/get_model_meta.py +53 -9
- mteb/models/instruct_wrapper.py +41 -13
- mteb/models/model_implementations/align_models.py +11 -5
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +6 -4
- mteb/models/model_implementations/ara_models.py +2 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +85 -22
- mteb/models/model_implementations/bica_model.py +4 -3
- mteb/models/model_implementations/blip2_models.py +13 -6
- mteb/models/model_implementations/blip_models.py +33 -20
- mteb/models/model_implementations/bm25.py +27 -17
- mteb/models/model_implementations/bmretriever_models.py +16 -6
- mteb/models/model_implementations/cadet_models.py +2 -1
- mteb/models/model_implementations/cde_models.py +22 -9
- mteb/models/model_implementations/clip_models.py +18 -10
- mteb/models/model_implementations/clips_models.py +6 -3
- mteb/models/model_implementations/codefuse_models.py +10 -5
- mteb/models/model_implementations/codesage_models.py +6 -3
- mteb/models/model_implementations/cohere_models.py +19 -9
- mteb/models/model_implementations/cohere_v.py +16 -6
- mteb/models/model_implementations/colpali_models.py +10 -6
- mteb/models/model_implementations/colqwen_models.py +24 -38
- mteb/models/model_implementations/colsmol_models.py +5 -3
- mteb/models/model_implementations/conan_models.py +12 -5
- mteb/models/model_implementations/dino_models.py +70 -46
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +18 -9
- mteb/models/model_implementations/e5_v.py +16 -10
- mteb/models/model_implementations/eagerworks_models.py +12 -5
- mteb/models/model_implementations/emillykkejensen_models.py +9 -6
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +3 -2
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +18 -9
- mteb/models/model_implementations/facebookai.py +16 -2
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +13 -8
- mteb/models/model_implementations/google_models.py +16 -5
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
- mteb/models/model_implementations/gritlm_models.py +5 -2
- mteb/models/model_implementations/gte_models.py +34 -13
- mteb/models/model_implementations/hinvec_models.py +7 -2
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +16 -7
- mteb/models/model_implementations/jina_clip.py +58 -14
- mteb/models/model_implementations/jina_models.py +35 -16
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +13 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
- mteb/models/model_implementations/kfst.py +2 -1
- mteb/models/model_implementations/kowshik24_models.py +2 -1
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +8 -2
- mteb/models/model_implementations/listconranker.py +11 -5
- mteb/models/model_implementations/llm2clip_models.py +18 -10
- mteb/models/model_implementations/llm2vec_models.py +28 -14
- mteb/models/model_implementations/mcinext_models.py +12 -3
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +131 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
- mteb/models/model_implementations/mme5_models.py +3 -2
- mteb/models/model_implementations/moco_models.py +15 -8
- mteb/models/model_implementations/mod_models.py +3 -2
- mteb/models/model_implementations/model2vec_models.py +37 -18
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +6 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
- mteb/models/model_implementations/nomic_models.py +47 -19
- mteb/models/model_implementations/nomic_models_vision.py +6 -4
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
- mteb/models/model_implementations/nvidia_models.py +165 -22
- mteb/models/model_implementations/octen_models.py +64 -3
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +30 -17
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
- mteb/models/model_implementations/ops_moa_models.py +10 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
- mteb/models/model_implementations/pawan_models.py +2 -1
- mteb/models/model_implementations/piccolo_models.py +3 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +20 -10
- mteb/models/model_implementations/pylate_models.py +41 -21
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +14 -4
- mteb/models/model_implementations/qzhou_models.py +4 -2
- mteb/models/model_implementations/random_baseline.py +7 -6
- mteb/models/model_implementations/rasgaard_models.py +3 -2
- mteb/models/model_implementations/reasonir_model.py +66 -1
- mteb/models/model_implementations/repllama_models.py +18 -9
- mteb/models/model_implementations/rerankers_custom.py +25 -10
- mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +40 -20
- mteb/models/model_implementations/ruri_models.py +20 -10
- mteb/models/model_implementations/salesforce_models.py +13 -4
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +142 -22
- mteb/models/model_implementations/shuu_model.py +2 -1
- mteb/models/model_implementations/siglip_models.py +39 -24
- mteb/models/model_implementations/slm_models.py +419 -0
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +4 -2
- mteb/models/model_implementations/text2vec_models.py +12 -3
- mteb/models/model_implementations/ua_sentence_models.py +2 -1
- mteb/models/model_implementations/uae_models.py +17 -5
- mteb/models/model_implementations/vdr_models.py +9 -2
- mteb/models/model_implementations/vi_vn_models.py +12 -6
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +14 -7
- mteb/models/model_implementations/voyage_models.py +136 -4
- mteb/models/model_implementations/voyage_v.py +17 -10
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +2 -1
- mteb/models/model_implementations/yuan_models_en.py +3 -2
- mteb/models/model_meta.py +127 -40
- mteb/models/models_protocols.py +43 -22
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
- mteb/models/search_wrappers.py +63 -29
- mteb/models/sentence_transformer_wrapper.py +52 -26
- mteb/models/vllm_wrapper.py +329 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +48 -35
- mteb/results/model_result.py +68 -32
- mteb/results/task_result.py +110 -72
- mteb/similarity_functions.py +19 -9
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +2 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +16 -16
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +44 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +3 -3
- mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +13 -1
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +18 -5
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
|
@@ -18,6 +18,7 @@ MMTEB_CITATION = r"""@article{enevoldsen2025mmtebmassivemultilingualtext,
|
|
|
18
18
|
|
|
19
19
|
MTEB_EN = Benchmark(
|
|
20
20
|
name="MTEB(eng, v2)",
|
|
21
|
+
aliases=["MTEB(eng)"],
|
|
21
22
|
display_name="English",
|
|
22
23
|
icon="https://github.com/lipis/flag-icons/raw/refs/heads/main/flags/4x3/us.svg",
|
|
23
24
|
tasks=MTEBTasks(
|
|
@@ -89,6 +90,7 @@ The original MTEB leaderboard is available under the [MTEB(eng, v1)](http://mteb
|
|
|
89
90
|
|
|
90
91
|
MTEB_ENG_CLASSIC = Benchmark(
|
|
91
92
|
name="MTEB(eng, v1)",
|
|
93
|
+
aliases=["MTEB(eng, classic)", "MTEB"],
|
|
92
94
|
display_name="English Legacy",
|
|
93
95
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/gb.svg",
|
|
94
96
|
tasks=MTEBTasks(
|
|
@@ -185,6 +187,7 @@ We recommend that you use [MTEB(eng, v2)](http://mteb-leaderboard.hf.space/?benc
|
|
|
185
187
|
|
|
186
188
|
MTEB_MAIN_RU = Benchmark(
|
|
187
189
|
name="MTEB(rus, v1)",
|
|
190
|
+
aliases=["MTEB(rus)"],
|
|
188
191
|
display_name="Russian legacy",
|
|
189
192
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ru.svg",
|
|
190
193
|
tasks=MTEBTasks(
|
|
@@ -344,6 +347,7 @@ RU_SCI_BENCH = Benchmark(
|
|
|
344
347
|
|
|
345
348
|
MTEB_RETRIEVAL_WITH_INSTRUCTIONS = Benchmark(
|
|
346
349
|
name="FollowIR",
|
|
350
|
+
aliases=["MTEB(Retrieval w/Instructions)"],
|
|
347
351
|
display_name="Instruction Following",
|
|
348
352
|
tasks=get_tasks(
|
|
349
353
|
tasks=[
|
|
@@ -394,7 +398,9 @@ MTEB_RETRIEVAL_WITH_DOMAIN_INSTRUCTIONS = Benchmark(
|
|
|
394
398
|
)
|
|
395
399
|
|
|
396
400
|
MTEB_RETRIEVAL_LAW = Benchmark(
|
|
397
|
-
|
|
401
|
+
# This benchmark is likely in the need of an update
|
|
402
|
+
name="MTEB(Law, v1)",
|
|
403
|
+
aliases=["MTEB(law)"],
|
|
398
404
|
display_name="Legal",
|
|
399
405
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-library.svg",
|
|
400
406
|
tasks=get_tasks(
|
|
@@ -416,6 +422,7 @@ MTEB_RETRIEVAL_LAW = Benchmark(
|
|
|
416
422
|
|
|
417
423
|
MTEB_RETRIEVAL_MEDICAL = Benchmark(
|
|
418
424
|
name="MTEB(Medical, v1)",
|
|
425
|
+
aliases=["MTEB(Medical)"],
|
|
419
426
|
display_name="Medical",
|
|
420
427
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-hospital.svg",
|
|
421
428
|
tasks=get_tasks(
|
|
@@ -469,6 +476,7 @@ MTEB_MINERS_BITEXT_MINING = Benchmark(
|
|
|
469
476
|
|
|
470
477
|
SEB = Benchmark(
|
|
471
478
|
name="MTEB(Scandinavian, v1)",
|
|
479
|
+
aliases=["MTEB(Scandinavian)", "SEB"],
|
|
472
480
|
display_name="Scandinavian",
|
|
473
481
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/dk.svg",
|
|
474
482
|
language_view=["dan-Latn", "swe-Latn", "nno-Latn", "nob-Latn"],
|
|
@@ -595,6 +603,7 @@ RAR_b = Benchmark(
|
|
|
595
603
|
|
|
596
604
|
MTEB_FRA = Benchmark(
|
|
597
605
|
name="MTEB(fra, v1)",
|
|
606
|
+
aliases=["MTEB(fra)"],
|
|
598
607
|
display_name="French",
|
|
599
608
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/fr.svg",
|
|
600
609
|
tasks=MTEBTasks(
|
|
@@ -653,6 +662,7 @@ MTEB_FRA = Benchmark(
|
|
|
653
662
|
|
|
654
663
|
MTEB_DEU = Benchmark(
|
|
655
664
|
name="MTEB(deu, v1)",
|
|
665
|
+
aliases=["MTEB(deu)"],
|
|
656
666
|
display_name="German",
|
|
657
667
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/de.svg",
|
|
658
668
|
tasks=get_tasks(
|
|
@@ -704,6 +714,7 @@ MTEB_DEU = Benchmark(
|
|
|
704
714
|
|
|
705
715
|
MTEB_KOR = Benchmark(
|
|
706
716
|
name="MTEB(kor, v1)",
|
|
717
|
+
aliases=["MTEB(kor)"],
|
|
707
718
|
display_name="Korean",
|
|
708
719
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/kr.svg",
|
|
709
720
|
tasks=get_tasks(
|
|
@@ -728,6 +739,7 @@ MTEB_KOR = Benchmark(
|
|
|
728
739
|
|
|
729
740
|
MTEB_POL = Benchmark(
|
|
730
741
|
name="MTEB(pol, v1)",
|
|
742
|
+
aliases=["MTEB(pol)"],
|
|
731
743
|
display_name="Polish",
|
|
732
744
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/pl.svg",
|
|
733
745
|
tasks=MTEBTasks(
|
|
@@ -777,6 +789,7 @@ two novel clustering tasks.""", # Rephrased from the abstract
|
|
|
777
789
|
|
|
778
790
|
MTEB_code = Benchmark(
|
|
779
791
|
name="MTEB(Code, v1)",
|
|
792
|
+
aliases=["MTEB(code)"],
|
|
780
793
|
display_name="Code",
|
|
781
794
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-tech-electronics.svg",
|
|
782
795
|
tasks=get_tasks(
|
|
@@ -953,6 +966,7 @@ MTEB_multilingual_v1 = Benchmark(
|
|
|
953
966
|
|
|
954
967
|
MTEB_multilingual_v2 = Benchmark(
|
|
955
968
|
name="MTEB(Multilingual, v2)",
|
|
969
|
+
aliases=["MTEB(Multilingual)", "MMTEB"],
|
|
956
970
|
display_name="Multilingual",
|
|
957
971
|
language_view=[
|
|
958
972
|
"eng-Latn", # English
|
|
@@ -986,6 +1000,7 @@ MTEB_multilingual_v2 = Benchmark(
|
|
|
986
1000
|
|
|
987
1001
|
MTEB_JPN = Benchmark(
|
|
988
1002
|
name="MTEB(jpn, v1)",
|
|
1003
|
+
aliases=["MTEB(jpn)"],
|
|
989
1004
|
display_name="Japanese Legacy",
|
|
990
1005
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
|
|
991
1006
|
tasks=get_tasks(
|
|
@@ -1056,6 +1071,7 @@ indic_languages = [
|
|
|
1056
1071
|
|
|
1057
1072
|
MTEB_INDIC = Benchmark(
|
|
1058
1073
|
name="MTEB(Indic, v1)",
|
|
1074
|
+
aliases=["MTEB(Indic)"],
|
|
1059
1075
|
display_name="Indic",
|
|
1060
1076
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/in.svg",
|
|
1061
1077
|
tasks=MTEBTasks(
|
|
@@ -1146,6 +1162,7 @@ eu_languages = [
|
|
|
1146
1162
|
|
|
1147
1163
|
MTEB_EU = Benchmark(
|
|
1148
1164
|
name="MTEB(Europe, v1)",
|
|
1165
|
+
aliases=["MTEB(Europe)"],
|
|
1149
1166
|
display_name="European",
|
|
1150
1167
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/eu.svg",
|
|
1151
1168
|
tasks=get_tasks(
|
|
@@ -1285,6 +1302,7 @@ BRIGHT = Benchmark(
|
|
|
1285
1302
|
|
|
1286
1303
|
BRIGHT_LONG = Benchmark(
|
|
1287
1304
|
name="BRIGHT (long)",
|
|
1305
|
+
aliases=["BRIGHT(long)"],
|
|
1288
1306
|
tasks=MTEBTasks(
|
|
1289
1307
|
(
|
|
1290
1308
|
get_task(
|
|
@@ -1312,6 +1330,46 @@ This is the long version of the benchmark, which only filter longer documents.
|
|
|
1312
1330
|
""",
|
|
1313
1331
|
)
|
|
1314
1332
|
|
|
1333
|
+
BRIGHT_V1_1 = Benchmark(
|
|
1334
|
+
name="BRIGHT(v1.1)",
|
|
1335
|
+
display_name="Reasoning Retrieval",
|
|
1336
|
+
tasks=get_tasks(
|
|
1337
|
+
tasks=[
|
|
1338
|
+
"BrightBiologyRetrieval",
|
|
1339
|
+
"BrightEarthScienceRetrieval",
|
|
1340
|
+
"BrightEconomicsRetrieval",
|
|
1341
|
+
"BrightPsychologyRetrieval",
|
|
1342
|
+
"BrightRoboticsRetrieval",
|
|
1343
|
+
"BrightStackoverflowRetrieval",
|
|
1344
|
+
"BrightSustainableLivingRetrieval",
|
|
1345
|
+
"BrightPonyRetrieval",
|
|
1346
|
+
"BrightLeetcodeRetrieval",
|
|
1347
|
+
"BrightAopsRetrieval",
|
|
1348
|
+
"BrightTheoremQATheoremsRetrieval",
|
|
1349
|
+
"BrightTheoremQAQuestionsRetrieval",
|
|
1350
|
+
"BrightBiologyLongRetrieval",
|
|
1351
|
+
"BrightEarthScienceLongRetrieval",
|
|
1352
|
+
"BrightEconomicsLongRetrieval",
|
|
1353
|
+
"BrightPsychologyLongRetrieval",
|
|
1354
|
+
"BrightRoboticsLongRetrieval",
|
|
1355
|
+
"BrightStackoverflowLongRetrieval",
|
|
1356
|
+
"BrightSustainableLivingLongRetrieval",
|
|
1357
|
+
"BrightPonyLongRetrieval",
|
|
1358
|
+
],
|
|
1359
|
+
),
|
|
1360
|
+
description="v1.1 refactors the BRIGHT into a different tasks and added prompt to individual tasks.",
|
|
1361
|
+
reference="https://brightbenchmark.github.io/",
|
|
1362
|
+
citation=r"""
|
|
1363
|
+
@article{su2024bright,
|
|
1364
|
+
author = {Su, Hongjin and Yen, Howard and Xia, Mengzhou and Shi, Weijia and Muennighoff, Niklas and Wang, Han-yu and Liu, Haisu and Shi, Quan and Siegel, Zachary S and Tang, Michael and others},
|
|
1365
|
+
journal = {arXiv preprint arXiv:2407.12883},
|
|
1366
|
+
title = {Bright: A realistic and challenging benchmark for reasoning-intensive retrieval},
|
|
1367
|
+
year = {2024},
|
|
1368
|
+
}
|
|
1369
|
+
""",
|
|
1370
|
+
)
|
|
1371
|
+
|
|
1372
|
+
|
|
1315
1373
|
CODE_RAG = Benchmark(
|
|
1316
1374
|
name="CodeRAG",
|
|
1317
1375
|
tasks=get_tasks(
|
|
@@ -1400,6 +1458,7 @@ NANOBEIR = Benchmark(
|
|
|
1400
1458
|
|
|
1401
1459
|
C_MTEB = Benchmark(
|
|
1402
1460
|
name="MTEB(cmn, v1)",
|
|
1461
|
+
aliases=["MTEB(Chinese)", "CMTEB"],
|
|
1403
1462
|
display_name="Chinese",
|
|
1404
1463
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/cn.svg",
|
|
1405
1464
|
tasks=MTEBTasks(
|
|
@@ -1466,6 +1525,7 @@ C_MTEB = Benchmark(
|
|
|
1466
1525
|
|
|
1467
1526
|
FA_MTEB = Benchmark(
|
|
1468
1527
|
name="MTEB(fas, v1)",
|
|
1528
|
+
aliases=["FaMTEB(fas, beta)"],
|
|
1469
1529
|
display_name="Farsi Legacy",
|
|
1470
1530
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ir.svg",
|
|
1471
1531
|
tasks=get_tasks(
|
|
@@ -1636,6 +1696,7 @@ FA_MTEB_2 = Benchmark(
|
|
|
1636
1696
|
|
|
1637
1697
|
CHEMTEB = Benchmark(
|
|
1638
1698
|
name="ChemTEB",
|
|
1699
|
+
aliases=["ChemTEB(v1)"],
|
|
1639
1700
|
display_name="Chemical",
|
|
1640
1701
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-purge.svg",
|
|
1641
1702
|
tasks=get_tasks(
|
|
@@ -1681,6 +1742,62 @@ CHEMTEB = Benchmark(
|
|
|
1681
1742
|
""",
|
|
1682
1743
|
)
|
|
1683
1744
|
|
|
1745
|
+
CHEMTEB_V1_1 = Benchmark(
|
|
1746
|
+
name="ChemTEB(v1.1)",
|
|
1747
|
+
aliases=["ChemTEB(latest)"],
|
|
1748
|
+
display_name="Chemical",
|
|
1749
|
+
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-purge.svg",
|
|
1750
|
+
tasks=get_tasks(
|
|
1751
|
+
tasks=[
|
|
1752
|
+
"PubChemSMILESBitextMining",
|
|
1753
|
+
"SDSEyeProtectionClassification",
|
|
1754
|
+
"SDSGlovesClassification",
|
|
1755
|
+
"WikipediaBioMetChemClassification",
|
|
1756
|
+
"WikipediaGreenhouseEnantiopureClassification",
|
|
1757
|
+
"WikipediaSolidStateColloidalClassification",
|
|
1758
|
+
"WikipediaOrganicInorganicClassification",
|
|
1759
|
+
"WikipediaCryobiologySeparationClassification",
|
|
1760
|
+
"WikipediaChemistryTopicsClassification",
|
|
1761
|
+
"WikipediaTheoreticalAppliedClassification",
|
|
1762
|
+
"WikipediaChemFieldsClassification",
|
|
1763
|
+
"WikipediaLuminescenceClassification",
|
|
1764
|
+
"WikipediaIsotopesFissionClassification",
|
|
1765
|
+
"WikipediaSaltsSemiconductorsClassification",
|
|
1766
|
+
"WikipediaBiolumNeurochemClassification",
|
|
1767
|
+
"WikipediaCrystallographyAnalyticalClassification",
|
|
1768
|
+
"WikipediaCompChemSpectroscopyClassification",
|
|
1769
|
+
"WikipediaChemEngSpecialtiesClassification",
|
|
1770
|
+
"WikipediaChemistryTopicsClustering",
|
|
1771
|
+
"WikipediaSpecialtiesInChemistryClustering",
|
|
1772
|
+
"PubChemAISentenceParaphrasePC",
|
|
1773
|
+
"PubChemSMILESPC",
|
|
1774
|
+
"PubChemSynonymPC",
|
|
1775
|
+
"PubChemWikiParagraphsPC",
|
|
1776
|
+
"PubChemWikiPairClassification",
|
|
1777
|
+
"ChemNQRetrieval",
|
|
1778
|
+
"ChemHotpotQARetrieval",
|
|
1779
|
+
"ChemRxivRetrieval",
|
|
1780
|
+
],
|
|
1781
|
+
),
|
|
1782
|
+
description="ChemTEB evaluates the performance of text embedding models on chemical domain data. This version adds the ChemRxivRetrieval task.",
|
|
1783
|
+
reference="https://arxiv.org/abs/2412.00532",
|
|
1784
|
+
citation=r"""
|
|
1785
|
+
@article{kasmaee2024chemteb,
|
|
1786
|
+
author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila},
|
|
1787
|
+
journal = {arXiv preprint arXiv:2412.00532},
|
|
1788
|
+
title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \\& Efficiency on a Specific Domain},
|
|
1789
|
+
year = {2024},
|
|
1790
|
+
}
|
|
1791
|
+
|
|
1792
|
+
@article{kasmaee2025chembed,
|
|
1793
|
+
author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Astaraki, Mahdi and Saloot, Mohammad Arshi and Sherck, Nicholas and Mahyar, Hamidreza and Samiee, Soheila},
|
|
1794
|
+
journal = {arXiv preprint arXiv:2508.01643},
|
|
1795
|
+
title = {Chembed: Enhancing chemical literature search through domain-specific text embeddings},
|
|
1796
|
+
year = {2025},
|
|
1797
|
+
}
|
|
1798
|
+
""",
|
|
1799
|
+
)
|
|
1800
|
+
|
|
1684
1801
|
BEIR_NL = Benchmark(
|
|
1685
1802
|
name="BEIR-NL",
|
|
1686
1803
|
display_name="BEIR-NL",
|
|
@@ -1704,8 +1821,7 @@ BEIR_NL = Benchmark(
|
|
|
1704
1821
|
"TRECCOVID-NL",
|
|
1705
1822
|
],
|
|
1706
1823
|
),
|
|
1707
|
-
description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated "
|
|
1708
|
-
"translation.",
|
|
1824
|
+
description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated translation.",
|
|
1709
1825
|
reference="https://arxiv.org/abs/2412.08329",
|
|
1710
1826
|
contacts=["nikolay-banar"],
|
|
1711
1827
|
citation=r"""
|
|
@@ -2330,23 +2446,23 @@ VIDORE_V3 = VidoreBenchmark(
|
|
|
2330
2446
|
]
|
|
2331
2447
|
),
|
|
2332
2448
|
description="ViDoRe V3 sets a new industry gold standard for multi-modal, enterprise document visual retrieval evaluation. It addresses a critical challenge in production RAG systems: retrieving accurate information from complex, visually-rich documents. The benchmark includes both open and closed datasets: to submit results on private tasks, please [open an issue](https://github.com/embeddings-benchmark/mteb/issues?template=eval_request.yaml).",
|
|
2333
|
-
reference="https://
|
|
2449
|
+
reference="https://arxiv.org/abs/2601.08620",
|
|
2334
2450
|
citation=r"""
|
|
2335
|
-
@
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2339
|
-
|
|
2340
|
-
|
|
2341
|
-
|
|
2342
|
-
|
|
2343
|
-
year = {2025},
|
|
2451
|
+
@article{loison2026vidorev3comprehensiveevaluation,
|
|
2452
|
+
archiveprefix = {arXiv},
|
|
2453
|
+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
|
|
2454
|
+
eprint = {2601.08620},
|
|
2455
|
+
primaryclass = {cs.AI},
|
|
2456
|
+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
|
|
2457
|
+
url = {https://arxiv.org/abs/2601.08620},
|
|
2458
|
+
year = {2026},
|
|
2344
2459
|
}
|
|
2345
2460
|
""",
|
|
2346
2461
|
)
|
|
2347
2462
|
|
|
2348
2463
|
VISUAL_DOCUMENT_RETRIEVAL = VidoreBenchmark(
|
|
2349
2464
|
name="ViDoRe(v1&v2)",
|
|
2465
|
+
aliases=["VisualDocumentRetrieval"],
|
|
2350
2466
|
display_name="ViDoRe (V1&V2)",
|
|
2351
2467
|
tasks=get_tasks(
|
|
2352
2468
|
tasks=[
|
|
@@ -2707,3 +2823,27 @@ JMTEB_LITE_V1 = Benchmark(
|
|
|
2707
2823
|
""",
|
|
2708
2824
|
contacts=["lsz05"],
|
|
2709
2825
|
)
|
|
2826
|
+
|
|
2827
|
+
KOVIDORE_V2 = Benchmark(
|
|
2828
|
+
name="KoViDoRe(v2)",
|
|
2829
|
+
display_name="KoViDoRe v2",
|
|
2830
|
+
tasks=get_tasks(
|
|
2831
|
+
tasks=[
|
|
2832
|
+
"KoVidore2CybersecurityRetrieval",
|
|
2833
|
+
"KoVidore2EconomicRetrieval",
|
|
2834
|
+
"KoVidore2EnergyRetrieval",
|
|
2835
|
+
"KoVidore2HrRetrieval",
|
|
2836
|
+
]
|
|
2837
|
+
),
|
|
2838
|
+
description="KoViDoRe v2 sets a new industry gold standard for multi-modal, enterprise document visual retrieval evaluation. It addresses a critical challenge in production RAG systems: retrieving accurate information from complex, visually-rich documents.",
|
|
2839
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
2840
|
+
citation=r"""
|
|
2841
|
+
@misc{choi2026kovidorev2,
|
|
2842
|
+
author = {Yongbin Choi},
|
|
2843
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
2844
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
2845
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
2846
|
+
year = {2026},
|
|
2847
|
+
}
|
|
2848
|
+
""",
|
|
2849
|
+
)
|
|
@@ -10,6 +10,8 @@ RTEB_CITATION = r"""@article{rteb2025,
|
|
|
10
10
|
year = {2025},
|
|
11
11
|
}"""
|
|
12
12
|
|
|
13
|
+
removal_note = "\n\nNote: We have temporarily removed the 'Private' column to read more about this decision out the [announcement](https://github.com/embeddings-benchmark/mteb/issues/3934)."
|
|
14
|
+
|
|
13
15
|
RTEB_MAIN = RtebBenchmark(
|
|
14
16
|
name="RTEB(beta)",
|
|
15
17
|
display_name="RTEB Multilingual",
|
|
@@ -48,7 +50,8 @@ RTEB_MAIN = RtebBenchmark(
|
|
|
48
50
|
"JapaneseLegal1Retrieval",
|
|
49
51
|
],
|
|
50
52
|
),
|
|
51
|
-
description="RTEB (ReTrieval Embedding Benchmark) is a comprehensive benchmark for evaluating text retrieval models across multiple specialized domains including legal, finance, code, and healthcare. It contains diverse retrieval tasks designed to test models' ability to understand domain-specific terminology and retrieve relevant documents in specialized contexts across multiple languages. The dataset includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
53
|
+
description="RTEB (ReTrieval Embedding Benchmark) is a comprehensive benchmark for evaluating text retrieval models across multiple specialized domains including legal, finance, code, and healthcare. It contains diverse retrieval tasks designed to test models' ability to understand domain-specific terminology and retrieve relevant documents in specialized contexts across multiple languages. The dataset includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
54
|
+
+ removal_note,
|
|
52
55
|
citation=RTEB_CITATION,
|
|
53
56
|
contacts=["fzowl"],
|
|
54
57
|
)
|
|
@@ -83,7 +86,8 @@ RTEB_ENGLISH = RtebBenchmark(
|
|
|
83
86
|
],
|
|
84
87
|
languages=["eng"],
|
|
85
88
|
),
|
|
86
|
-
description="RTEB English is a subset of RTEB containing retrieval tasks in English across legal, finance, code, and healthcare domains. Includes diverse tasks covering specialized domains such as healthcare and finance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
89
|
+
description="RTEB English is a subset of RTEB containing retrieval tasks in English across legal, finance, code, and healthcare domains. Includes diverse tasks covering specialized domains such as healthcare and finance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
90
|
+
+ removal_note,
|
|
87
91
|
citation=RTEB_CITATION,
|
|
88
92
|
contacts=["fzowl"],
|
|
89
93
|
)
|
|
@@ -101,7 +105,8 @@ RTEB_FRENCH = RtebBenchmark(
|
|
|
101
105
|
],
|
|
102
106
|
languages=["fra"],
|
|
103
107
|
),
|
|
104
|
-
description="RTEB French is a subset of RTEB containing retrieval tasks in French across legal and general knowledge domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
108
|
+
description="RTEB French is a subset of RTEB containing retrieval tasks in French across legal and general knowledge domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
109
|
+
+ removal_note,
|
|
105
110
|
citation=RTEB_CITATION,
|
|
106
111
|
contacts=["fzowl"],
|
|
107
112
|
)
|
|
@@ -119,7 +124,8 @@ RTEB_GERMAN = RtebBenchmark(
|
|
|
119
124
|
"GermanLegal1Retrieval",
|
|
120
125
|
],
|
|
121
126
|
),
|
|
122
|
-
description="RTEB German is a subset of RTEB containing retrieval tasks in German across legal, healthcare, and business domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
127
|
+
description="RTEB German is a subset of RTEB containing retrieval tasks in German across legal, healthcare, and business domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
128
|
+
+ removal_note,
|
|
123
129
|
citation=RTEB_CITATION,
|
|
124
130
|
contacts=["fzowl"],
|
|
125
131
|
)
|
|
@@ -135,7 +141,8 @@ RTEB_JAPANESE = RtebBenchmark(
|
|
|
135
141
|
"JapaneseLegal1Retrieval",
|
|
136
142
|
],
|
|
137
143
|
),
|
|
138
|
-
description="RTEB Japanese is a subset of RTEB containing retrieval tasks in Japanese across legal and code domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
144
|
+
description="RTEB Japanese is a subset of RTEB containing retrieval tasks in Japanese across legal and code domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
145
|
+
+ removal_note,
|
|
139
146
|
citation=RTEB_CITATION,
|
|
140
147
|
contacts=["fzowl"],
|
|
141
148
|
)
|
|
@@ -156,7 +163,8 @@ RTEB_FINANCE = RtebBenchmark(
|
|
|
156
163
|
"EnglishFinance4Retrieval",
|
|
157
164
|
],
|
|
158
165
|
),
|
|
159
|
-
description="RTEB Finance is a subset of RTEB containing retrieval tasks specifically focused on financial domain including finance benchmarks, Q&A, financial document retrieval, and corporate governance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
166
|
+
description="RTEB Finance is a subset of RTEB containing retrieval tasks specifically focused on financial domain including finance benchmarks, Q&A, financial document retrieval, and corporate governance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
167
|
+
+ removal_note,
|
|
160
168
|
citation=RTEB_CITATION,
|
|
161
169
|
contacts=["fzowl"],
|
|
162
170
|
)
|
|
@@ -177,7 +185,8 @@ RTEB_LEGAL = RtebBenchmark(
|
|
|
177
185
|
"JapaneseLegal1Retrieval",
|
|
178
186
|
],
|
|
179
187
|
),
|
|
180
|
-
description="RTEB Legal is a subset of RTEB containing retrieval tasks specifically focused on legal domain including case documents, statutes, legal summarization, and multilingual legal Q&A. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
188
|
+
description="RTEB Legal is a subset of RTEB containing retrieval tasks specifically focused on legal domain including case documents, statutes, legal summarization, and multilingual legal Q&A. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
189
|
+
+ removal_note,
|
|
181
190
|
citation=RTEB_CITATION,
|
|
182
191
|
contacts=["fzowl"],
|
|
183
192
|
)
|
|
@@ -199,7 +208,8 @@ RTEB_CODE = RtebBenchmark(
|
|
|
199
208
|
"JapaneseCode1Retrieval",
|
|
200
209
|
],
|
|
201
210
|
),
|
|
202
|
-
description="RTEB Code is a subset of RTEB containing retrieval tasks specifically focused on programming and code domains including algorithmic problems, data science tasks, code evaluation, SQL retrieval, and multilingual code retrieval. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
211
|
+
description="RTEB Code is a subset of RTEB containing retrieval tasks specifically focused on programming and code domains including algorithmic problems, data science tasks, code evaluation, SQL retrieval, and multilingual code retrieval. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
212
|
+
+ removal_note,
|
|
203
213
|
citation=RTEB_CITATION,
|
|
204
214
|
contacts=["fzowl"],
|
|
205
215
|
)
|
|
@@ -217,7 +227,8 @@ RTEB_HEALTHCARE = RtebBenchmark(
|
|
|
217
227
|
"GermanHealthcare1Retrieval",
|
|
218
228
|
],
|
|
219
229
|
),
|
|
220
|
-
description="RTEB Healthcare is a subset of RTEB containing retrieval tasks specifically focused on healthcare and medical domains including medical Q&A, healthcare information retrieval, cross-lingual medical retrieval, and multilingual medical consultation. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
230
|
+
description="RTEB Healthcare is a subset of RTEB containing retrieval tasks specifically focused on healthcare and medical domains including medical Q&A, healthcare information retrieval, cross-lingual medical retrieval, and multilingual medical consultation. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
231
|
+
+ removal_note,
|
|
221
232
|
citation=RTEB_CITATION,
|
|
222
233
|
contacts=["fzowl"],
|
|
223
234
|
)
|
mteb/benchmarks/get_benchmark.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import difflib
|
|
2
2
|
import logging
|
|
3
|
-
import warnings
|
|
4
3
|
from functools import lru_cache
|
|
5
4
|
|
|
6
5
|
from .benchmark import Benchmark
|
|
@@ -20,53 +19,16 @@ def _build_registry() -> dict[str, Benchmark]:
|
|
|
20
19
|
return benchmark_registry
|
|
21
20
|
|
|
22
21
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
MTEB_INDIC,
|
|
34
|
-
MTEB_JPN,
|
|
35
|
-
MTEB_KOR,
|
|
36
|
-
MTEB_MAIN_RU,
|
|
37
|
-
MTEB_POL,
|
|
38
|
-
MTEB_RETRIEVAL_LAW,
|
|
39
|
-
MTEB_RETRIEVAL_MEDICAL,
|
|
40
|
-
MTEB_RETRIEVAL_WITH_INSTRUCTIONS,
|
|
41
|
-
SEB,
|
|
42
|
-
VISUAL_DOCUMENT_RETRIEVAL,
|
|
43
|
-
MTEB_code,
|
|
44
|
-
MTEB_multilingual_v2,
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
previous_benchmark_names = {
|
|
48
|
-
"MTEB(eng)": MTEB_EN.name,
|
|
49
|
-
"MTEB(eng, classic)": MTEB_ENG_CLASSIC.name,
|
|
50
|
-
"MTEB(rus)": MTEB_MAIN_RU.name,
|
|
51
|
-
"MTEB(Retrieval w/Instructions)": MTEB_RETRIEVAL_WITH_INSTRUCTIONS.name,
|
|
52
|
-
"MTEB(law)": MTEB_RETRIEVAL_LAW.name,
|
|
53
|
-
"MTEB(Medical)": MTEB_RETRIEVAL_MEDICAL.name,
|
|
54
|
-
"MTEB(Scandinavian)": SEB.name,
|
|
55
|
-
"MTEB(fra)": MTEB_FRA.name,
|
|
56
|
-
"MTEB(deu)": MTEB_DEU.name,
|
|
57
|
-
"MTEB(kor)": MTEB_KOR.name,
|
|
58
|
-
"MTEB(pol)": MTEB_POL.name,
|
|
59
|
-
"MTEB(code)": MTEB_code.name,
|
|
60
|
-
"MTEB(Multilingual)": MTEB_multilingual_v2.name,
|
|
61
|
-
"MTEB(jpn)": MTEB_JPN.name,
|
|
62
|
-
"MTEB(Indic)": MTEB_INDIC.name,
|
|
63
|
-
"MTEB(Europe)": MTEB_EU.name,
|
|
64
|
-
"MTEB(Chinese)": C_MTEB.name,
|
|
65
|
-
"FaMTEB(fas, beta)": FA_MTEB.name,
|
|
66
|
-
"BRIGHT(long)": BRIGHT_LONG.name,
|
|
67
|
-
"VisualDocumentRetrieval": VISUAL_DOCUMENT_RETRIEVAL.name,
|
|
68
|
-
}
|
|
69
|
-
return previous_benchmark_names
|
|
22
|
+
@lru_cache
|
|
23
|
+
def _build_aliases_registry() -> dict[str, Benchmark]:
|
|
24
|
+
import mteb.benchmarks.benchmarks as benchmark_module
|
|
25
|
+
|
|
26
|
+
aliases: dict[str, Benchmark] = {}
|
|
27
|
+
for _, inst in benchmark_module.__dict__.items():
|
|
28
|
+
if isinstance(inst, Benchmark) and inst.aliases is not None:
|
|
29
|
+
for alias in inst.aliases:
|
|
30
|
+
aliases[alias] = inst
|
|
31
|
+
return aliases
|
|
70
32
|
|
|
71
33
|
|
|
72
34
|
def get_benchmark(
|
|
@@ -80,14 +42,11 @@ def get_benchmark(
|
|
|
80
42
|
Returns:
|
|
81
43
|
The Benchmark instance corresponding to the given name.
|
|
82
44
|
"""
|
|
83
|
-
previous_benchmark_names = _get_previous_benchmark_names()
|
|
84
45
|
benchmark_registry = _build_registry()
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
)
|
|
90
|
-
benchmark_name = previous_benchmark_names[benchmark_name]
|
|
46
|
+
aliases_registry = _build_aliases_registry()
|
|
47
|
+
|
|
48
|
+
if benchmark_name in aliases_registry:
|
|
49
|
+
return aliases_registry[benchmark_name]
|
|
91
50
|
if benchmark_name not in benchmark_registry:
|
|
92
51
|
close_matches = difflib.get_close_matches(
|
|
93
52
|
benchmark_name, benchmark_registry.keys()
|