mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +78 -30
- mteb/_evaluators/any_sts_evaluator.py +13 -6
- mteb/_evaluators/clustering_evaluator.py +13 -5
- mteb/_evaluators/evaluator.py +12 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
- mteb/_evaluators/pair_classification_evaluator.py +17 -7
- mteb/_evaluators/retrieval_evaluator.py +23 -14
- mteb/_evaluators/retrieval_metrics.py +26 -19
- mteb/_evaluators/sklearn_evaluator.py +27 -17
- mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
- mteb/_evaluators/text/summarization_evaluator.py +31 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +9 -3
- mteb/abstasks/_data_filter/task_pipelines.py +10 -2
- mteb/abstasks/_statistics_calculation.py +21 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +78 -44
- mteb/abstasks/aggregate_task_metadata.py +21 -18
- mteb/abstasks/aggregated_task.py +23 -35
- mteb/abstasks/classification.py +39 -18
- mteb/abstasks/clustering.py +37 -20
- mteb/abstasks/clustering_legacy.py +30 -16
- mteb/abstasks/image/image_text_pair_classification.py +26 -9
- mteb/abstasks/multilabel_classification.py +33 -21
- mteb/abstasks/pair_classification.py +44 -19
- mteb/abstasks/regression.py +18 -10
- mteb/abstasks/retrieval.py +82 -52
- mteb/abstasks/retrieval_dataset_loaders.py +50 -39
- mteb/abstasks/sts.py +34 -15
- mteb/abstasks/task_metadata.py +44 -37
- mteb/abstasks/text/bitext_mining.py +57 -35
- mteb/abstasks/text/reranking.py +10 -8
- mteb/abstasks/text/summarization.py +26 -10
- mteb/abstasks/zeroshot_classification.py +27 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +15 -3
- mteb/benchmarks/benchmarks/__init__.py +6 -0
- mteb/benchmarks/benchmarks/benchmarks.py +153 -13
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +189 -31
- mteb/cli/_display_tasks.py +10 -4
- mteb/cli/build_cli.py +112 -13
- mteb/cli/generate_model_card.py +50 -23
- mteb/deprecated_evaluator.py +72 -54
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +71 -47
- mteb/filter_tasks.py +36 -32
- mteb/get_tasks.py +37 -33
- mteb/languages/language_scripts.py +11 -4
- mteb/leaderboard/app.py +172 -37
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +20 -14
- mteb/models/abs_encoder.py +30 -16
- mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +16 -11
- mteb/models/get_model_meta.py +53 -9
- mteb/models/instruct_wrapper.py +41 -13
- mteb/models/model_implementations/align_models.py +11 -5
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +6 -4
- mteb/models/model_implementations/ara_models.py +2 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +85 -22
- mteb/models/model_implementations/bica_model.py +4 -3
- mteb/models/model_implementations/blip2_models.py +13 -6
- mteb/models/model_implementations/blip_models.py +33 -20
- mteb/models/model_implementations/bm25.py +27 -17
- mteb/models/model_implementations/bmretriever_models.py +16 -6
- mteb/models/model_implementations/cadet_models.py +2 -1
- mteb/models/model_implementations/cde_models.py +22 -9
- mteb/models/model_implementations/clip_models.py +18 -10
- mteb/models/model_implementations/clips_models.py +6 -3
- mteb/models/model_implementations/codefuse_models.py +10 -5
- mteb/models/model_implementations/codesage_models.py +6 -3
- mteb/models/model_implementations/cohere_models.py +19 -9
- mteb/models/model_implementations/cohere_v.py +16 -6
- mteb/models/model_implementations/colpali_models.py +10 -6
- mteb/models/model_implementations/colqwen_models.py +24 -38
- mteb/models/model_implementations/colsmol_models.py +5 -3
- mteb/models/model_implementations/conan_models.py +12 -5
- mteb/models/model_implementations/dino_models.py +70 -46
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +18 -9
- mteb/models/model_implementations/e5_v.py +16 -10
- mteb/models/model_implementations/eagerworks_models.py +12 -5
- mteb/models/model_implementations/emillykkejensen_models.py +9 -6
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +3 -2
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +18 -9
- mteb/models/model_implementations/facebookai.py +16 -2
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +13 -8
- mteb/models/model_implementations/google_models.py +16 -5
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
- mteb/models/model_implementations/gritlm_models.py +5 -2
- mteb/models/model_implementations/gte_models.py +34 -13
- mteb/models/model_implementations/hinvec_models.py +7 -2
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +16 -7
- mteb/models/model_implementations/jina_clip.py +58 -14
- mteb/models/model_implementations/jina_models.py +35 -16
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +13 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
- mteb/models/model_implementations/kfst.py +2 -1
- mteb/models/model_implementations/kowshik24_models.py +2 -1
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +8 -2
- mteb/models/model_implementations/listconranker.py +11 -5
- mteb/models/model_implementations/llm2clip_models.py +18 -10
- mteb/models/model_implementations/llm2vec_models.py +28 -14
- mteb/models/model_implementations/mcinext_models.py +12 -3
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +131 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
- mteb/models/model_implementations/mme5_models.py +3 -2
- mteb/models/model_implementations/moco_models.py +15 -8
- mteb/models/model_implementations/mod_models.py +3 -2
- mteb/models/model_implementations/model2vec_models.py +37 -18
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +6 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
- mteb/models/model_implementations/nomic_models.py +47 -19
- mteb/models/model_implementations/nomic_models_vision.py +6 -4
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
- mteb/models/model_implementations/nvidia_models.py +165 -22
- mteb/models/model_implementations/octen_models.py +64 -3
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +30 -17
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
- mteb/models/model_implementations/ops_moa_models.py +10 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
- mteb/models/model_implementations/pawan_models.py +2 -1
- mteb/models/model_implementations/piccolo_models.py +3 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +20 -10
- mteb/models/model_implementations/pylate_models.py +41 -21
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +14 -4
- mteb/models/model_implementations/qzhou_models.py +4 -2
- mteb/models/model_implementations/random_baseline.py +7 -6
- mteb/models/model_implementations/rasgaard_models.py +3 -2
- mteb/models/model_implementations/reasonir_model.py +66 -1
- mteb/models/model_implementations/repllama_models.py +18 -9
- mteb/models/model_implementations/rerankers_custom.py +25 -10
- mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +40 -20
- mteb/models/model_implementations/ruri_models.py +20 -10
- mteb/models/model_implementations/salesforce_models.py +13 -4
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +142 -22
- mteb/models/model_implementations/shuu_model.py +2 -1
- mteb/models/model_implementations/siglip_models.py +39 -24
- mteb/models/model_implementations/slm_models.py +419 -0
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +4 -2
- mteb/models/model_implementations/text2vec_models.py +12 -3
- mteb/models/model_implementations/ua_sentence_models.py +2 -1
- mteb/models/model_implementations/uae_models.py +17 -5
- mteb/models/model_implementations/vdr_models.py +9 -2
- mteb/models/model_implementations/vi_vn_models.py +12 -6
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +14 -7
- mteb/models/model_implementations/voyage_models.py +136 -4
- mteb/models/model_implementations/voyage_v.py +17 -10
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +2 -1
- mteb/models/model_implementations/yuan_models_en.py +3 -2
- mteb/models/model_meta.py +127 -40
- mteb/models/models_protocols.py +43 -22
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
- mteb/models/search_wrappers.py +63 -29
- mteb/models/sentence_transformer_wrapper.py +52 -26
- mteb/models/vllm_wrapper.py +329 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +48 -35
- mteb/results/model_result.py +68 -32
- mteb/results/task_result.py +110 -72
- mteb/similarity_functions.py +19 -9
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +2 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +16 -16
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +44 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +3 -3
- mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +13 -1
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +18 -5
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
mteb/cache.py
CHANGED
|
@@ -1,19 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import gzip
|
|
4
|
+
import io
|
|
1
5
|
import json
|
|
2
6
|
import logging
|
|
3
7
|
import os
|
|
4
8
|
import shutil
|
|
5
9
|
import subprocess
|
|
10
|
+
import warnings
|
|
6
11
|
from collections import defaultdict
|
|
7
|
-
from collections.abc import Sequence
|
|
8
12
|
from pathlib import Path
|
|
9
|
-
from typing import cast
|
|
13
|
+
from typing import TYPE_CHECKING, cast
|
|
14
|
+
|
|
15
|
+
import requests
|
|
16
|
+
from pydantic import ValidationError
|
|
10
17
|
|
|
11
18
|
import mteb
|
|
12
19
|
from mteb.abstasks import AbsTask
|
|
13
20
|
from mteb.benchmarks.benchmark import Benchmark
|
|
14
21
|
from mteb.models import ModelMeta
|
|
15
22
|
from mteb.results import BenchmarkResults, ModelResult, TaskResult
|
|
16
|
-
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from collections.abc import Iterable, Sequence
|
|
26
|
+
|
|
27
|
+
from mteb.types import ModelName, Revision
|
|
17
28
|
|
|
18
29
|
logger = logging.getLogger(__name__)
|
|
19
30
|
|
|
@@ -22,8 +33,8 @@ class ResultCache:
|
|
|
22
33
|
"""Class to handle the local cache of MTEB results.
|
|
23
34
|
|
|
24
35
|
Examples:
|
|
25
|
-
>>>
|
|
26
|
-
>>> cache = ResultCache(cache_path="~/.cache/mteb") # default
|
|
36
|
+
>>> import mteb
|
|
37
|
+
>>> cache = mteb.ResultCache(cache_path="~/.cache/mteb") # default
|
|
27
38
|
>>> cache.download_from_remote() # download the latest results from the remote repository
|
|
28
39
|
>>> result = cache.load_results("task_name", "model_name")
|
|
29
40
|
"""
|
|
@@ -83,9 +94,9 @@ class ResultCache:
|
|
|
83
94
|
model_path = results_folder / model_name
|
|
84
95
|
|
|
85
96
|
if model_revision is None:
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
)
|
|
97
|
+
msg = "`model_revision` is not specified, attempting to load the latest revision. To disable this behavior, specify the 'model_revision` explicitly."
|
|
98
|
+
logger.warning(msg)
|
|
99
|
+
warnings.warn(msg)
|
|
89
100
|
# get revs from paths
|
|
90
101
|
revisions = [p for p in model_path.glob("*") if p.is_dir()]
|
|
91
102
|
if not revisions:
|
|
@@ -275,21 +286,165 @@ class ResultCache:
|
|
|
275
286
|
|
|
276
287
|
return results_directory
|
|
277
288
|
|
|
289
|
+
def _download_cached_results_from_branch(
|
|
290
|
+
self,
|
|
291
|
+
branch: str = "cached-data",
|
|
292
|
+
filename: str = "__cached_results.json.gz",
|
|
293
|
+
output_path: Path | None = None,
|
|
294
|
+
remote: str = "https://github.com/embeddings-benchmark/results",
|
|
295
|
+
timeout: int = 60,
|
|
296
|
+
max_size_mb: int = 500,
|
|
297
|
+
) -> Path:
|
|
298
|
+
"""Download pre-computed cached results from a specific branch.
|
|
299
|
+
|
|
300
|
+
This is significantly faster than download_from_remote() since it downloads
|
|
301
|
+
only a compressed cache file instead of cloning the entire repository.
|
|
302
|
+
|
|
303
|
+
The method performs the following steps:
|
|
304
|
+
1. Downloads a gzipped JSON file from the specified branch
|
|
305
|
+
2. Validates file size and content type
|
|
306
|
+
3. Decompresses the gzip content
|
|
307
|
+
4. Writes the decompressed JSON to disk
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
branch: Branch name to download from (default: "cached-data")
|
|
311
|
+
filename: Name of the cached results file (default: "__cached_results.json.gz")
|
|
312
|
+
output_path: Where to save the file. If None, uses mteb/leaderboard/__cached_results.json
|
|
313
|
+
remote: Base URL of the results repository
|
|
314
|
+
timeout: Request timeout in seconds (default: 60)
|
|
315
|
+
max_size_mb: Maximum allowed file size in megabytes (default: 500)
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
Path to the downloaded and decompressed cache file
|
|
319
|
+
|
|
320
|
+
Raises:
|
|
321
|
+
requests.exceptions.RequestException: On HTTP errors
|
|
322
|
+
ValueError: On validation failures (size, content-type)
|
|
323
|
+
gzip.BadGzipFile: If content is not valid gzip
|
|
324
|
+
UnicodeDecodeError: If content cannot be decoded as UTF-8
|
|
325
|
+
PermissionError: If file cannot be written due to permissions
|
|
326
|
+
OSError: On other file system errors
|
|
327
|
+
|
|
328
|
+
Examples:
|
|
329
|
+
>>> import mteb
|
|
330
|
+
>>> cache = mteb.ResultCache()
|
|
331
|
+
>>> # Download optimized cached results
|
|
332
|
+
>>> cache_file = cache._download_cached_results_from_branch()
|
|
333
|
+
>>> # Use custom output path
|
|
334
|
+
>>> cache_file = cache._download_cached_results_from_branch(
|
|
335
|
+
... output_path=Path("/tmp/my_cache.json")
|
|
336
|
+
... )
|
|
337
|
+
"""
|
|
338
|
+
if output_path is None:
|
|
339
|
+
# Default to saving in mteb/leaderboard/__cached_results.json
|
|
340
|
+
# Get the mteb package directory (parent of this file)
|
|
341
|
+
mteb_package_dir = Path(__file__).parent
|
|
342
|
+
output_path = mteb_package_dir / "leaderboard" / "__cached_results.json"
|
|
343
|
+
|
|
344
|
+
# Extract repository owner and name from the remote URL
|
|
345
|
+
# e.g., "https://github.com/embeddings-benchmark/results" -> "embeddings-benchmark/results"
|
|
346
|
+
repo_path = remote.replace("https://github.com/", "").replace(
|
|
347
|
+
"http://github.com/", ""
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
url = f"https://raw.githubusercontent.com/{repo_path}/{branch}/{filename}"
|
|
351
|
+
logger.info(f"Downloading cached results from {url}")
|
|
352
|
+
|
|
353
|
+
# Step 1: Download with validation
|
|
354
|
+
max_size_bytes = max_size_mb * 1024 * 1024
|
|
355
|
+
|
|
356
|
+
try:
|
|
357
|
+
response = requests.get(url, timeout=timeout)
|
|
358
|
+
response.raise_for_status()
|
|
359
|
+
|
|
360
|
+
# Check if this is a Git LFS pointer file
|
|
361
|
+
content_type = response.headers.get("content-type", "").lower()
|
|
362
|
+
if (
|
|
363
|
+
content_type == "text/plain; charset=utf-8"
|
|
364
|
+
and b"git-lfs" in response.content
|
|
365
|
+
):
|
|
366
|
+
# Try Git LFS media URL instead
|
|
367
|
+
media_url = f"https://media.githubusercontent.com/media/{repo_path}/{branch}/{filename}"
|
|
368
|
+
logger.info(f"Detected Git LFS file, trying media URL: {media_url}")
|
|
369
|
+
response = requests.get(media_url, timeout=timeout)
|
|
370
|
+
response.raise_for_status()
|
|
371
|
+
content_type = response.headers.get("content-type", "").lower()
|
|
372
|
+
|
|
373
|
+
# Validate content-type header
|
|
374
|
+
expected_content_types = [
|
|
375
|
+
"application/gzip",
|
|
376
|
+
"application/octet-stream",
|
|
377
|
+
"application/x-gzip",
|
|
378
|
+
]
|
|
379
|
+
if content_type and not any(
|
|
380
|
+
ct in content_type for ct in expected_content_types
|
|
381
|
+
):
|
|
382
|
+
raise Exception(
|
|
383
|
+
f"Unexpected content-type: {content_type}. Expected one of: {expected_content_types}"
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
# Validate file size
|
|
387
|
+
content_length = len(response.content)
|
|
388
|
+
if content_length > max_size_bytes:
|
|
389
|
+
raise ValueError(
|
|
390
|
+
f"Downloaded file too large: {content_length} bytes (max: {max_size_bytes})"
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
logger.info(
|
|
394
|
+
f"HTTP request successful, content length: {content_length} bytes"
|
|
395
|
+
)
|
|
396
|
+
content = response.content
|
|
397
|
+
|
|
398
|
+
except Exception as e:
|
|
399
|
+
logger.error(f"Unexpected HTTP error: {type(e).__name__}: {e}")
|
|
400
|
+
raise e
|
|
401
|
+
|
|
402
|
+
# Step 2: Decompress gzip data
|
|
403
|
+
logger.info("Attempting gzip decompression...")
|
|
404
|
+
|
|
405
|
+
try:
|
|
406
|
+
with gzip.open(io.BytesIO(content), "rt", encoding="utf-8") as gz_file:
|
|
407
|
+
data = gz_file.read()
|
|
408
|
+
logger.info(f"Decompression successful, data length: {len(data)} chars")
|
|
409
|
+
|
|
410
|
+
except Exception as e:
|
|
411
|
+
logger.error(f"Unexpected decompression error: {type(e).__name__}: {e}")
|
|
412
|
+
raise e
|
|
413
|
+
|
|
414
|
+
# Step 3: Write to disk
|
|
415
|
+
logger.info(f"Attempting to write to: {output_path}")
|
|
416
|
+
|
|
417
|
+
# Check parent directory exists and is writable
|
|
418
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
419
|
+
|
|
420
|
+
try:
|
|
421
|
+
output_path.write_text(data, encoding="utf-8")
|
|
422
|
+
logger.info(
|
|
423
|
+
f"File write successful, size: {output_path.stat().st_size} bytes"
|
|
424
|
+
)
|
|
425
|
+
except Exception as e:
|
|
426
|
+
logger.error(f"Unexpected file write error: {type(e).__name__}: {e}")
|
|
427
|
+
raise e
|
|
428
|
+
|
|
429
|
+
return output_path
|
|
430
|
+
|
|
278
431
|
def clear_cache(self) -> None:
|
|
279
432
|
"""Clear the local cache directory."""
|
|
280
433
|
if self.cache_path.exists() and self.cache_path.is_dir():
|
|
281
434
|
shutil.rmtree(self.cache_path)
|
|
282
435
|
logger.info(f"Cache directory {self.cache_path} cleared.")
|
|
283
436
|
else:
|
|
284
|
-
|
|
437
|
+
msg = f"Cache directory `{self.cache_path}` does not exist."
|
|
438
|
+
logger.warning(msg)
|
|
439
|
+
warnings.warn(msg)
|
|
285
440
|
|
|
286
441
|
def __repr__(self) -> str:
|
|
287
442
|
return f"ResultCache(cache_path={self.cache_path})"
|
|
288
443
|
|
|
289
444
|
def get_cache_paths(
|
|
290
445
|
self,
|
|
291
|
-
models: Sequence[str] |
|
|
292
|
-
tasks: Sequence[str] |
|
|
446
|
+
models: Sequence[str] | Iterable[ModelMeta] | None = None,
|
|
447
|
+
tasks: Sequence[str] | Iterable[AbsTask] | None = None,
|
|
293
448
|
require_model_meta: bool = True,
|
|
294
449
|
include_remote: bool = True,
|
|
295
450
|
) -> list[Path]:
|
|
@@ -311,8 +466,8 @@ class ResultCache:
|
|
|
311
466
|
A list of paths in the cache directory.
|
|
312
467
|
|
|
313
468
|
Examples:
|
|
314
|
-
>>>
|
|
315
|
-
>>> cache = ResultCache()
|
|
469
|
+
>>> import mteb
|
|
470
|
+
>>> cache = mteb.ResultCache()
|
|
316
471
|
>>>
|
|
317
472
|
>>> # Get all cache paths
|
|
318
473
|
>>> paths = cache.get_cache_paths()
|
|
@@ -422,7 +577,7 @@ class ResultCache:
|
|
|
422
577
|
@staticmethod
|
|
423
578
|
def _filter_paths_by_model_and_revision(
|
|
424
579
|
paths: list[Path],
|
|
425
|
-
models: Sequence[str] |
|
|
580
|
+
models: Sequence[str] | Iterable[ModelMeta] | None = None,
|
|
426
581
|
) -> list[Path]:
|
|
427
582
|
"""Filter a list of paths by model name and optional revision.
|
|
428
583
|
|
|
@@ -432,8 +587,9 @@ class ResultCache:
|
|
|
432
587
|
if not models:
|
|
433
588
|
return paths
|
|
434
589
|
|
|
435
|
-
|
|
436
|
-
|
|
590
|
+
first_model = next(iter(models))
|
|
591
|
+
if isinstance(first_model, ModelMeta):
|
|
592
|
+
models = cast("Iterable[ModelMeta]", models)
|
|
437
593
|
name_and_revision = {
|
|
438
594
|
(m.model_name_as_path(), m.revision or "no_revision_available")
|
|
439
595
|
for m in models
|
|
@@ -444,13 +600,14 @@ class ResultCache:
|
|
|
444
600
|
if (p.parent.parent.name, p.parent.name) in name_and_revision
|
|
445
601
|
]
|
|
446
602
|
|
|
447
|
-
|
|
603
|
+
str_models = cast("Sequence[str]", models)
|
|
604
|
+
model_names = {m.replace("/", "__").replace(" ", "_") for m in str_models}
|
|
448
605
|
return [p for p in paths if p.parent.parent.name in model_names]
|
|
449
606
|
|
|
450
607
|
@staticmethod
|
|
451
608
|
def _filter_paths_by_task(
|
|
452
609
|
paths: list[Path],
|
|
453
|
-
tasks: Sequence[str] |
|
|
610
|
+
tasks: Sequence[str] | Iterable[AbsTask] | None = None,
|
|
454
611
|
) -> list[Path]:
|
|
455
612
|
if tasks is not None:
|
|
456
613
|
task_names = set()
|
|
@@ -466,8 +623,8 @@ class ResultCache:
|
|
|
466
623
|
|
|
467
624
|
def load_results(
|
|
468
625
|
self,
|
|
469
|
-
models: Sequence[str] |
|
|
470
|
-
tasks: Sequence[str] |
|
|
626
|
+
models: Sequence[str] | Iterable[ModelMeta] | None = None,
|
|
627
|
+
tasks: Sequence[str] | Iterable[AbsTask] | Benchmark | str | None = None,
|
|
471
628
|
require_model_meta: bool = True,
|
|
472
629
|
include_remote: bool = True,
|
|
473
630
|
validate_and_filter: bool = False,
|
|
@@ -478,6 +635,7 @@ class ResultCache:
|
|
|
478
635
|
Args:
|
|
479
636
|
models: A list of model names to load the results for. If None it will load the results for all models.
|
|
480
637
|
tasks: A list of task names to load the results for. If str is passed, then benchmark will be loaded.
|
|
638
|
+
If Benchmark is passed, then all tasks in the benchmark will be loaded.
|
|
481
639
|
If None it will load the results for all tasks.
|
|
482
640
|
require_model_meta: If True it will ignore results that do not have a model_meta.json file. If false it attempt to
|
|
483
641
|
extract the model name and revision from the path.
|
|
@@ -490,8 +648,8 @@ class ResultCache:
|
|
|
490
648
|
A BenchmarkResults object containing the results for the specified models and tasks.
|
|
491
649
|
|
|
492
650
|
Examples:
|
|
493
|
-
>>>
|
|
494
|
-
>>> cache = ResultCache()
|
|
651
|
+
>>> import mteb
|
|
652
|
+
>>> cache = mteb.ResultCache()
|
|
495
653
|
>>>
|
|
496
654
|
>>> # Load results for specific models and tasks
|
|
497
655
|
>>> results = cache.load_results(
|
|
@@ -511,7 +669,7 @@ class ResultCache:
|
|
|
511
669
|
)
|
|
512
670
|
models_results = defaultdict(list)
|
|
513
671
|
|
|
514
|
-
task_names = {}
|
|
672
|
+
task_names: dict[str, AbsTask | None] = {}
|
|
515
673
|
if tasks is not None:
|
|
516
674
|
for task in tasks:
|
|
517
675
|
if isinstance(task, AbsTask):
|
|
@@ -529,10 +687,12 @@ class ResultCache:
|
|
|
529
687
|
)
|
|
530
688
|
|
|
531
689
|
if validate_and_filter:
|
|
532
|
-
|
|
690
|
+
task_instance = task_names[task_result.task_name]
|
|
533
691
|
try:
|
|
534
|
-
task_result = task_result.validate_and_filter_scores(
|
|
535
|
-
|
|
692
|
+
task_result = task_result.validate_and_filter_scores(
|
|
693
|
+
task=task_instance
|
|
694
|
+
)
|
|
695
|
+
except ValidationError as e:
|
|
536
696
|
logger.info(
|
|
537
697
|
f"Validation failed for {task_result.task_name} in {model_name} {revision}: {e}"
|
|
538
698
|
)
|
|
@@ -541,7 +701,7 @@ class ResultCache:
|
|
|
541
701
|
models_results[(model_name, revision)].append(task_result)
|
|
542
702
|
|
|
543
703
|
# create BenchmarkResults object
|
|
544
|
-
|
|
704
|
+
models_results_object = [
|
|
545
705
|
ModelResult(
|
|
546
706
|
model_name=model_name,
|
|
547
707
|
model_revision=revision,
|
|
@@ -550,9 +710,7 @@ class ResultCache:
|
|
|
550
710
|
for (model_name, revision), task_results in models_results.items()
|
|
551
711
|
]
|
|
552
712
|
|
|
553
|
-
|
|
554
|
-
model_results=
|
|
713
|
+
return BenchmarkResults(
|
|
714
|
+
model_results=models_results_object,
|
|
555
715
|
benchmark=tasks if isinstance(tasks, Benchmark) else None,
|
|
556
716
|
)
|
|
557
|
-
|
|
558
|
-
return benchmark_results
|
mteb/cli/_display_tasks.py
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
2
4
|
|
|
3
|
-
from mteb.abstasks import AbsTask
|
|
4
|
-
from mteb.benchmarks import Benchmark
|
|
5
5
|
from mteb.get_tasks import MTEBTasks
|
|
6
6
|
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from collections.abc import Iterable, Sequence
|
|
9
|
+
|
|
10
|
+
from mteb.abstasks import AbsTask
|
|
11
|
+
from mteb.benchmarks import Benchmark
|
|
12
|
+
|
|
7
13
|
|
|
8
14
|
def _display_benchmarks(benchmarks: Sequence[Benchmark]) -> None:
|
|
9
15
|
"""Get all benchmarks available in the MTEB."""
|
|
@@ -31,7 +37,7 @@ def _display_benchmarks(benchmarks: Sequence[Benchmark]) -> None:
|
|
|
31
37
|
_display_tasks(benchmark.tasks, name=name)
|
|
32
38
|
|
|
33
39
|
|
|
34
|
-
def _display_tasks(task_list:
|
|
40
|
+
def _display_tasks(task_list: Iterable[AbsTask], name: str | None = None) -> None:
|
|
35
41
|
from rich.console import Console
|
|
36
42
|
|
|
37
43
|
console = Console()
|
mteb/cli/build_cli.py
CHANGED
|
@@ -1,17 +1,22 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
+
import warnings
|
|
4
5
|
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
5
7
|
|
|
6
8
|
import torch
|
|
7
9
|
from rich.logging import RichHandler
|
|
8
10
|
|
|
9
11
|
import mteb
|
|
10
12
|
from mteb.cache import ResultCache
|
|
13
|
+
from mteb.cli._display_tasks import _display_benchmarks, _display_tasks
|
|
11
14
|
from mteb.cli.generate_model_card import generate_model_card
|
|
12
15
|
from mteb.evaluate import OverwriteStrategy
|
|
13
16
|
|
|
14
|
-
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from mteb.abstasks.abstask import AbsTask
|
|
19
|
+
from mteb.types import EncodeKwargs
|
|
15
20
|
|
|
16
21
|
logger = logging.getLogger(__name__)
|
|
17
22
|
|
|
@@ -53,7 +58,7 @@ def run(args: argparse.Namespace) -> None:
|
|
|
53
58
|
|
|
54
59
|
if args.benchmarks:
|
|
55
60
|
benchmarks = mteb.get_benchmarks(names=args.benchmarks)
|
|
56
|
-
tasks =
|
|
61
|
+
tasks = tuple(t for b in benchmarks for t in b.tasks)
|
|
57
62
|
else:
|
|
58
63
|
tasks = mteb.get_tasks(
|
|
59
64
|
categories=args.categories,
|
|
@@ -63,21 +68,23 @@ def run(args: argparse.Namespace) -> None:
|
|
|
63
68
|
eval_splits=args.eval_splits,
|
|
64
69
|
)
|
|
65
70
|
|
|
66
|
-
encode_kwargs = {}
|
|
71
|
+
encode_kwargs: EncodeKwargs = {}
|
|
67
72
|
if args.batch_size is not None:
|
|
68
73
|
encode_kwargs["batch_size"] = args.batch_size
|
|
69
74
|
|
|
70
75
|
overwrite_strategy = args.overwrite_strategy
|
|
71
76
|
if args.overwrite:
|
|
72
|
-
|
|
73
|
-
"`--overwrite` is deprecated, please use `--overwrite-strategy 'always'` instead."
|
|
77
|
+
warnings.warn(
|
|
78
|
+
"`--overwrite` is deprecated, please use `--overwrite-strategy 'always'` instead.",
|
|
79
|
+
DeprecationWarning,
|
|
74
80
|
)
|
|
75
81
|
overwrite_strategy = OverwriteStrategy.ALWAYS.value
|
|
76
82
|
|
|
77
83
|
prediction_folder = args.prediction_folder
|
|
78
84
|
if args.save_predictions:
|
|
79
|
-
|
|
80
|
-
"`--save_predictions` is deprecated, please use `--prediction-folder` instead."
|
|
85
|
+
warnings.warn(
|
|
86
|
+
"`--save_predictions` is deprecated, please use `--prediction-folder` instead.",
|
|
87
|
+
DeprecationWarning,
|
|
81
88
|
)
|
|
82
89
|
prediction_folder = args.output_folder
|
|
83
90
|
|
|
@@ -279,23 +286,25 @@ def _create_meta(args: argparse.Namespace) -> None:
|
|
|
279
286
|
from_existing = Path(from_existing)
|
|
280
287
|
|
|
281
288
|
if output_path.exists() and overwrite:
|
|
282
|
-
|
|
289
|
+
msg = "Output path already exists, overwriting."
|
|
290
|
+
logger.warning(msg)
|
|
291
|
+
warnings.warn(msg)
|
|
283
292
|
elif output_path.exists():
|
|
284
293
|
raise FileExistsError(
|
|
285
294
|
"Output path already exists, use --overwrite to overwrite."
|
|
286
295
|
)
|
|
287
296
|
|
|
288
|
-
|
|
297
|
+
benchmarks = None
|
|
298
|
+
tasks: list[AbsTask] = []
|
|
289
299
|
if tasks_names is not None:
|
|
290
|
-
tasks = mteb.get_tasks(tasks_names)
|
|
300
|
+
tasks = list(mteb.get_tasks(tasks_names))
|
|
291
301
|
if benchmarks is not None:
|
|
292
302
|
benchmarks = mteb.get_benchmarks(benchmarks)
|
|
293
|
-
for benchmark in benchmarks:
|
|
294
|
-
tasks.extend(benchmark.tasks)
|
|
295
303
|
|
|
296
304
|
generate_model_card(
|
|
297
305
|
model_name,
|
|
298
|
-
tasks
|
|
306
|
+
tasks,
|
|
307
|
+
benchmarks,
|
|
299
308
|
existing_model_card_id_or_path=from_existing,
|
|
300
309
|
results_cache=ResultCache(results_folder),
|
|
301
310
|
output_path=output_path,
|
|
@@ -356,6 +365,95 @@ def _add_create_meta_parser(subparsers) -> None:
|
|
|
356
365
|
parser.set_defaults(func=_create_meta)
|
|
357
366
|
|
|
358
367
|
|
|
368
|
+
def _add_leaderboard_parser(subparsers) -> None:
|
|
369
|
+
parser = subparsers.add_parser("leaderboard", help="Launch the MTEB leaderboard")
|
|
370
|
+
|
|
371
|
+
parser.add_argument(
|
|
372
|
+
"--cache-path",
|
|
373
|
+
type=str,
|
|
374
|
+
help="Path to the cache folder containing model results",
|
|
375
|
+
required=False,
|
|
376
|
+
default=None,
|
|
377
|
+
)
|
|
378
|
+
parser.add_argument(
|
|
379
|
+
"--host",
|
|
380
|
+
type=str,
|
|
381
|
+
default="0.0.0.0",
|
|
382
|
+
help="Host to run the leaderboard server on",
|
|
383
|
+
)
|
|
384
|
+
parser.add_argument(
|
|
385
|
+
"--port",
|
|
386
|
+
type=int,
|
|
387
|
+
default=7860,
|
|
388
|
+
help="Port to run the leaderboard server on",
|
|
389
|
+
)
|
|
390
|
+
parser.add_argument(
|
|
391
|
+
"--share",
|
|
392
|
+
action="store_true",
|
|
393
|
+
default=False,
|
|
394
|
+
help="Create a public URL for the leaderboard",
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
parser.set_defaults(func=_leaderboard)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def _leaderboard(args: argparse.Namespace) -> None:
|
|
401
|
+
"""Launch the MTEB leaderboard with specified cache path."""
|
|
402
|
+
# Import leaderboard module only when needed to avoid requiring leaderboard dependencies
|
|
403
|
+
# for other CLI commands
|
|
404
|
+
try:
|
|
405
|
+
import gradio as gr
|
|
406
|
+
|
|
407
|
+
from mteb.leaderboard import get_leaderboard_app
|
|
408
|
+
except ImportError as e:
|
|
409
|
+
raise ImportError(
|
|
410
|
+
"Seems like some dependencies are not installed. "
|
|
411
|
+
+ "You can likely install these using: `pip install mteb[leaderboard]`. "
|
|
412
|
+
+ f"{e}"
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
cache_path = args.cache_path
|
|
416
|
+
|
|
417
|
+
if cache_path:
|
|
418
|
+
logger.info(f"Using cache path: {cache_path}")
|
|
419
|
+
cache = ResultCache(cache_path)
|
|
420
|
+
else:
|
|
421
|
+
cache = ResultCache()
|
|
422
|
+
logger.info(f"Using default cache path: {cache.cache_path}")
|
|
423
|
+
|
|
424
|
+
app = get_leaderboard_app(cache)
|
|
425
|
+
|
|
426
|
+
logger.info(f"Starting leaderboard on {args.host}:{args.port}")
|
|
427
|
+
if args.share:
|
|
428
|
+
logger.info("Creating public URL...")
|
|
429
|
+
|
|
430
|
+
logging.getLogger("mteb.load_results.task_results").setLevel(
|
|
431
|
+
logging.ERROR
|
|
432
|
+
) # Warnings related to task split
|
|
433
|
+
logging.getLogger("mteb.model_meta").setLevel(
|
|
434
|
+
logging.ERROR
|
|
435
|
+
) # Warning related to model metadata (fetch_from_hf=False)
|
|
436
|
+
logging.getLogger("mteb.load_results.benchmark_results").setLevel(
|
|
437
|
+
logging.ERROR
|
|
438
|
+
) # Warning related to model metadata (fetch_from_hf=False)
|
|
439
|
+
warnings.filterwarnings("ignore", message="Couldn't get scores for .* due to .*")
|
|
440
|
+
|
|
441
|
+
# Head content for Tailwind CSS
|
|
442
|
+
head = """
|
|
443
|
+
<link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
|
|
444
|
+
"""
|
|
445
|
+
|
|
446
|
+
app.launch(
|
|
447
|
+
server_name=args.host,
|
|
448
|
+
server_port=args.port,
|
|
449
|
+
share=args.share,
|
|
450
|
+
theme=gr.themes.Soft(
|
|
451
|
+
font=[gr.themes.GoogleFont("Roboto Mono"), "Arial", "sans-serif"],
|
|
452
|
+
),
|
|
453
|
+
head=head,
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
|
|
359
457
|
def build_cli() -> argparse.ArgumentParser:
|
|
360
458
|
"""Builds the argument parser for the MTEB CLI.
|
|
361
459
|
|
|
@@ -375,6 +473,7 @@ def build_cli() -> argparse.ArgumentParser:
|
|
|
375
473
|
_add_available_tasks_parser(subparsers)
|
|
376
474
|
_add_available_benchmarks_parser(subparsers)
|
|
377
475
|
_add_create_meta_parser(subparsers)
|
|
476
|
+
_add_leaderboard_parser(subparsers)
|
|
378
477
|
|
|
379
478
|
return parser
|
|
380
479
|
|