mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +6 -0
- mteb/_create_dataloaders.py +22 -20
- mteb/_evaluators/any_sts_evaluator.py +23 -14
- mteb/_evaluators/classification_metrics.py +54 -0
- mteb/_evaluators/clustering_evaluator.py +3 -3
- mteb/_evaluators/evaluator.py +4 -2
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
- mteb/_evaluators/pair_classification_evaluator.py +34 -40
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/retrieval_metrics.py +18 -17
- mteb/_evaluators/sklearn_evaluator.py +25 -37
- mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
- mteb/_evaluators/text/summarization_evaluator.py +27 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
- mteb/abstasks/_data_filter/__init__.py +0 -0
- mteb/abstasks/_data_filter/filters.py +125 -0
- mteb/abstasks/_data_filter/task_pipelines.py +105 -0
- mteb/abstasks/_statistics_calculation.py +23 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +35 -28
- mteb/abstasks/aggregate_task_metadata.py +1 -9
- mteb/abstasks/aggregated_task.py +10 -29
- mteb/abstasks/classification.py +15 -12
- mteb/abstasks/clustering.py +20 -16
- mteb/abstasks/clustering_legacy.py +13 -10
- mteb/abstasks/image/image_text_pair_classification.py +7 -4
- mteb/abstasks/multilabel_classification.py +33 -22
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +4 -4
- mteb/abstasks/retrieval.py +28 -24
- mteb/abstasks/retrieval_dataset_loaders.py +2 -2
- mteb/abstasks/sts.py +14 -4
- mteb/abstasks/task_metadata.py +32 -33
- mteb/abstasks/text/bitext_mining.py +39 -28
- mteb/abstasks/text/reranking.py +8 -6
- mteb/abstasks/text/summarization.py +10 -5
- mteb/abstasks/zeroshot_classification.py +8 -4
- mteb/benchmarks/_create_table.py +84 -37
- mteb/benchmarks/benchmark.py +77 -16
- mteb/benchmarks/benchmarks/__init__.py +12 -0
- mteb/benchmarks/benchmarks/benchmarks.py +361 -16
- mteb/benchmarks/get_benchmark.py +14 -53
- mteb/cache.py +227 -37
- mteb/cli/_display_tasks.py +2 -2
- mteb/cli/build_cli.py +110 -14
- mteb/cli/generate_model_card.py +43 -23
- mteb/deprecated_evaluator.py +71 -62
- mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
- mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
- mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
- mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +106 -75
- mteb/filter_tasks.py +25 -26
- mteb/get_tasks.py +29 -30
- mteb/languages/language_scripts.py +5 -3
- mteb/leaderboard/app.py +414 -151
- mteb/leaderboard/benchmark_selector.py +14 -5
- mteb/leaderboard/figures.py +13 -15
- mteb/leaderboard/table.py +82 -17
- mteb/load_results.py +12 -12
- mteb/models/__init__.py +4 -1
- mteb/models/abs_encoder.py +31 -23
- mteb/models/cache_wrappers/__init__.py +2 -1
- mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +3 -3
- mteb/models/get_model_meta.py +25 -118
- mteb/models/instruct_wrapper.py +33 -9
- mteb/models/model_implementations/align_models.py +8 -1
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +65 -0
- mteb/models/model_implementations/ara_models.py +9 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +101 -17
- mteb/models/model_implementations/bica_model.py +35 -0
- mteb/models/model_implementations/blip2_models.py +13 -2
- mteb/models/model_implementations/blip_models.py +43 -16
- mteb/models/model_implementations/bm25.py +5 -4
- mteb/models/model_implementations/bmretriever_models.py +10 -4
- mteb/models/model_implementations/cadet_models.py +10 -1
- mteb/models/model_implementations/cde_models.py +25 -4
- mteb/models/model_implementations/clip_models.py +9 -6
- mteb/models/model_implementations/clips_models.py +100 -0
- mteb/models/model_implementations/codefuse_models.py +165 -3
- mteb/models/model_implementations/codesage_models.py +18 -3
- mteb/models/model_implementations/cohere_models.py +13 -6
- mteb/models/model_implementations/cohere_v.py +7 -2
- mteb/models/model_implementations/colpali_models.py +17 -9
- mteb/models/model_implementations/colqwen_models.py +275 -5
- mteb/models/model_implementations/colsmol_models.py +4 -2
- mteb/models/model_implementations/conan_models.py +2 -1
- mteb/models/model_implementations/dino_models.py +194 -23
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +21 -110
- mteb/models/model_implementations/e5_v.py +7 -6
- mteb/models/model_implementations/eagerworks_models.py +164 -0
- mteb/models/model_implementations/emillykkejensen_models.py +91 -0
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +32 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +67 -9
- mteb/models/model_implementations/facebookai.py +205 -0
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +17 -10
- mteb/models/model_implementations/google_models.py +17 -6
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
- mteb/models/model_implementations/gritlm_models.py +4 -2
- mteb/models/model_implementations/gte_models.py +99 -9
- mteb/models/model_implementations/hinvec_models.py +2 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +256 -3
- mteb/models/model_implementations/jina_clip.py +49 -10
- mteb/models/model_implementations/jina_models.py +222 -11
- mteb/models/model_implementations/kalm_models.py +203 -25
- mteb/models/model_implementations/kblab.py +37 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
- mteb/models/model_implementations/kfst.py +25 -0
- mteb/models/model_implementations/kowshik24_models.py +32 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +4 -3
- mteb/models/model_implementations/listconranker.py +2 -2
- mteb/models/model_implementations/llm2clip_models.py +9 -6
- mteb/models/model_implementations/llm2vec_models.py +16 -8
- mteb/models/model_implementations/mcinext_models.py +7 -1
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +422 -60
- mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +15 -4
- mteb/models/model_implementations/mod_models.py +191 -0
- mteb/models/model_implementations/model2vec_models.py +27 -14
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +70 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
- mteb/models/model_implementations/nomic_models.py +173 -6
- mteb/models/model_implementations/nomic_models_vision.py +8 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
- mteb/models/model_implementations/nvidia_models.py +155 -20
- mteb/models/model_implementations/octen_models.py +254 -0
- mteb/models/model_implementations/openai_models.py +20 -16
- mteb/models/model_implementations/openclip_models.py +37 -13
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
- mteb/models/model_implementations/ops_moa_models.py +5 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +39 -0
- mteb/models/model_implementations/piccolo_models.py +9 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +12 -8
- mteb/models/model_implementations/pylate_models.py +46 -12
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +9 -6
- mteb/models/model_implementations/qzhou_models.py +5 -3
- mteb/models/model_implementations/random_baseline.py +19 -24
- mteb/models/model_implementations/rasgaard_models.py +34 -0
- mteb/models/model_implementations/reasonir_model.py +2 -1
- mteb/models/model_implementations/repllama_models.py +5 -3
- mteb/models/model_implementations/rerankers_custom.py +15 -9
- mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +71 -20
- mteb/models/model_implementations/ruri_models.py +322 -0
- mteb/models/model_implementations/salesforce_models.py +6 -3
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +177 -18
- mteb/models/model_implementations/shuu_model.py +32 -31
- mteb/models/model_implementations/siglip_models.py +30 -20
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/sonar_models.py +1 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +376 -0
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +11 -1
- mteb/models/model_implementations/uae_models.py +8 -1
- mteb/models/model_implementations/vdr_models.py +3 -1
- mteb/models/model_implementations/vi_vn_models.py +45 -6
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +5 -3
- mteb/models/model_implementations/voyage_models.py +99 -0
- mteb/models/model_implementations/voyage_v.py +17 -9
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +34 -0
- mteb/models/model_implementations/yuan_models_en.py +58 -0
- mteb/models/model_meta.py +498 -29
- mteb/models/models_protocols.py +22 -6
- mteb/models/search_encoder_index/__init__.py +7 -0
- mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
- mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
- mteb/models/search_wrappers.py +197 -65
- mteb/models/sentence_transformer_wrapper.py +52 -32
- mteb/models/vllm_wrapper.py +327 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +114 -65
- mteb/results/model_result.py +63 -26
- mteb/results/task_result.py +117 -77
- mteb/similarity_functions.py +60 -7
- mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -3
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/__init__.py +6 -1
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +2 -3
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +1 -2
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
- mteb/tasks/classification/nld/iconclass_classification.py +3 -0
- mteb/tasks/classification/nld/open_tender_classification.py +3 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/__init__.py +4 -0
- mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/rus/__init__.py +2 -2
- mteb/tasks/pair_classification/rus/terra.py +51 -25
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/jpn/__init__.py +9 -1
- mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
- mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
- mteb/tasks/reranking/multilingual/__init__.py +2 -0
- mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/code/code_rag.py +12 -12
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +2 -0
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
- mteb/tasks/retrieval/jpn/__init__.py +8 -0
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
- mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
- mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
- mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/kor/__init__.py +16 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
- mteb/tasks/retrieval/multilingual/__init__.py +24 -0
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
- mteb/tasks/retrieval/nld/__init__.py +8 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +19 -2
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +9 -3
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
- mteb/models/model_implementations/mxbai_models.py +0 -102
- mteb/models/model_implementations/nb_sbert.py +0 -25
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
mteb/cache.py
CHANGED
|
@@ -1,14 +1,22 @@
|
|
|
1
|
+
import gzip
|
|
2
|
+
import io
|
|
1
3
|
import json
|
|
2
4
|
import logging
|
|
3
5
|
import os
|
|
4
6
|
import shutil
|
|
5
7
|
import subprocess
|
|
8
|
+
import warnings
|
|
6
9
|
from collections import defaultdict
|
|
7
|
-
from collections.abc import Sequence
|
|
10
|
+
from collections.abc import Iterable, Sequence
|
|
8
11
|
from pathlib import Path
|
|
9
12
|
from typing import cast
|
|
10
13
|
|
|
14
|
+
import requests
|
|
15
|
+
from pydantic import ValidationError
|
|
16
|
+
|
|
17
|
+
import mteb
|
|
11
18
|
from mteb.abstasks import AbsTask
|
|
19
|
+
from mteb.benchmarks.benchmark import Benchmark
|
|
12
20
|
from mteb.models import ModelMeta
|
|
13
21
|
from mteb.results import BenchmarkResults, ModelResult, TaskResult
|
|
14
22
|
from mteb.types import ModelName, Revision
|
|
@@ -20,8 +28,8 @@ class ResultCache:
|
|
|
20
28
|
"""Class to handle the local cache of MTEB results.
|
|
21
29
|
|
|
22
30
|
Examples:
|
|
23
|
-
>>>
|
|
24
|
-
>>> cache = ResultCache(cache_path="~/.cache/mteb") # default
|
|
31
|
+
>>> import mteb
|
|
32
|
+
>>> cache = mteb.ResultCache(cache_path="~/.cache/mteb") # default
|
|
25
33
|
>>> cache.download_from_remote() # download the latest results from the remote repository
|
|
26
34
|
>>> result = cache.load_results("task_name", "model_name")
|
|
27
35
|
"""
|
|
@@ -62,7 +70,11 @@ class ResultCache:
|
|
|
62
70
|
Returns:
|
|
63
71
|
The path to the results of the task.
|
|
64
72
|
"""
|
|
65
|
-
results_folder =
|
|
73
|
+
results_folder = (
|
|
74
|
+
self.cache_path / "results"
|
|
75
|
+
if not remote
|
|
76
|
+
else self.cache_path / "remote" / "results"
|
|
77
|
+
)
|
|
66
78
|
|
|
67
79
|
if isinstance(model_name, ModelMeta):
|
|
68
80
|
if model_revision is not None:
|
|
@@ -74,12 +86,12 @@ class ResultCache:
|
|
|
74
86
|
elif isinstance(model_name, str):
|
|
75
87
|
model_name = model_name.replace("/", "__").replace(" ", "_")
|
|
76
88
|
|
|
77
|
-
model_path =
|
|
89
|
+
model_path = results_folder / model_name
|
|
78
90
|
|
|
79
91
|
if model_revision is None:
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
)
|
|
92
|
+
msg = "`model_revision` is not specified, attempting to load the latest revision. To disable this behavior, specify the 'model_revision` explicitly."
|
|
93
|
+
logger.warning(msg)
|
|
94
|
+
warnings.warn(msg)
|
|
83
95
|
# get revs from paths
|
|
84
96
|
revisions = [p for p in model_path.glob("*") if p.is_dir()]
|
|
85
97
|
if not revisions:
|
|
@@ -191,12 +203,14 @@ class ResultCache:
|
|
|
191
203
|
self,
|
|
192
204
|
remote: str = "https://github.com/embeddings-benchmark/results",
|
|
193
205
|
download_latest: bool = True,
|
|
206
|
+
revision: str | None = None,
|
|
194
207
|
) -> Path:
|
|
195
208
|
"""Downloads the latest version of the results repository from GitHub to a local cache directory. Required git to be installed.
|
|
196
209
|
|
|
197
210
|
Args:
|
|
198
211
|
remote: The URL of the results repository on GitHub.
|
|
199
212
|
download_latest: If True it will download the latest version of the repository, otherwise it will only update the existing repository.
|
|
213
|
+
revision: If specified, it will checkout the given revision after cloning or pulling the repository.
|
|
200
214
|
|
|
201
215
|
Returns:
|
|
202
216
|
The path to the local cache directory.
|
|
@@ -224,14 +238,27 @@ class ResultCache:
|
|
|
224
238
|
)
|
|
225
239
|
raise ValueError(msg)
|
|
226
240
|
|
|
227
|
-
if download_latest:
|
|
241
|
+
if revision or download_latest:
|
|
228
242
|
logger.info(
|
|
229
|
-
f"remote repository already exists in {results_directory},
|
|
243
|
+
f"remote repository already exists in {results_directory}, fetching updates"
|
|
244
|
+
)
|
|
245
|
+
subprocess.run(
|
|
246
|
+
["git", "fetch", "--all", "--tags"],
|
|
247
|
+
cwd=results_directory,
|
|
248
|
+
check=True,
|
|
230
249
|
)
|
|
231
|
-
subprocess.run(["git", "pull"], cwd=results_directory)
|
|
232
250
|
else:
|
|
233
251
|
logger.debug(
|
|
234
|
-
f"Results repository already exists in {results_directory}, skipping update,
|
|
252
|
+
f"Results repository already exists in {results_directory}, skipping update, "
|
|
253
|
+
f"set download_latest=True to update it"
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
if revision:
|
|
257
|
+
logger.info(f"Checking out revision '{revision}'")
|
|
258
|
+
subprocess.run(
|
|
259
|
+
["git", "checkout", revision],
|
|
260
|
+
cwd=results_directory,
|
|
261
|
+
check=True,
|
|
235
262
|
)
|
|
236
263
|
return results_directory
|
|
237
264
|
|
|
@@ -239,25 +266,180 @@ class ResultCache:
|
|
|
239
266
|
f"No results repository found in {results_directory}, cloning it from {remote}"
|
|
240
267
|
)
|
|
241
268
|
|
|
242
|
-
|
|
269
|
+
clone_cmd = ["git", "clone", "--depth", "1"]
|
|
270
|
+
|
|
271
|
+
if revision:
|
|
272
|
+
logger.info(f"Cloning repository at revision '{revision}'")
|
|
273
|
+
clone_cmd.append(f"--revision={revision}")
|
|
274
|
+
clone_cmd.extend([remote, "remote"])
|
|
275
|
+
|
|
276
|
+
subprocess.run(
|
|
277
|
+
clone_cmd,
|
|
278
|
+
cwd=self.cache_path,
|
|
279
|
+
check=True,
|
|
280
|
+
)
|
|
243
281
|
|
|
244
282
|
return results_directory
|
|
245
283
|
|
|
284
|
+
def _download_cached_results_from_branch(
|
|
285
|
+
self,
|
|
286
|
+
branch: str = "cached-data",
|
|
287
|
+
filename: str = "__cached_results.json.gz",
|
|
288
|
+
output_path: Path | None = None,
|
|
289
|
+
remote: str = "https://github.com/embeddings-benchmark/results",
|
|
290
|
+
timeout: int = 60,
|
|
291
|
+
max_size_mb: int = 500,
|
|
292
|
+
) -> Path:
|
|
293
|
+
"""Download pre-computed cached results from a specific branch.
|
|
294
|
+
|
|
295
|
+
This is significantly faster than download_from_remote() since it downloads
|
|
296
|
+
only a compressed cache file instead of cloning the entire repository.
|
|
297
|
+
|
|
298
|
+
The method performs the following steps:
|
|
299
|
+
1. Downloads a gzipped JSON file from the specified branch
|
|
300
|
+
2. Validates file size and content type
|
|
301
|
+
3. Decompresses the gzip content
|
|
302
|
+
4. Writes the decompressed JSON to disk
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
branch: Branch name to download from (default: "cached-data")
|
|
306
|
+
filename: Name of the cached results file (default: "__cached_results.json.gz")
|
|
307
|
+
output_path: Where to save the file. If None, uses mteb/leaderboard/__cached_results.json
|
|
308
|
+
remote: Base URL of the results repository
|
|
309
|
+
timeout: Request timeout in seconds (default: 60)
|
|
310
|
+
max_size_mb: Maximum allowed file size in megabytes (default: 500)
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Path to the downloaded and decompressed cache file
|
|
314
|
+
|
|
315
|
+
Raises:
|
|
316
|
+
requests.exceptions.RequestException: On HTTP errors
|
|
317
|
+
ValueError: On validation failures (size, content-type)
|
|
318
|
+
gzip.BadGzipFile: If content is not valid gzip
|
|
319
|
+
UnicodeDecodeError: If content cannot be decoded as UTF-8
|
|
320
|
+
PermissionError: If file cannot be written due to permissions
|
|
321
|
+
OSError: On other file system errors
|
|
322
|
+
|
|
323
|
+
Examples:
|
|
324
|
+
>>> import mteb
|
|
325
|
+
>>> cache = mteb.ResultCache()
|
|
326
|
+
>>> # Download optimized cached results
|
|
327
|
+
>>> cache_file = cache._download_cached_results_from_branch()
|
|
328
|
+
>>> # Use custom output path
|
|
329
|
+
>>> cache_file = cache._download_cached_results_from_branch(
|
|
330
|
+
... output_path=Path("/tmp/my_cache.json")
|
|
331
|
+
... )
|
|
332
|
+
"""
|
|
333
|
+
if output_path is None:
|
|
334
|
+
# Default to saving in mteb/leaderboard/__cached_results.json
|
|
335
|
+
# Get the mteb package directory (parent of this file)
|
|
336
|
+
mteb_package_dir = Path(__file__).parent
|
|
337
|
+
output_path = mteb_package_dir / "leaderboard" / "__cached_results.json"
|
|
338
|
+
|
|
339
|
+
# Extract repository owner and name from the remote URL
|
|
340
|
+
# e.g., "https://github.com/embeddings-benchmark/results" -> "embeddings-benchmark/results"
|
|
341
|
+
repo_path = remote.replace("https://github.com/", "").replace(
|
|
342
|
+
"http://github.com/", ""
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
url = f"https://raw.githubusercontent.com/{repo_path}/{branch}/{filename}"
|
|
346
|
+
logger.info(f"Downloading cached results from {url}")
|
|
347
|
+
|
|
348
|
+
# Step 1: Download with validation
|
|
349
|
+
max_size_bytes = max_size_mb * 1024 * 1024
|
|
350
|
+
|
|
351
|
+
try:
|
|
352
|
+
response = requests.get(url, timeout=timeout)
|
|
353
|
+
response.raise_for_status()
|
|
354
|
+
|
|
355
|
+
# Check if this is a Git LFS pointer file
|
|
356
|
+
content_type = response.headers.get("content-type", "").lower()
|
|
357
|
+
if (
|
|
358
|
+
content_type == "text/plain; charset=utf-8"
|
|
359
|
+
and b"git-lfs" in response.content
|
|
360
|
+
):
|
|
361
|
+
# Try Git LFS media URL instead
|
|
362
|
+
media_url = f"https://media.githubusercontent.com/media/{repo_path}/{branch}/{filename}"
|
|
363
|
+
logger.info(f"Detected Git LFS file, trying media URL: {media_url}")
|
|
364
|
+
response = requests.get(media_url, timeout=timeout)
|
|
365
|
+
response.raise_for_status()
|
|
366
|
+
content_type = response.headers.get("content-type", "").lower()
|
|
367
|
+
|
|
368
|
+
# Validate content-type header
|
|
369
|
+
expected_content_types = [
|
|
370
|
+
"application/gzip",
|
|
371
|
+
"application/octet-stream",
|
|
372
|
+
"application/x-gzip",
|
|
373
|
+
]
|
|
374
|
+
if content_type and not any(
|
|
375
|
+
ct in content_type for ct in expected_content_types
|
|
376
|
+
):
|
|
377
|
+
raise Exception(
|
|
378
|
+
f"Unexpected content-type: {content_type}. Expected one of: {expected_content_types}"
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
# Validate file size
|
|
382
|
+
content_length = len(response.content)
|
|
383
|
+
if content_length > max_size_bytes:
|
|
384
|
+
raise ValueError(
|
|
385
|
+
f"Downloaded file too large: {content_length} bytes (max: {max_size_bytes})"
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
logger.info(
|
|
389
|
+
f"HTTP request successful, content length: {content_length} bytes"
|
|
390
|
+
)
|
|
391
|
+
content = response.content
|
|
392
|
+
|
|
393
|
+
except Exception as e:
|
|
394
|
+
logger.error(f"Unexpected HTTP error: {type(e).__name__}: {e}")
|
|
395
|
+
raise e
|
|
396
|
+
|
|
397
|
+
# Step 2: Decompress gzip data
|
|
398
|
+
logger.info("Attempting gzip decompression...")
|
|
399
|
+
|
|
400
|
+
try:
|
|
401
|
+
with gzip.open(io.BytesIO(content), "rt", encoding="utf-8") as gz_file:
|
|
402
|
+
data = gz_file.read()
|
|
403
|
+
logger.info(f"Decompression successful, data length: {len(data)} chars")
|
|
404
|
+
|
|
405
|
+
except Exception as e:
|
|
406
|
+
logger.error(f"Unexpected decompression error: {type(e).__name__}: {e}")
|
|
407
|
+
raise e
|
|
408
|
+
|
|
409
|
+
# Step 3: Write to disk
|
|
410
|
+
logger.info(f"Attempting to write to: {output_path}")
|
|
411
|
+
|
|
412
|
+
# Check parent directory exists and is writable
|
|
413
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
414
|
+
|
|
415
|
+
try:
|
|
416
|
+
output_path.write_text(data, encoding="utf-8")
|
|
417
|
+
logger.info(
|
|
418
|
+
f"File write successful, size: {output_path.stat().st_size} bytes"
|
|
419
|
+
)
|
|
420
|
+
except Exception as e:
|
|
421
|
+
logger.error(f"Unexpected file write error: {type(e).__name__}: {e}")
|
|
422
|
+
raise e
|
|
423
|
+
|
|
424
|
+
return output_path
|
|
425
|
+
|
|
246
426
|
def clear_cache(self) -> None:
|
|
247
427
|
"""Clear the local cache directory."""
|
|
248
428
|
if self.cache_path.exists() and self.cache_path.is_dir():
|
|
249
429
|
shutil.rmtree(self.cache_path)
|
|
250
430
|
logger.info(f"Cache directory {self.cache_path} cleared.")
|
|
251
431
|
else:
|
|
252
|
-
|
|
432
|
+
msg = f"Cache directory `{self.cache_path}` does not exist."
|
|
433
|
+
logger.warning(msg)
|
|
434
|
+
warnings.warn(msg)
|
|
253
435
|
|
|
254
436
|
def __repr__(self) -> str:
|
|
255
437
|
return f"ResultCache(cache_path={self.cache_path})"
|
|
256
438
|
|
|
257
439
|
def get_cache_paths(
|
|
258
440
|
self,
|
|
259
|
-
models: Sequence[str] |
|
|
260
|
-
tasks: Sequence[str] |
|
|
441
|
+
models: Sequence[str] | Iterable[ModelMeta] | None = None,
|
|
442
|
+
tasks: Sequence[str] | Iterable[AbsTask] | None = None,
|
|
261
443
|
require_model_meta: bool = True,
|
|
262
444
|
include_remote: bool = True,
|
|
263
445
|
) -> list[Path]:
|
|
@@ -279,8 +461,8 @@ class ResultCache:
|
|
|
279
461
|
A list of paths in the cache directory.
|
|
280
462
|
|
|
281
463
|
Examples:
|
|
282
|
-
>>>
|
|
283
|
-
>>> cache = ResultCache()
|
|
464
|
+
>>> import mteb
|
|
465
|
+
>>> cache = mteb.ResultCache()
|
|
284
466
|
>>>
|
|
285
467
|
>>> # Get all cache paths
|
|
286
468
|
>>> paths = cache.get_cache_paths()
|
|
@@ -390,7 +572,7 @@ class ResultCache:
|
|
|
390
572
|
@staticmethod
|
|
391
573
|
def _filter_paths_by_model_and_revision(
|
|
392
574
|
paths: list[Path],
|
|
393
|
-
models: Sequence[str] |
|
|
575
|
+
models: Sequence[str] | Iterable[ModelMeta] | None = None,
|
|
394
576
|
) -> list[Path]:
|
|
395
577
|
"""Filter a list of paths by model name and optional revision.
|
|
396
578
|
|
|
@@ -400,8 +582,9 @@ class ResultCache:
|
|
|
400
582
|
if not models:
|
|
401
583
|
return paths
|
|
402
584
|
|
|
403
|
-
|
|
404
|
-
|
|
585
|
+
first_model = next(iter(models))
|
|
586
|
+
if isinstance(first_model, ModelMeta):
|
|
587
|
+
models = cast(Iterable[ModelMeta], models)
|
|
405
588
|
name_and_revision = {
|
|
406
589
|
(m.model_name_as_path(), m.revision or "no_revision_available")
|
|
407
590
|
for m in models
|
|
@@ -412,13 +595,14 @@ class ResultCache:
|
|
|
412
595
|
if (p.parent.parent.name, p.parent.name) in name_and_revision
|
|
413
596
|
]
|
|
414
597
|
|
|
415
|
-
|
|
598
|
+
str_models = cast(Sequence[str], models)
|
|
599
|
+
model_names = {m.replace("/", "__").replace(" ", "_") for m in str_models}
|
|
416
600
|
return [p for p in paths if p.parent.parent.name in model_names]
|
|
417
601
|
|
|
418
602
|
@staticmethod
|
|
419
603
|
def _filter_paths_by_task(
|
|
420
604
|
paths: list[Path],
|
|
421
|
-
tasks: Sequence[str] |
|
|
605
|
+
tasks: Sequence[str] | Iterable[AbsTask] | None = None,
|
|
422
606
|
) -> list[Path]:
|
|
423
607
|
if tasks is not None:
|
|
424
608
|
task_names = set()
|
|
@@ -434,8 +618,8 @@ class ResultCache:
|
|
|
434
618
|
|
|
435
619
|
def load_results(
|
|
436
620
|
self,
|
|
437
|
-
models: Sequence[str] |
|
|
438
|
-
tasks: Sequence[str] |
|
|
621
|
+
models: Sequence[str] | Iterable[ModelMeta] | None = None,
|
|
622
|
+
tasks: Sequence[str] | Iterable[AbsTask] | Benchmark | str | None = None,
|
|
439
623
|
require_model_meta: bool = True,
|
|
440
624
|
include_remote: bool = True,
|
|
441
625
|
validate_and_filter: bool = False,
|
|
@@ -445,7 +629,9 @@ class ResultCache:
|
|
|
445
629
|
|
|
446
630
|
Args:
|
|
447
631
|
models: A list of model names to load the results for. If None it will load the results for all models.
|
|
448
|
-
tasks: A list of task names to load the results for. If
|
|
632
|
+
tasks: A list of task names to load the results for. If str is passed, then benchmark will be loaded.
|
|
633
|
+
If Benchmark is passed, then all tasks in the benchmark will be loaded.
|
|
634
|
+
If None it will load the results for all tasks.
|
|
449
635
|
require_model_meta: If True it will ignore results that do not have a model_meta.json file. If false it attempt to
|
|
450
636
|
extract the model name and revision from the path.
|
|
451
637
|
include_remote: If True, it will include results from the remote repository.
|
|
@@ -457,8 +643,8 @@ class ResultCache:
|
|
|
457
643
|
A BenchmarkResults object containing the results for the specified models and tasks.
|
|
458
644
|
|
|
459
645
|
Examples:
|
|
460
|
-
>>>
|
|
461
|
-
>>> cache = ResultCache()
|
|
646
|
+
>>> import mteb
|
|
647
|
+
>>> cache = mteb.ResultCache()
|
|
462
648
|
>>>
|
|
463
649
|
>>> # Load results for specific models and tasks
|
|
464
650
|
>>> results = cache.load_results(
|
|
@@ -467,6 +653,9 @@ class ResultCache:
|
|
|
467
653
|
... require_model_meta=True,
|
|
468
654
|
... )
|
|
469
655
|
"""
|
|
656
|
+
if isinstance(tasks, str):
|
|
657
|
+
tasks = mteb.get_benchmark(tasks)
|
|
658
|
+
|
|
470
659
|
paths = self.get_cache_paths(
|
|
471
660
|
models=models,
|
|
472
661
|
tasks=tasks,
|
|
@@ -475,7 +664,7 @@ class ResultCache:
|
|
|
475
664
|
)
|
|
476
665
|
models_results = defaultdict(list)
|
|
477
666
|
|
|
478
|
-
task_names = {}
|
|
667
|
+
task_names: dict[str, AbsTask | None] = {}
|
|
479
668
|
if tasks is not None:
|
|
480
669
|
for task in tasks:
|
|
481
670
|
if isinstance(task, AbsTask):
|
|
@@ -493,10 +682,12 @@ class ResultCache:
|
|
|
493
682
|
)
|
|
494
683
|
|
|
495
684
|
if validate_and_filter:
|
|
496
|
-
|
|
685
|
+
task_instance = task_names[task_result.task_name]
|
|
497
686
|
try:
|
|
498
|
-
task_result.validate_and_filter_scores(
|
|
499
|
-
|
|
687
|
+
task_result = task_result.validate_and_filter_scores(
|
|
688
|
+
task=task_instance
|
|
689
|
+
)
|
|
690
|
+
except ValidationError as e:
|
|
500
691
|
logger.info(
|
|
501
692
|
f"Validation failed for {task_result.task_name} in {model_name} {revision}: {e}"
|
|
502
693
|
)
|
|
@@ -505,7 +696,7 @@ class ResultCache:
|
|
|
505
696
|
models_results[(model_name, revision)].append(task_result)
|
|
506
697
|
|
|
507
698
|
# create BenchmarkResults object
|
|
508
|
-
|
|
699
|
+
models_results_object = [
|
|
509
700
|
ModelResult(
|
|
510
701
|
model_name=model_name,
|
|
511
702
|
model_revision=revision,
|
|
@@ -514,8 +705,7 @@ class ResultCache:
|
|
|
514
705
|
for (model_name, revision), task_results in models_results.items()
|
|
515
706
|
]
|
|
516
707
|
|
|
517
|
-
|
|
518
|
-
model_results=
|
|
708
|
+
return BenchmarkResults(
|
|
709
|
+
model_results=models_results_object,
|
|
710
|
+
benchmark=tasks if isinstance(tasks, Benchmark) else None,
|
|
519
711
|
)
|
|
520
|
-
|
|
521
|
-
return benchmark_results
|
mteb/cli/_display_tasks.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from collections.abc import Sequence
|
|
1
|
+
from collections.abc import Iterable, Sequence
|
|
2
2
|
|
|
3
3
|
from mteb.abstasks import AbsTask
|
|
4
4
|
from mteb.benchmarks import Benchmark
|
|
@@ -31,7 +31,7 @@ def _display_benchmarks(benchmarks: Sequence[Benchmark]) -> None:
|
|
|
31
31
|
_display_tasks(benchmark.tasks, name=name)
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
def _display_tasks(task_list:
|
|
34
|
+
def _display_tasks(task_list: Iterable[AbsTask], name: str | None = None) -> None:
|
|
35
35
|
from rich.console import Console
|
|
36
36
|
|
|
37
37
|
console = Console()
|
mteb/cli/build_cli.py
CHANGED
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
+
import warnings
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
|
|
6
7
|
import torch
|
|
7
8
|
from rich.logging import RichHandler
|
|
8
9
|
|
|
9
10
|
import mteb
|
|
11
|
+
from mteb.abstasks.abstask import AbsTask
|
|
10
12
|
from mteb.cache import ResultCache
|
|
13
|
+
from mteb.cli._display_tasks import _display_benchmarks, _display_tasks
|
|
11
14
|
from mteb.cli.generate_model_card import generate_model_card
|
|
12
15
|
from mteb.evaluate import OverwriteStrategy
|
|
13
|
-
|
|
14
|
-
from ._display_tasks import _display_benchmarks, _display_tasks
|
|
16
|
+
from mteb.types._encoder_io import EncodeKwargs
|
|
15
17
|
|
|
16
18
|
logger = logging.getLogger(__name__)
|
|
17
19
|
|
|
@@ -53,7 +55,7 @@ def run(args: argparse.Namespace) -> None:
|
|
|
53
55
|
|
|
54
56
|
if args.benchmarks:
|
|
55
57
|
benchmarks = mteb.get_benchmarks(names=args.benchmarks)
|
|
56
|
-
tasks =
|
|
58
|
+
tasks = tuple(t for b in benchmarks for t in b.tasks)
|
|
57
59
|
else:
|
|
58
60
|
tasks = mteb.get_tasks(
|
|
59
61
|
categories=args.categories,
|
|
@@ -63,21 +65,23 @@ def run(args: argparse.Namespace) -> None:
|
|
|
63
65
|
eval_splits=args.eval_splits,
|
|
64
66
|
)
|
|
65
67
|
|
|
66
|
-
encode_kwargs = {}
|
|
68
|
+
encode_kwargs: EncodeKwargs = {}
|
|
67
69
|
if args.batch_size is not None:
|
|
68
70
|
encode_kwargs["batch_size"] = args.batch_size
|
|
69
71
|
|
|
70
72
|
overwrite_strategy = args.overwrite_strategy
|
|
71
73
|
if args.overwrite:
|
|
72
|
-
|
|
73
|
-
"`--overwrite` is deprecated, please use `--overwrite-strategy 'always'` instead."
|
|
74
|
+
warnings.warn(
|
|
75
|
+
"`--overwrite` is deprecated, please use `--overwrite-strategy 'always'` instead.",
|
|
76
|
+
DeprecationWarning,
|
|
74
77
|
)
|
|
75
78
|
overwrite_strategy = OverwriteStrategy.ALWAYS.value
|
|
76
79
|
|
|
77
80
|
prediction_folder = args.prediction_folder
|
|
78
81
|
if args.save_predictions:
|
|
79
|
-
|
|
80
|
-
"`--save_predictions` is deprecated, please use `--prediction-folder` instead."
|
|
82
|
+
warnings.warn(
|
|
83
|
+
"`--save_predictions` is deprecated, please use `--prediction-folder` instead.",
|
|
84
|
+
DeprecationWarning,
|
|
81
85
|
)
|
|
82
86
|
prediction_folder = args.output_folder
|
|
83
87
|
|
|
@@ -279,23 +283,25 @@ def _create_meta(args: argparse.Namespace) -> None:
|
|
|
279
283
|
from_existing = Path(from_existing)
|
|
280
284
|
|
|
281
285
|
if output_path.exists() and overwrite:
|
|
282
|
-
|
|
286
|
+
msg = "Output path already exists, overwriting."
|
|
287
|
+
logger.warning(msg)
|
|
288
|
+
warnings.warn(msg)
|
|
283
289
|
elif output_path.exists():
|
|
284
290
|
raise FileExistsError(
|
|
285
291
|
"Output path already exists, use --overwrite to overwrite."
|
|
286
292
|
)
|
|
287
293
|
|
|
288
|
-
|
|
294
|
+
benchmarks = None
|
|
295
|
+
tasks: list[AbsTask] = []
|
|
289
296
|
if tasks_names is not None:
|
|
290
|
-
tasks = mteb.get_tasks(tasks_names)
|
|
297
|
+
tasks = list(mteb.get_tasks(tasks_names))
|
|
291
298
|
if benchmarks is not None:
|
|
292
299
|
benchmarks = mteb.get_benchmarks(benchmarks)
|
|
293
|
-
for benchmark in benchmarks:
|
|
294
|
-
tasks.extend(benchmark.tasks)
|
|
295
300
|
|
|
296
301
|
generate_model_card(
|
|
297
302
|
model_name,
|
|
298
|
-
tasks
|
|
303
|
+
tasks,
|
|
304
|
+
benchmarks,
|
|
299
305
|
existing_model_card_id_or_path=from_existing,
|
|
300
306
|
results_cache=ResultCache(results_folder),
|
|
301
307
|
output_path=output_path,
|
|
@@ -356,6 +362,95 @@ def _add_create_meta_parser(subparsers) -> None:
|
|
|
356
362
|
parser.set_defaults(func=_create_meta)
|
|
357
363
|
|
|
358
364
|
|
|
365
|
+
def _add_leaderboard_parser(subparsers) -> None:
|
|
366
|
+
parser = subparsers.add_parser("leaderboard", help="Launch the MTEB leaderboard")
|
|
367
|
+
|
|
368
|
+
parser.add_argument(
|
|
369
|
+
"--cache-path",
|
|
370
|
+
type=str,
|
|
371
|
+
help="Path to the cache folder containing model results",
|
|
372
|
+
required=False,
|
|
373
|
+
default=None,
|
|
374
|
+
)
|
|
375
|
+
parser.add_argument(
|
|
376
|
+
"--host",
|
|
377
|
+
type=str,
|
|
378
|
+
default="0.0.0.0",
|
|
379
|
+
help="Host to run the leaderboard server on",
|
|
380
|
+
)
|
|
381
|
+
parser.add_argument(
|
|
382
|
+
"--port",
|
|
383
|
+
type=int,
|
|
384
|
+
default=7860,
|
|
385
|
+
help="Port to run the leaderboard server on",
|
|
386
|
+
)
|
|
387
|
+
parser.add_argument(
|
|
388
|
+
"--share",
|
|
389
|
+
action="store_true",
|
|
390
|
+
default=False,
|
|
391
|
+
help="Create a public URL for the leaderboard",
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
parser.set_defaults(func=_leaderboard)
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def _leaderboard(args: argparse.Namespace) -> None:
|
|
398
|
+
"""Launch the MTEB leaderboard with specified cache path."""
|
|
399
|
+
# Import leaderboard module only when needed to avoid requiring leaderboard dependencies
|
|
400
|
+
# for other CLI commands
|
|
401
|
+
try:
|
|
402
|
+
import gradio as gr
|
|
403
|
+
|
|
404
|
+
from mteb.leaderboard import get_leaderboard_app
|
|
405
|
+
except ImportError as e:
|
|
406
|
+
raise ImportError(
|
|
407
|
+
"Seems like some dependencies are not installed. "
|
|
408
|
+
+ "You can likely install these using: `pip install mteb[leaderboard]`. "
|
|
409
|
+
+ f"{e}"
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
cache_path = args.cache_path
|
|
413
|
+
|
|
414
|
+
if cache_path:
|
|
415
|
+
logger.info(f"Using cache path: {cache_path}")
|
|
416
|
+
cache = ResultCache(cache_path)
|
|
417
|
+
else:
|
|
418
|
+
cache = ResultCache()
|
|
419
|
+
logger.info(f"Using default cache path: {cache.cache_path}")
|
|
420
|
+
|
|
421
|
+
app = get_leaderboard_app(cache)
|
|
422
|
+
|
|
423
|
+
logger.info(f"Starting leaderboard on {args.host}:{args.port}")
|
|
424
|
+
if args.share:
|
|
425
|
+
logger.info("Creating public URL...")
|
|
426
|
+
|
|
427
|
+
logging.getLogger("mteb.load_results.task_results").setLevel(
|
|
428
|
+
logging.ERROR
|
|
429
|
+
) # Warnings related to task split
|
|
430
|
+
logging.getLogger("mteb.model_meta").setLevel(
|
|
431
|
+
logging.ERROR
|
|
432
|
+
) # Warning related to model metadata (fetch_from_hf=False)
|
|
433
|
+
logging.getLogger("mteb.load_results.benchmark_results").setLevel(
|
|
434
|
+
logging.ERROR
|
|
435
|
+
) # Warning related to model metadata (fetch_from_hf=False)
|
|
436
|
+
warnings.filterwarnings("ignore", message="Couldn't get scores for .* due to .*")
|
|
437
|
+
|
|
438
|
+
# Head content for Tailwind CSS
|
|
439
|
+
head = """
|
|
440
|
+
<link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
|
|
441
|
+
"""
|
|
442
|
+
|
|
443
|
+
app.launch(
|
|
444
|
+
server_name=args.host,
|
|
445
|
+
server_port=args.port,
|
|
446
|
+
share=args.share,
|
|
447
|
+
theme=gr.themes.Soft(
|
|
448
|
+
font=[gr.themes.GoogleFont("Roboto Mono"), "Arial", "sans-serif"],
|
|
449
|
+
),
|
|
450
|
+
head=head,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
|
|
359
454
|
def build_cli() -> argparse.ArgumentParser:
|
|
360
455
|
"""Builds the argument parser for the MTEB CLI.
|
|
361
456
|
|
|
@@ -375,6 +470,7 @@ def build_cli() -> argparse.ArgumentParser:
|
|
|
375
470
|
_add_available_tasks_parser(subparsers)
|
|
376
471
|
_add_available_benchmarks_parser(subparsers)
|
|
377
472
|
_add_create_meta_parser(subparsers)
|
|
473
|
+
_add_leaderboard_parser(subparsers)
|
|
378
474
|
|
|
379
475
|
return parser
|
|
380
476
|
|