mteb 2.1.4__py3-none-any.whl → 2.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +4 -0
- mteb/_create_dataloaders.py +6 -3
- mteb/_evaluators/any_sts_evaluator.py +21 -12
- mteb/_evaluators/classification_metrics.py +54 -0
- mteb/_evaluators/clustering_evaluator.py +1 -1
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +9 -4
- mteb/_evaluators/pair_classification_evaluator.py +30 -38
- mteb/_evaluators/sklearn_evaluator.py +15 -28
- mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
- mteb/_evaluators/text/summarization_evaluator.py +4 -2
- mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
- mteb/abstasks/_data_filter/__init__.py +0 -0
- mteb/abstasks/_data_filter/filters.py +125 -0
- mteb/abstasks/_data_filter/task_pipelines.py +102 -0
- mteb/abstasks/_statistics_calculation.py +6 -2
- mteb/abstasks/classification.py +0 -2
- mteb/abstasks/clustering.py +1 -1
- mteb/abstasks/clustering_legacy.py +3 -0
- mteb/abstasks/multilabel_classification.py +10 -3
- mteb/abstasks/pair_classification.py +8 -1
- mteb/abstasks/sts.py +7 -0
- mteb/abstasks/task_metadata.py +1 -0
- mteb/benchmarks/_create_table.py +84 -37
- mteb/benchmarks/benchmark.py +74 -15
- mteb/benchmarks/benchmarks/__init__.py +8 -0
- mteb/benchmarks/benchmarks/benchmarks.py +259 -15
- mteb/benchmarks/get_benchmark.py +2 -0
- mteb/cache.py +47 -10
- mteb/deprecated_evaluator.py +8 -13
- mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
- mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
- mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
- mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/evaluate.py +65 -45
- mteb/leaderboard/app.py +268 -133
- mteb/leaderboard/benchmark_selector.py +14 -5
- mteb/leaderboard/figures.py +13 -15
- mteb/leaderboard/table.py +82 -17
- mteb/models/__init__.py +4 -1
- mteb/models/abs_encoder.py +21 -17
- mteb/models/cache_wrappers/__init__.py +2 -1
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +2 -2
- mteb/models/cache_wrappers/cache_wrapper.py +1 -1
- mteb/models/get_model_meta.py +3 -114
- mteb/models/instruct_wrapper.py +5 -1
- mteb/models/model_implementations/align_models.py +7 -0
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +65 -0
- mteb/models/model_implementations/ara_models.py +8 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +60 -0
- mteb/models/model_implementations/bica_model.py +35 -0
- mteb/models/model_implementations/blip2_models.py +11 -0
- mteb/models/model_implementations/blip_models.py +27 -0
- mteb/models/model_implementations/bm25.py +1 -0
- mteb/models/model_implementations/bmretriever_models.py +4 -0
- mteb/models/model_implementations/cadet_models.py +9 -0
- mteb/models/model_implementations/cde_models.py +14 -0
- mteb/models/model_implementations/clip_models.py +3 -0
- mteb/models/model_implementations/clips_models.py +100 -0
- mteb/models/model_implementations/codefuse_models.py +162 -0
- mteb/models/model_implementations/codesage_models.py +15 -0
- mteb/models/model_implementations/cohere_models.py +8 -1
- mteb/models/model_implementations/cohere_v.py +5 -0
- mteb/models/model_implementations/colpali_models.py +14 -6
- mteb/models/model_implementations/colqwen_models.py +271 -1
- mteb/models/model_implementations/colsmol_models.py +2 -0
- mteb/models/model_implementations/conan_models.py +1 -0
- mteb/models/model_implementations/dino_models.py +171 -0
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +12 -101
- mteb/models/model_implementations/e5_v.py +1 -0
- mteb/models/model_implementations/eagerworks_models.py +164 -0
- mteb/models/model_implementations/emillykkejensen_models.py +91 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +32 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +58 -0
- mteb/models/model_implementations/facebookai.py +193 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +11 -5
- mteb/models/model_implementations/google_models.py +16 -5
- mteb/models/model_implementations/granite_vision_embedding_models.py +7 -2
- mteb/models/model_implementations/gritlm_models.py +2 -0
- mteb/models/model_implementations/gte_models.py +78 -0
- mteb/models/model_implementations/hinvec_models.py +1 -0
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +255 -2
- mteb/models/model_implementations/jina_clip.py +1 -0
- mteb/models/model_implementations/jina_models.py +209 -5
- mteb/models/model_implementations/kalm_models.py +203 -25
- mteb/models/model_implementations/kblab.py +31 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
- mteb/models/model_implementations/kfst.py +25 -0
- mteb/models/model_implementations/kowshik24_models.py +32 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +3 -2
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +3 -0
- mteb/models/model_implementations/llm2vec_models.py +8 -0
- mteb/models/model_implementations/mcinext_models.py +3 -0
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +362 -0
- mteb/models/model_implementations/mme5_models.py +1 -0
- mteb/models/model_implementations/moco_models.py +11 -0
- mteb/models/model_implementations/mod_models.py +191 -0
- mteb/models/model_implementations/model2vec_models.py +13 -0
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/mxbai_models.py +9 -0
- mteb/models/model_implementations/nbailab.py +70 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
- mteb/models/model_implementations/nomic_models.py +156 -4
- mteb/models/model_implementations/nomic_models_vision.py +7 -2
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +23 -16
- mteb/models/model_implementations/nvidia_models.py +4 -1
- mteb/models/model_implementations/octen_models.py +195 -0
- mteb/models/model_implementations/openai_models.py +20 -16
- mteb/models/model_implementations/openclip_models.py +24 -0
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
- mteb/models/model_implementations/ops_moa_models.py +4 -2
- mteb/models/model_implementations/pawan_models.py +39 -0
- mteb/models/model_implementations/piccolo_models.py +8 -0
- mteb/models/model_implementations/promptriever_models.py +8 -4
- mteb/models/model_implementations/pylate_models.py +37 -4
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +6 -3
- mteb/models/model_implementations/qzhou_models.py +3 -1
- mteb/models/model_implementations/random_baseline.py +16 -21
- mteb/models/model_implementations/rasgaard_models.py +34 -0
- mteb/models/model_implementations/reasonir_model.py +1 -0
- mteb/models/model_implementations/repllama_models.py +2 -0
- mteb/models/model_implementations/rerankers_custom.py +3 -3
- mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +51 -0
- mteb/models/model_implementations/ruri_models.py +322 -0
- mteb/models/model_implementations/salesforce_models.py +3 -0
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +658 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +57 -0
- mteb/models/model_implementations/shuu_model.py +32 -31
- mteb/models/model_implementations/siglip_models.py +10 -0
- mteb/models/model_implementations/sonar_models.py +1 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +376 -0
- mteb/models/model_implementations/ua_sentence_models.py +10 -0
- mteb/models/model_implementations/uae_models.py +1 -0
- mteb/models/model_implementations/vdr_models.py +2 -0
- mteb/models/model_implementations/vi_vn_models.py +39 -0
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +2 -0
- mteb/models/model_implementations/voyage_models.py +15 -0
- mteb/models/model_implementations/voyage_v.py +8 -2
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +34 -0
- mteb/models/model_implementations/yuan_models_en.py +58 -0
- mteb/models/model_meta.py +442 -22
- mteb/models/search_encoder_index/__init__.py +7 -0
- mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
- mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +157 -0
- mteb/models/search_wrappers.py +165 -48
- mteb/models/sentence_transformer_wrapper.py +2 -7
- mteb/results/benchmark_results.py +88 -47
- mteb/results/model_result.py +11 -4
- mteb/results/task_result.py +37 -19
- mteb/similarity_functions.py +49 -0
- mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/__init__.py +6 -1
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +1 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +1 -2
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
- mteb/tasks/classification/nld/iconclass_classification.py +3 -0
- mteb/tasks/classification/nld/open_tender_classification.py +3 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/__init__.py +4 -0
- mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/rus/__init__.py +2 -2
- mteb/tasks/pair_classification/rus/terra.py +51 -25
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/jpn/__init__.py +9 -1
- mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
- mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
- mteb/tasks/reranking/multilingual/__init__.py +2 -0
- mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
- mteb/tasks/retrieval/jpn/__init__.py +8 -0
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
- mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
- mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
- mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/kor/__init__.py +2 -1
- mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
- mteb/tasks/retrieval/multilingual/__init__.py +22 -0
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
- mteb/tasks/retrieval/nld/__init__.py +8 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- mteb/types/_encoder_io.py +7 -2
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/METADATA +11 -5
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/RECORD +457 -391
- mteb/models/model_implementations/nb_sbert.py +0 -25
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/WHEEL +0 -0
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/top_level.txt +0 -0
mteb/leaderboard/figures.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from typing import get_args
|
|
2
3
|
|
|
3
4
|
import numpy as np
|
|
@@ -7,6 +8,8 @@ import plotly.graph_objects as go
|
|
|
7
8
|
|
|
8
9
|
from mteb.abstasks.task_metadata import TaskType
|
|
9
10
|
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
10
13
|
|
|
11
14
|
def _text_plot(text: str):
|
|
12
15
|
"""Returns empty scatter plot with text added, this can be great for error messages."""
|
|
@@ -29,16 +32,17 @@ def _failsafe_plot(fun):
|
|
|
29
32
|
try:
|
|
30
33
|
return fun(*args, **kwargs)
|
|
31
34
|
except Exception as e:
|
|
35
|
+
logger.error(f"Plot generation failed: {e}")
|
|
32
36
|
return _text_plot(f"Couldn't produce plot. Reason: {e}")
|
|
33
37
|
|
|
34
38
|
return wrapper
|
|
35
39
|
|
|
36
40
|
|
|
37
|
-
def _parse_n_params(
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
41
|
+
def _parse_n_params(params: float | None) -> int | float:
|
|
42
|
+
"""Specified in billions."""
|
|
43
|
+
if params is None or np.isnan(params):
|
|
44
|
+
return None
|
|
45
|
+
return int(params * 1e9)
|
|
42
46
|
|
|
43
47
|
|
|
44
48
|
def _parse_model_name(name: str) -> str:
|
|
@@ -51,20 +55,14 @@ def _parse_model_name(name: str) -> str:
|
|
|
51
55
|
|
|
52
56
|
|
|
53
57
|
def _parse_float(value) -> float:
|
|
54
|
-
|
|
55
|
-
if value == "Infinite":
|
|
56
|
-
return np.inf
|
|
57
|
-
else:
|
|
58
|
-
return float(value)
|
|
59
|
-
except ValueError:
|
|
58
|
+
if value is None or np.isnan(value):
|
|
60
59
|
return np.nan
|
|
60
|
+
return float(value)
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
def _process_max_tokens(x):
|
|
64
|
-
if pd.isna(x):
|
|
64
|
+
if pd.isna(x) or x is None or np.isinf(x):
|
|
65
65
|
return "Unknown"
|
|
66
|
-
if np.isinf(x):
|
|
67
|
-
return "Infinite"
|
|
68
66
|
return str(int(x))
|
|
69
67
|
|
|
70
68
|
|
|
@@ -112,7 +110,7 @@ def _add_size_guide(fig: go.Figure):
|
|
|
112
110
|
@_failsafe_plot
|
|
113
111
|
def _performance_size_plot(df: pd.DataFrame) -> go.Figure:
|
|
114
112
|
df = df.copy()
|
|
115
|
-
df["Number of Parameters"] = df["Number of Parameters"].map(_parse_n_params)
|
|
113
|
+
df["Number of Parameters"] = df["Number of Parameters (B)"].map(_parse_n_params)
|
|
116
114
|
df["Model"] = df["Model"].map(_parse_model_name)
|
|
117
115
|
df["model_text"] = df["Model"].where(df["Model"].isin(models_to_annotate), "")
|
|
118
116
|
df["Embedding Dimensions"] = df["Embedding Dimensions"].map(_parse_float)
|
mteb/leaderboard/table.py
CHANGED
|
@@ -26,16 +26,6 @@ def _format_scores(score: float) -> float:
|
|
|
26
26
|
return round(score * 100, 2)
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
def _get_column_types(df: pd.DataFrame) -> list[str]:
|
|
30
|
-
types = []
|
|
31
|
-
for column_name in df.columns:
|
|
32
|
-
if is_numeric_dtype(df[column_name]):
|
|
33
|
-
types.append("number")
|
|
34
|
-
else:
|
|
35
|
-
types.append("str")
|
|
36
|
-
return types
|
|
37
|
-
|
|
38
|
-
|
|
39
29
|
def _get_column_widths(df: pd.DataFrame) -> list[str]:
|
|
40
30
|
# Please do not remove this function when refactoring.
|
|
41
31
|
# Column width calculation seeminlgy changes regularly with Gradio releases,
|
|
@@ -120,6 +110,39 @@ def apply_per_task_styling_from_benchmark(
|
|
|
120
110
|
return _apply_per_task_table_styling(per_task_df)
|
|
121
111
|
|
|
122
112
|
|
|
113
|
+
def apply_per_language_styling_from_benchmark(
|
|
114
|
+
benchmark_instance: Benchmark, benchmark_results: BenchmarkResults
|
|
115
|
+
) -> gr.DataFrame:
|
|
116
|
+
"""Apply styling to per-language table created by the benchmark instance's _create_per_language_table method.
|
|
117
|
+
|
|
118
|
+
This supports polymorphism - different benchmark classes can have different table generation logic.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
benchmark_instance: The benchmark instance
|
|
122
|
+
benchmark_results: BenchmarkResults object containing model results (may be pre-filtered)
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Styled gr.DataFrame ready for display in the leaderboard
|
|
126
|
+
"""
|
|
127
|
+
# Use the instance method to support polymorphism
|
|
128
|
+
per_language_df = benchmark_instance._create_per_language_table(benchmark_results)
|
|
129
|
+
|
|
130
|
+
# If it's a no-results DataFrame, return it as-is
|
|
131
|
+
if "No results" in per_language_df.columns:
|
|
132
|
+
return gr.DataFrame(per_language_df)
|
|
133
|
+
|
|
134
|
+
# Apply the styling
|
|
135
|
+
return _apply_per_language_table_styling(per_language_df)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _style_number_of_parameters(num_params: float) -> str:
|
|
139
|
+
"""Anything bigger than 1B is shown in billions with 1 decimal (e.g. 1.712 > 1.7) while anything smaller as 0.xxx B (e.g. 0.345 remains 0.345)"""
|
|
140
|
+
if num_params >= 1:
|
|
141
|
+
return f"{num_params:.1f}"
|
|
142
|
+
else:
|
|
143
|
+
return f"{num_params:.3f}"
|
|
144
|
+
|
|
145
|
+
|
|
123
146
|
def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
|
|
124
147
|
"""Apply styling to a raw summary DataFrame
|
|
125
148
|
|
|
@@ -130,7 +153,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
|
|
|
130
153
|
"Rank (Borda)",
|
|
131
154
|
"Rank",
|
|
132
155
|
"Model",
|
|
133
|
-
"Number of Parameters",
|
|
156
|
+
"Number of Parameters (B)",
|
|
134
157
|
"Embedding Dimensions",
|
|
135
158
|
"Max Tokens",
|
|
136
159
|
"Memory Usage (MB)",
|
|
@@ -156,7 +179,14 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
|
|
|
156
179
|
joint_table[score_columns] = joint_table[score_columns].map(_format_scores)
|
|
157
180
|
|
|
158
181
|
joint_table_style = joint_table.style.format(
|
|
159
|
-
{
|
|
182
|
+
{
|
|
183
|
+
**dict.fromkeys(score_columns, "{:.2f}"),
|
|
184
|
+
"Rank (Borda)": "{:.0f}",
|
|
185
|
+
"Memory Usage (MB)": "{:.0f}",
|
|
186
|
+
"Embedding Dimensions": "{:.0f}",
|
|
187
|
+
"Max Tokens": "{:.0f}",
|
|
188
|
+
"Number of Parameters (B)": lambda x: _style_number_of_parameters(x),
|
|
189
|
+
},
|
|
160
190
|
na_rep="",
|
|
161
191
|
)
|
|
162
192
|
joint_table_style = joint_table_style.highlight_min(
|
|
@@ -186,7 +216,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
|
|
|
186
216
|
gmap=gmap_values.loc[mask],
|
|
187
217
|
)
|
|
188
218
|
|
|
189
|
-
column_types =
|
|
219
|
+
column_types = ["auto" for _ in joint_table_style.data.columns]
|
|
190
220
|
# setting model name column to markdown
|
|
191
221
|
if len(column_types) > 1:
|
|
192
222
|
column_types[1] = "markdown"
|
|
@@ -204,8 +234,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
|
|
|
204
234
|
pinned_columns=2,
|
|
205
235
|
column_widths=column_widths,
|
|
206
236
|
wrap=True,
|
|
207
|
-
|
|
208
|
-
show_copy_button=True,
|
|
237
|
+
buttons=["copy", "fullscreen"],
|
|
209
238
|
show_search="filter",
|
|
210
239
|
)
|
|
211
240
|
|
|
@@ -223,11 +252,47 @@ def _apply_per_task_table_styling(per_task: pd.DataFrame) -> gr.DataFrame:
|
|
|
223
252
|
"{:.2f}", subset=task_score_columns, na_rep=""
|
|
224
253
|
).highlight_max(subset=task_score_columns, props="font-weight: bold")
|
|
225
254
|
|
|
255
|
+
# setting task name column width to 250px
|
|
256
|
+
column_widths = _get_column_widths(per_task_style.data)
|
|
257
|
+
if len(column_widths) > 0:
|
|
258
|
+
column_widths[0] = "250px"
|
|
259
|
+
|
|
226
260
|
return gr.DataFrame(
|
|
227
261
|
per_task_style,
|
|
228
262
|
interactive=False,
|
|
229
263
|
pinned_columns=1,
|
|
230
|
-
|
|
231
|
-
|
|
264
|
+
column_widths=column_widths,
|
|
265
|
+
buttons=["copy", "fullscreen"],
|
|
266
|
+
show_search="filter",
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _apply_per_language_table_styling(per_language: pd.DataFrame) -> gr.DataFrame:
|
|
271
|
+
"""Apply styling to a raw per-task DataFrame
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Styled gr.DataFrame ready for display in the leaderboard
|
|
275
|
+
"""
|
|
276
|
+
language_score_columns = per_language.select_dtypes("number").columns
|
|
277
|
+
per_language[language_score_columns] *= 100
|
|
278
|
+
|
|
279
|
+
if len(per_language.columns) > 100: # Avoid gradio error on very wide tables
|
|
280
|
+
per_language_style = per_language.round(2)
|
|
281
|
+
else:
|
|
282
|
+
per_language_style = per_language.style.format(
|
|
283
|
+
"{:.2f}", subset=language_score_columns, na_rep=""
|
|
284
|
+
).highlight_max(subset=language_score_columns, props="font-weight: bold")
|
|
285
|
+
|
|
286
|
+
# setting task name column width to 250px
|
|
287
|
+
column_widths = _get_column_widths(per_language_style.data)
|
|
288
|
+
if len(column_widths) > 0:
|
|
289
|
+
column_widths[0] = "250px"
|
|
290
|
+
|
|
291
|
+
return gr.DataFrame(
|
|
292
|
+
per_language_style,
|
|
293
|
+
interactive=False,
|
|
294
|
+
pinned_columns=1,
|
|
295
|
+
column_widths=column_widths,
|
|
296
|
+
buttons=["copy", "fullscreen"],
|
|
232
297
|
show_search="filter",
|
|
233
298
|
)
|
mteb/models/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .cache_wrappers import CachedEmbeddingWrapper
|
|
1
|
+
from .cache_wrappers import CacheBackendProtocol, CachedEmbeddingWrapper
|
|
2
2
|
from .model_meta import ModelMeta
|
|
3
3
|
from .models_protocols import (
|
|
4
4
|
CrossEncoderProtocol,
|
|
@@ -6,6 +6,7 @@ from .models_protocols import (
|
|
|
6
6
|
MTEBModels,
|
|
7
7
|
SearchProtocol,
|
|
8
8
|
)
|
|
9
|
+
from .search_encoder_index.search_backend_protocol import IndexEncoderSearchProtocol
|
|
9
10
|
from .search_wrappers import SearchCrossEncoderWrapper, SearchEncoderWrapper
|
|
10
11
|
from .sentence_transformer_wrapper import (
|
|
11
12
|
CrossEncoderWrapper,
|
|
@@ -14,10 +15,12 @@ from .sentence_transformer_wrapper import (
|
|
|
14
15
|
)
|
|
15
16
|
|
|
16
17
|
__all__ = [
|
|
18
|
+
"CacheBackendProtocol",
|
|
17
19
|
"CachedEmbeddingWrapper",
|
|
18
20
|
"CrossEncoderProtocol",
|
|
19
21
|
"CrossEncoderWrapper",
|
|
20
22
|
"EncoderProtocol",
|
|
23
|
+
"IndexEncoderSearchProtocol",
|
|
21
24
|
"MTEBModels",
|
|
22
25
|
"ModelMeta",
|
|
23
26
|
"SearchCrossEncoderWrapper",
|
mteb/models/abs_encoder.py
CHANGED
|
@@ -54,11 +54,11 @@ class AbsEncoder(ABC):
|
|
|
54
54
|
"""A wrapper function around the model.encode method that handles the prompt_name argument and standardizes the output to a numpy array.
|
|
55
55
|
|
|
56
56
|
The order of priorities for prompt selection are:
|
|
57
|
-
1. Composed prompt of task name + prompt type
|
|
57
|
+
1. Composed prompt of task name + prompt type
|
|
58
58
|
2. Specific task prompt
|
|
59
|
-
3. Composed prompt of task type + prompt type
|
|
59
|
+
3. Composed prompt of task type + prompt type
|
|
60
60
|
4. Specific task type prompt
|
|
61
|
-
5. Specific prompt type
|
|
61
|
+
5. Specific prompt type
|
|
62
62
|
|
|
63
63
|
Args:
|
|
64
64
|
task_metadata: The task name to use for building the encoding prompt
|
|
@@ -105,7 +105,7 @@ class AbsEncoder(ABC):
|
|
|
105
105
|
|
|
106
106
|
Args:
|
|
107
107
|
task_metadata: The metadata of the task.
|
|
108
|
-
prompt_type: The name type of prompt.
|
|
108
|
+
prompt_type: The name type of prompt.
|
|
109
109
|
"""
|
|
110
110
|
if not self.model_prompts:
|
|
111
111
|
return None
|
|
@@ -210,13 +210,11 @@ class AbsEncoder(ABC):
|
|
|
210
210
|
task_metadata: The metadata of the task. Sentence-transformers uses this to
|
|
211
211
|
determine which prompt to use from a specified dictionary.
|
|
212
212
|
The order of priorities for prompt selection are:
|
|
213
|
-
1.
|
|
214
|
-
2. Specific task prompt
|
|
215
|
-
3.
|
|
216
|
-
4.
|
|
217
|
-
|
|
218
|
-
6. Default prompt from the task definition
|
|
219
|
-
prompt_type: The name type of prompt. (query or passage)
|
|
213
|
+
1. Specific task prompt
|
|
214
|
+
2. Specific task type prompt
|
|
215
|
+
3. Specific prompt type
|
|
216
|
+
4. Default prompt from the task definition
|
|
217
|
+
prompt_type: The name type of prompt.
|
|
220
218
|
|
|
221
219
|
Returns:
|
|
222
220
|
The instruction/prompt to be used for encoding sentences.
|
|
@@ -224,6 +222,12 @@ class AbsEncoder(ABC):
|
|
|
224
222
|
prompt = task_metadata.prompt
|
|
225
223
|
if self.prompts_dict and task_metadata.name in self.prompts_dict:
|
|
226
224
|
prompt = self.prompts_dict[task_metadata.name]
|
|
225
|
+
elif self.prompts_dict and task_metadata.type in self.prompts_dict:
|
|
226
|
+
prompt = self.prompts_dict[task_metadata.type]
|
|
227
|
+
elif (
|
|
228
|
+
self.prompts_dict and prompt_type and prompt_type.value in self.prompts_dict
|
|
229
|
+
):
|
|
230
|
+
prompt = self.prompts_dict[prompt_type.value]
|
|
227
231
|
|
|
228
232
|
if isinstance(prompt, dict) and prompt_type:
|
|
229
233
|
if prompt.get(prompt_type.value):
|
|
@@ -246,7 +250,7 @@ class AbsEncoder(ABC):
|
|
|
246
250
|
|
|
247
251
|
Args:
|
|
248
252
|
instruction: The instruction to be formatted.
|
|
249
|
-
prompt_type: The name type of prompt.
|
|
253
|
+
prompt_type: The name type of prompt.
|
|
250
254
|
"""
|
|
251
255
|
if self.instruction_template is None:
|
|
252
256
|
raise ValueError(
|
|
@@ -269,7 +273,7 @@ class AbsEncoder(ABC):
|
|
|
269
273
|
|
|
270
274
|
Args:
|
|
271
275
|
task_metadata: The metadata of the task
|
|
272
|
-
prompt_type: The name type of prompt.
|
|
276
|
+
prompt_type: The name type of prompt.
|
|
273
277
|
|
|
274
278
|
Returns:
|
|
275
279
|
The instruction to be used for encoding sentences.
|
|
@@ -373,14 +377,14 @@ class AbsEncoder(ABC):
|
|
|
373
377
|
task_metadata: The metadata of the task. Sentence-transformers uses this to
|
|
374
378
|
determine which prompt to use from a specified dictionary.
|
|
375
379
|
The order of priorities for prompt selection are:
|
|
376
|
-
1. Composed prompt of task name + prompt type
|
|
380
|
+
1. Composed prompt of task name + prompt type
|
|
377
381
|
2. Specific task prompt
|
|
378
|
-
3. Composed prompt of task type + prompt type
|
|
382
|
+
3. Composed prompt of task type + prompt type
|
|
379
383
|
4. Specific task type prompt
|
|
380
|
-
5. Specific prompt type
|
|
384
|
+
5. Specific prompt type
|
|
381
385
|
hf_split: Split of current task
|
|
382
386
|
hf_subset: Subset of current task
|
|
383
|
-
prompt_type: The name type of prompt.
|
|
387
|
+
prompt_type: The name type of prompt.
|
|
384
388
|
**kwargs: Additional arguments to pass to the encoder.
|
|
385
389
|
|
|
386
390
|
Returns:
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
|
|
3
|
-
from PIL import Image
|
|
4
|
-
|
|
5
3
|
from mteb.types import BatchedInput
|
|
6
4
|
|
|
7
5
|
|
|
@@ -11,6 +9,8 @@ def _hash_item(item: BatchedInput) -> str:
|
|
|
11
9
|
item_hash = hashlib.sha256(item["text"].encode()).hexdigest()
|
|
12
10
|
|
|
13
11
|
if "image" in item:
|
|
12
|
+
from PIL import Image
|
|
13
|
+
|
|
14
14
|
image: Image.Image = item["image"]
|
|
15
15
|
item_hash += hashlib.sha256(image.tobytes()).hexdigest()
|
|
16
16
|
|
mteb/models/get_model_meta.py
CHANGED
|
@@ -1,25 +1,15 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
import difflib
|
|
4
2
|
import logging
|
|
5
3
|
from collections.abc import Iterable
|
|
6
|
-
from typing import
|
|
7
|
-
|
|
8
|
-
from huggingface_hub import ModelCard
|
|
9
|
-
from huggingface_hub.errors import RepositoryNotFoundError
|
|
4
|
+
from typing import Any
|
|
10
5
|
|
|
11
6
|
from mteb.abstasks import AbsTask
|
|
12
7
|
from mteb.models import (
|
|
13
|
-
CrossEncoderWrapper,
|
|
14
8
|
ModelMeta,
|
|
15
9
|
MTEBModels,
|
|
16
|
-
sentence_transformers_loader,
|
|
17
10
|
)
|
|
18
11
|
from mteb.models.model_implementations import MODEL_REGISTRY
|
|
19
12
|
|
|
20
|
-
if TYPE_CHECKING:
|
|
21
|
-
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
22
|
-
|
|
23
13
|
logger = logging.getLogger(__name__)
|
|
24
14
|
|
|
25
15
|
|
|
@@ -100,24 +90,9 @@ def get_model(
|
|
|
100
90
|
Returns:
|
|
101
91
|
A model object
|
|
102
92
|
"""
|
|
103
|
-
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
104
|
-
|
|
105
93
|
meta = get_model_meta(model_name, revision)
|
|
106
94
|
model = meta.load_model(**kwargs)
|
|
107
95
|
|
|
108
|
-
# If revision not available in the modelmeta, try to extract it from sentence-transformers
|
|
109
|
-
if hasattr(model, "model") and isinstance(model.model, SentenceTransformer): # type: ignore
|
|
110
|
-
_meta = _model_meta_from_sentence_transformers(model.model) # type: ignore
|
|
111
|
-
if meta.revision is None:
|
|
112
|
-
meta.revision = _meta.revision if _meta.revision else meta.revision
|
|
113
|
-
if not meta.similarity_fn_name:
|
|
114
|
-
meta.similarity_fn_name = _meta.similarity_fn_name
|
|
115
|
-
|
|
116
|
-
elif isinstance(model, CrossEncoder):
|
|
117
|
-
_meta = _model_meta_from_cross_encoder(model.model)
|
|
118
|
-
if meta.revision is None:
|
|
119
|
-
meta.revision = _meta.revision if _meta.revision else meta.revision
|
|
120
|
-
|
|
121
96
|
model.mteb_model_meta = meta # type: ignore
|
|
122
97
|
return model
|
|
123
98
|
|
|
@@ -147,12 +122,8 @@ def get_model_meta(
|
|
|
147
122
|
logger.info(
|
|
148
123
|
"Model not found in model registry. Attempting to extract metadata by loading the model ({model_name}) using HuggingFace."
|
|
149
124
|
)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
meta.revision = revision
|
|
153
|
-
return meta
|
|
154
|
-
except RepositoryNotFoundError:
|
|
155
|
-
pass
|
|
125
|
+
meta = ModelMeta.from_hub(model_name, revision)
|
|
126
|
+
return meta
|
|
156
127
|
|
|
157
128
|
not_found_msg = f"Model '{model_name}' not found in MTEB registry"
|
|
158
129
|
not_found_msg += " nor on the Huggingface Hub." if fetch_from_hf else "."
|
|
@@ -170,85 +141,3 @@ def get_model_meta(
|
|
|
170
141
|
suggestion = f" Did you mean: '{close_matches[0]}'?"
|
|
171
142
|
|
|
172
143
|
raise KeyError(not_found_msg + suggestion)
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
def _model_meta_from_hf_hub(model_name: str) -> ModelMeta:
|
|
176
|
-
card = ModelCard.load(model_name)
|
|
177
|
-
card_data = card.data.to_dict()
|
|
178
|
-
frameworks = ["PyTorch"]
|
|
179
|
-
loader = None
|
|
180
|
-
if card_data.get("library_name", None) == "sentence-transformers":
|
|
181
|
-
frameworks.append("Sentence Transformers")
|
|
182
|
-
loader = sentence_transformers_loader
|
|
183
|
-
revision = card_data.get("base_model_revision", None)
|
|
184
|
-
license = card_data.get("license", None)
|
|
185
|
-
return ModelMeta(
|
|
186
|
-
loader=loader,
|
|
187
|
-
name=model_name,
|
|
188
|
-
revision=revision,
|
|
189
|
-
release_date=None,
|
|
190
|
-
languages=None,
|
|
191
|
-
license=license,
|
|
192
|
-
framework=frameworks, # type: ignore
|
|
193
|
-
training_datasets=None,
|
|
194
|
-
similarity_fn_name=None,
|
|
195
|
-
n_parameters=None,
|
|
196
|
-
memory_usage_mb=None,
|
|
197
|
-
max_tokens=None,
|
|
198
|
-
embed_dim=None,
|
|
199
|
-
open_weights=True,
|
|
200
|
-
public_training_code=None,
|
|
201
|
-
public_training_data=None,
|
|
202
|
-
use_instructions=None,
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def _model_meta_from_cross_encoder(model: CrossEncoder) -> ModelMeta:
|
|
207
|
-
return ModelMeta(
|
|
208
|
-
loader=CrossEncoderWrapper,
|
|
209
|
-
name=model.model.name_or_path,
|
|
210
|
-
revision=model.config._commit_hash,
|
|
211
|
-
release_date=None,
|
|
212
|
-
languages=None,
|
|
213
|
-
framework=["Sentence Transformers"],
|
|
214
|
-
similarity_fn_name=None,
|
|
215
|
-
n_parameters=None,
|
|
216
|
-
memory_usage_mb=None,
|
|
217
|
-
max_tokens=None,
|
|
218
|
-
embed_dim=None,
|
|
219
|
-
license=None,
|
|
220
|
-
open_weights=True,
|
|
221
|
-
public_training_code=None,
|
|
222
|
-
public_training_data=None,
|
|
223
|
-
use_instructions=None,
|
|
224
|
-
training_datasets=None,
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
def _model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMeta:
|
|
229
|
-
name: str | None = (
|
|
230
|
-
model.model_card_data.model_name
|
|
231
|
-
if model.model_card_data.model_name
|
|
232
|
-
else model.model_card_data.base_model
|
|
233
|
-
)
|
|
234
|
-
embeddings_dim = model.get_sentence_embedding_dimension()
|
|
235
|
-
meta = ModelMeta(
|
|
236
|
-
loader=sentence_transformers_loader,
|
|
237
|
-
name=name,
|
|
238
|
-
revision=model.model_card_data.base_model_revision,
|
|
239
|
-
release_date=None,
|
|
240
|
-
languages=None,
|
|
241
|
-
framework=["Sentence Transformers"],
|
|
242
|
-
similarity_fn_name=None,
|
|
243
|
-
n_parameters=None,
|
|
244
|
-
memory_usage_mb=None,
|
|
245
|
-
max_tokens=None,
|
|
246
|
-
embed_dim=embeddings_dim,
|
|
247
|
-
license=None,
|
|
248
|
-
open_weights=True,
|
|
249
|
-
public_training_code=None,
|
|
250
|
-
public_training_data=None,
|
|
251
|
-
use_instructions=None,
|
|
252
|
-
training_datasets=None,
|
|
253
|
-
)
|
|
254
|
-
return meta
|
mteb/models/instruct_wrapper.py
CHANGED
|
@@ -122,7 +122,8 @@ class InstructSentenceTransformerModel(AbsEncoder):
|
|
|
122
122
|
apply_instruction_to_passages: Whether to apply the instruction template to the passages.
|
|
123
123
|
padding_side: Padding side. If None, the padding side will be read from the model config.
|
|
124
124
|
add_eos_token: Whether to add the eos token to each input example.
|
|
125
|
-
prompts_dict: Dictionary of task names to prompt names. If
|
|
125
|
+
prompts_dict: Dictionary of task names to prompt names. If task name is missing in the dict or prompts dict is None, prompt from task metadata or
|
|
126
|
+
AbsTask.abstask_prompt will be used.
|
|
126
127
|
**kwargs: Kwargs for Sentence Transformer model.
|
|
127
128
|
"""
|
|
128
129
|
from sentence_transformers import SentenceTransformer
|
|
@@ -153,6 +154,9 @@ class InstructSentenceTransformerModel(AbsEncoder):
|
|
|
153
154
|
|
|
154
155
|
self.model_name = model_name
|
|
155
156
|
self.model = SentenceTransformer(model_name, revision=revision, **kwargs)
|
|
157
|
+
if max_seq_length:
|
|
158
|
+
# https://github.com/huggingface/sentence-transformers/issues/3575
|
|
159
|
+
self.model.max_seq_length = max_seq_length
|
|
156
160
|
self.apply_instruction_to_passages = apply_instruction_to_passages
|
|
157
161
|
self.prompts_dict = prompts_dict
|
|
158
162
|
|
|
@@ -105,6 +105,7 @@ class ALIGNModel(AbsEncoder):
|
|
|
105
105
|
align_base = ModelMeta(
|
|
106
106
|
loader=ALIGNModel,
|
|
107
107
|
name="kakaobrain/align-base",
|
|
108
|
+
model_type=["dense"],
|
|
108
109
|
languages=["eng-Latn"],
|
|
109
110
|
revision="e96a37facc7b1f59090ece82293226b817afd6ba",
|
|
110
111
|
release_date="2023-02-24",
|
|
@@ -124,4 +125,10 @@ align_base = ModelMeta(
|
|
|
124
125
|
training_datasets=set(
|
|
125
126
|
# COYO-700M
|
|
126
127
|
),
|
|
128
|
+
citation="""@misc{kakaobrain2022coyo-align,
|
|
129
|
+
title = {COYO-ALIGN},
|
|
130
|
+
author = {Yoon, Boogeo and Lee, Youhan and Baek, Woonhyuk},
|
|
131
|
+
year = {2022},
|
|
132
|
+
howpublished = {https://github.com/kakaobrain/coyo-align},
|
|
133
|
+
}""",
|
|
127
134
|
)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from mteb.models.model_implementations.model2vec_models import Model2VecModel
|
|
4
|
+
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
5
|
+
|
|
6
|
+
model2vecdk = ModelMeta(
|
|
7
|
+
loader=Model2VecModel, # type: ignore
|
|
8
|
+
name="andersborges/model2vecdk",
|
|
9
|
+
model_type=["dense"],
|
|
10
|
+
languages=["dan-Latn"],
|
|
11
|
+
open_weights=True,
|
|
12
|
+
revision="cb576c78dcc1b729e4612645f61db59929d69e61",
|
|
13
|
+
release_date="2025-11-21",
|
|
14
|
+
n_parameters=48042496,
|
|
15
|
+
memory_usage_mb=183,
|
|
16
|
+
max_tokens=np.inf,
|
|
17
|
+
embed_dim=256,
|
|
18
|
+
license="mit",
|
|
19
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
20
|
+
framework=["NumPy", "Sentence Transformers"],
|
|
21
|
+
reference="https://huggingface.co/andersborges/model2vecdk",
|
|
22
|
+
use_instructions=False,
|
|
23
|
+
adapted_from="https://huggingface.co/jealk/TTC-L2V-supervised-2",
|
|
24
|
+
superseded_by=None,
|
|
25
|
+
training_datasets=set(), # distilled
|
|
26
|
+
public_training_code="https://github.com/andersborges/dkmodel2vec",
|
|
27
|
+
public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
|
|
28
|
+
citation="""@article{minishlab2024model2vec,
|
|
29
|
+
author = {Tulkens, Stephan and {van Dongen}, Thomas},
|
|
30
|
+
title = {Model2Vec: Fast State-of-the-Art Static Embeddings},
|
|
31
|
+
year = {2024},
|
|
32
|
+
url = {https://github.com/MinishLab/model2vec}
|
|
33
|
+
}""",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
model2vecdk_stem = ModelMeta(
|
|
38
|
+
loader=Model2VecModel, # type: ignore
|
|
39
|
+
name="andersborges/model2vecdk-stem",
|
|
40
|
+
model_type=["dense"],
|
|
41
|
+
languages=["dan-Latn"],
|
|
42
|
+
open_weights=True,
|
|
43
|
+
revision="cb576c78dcc1b729e4612645f61db59929d69e61",
|
|
44
|
+
release_date="2025-11-21",
|
|
45
|
+
n_parameters=48578560,
|
|
46
|
+
memory_usage_mb=185,
|
|
47
|
+
max_tokens=np.inf,
|
|
48
|
+
embed_dim=256,
|
|
49
|
+
license="mit",
|
|
50
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
51
|
+
framework=["NumPy", "Sentence Transformers"],
|
|
52
|
+
reference="https://huggingface.co/andersborges/model2vecdk",
|
|
53
|
+
use_instructions=False,
|
|
54
|
+
adapted_from="https://huggingface.co/jealk/TTC-L2V-supervised-2",
|
|
55
|
+
superseded_by=None,
|
|
56
|
+
training_datasets=set(), # distilled
|
|
57
|
+
public_training_code="https://github.com/andersborges/dkmodel2vec",
|
|
58
|
+
public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
|
|
59
|
+
citation="""@article{minishlab2024model2vec,
|
|
60
|
+
author = {Tulkens, Stephan and {van Dongen}, Thomas},
|
|
61
|
+
title = {Model2Vec: Fast State-of-the-Art Static Embeddings},
|
|
62
|
+
year = {2024},
|
|
63
|
+
url = {https://github.com/MinishLab/model2vec}
|
|
64
|
+
}""",
|
|
65
|
+
)
|
|
@@ -4,6 +4,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
|
|
|
4
4
|
arabic_triplet_matryoshka = ModelMeta(
|
|
5
5
|
loader=sentence_transformers_loader,
|
|
6
6
|
name="Omartificial-Intelligence-Space/Arabic-Triplet-Matryoshka-V2",
|
|
7
|
+
model_type=["dense"],
|
|
7
8
|
languages=["ara-Arab"],
|
|
8
9
|
open_weights=True,
|
|
9
10
|
revision="ed357f222f0b6ea6670d2c9b5a1cb93950d34200",
|
|
@@ -23,4 +24,11 @@ arabic_triplet_matryoshka = ModelMeta(
|
|
|
23
24
|
training_datasets=set(
|
|
24
25
|
# "akhooli/arabic-triplets-1m-curated-sims-len"
|
|
25
26
|
),
|
|
27
|
+
citation="""
|
|
28
|
+
@article{nacar2025gate,
|
|
29
|
+
title={GATE: General Arabic Text Embedding for Enhanced Semantic Textual Similarity with Matryoshka Representation Learning and Hybrid Loss Training},
|
|
30
|
+
author={Nacar, Omer and Koubaa, Anis and Sibaee, Serry and Al-Habashi, Yasser and Ammar, Adel and Boulila, Wadii},
|
|
31
|
+
journal={arXiv preprint arXiv:2505.24581},
|
|
32
|
+
year={2025}
|
|
33
|
+
}""",
|
|
26
34
|
)
|