mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +63 -14
- mteb/_evaluators/any_sts_evaluator.py +12 -5
- mteb/_evaluators/clustering_evaluator.py +12 -4
- mteb/_evaluators/evaluator.py +11 -5
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
- mteb/_evaluators/pair_classification_evaluator.py +13 -5
- mteb/_evaluators/retrieval_evaluator.py +22 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +20 -11
- mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
- mteb/_evaluators/text/summarization_evaluator.py +10 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +48 -21
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +25 -9
- mteb/abstasks/clustering.py +23 -10
- mteb/abstasks/clustering_legacy.py +22 -8
- mteb/abstasks/image/image_text_pair_classification.py +23 -9
- mteb/abstasks/multilabel_classification.py +13 -5
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +56 -30
- mteb/abstasks/retrieval_dataset_loaders.py +48 -37
- mteb/abstasks/sts.py +29 -13
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +23 -12
- mteb/abstasks/text/reranking.py +2 -2
- mteb/abstasks/text/summarization.py +19 -8
- mteb/abstasks/zeroshot_classification.py +23 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +41 -2
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +10 -5
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/evaluate.py +33 -20
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +11 -4
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +32 -6
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +10 -4
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +2 -0
- mteb/models/model_implementations/ara_models.py +1 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +40 -1
- mteb/models/model_implementations/bica_model.py +1 -0
- mteb/models/model_implementations/blip2_models.py +11 -4
- mteb/models/model_implementations/blip_models.py +17 -4
- mteb/models/model_implementations/bm25.py +24 -14
- mteb/models/model_implementations/bmretriever_models.py +10 -2
- mteb/models/model_implementations/cadet_models.py +1 -0
- mteb/models/model_implementations/cde_models.py +11 -5
- mteb/models/model_implementations/clip_models.py +12 -4
- mteb/models/model_implementations/clips_models.py +3 -0
- mteb/models/model_implementations/codefuse_models.py +5 -0
- mteb/models/model_implementations/codesage_models.py +3 -0
- mteb/models/model_implementations/cohere_models.py +14 -4
- mteb/models/model_implementations/cohere_v.py +14 -4
- mteb/models/model_implementations/colpali_models.py +7 -3
- mteb/models/model_implementations/colqwen_models.py +17 -31
- mteb/models/model_implementations/colsmol_models.py +3 -1
- mteb/models/model_implementations/conan_models.py +11 -4
- mteb/models/model_implementations/dino_models.py +28 -4
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +9 -0
- mteb/models/model_implementations/e5_v.py +10 -4
- mteb/models/model_implementations/eagerworks_models.py +11 -4
- mteb/models/model_implementations/emillykkejensen_models.py +3 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +1 -0
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +9 -0
- mteb/models/model_implementations/facebookai.py +2 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +7 -3
- mteb/models/model_implementations/google_models.py +15 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
- mteb/models/model_implementations/gritlm_models.py +3 -0
- mteb/models/model_implementations/gte_models.py +9 -0
- mteb/models/model_implementations/hinvec_models.py +6 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +14 -5
- mteb/models/model_implementations/jina_clip.py +10 -4
- mteb/models/model_implementations/jina_models.py +17 -5
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +1 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
- mteb/models/model_implementations/kfst.py +1 -0
- mteb/models/model_implementations/kowshik24_models.py +1 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +7 -1
- mteb/models/model_implementations/listconranker.py +10 -4
- mteb/models/model_implementations/llm2clip_models.py +12 -4
- mteb/models/model_implementations/llm2vec_models.py +20 -6
- mteb/models/model_implementations/mcinext_models.py +8 -2
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +63 -0
- mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +11 -4
- mteb/models/model_implementations/mod_models.py +2 -1
- mteb/models/model_implementations/model2vec_models.py +23 -4
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/nbailab.py +3 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
- mteb/models/model_implementations/nomic_models.py +17 -4
- mteb/models/model_implementations/nomic_models_vision.py +5 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
- mteb/models/model_implementations/nvidia_models.py +15 -4
- mteb/models/model_implementations/octen_models.py +3 -1
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +17 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
- mteb/models/model_implementations/ops_moa_models.py +9 -2
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
- mteb/models/model_implementations/pawan_models.py +1 -0
- mteb/models/model_implementations/piccolo_models.py +2 -0
- mteb/models/model_implementations/promptriever_models.py +16 -6
- mteb/models/model_implementations/pylate_models.py +32 -13
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +11 -1
- mteb/models/model_implementations/qzhou_models.py +2 -0
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/rasgaard_models.py +1 -0
- mteb/models/model_implementations/reasonir_model.py +65 -0
- mteb/models/model_implementations/repllama_models.py +15 -6
- mteb/models/model_implementations/rerankers_custom.py +13 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +20 -0
- mteb/models/model_implementations/ruri_models.py +10 -0
- mteb/models/model_implementations/salesforce_models.py +10 -1
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +18 -0
- mteb/models/model_implementations/shuu_model.py +1 -0
- mteb/models/model_implementations/siglip_models.py +19 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +2 -0
- mteb/models/model_implementations/text2vec_models.py +3 -0
- mteb/models/model_implementations/ua_sentence_models.py +1 -0
- mteb/models/model_implementations/uae_models.py +10 -4
- mteb/models/model_implementations/vdr_models.py +8 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -0
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +11 -4
- mteb/models/model_implementations/voyage_models.py +52 -4
- mteb/models/model_implementations/voyage_v.py +11 -6
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +2 -1
- mteb/models/model_meta.py +47 -9
- mteb/models/models_protocols.py +23 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +31 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +32 -16
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +4 -4
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +42 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +1 -1
- mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
|
|
2
2
|
from mteb.models.model_meta import ModelMeta
|
|
3
|
-
from mteb.
|
|
3
|
+
from mteb.types import PromptType
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def instruction_template(
|
|
@@ -43,6 +43,7 @@ yuan_embedding_2_en = ModelMeta(
|
|
|
43
43
|
revision="b2fd15da3bcae3473c8529593825c15068f09fce",
|
|
44
44
|
release_date="2025-11-27",
|
|
45
45
|
n_parameters=595776512,
|
|
46
|
+
n_embedding_parameters=None,
|
|
46
47
|
memory_usage_mb=2272,
|
|
47
48
|
embed_dim=1024,
|
|
48
49
|
max_tokens=2048,
|
mteb/models/model_meta.py
CHANGED
|
@@ -3,17 +3,16 @@ from __future__ import annotations
|
|
|
3
3
|
import json
|
|
4
4
|
import logging
|
|
5
5
|
import warnings
|
|
6
|
-
from collections.abc import Callable
|
|
6
|
+
from collections.abc import Callable
|
|
7
7
|
from dataclasses import field
|
|
8
8
|
from enum import Enum
|
|
9
9
|
from functools import partial
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import TYPE_CHECKING, Any, Literal, cast
|
|
12
12
|
|
|
13
|
+
import numpy as np
|
|
13
14
|
from huggingface_hub import (
|
|
14
|
-
GitCommitInfo,
|
|
15
15
|
ModelCard,
|
|
16
|
-
ModelCardData,
|
|
17
16
|
get_safetensors_metadata,
|
|
18
17
|
hf_hub_download,
|
|
19
18
|
list_repo_commits,
|
|
@@ -29,18 +28,27 @@ from huggingface_hub.errors import (
|
|
|
29
28
|
SafetensorsParsingError,
|
|
30
29
|
)
|
|
31
30
|
from pydantic import BaseModel, ConfigDict, field_validator, model_validator
|
|
31
|
+
from sentence_transformers.models import Transformer
|
|
32
|
+
from torch import nn
|
|
32
33
|
from transformers import AutoConfig
|
|
33
|
-
from typing_extensions import Self
|
|
34
34
|
|
|
35
35
|
from mteb._helpful_enum import HelpfulStrEnum
|
|
36
36
|
from mteb.languages import check_language_code
|
|
37
|
-
from mteb.models.models_protocols import
|
|
37
|
+
from mteb.models.models_protocols import MTEBModels
|
|
38
38
|
from mteb.types import ISOLanguageScript, Licenses, Modalities, StrDate, StrURL
|
|
39
39
|
|
|
40
40
|
if TYPE_CHECKING:
|
|
41
|
+
from collections.abc import Sequence
|
|
42
|
+
|
|
43
|
+
from huggingface_hub import (
|
|
44
|
+
GitCommitInfo,
|
|
45
|
+
ModelCardData,
|
|
46
|
+
)
|
|
41
47
|
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
48
|
+
from typing_extensions import Self
|
|
42
49
|
|
|
43
50
|
from mteb.abstasks import AbsTask
|
|
51
|
+
from mteb.models.models_protocols import EncoderProtocol
|
|
44
52
|
|
|
45
53
|
|
|
46
54
|
logger = logging.getLogger(__name__)
|
|
@@ -94,8 +102,9 @@ class ModelMeta(BaseModel):
|
|
|
94
102
|
loader: The function that loads the model. If None it assumes that the model is not implemented.
|
|
95
103
|
loader_kwargs: The keyword arguments to pass to the loader function.
|
|
96
104
|
name: The name of the model, ideally the name on huggingface. It should be in the format "organization/model_name".
|
|
97
|
-
n_parameters: The number of parameters in the model, e.g. 7_000_000 for a 7M parameter model. Can be
|
|
98
|
-
|
|
105
|
+
n_parameters: The total number of parameters in the model, e.g. `7_000_000` for a 7M parameter model. Can be none in case the number of parameters is unknown.
|
|
106
|
+
n_embedding_parameters: The number of parameters used for the embedding layer. Can be None if the number of embedding parameters is not known (e.g. for proprietary models).
|
|
107
|
+
n_active_parameters_override: The number of active parameters used bu model. Should be used **only** for Mixture of Experts models.
|
|
99
108
|
memory_usage_mb: The memory usage of the model in MB. Can be None if the memory usage is not known (e.g. for proprietary models). To calculate it use the `calculate_memory_usage_mb` method.
|
|
100
109
|
max_tokens: The maximum number of tokens the model can handle. Can be None if the maximum number of tokens is not known (e.g. for proprietary
|
|
101
110
|
models).
|
|
@@ -134,6 +143,8 @@ class ModelMeta(BaseModel):
|
|
|
134
143
|
release_date: StrDate | None
|
|
135
144
|
languages: list[ISOLanguageScript] | None
|
|
136
145
|
n_parameters: int | None
|
|
146
|
+
n_active_parameters_override: int | None = None
|
|
147
|
+
n_embedding_parameters: int | None = None
|
|
137
148
|
memory_usage_mb: float | None
|
|
138
149
|
max_tokens: float | None
|
|
139
150
|
embed_dim: int | None
|
|
@@ -192,6 +203,16 @@ class ModelMeta(BaseModel):
|
|
|
192
203
|
"""
|
|
193
204
|
return "cross-encoder" in self.model_type
|
|
194
205
|
|
|
206
|
+
@property
|
|
207
|
+
def n_active_parameters(self):
|
|
208
|
+
"""Number of active parameters. Assumed to be `n_parameters - n_embedding_parameters`. Can be overwritten using `n_active_parameters_override` e.g. for MoE models."""
|
|
209
|
+
if self.n_active_parameters_override is not None:
|
|
210
|
+
return self.n_active_parameters_override
|
|
211
|
+
|
|
212
|
+
if self.n_parameters is not None and self.n_embedding_parameters is not None:
|
|
213
|
+
return self.n_parameters - self.n_embedding_parameters
|
|
214
|
+
return None
|
|
215
|
+
|
|
195
216
|
@field_validator("similarity_fn_name", mode="before")
|
|
196
217
|
@classmethod
|
|
197
218
|
def _validate_similarity_fn_name(cls, value: str) -> ScoringFunction | None:
|
|
@@ -384,6 +405,14 @@ class ModelMeta(BaseModel):
|
|
|
384
405
|
else model.model_card_data.base_model
|
|
385
406
|
)
|
|
386
407
|
meta = cls._from_hub(name, revision, compute_metadata)
|
|
408
|
+
try:
|
|
409
|
+
first = model[0]
|
|
410
|
+
|
|
411
|
+
if isinstance(first, Transformer):
|
|
412
|
+
emb = first.auto_model.get_input_embeddings()
|
|
413
|
+
meta.n_embedding_parameters = int(np.prod(emb.weight.shape))
|
|
414
|
+
except Exception as e:
|
|
415
|
+
logger.warning(f"Could not calculate embedding parameters for {name}: {e}")
|
|
387
416
|
meta.revision = model.model_card_data.base_model_revision or meta.revision
|
|
388
417
|
meta.max_tokens = model.max_seq_length
|
|
389
418
|
meta.embed_dim = model.get_sentence_embedding_dimension()
|
|
@@ -455,6 +484,15 @@ class ModelMeta(BaseModel):
|
|
|
455
484
|
from mteb.models import CrossEncoderWrapper
|
|
456
485
|
|
|
457
486
|
meta = cls._from_hub(model.model.name_or_path, revision, compute_metadata)
|
|
487
|
+
try:
|
|
488
|
+
emb = model.model.get_input_embeddings()
|
|
489
|
+
|
|
490
|
+
if isinstance(emb, nn.Embedding):
|
|
491
|
+
meta.n_embedding_parameters = int(np.prod(emb.weight.shape))
|
|
492
|
+
except Exception as e:
|
|
493
|
+
logger.warning(
|
|
494
|
+
f"Could not calculate embedding parameters for {model.model.name_or_path}: {e}"
|
|
495
|
+
)
|
|
458
496
|
meta.revision = model.config._commit_hash or meta.revision
|
|
459
497
|
meta.loader = CrossEncoderWrapper
|
|
460
498
|
meta.embed_dim = None
|
|
@@ -479,7 +517,7 @@ class ModelMeta(BaseModel):
|
|
|
479
517
|
if isinstance(tasks[0], str):
|
|
480
518
|
benchmark_datasets = set(tasks)
|
|
481
519
|
else:
|
|
482
|
-
tasks = cast(Sequence[
|
|
520
|
+
tasks = cast("Sequence[AbsTask]", tasks)
|
|
483
521
|
benchmark_datasets = set()
|
|
484
522
|
for task in tasks:
|
|
485
523
|
benchmark_datasets.add(task.metadata.name)
|
|
@@ -534,7 +572,7 @@ class ModelMeta(BaseModel):
|
|
|
534
572
|
if isinstance(tasks[0], str):
|
|
535
573
|
benchmark_datasets = set(tasks)
|
|
536
574
|
else:
|
|
537
|
-
tasks = cast(Sequence[
|
|
575
|
+
tasks = cast("Sequence[AbsTask]", tasks)
|
|
538
576
|
benchmark_datasets = {task.metadata.name for task in tasks}
|
|
539
577
|
overlap = training_datasets & benchmark_datasets
|
|
540
578
|
perc_overlap = 100 * (len(overlap) / len(benchmark_datasets))
|
mteb/models/models_protocols.py
CHANGED
|
@@ -1,22 +1,23 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
from typing_extensions import Unpack
|
|
5
|
-
|
|
6
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
7
|
-
from mteb.types import (
|
|
8
|
-
Array,
|
|
9
|
-
BatchedInput,
|
|
10
|
-
CorpusDatasetType,
|
|
11
|
-
EncodeKwargs,
|
|
12
|
-
PromptType,
|
|
13
|
-
QueryDatasetType,
|
|
14
|
-
RetrievalOutputType,
|
|
15
|
-
TopRankedDocumentsType,
|
|
16
|
-
)
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
|
17
4
|
|
|
18
5
|
if TYPE_CHECKING:
|
|
6
|
+
from torch.utils.data import DataLoader
|
|
7
|
+
from typing_extensions import Unpack
|
|
8
|
+
|
|
9
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
19
10
|
from mteb.models.model_meta import ModelMeta
|
|
11
|
+
from mteb.types import (
|
|
12
|
+
Array,
|
|
13
|
+
BatchedInput,
|
|
14
|
+
CorpusDatasetType,
|
|
15
|
+
EncodeKwargs,
|
|
16
|
+
PromptType,
|
|
17
|
+
QueryDatasetType,
|
|
18
|
+
RetrievalOutputType,
|
|
19
|
+
TopRankedDocumentsType,
|
|
20
|
+
)
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
@runtime_checkable
|
|
@@ -31,6 +32,7 @@ class SearchProtocol(Protocol):
|
|
|
31
32
|
hf_split: str,
|
|
32
33
|
hf_subset: str,
|
|
33
34
|
encode_kwargs: EncodeKwargs,
|
|
35
|
+
num_proc: int,
|
|
34
36
|
) -> None:
|
|
35
37
|
"""Index the corpus for retrieval.
|
|
36
38
|
|
|
@@ -40,6 +42,7 @@ class SearchProtocol(Protocol):
|
|
|
40
42
|
hf_split: Split of current task, allows to know some additional information about current split.
|
|
41
43
|
hf_subset: Subset of current task. Similar to `hf_split` to get more information
|
|
42
44
|
encode_kwargs: Additional arguments to pass to the encoder during indexing.
|
|
45
|
+
num_proc: Number of processes to use for dataloading.
|
|
43
46
|
"""
|
|
44
47
|
...
|
|
45
48
|
|
|
@@ -53,6 +56,7 @@ class SearchProtocol(Protocol):
|
|
|
53
56
|
top_k: int,
|
|
54
57
|
encode_kwargs: EncodeKwargs,
|
|
55
58
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
59
|
+
num_proc: int,
|
|
56
60
|
) -> RetrievalOutputType:
|
|
57
61
|
"""Search the corpus using the given queries.
|
|
58
62
|
|
|
@@ -65,6 +69,7 @@ class SearchProtocol(Protocol):
|
|
|
65
69
|
Passed only from Reranking tasks.
|
|
66
70
|
top_k: Number of top documents to return for each query.
|
|
67
71
|
encode_kwargs: Additional arguments to pass to the encoder during indexing.
|
|
72
|
+
num_proc: Number of processes to use for dataloading.
|
|
68
73
|
|
|
69
74
|
Returns:
|
|
70
75
|
Dictionary with query IDs as keys with dict as values, where each value is a mapping of document IDs to their relevance scores.
|
|
@@ -72,7 +77,7 @@ class SearchProtocol(Protocol):
|
|
|
72
77
|
...
|
|
73
78
|
|
|
74
79
|
@property
|
|
75
|
-
def mteb_model_meta(self) ->
|
|
80
|
+
def mteb_model_meta(self) -> ModelMeta:
|
|
76
81
|
"""Metadata of the model"""
|
|
77
82
|
...
|
|
78
83
|
|
|
@@ -177,7 +182,7 @@ class EncoderProtocol(Protocol):
|
|
|
177
182
|
...
|
|
178
183
|
|
|
179
184
|
@property
|
|
180
|
-
def mteb_model_meta(self) ->
|
|
185
|
+
def mteb_model_meta(self) -> ModelMeta:
|
|
181
186
|
"""Metadata of the model"""
|
|
182
187
|
...
|
|
183
188
|
|
|
@@ -236,7 +241,7 @@ class CrossEncoderProtocol(Protocol):
|
|
|
236
241
|
...
|
|
237
242
|
|
|
238
243
|
@property
|
|
239
|
-
def mteb_model_meta(self) ->
|
|
244
|
+
def mteb_model_meta(self) -> ModelMeta:
|
|
240
245
|
"""Metadata of the model"""
|
|
241
246
|
...
|
|
242
247
|
|
|
@@ -1,7 +1,11 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import Protocol
|
|
1
|
+
from __future__ import annotations
|
|
3
2
|
|
|
4
|
-
from
|
|
3
|
+
from typing import TYPE_CHECKING, Protocol
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from collections.abc import Callable
|
|
7
|
+
|
|
8
|
+
from mteb.types import Array, TopRankedDocumentsType
|
|
5
9
|
|
|
6
10
|
|
|
7
11
|
class IndexEncoderSearchProtocol(Protocol):
|
|
@@ -1,14 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import warnings
|
|
3
|
-
from
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
4
6
|
|
|
5
7
|
import numpy as np
|
|
6
8
|
import torch
|
|
7
9
|
|
|
8
10
|
from mteb._requires_package import requires_package
|
|
9
11
|
from mteb.models.model_meta import ScoringFunction
|
|
10
|
-
|
|
11
|
-
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
|
|
16
|
+
import faiss
|
|
17
|
+
|
|
18
|
+
from mteb.models.models_protocols import EncoderProtocol
|
|
19
|
+
from mteb.types import Array, TopRankedDocumentsType
|
|
20
|
+
|
|
12
21
|
|
|
13
22
|
logger = logging.getLogger(__name__)
|
|
14
23
|
|
|
@@ -33,7 +42,6 @@ class FaissSearchIndex:
|
|
|
33
42
|
install_instruction="pip install mteb[faiss-cpu]",
|
|
34
43
|
)
|
|
35
44
|
|
|
36
|
-
import faiss
|
|
37
45
|
from faiss import IndexFlatIP, IndexFlatL2
|
|
38
46
|
|
|
39
47
|
# https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
|
mteb/models/search_wrappers.py
CHANGED
|
@@ -1,28 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import heapq
|
|
2
4
|
import logging
|
|
3
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
4
6
|
|
|
5
7
|
import torch
|
|
6
8
|
from datasets import Dataset
|
|
7
|
-
from torch.utils.data import DataLoader
|
|
8
9
|
|
|
9
10
|
from mteb._create_dataloaders import (
|
|
10
11
|
create_dataloader,
|
|
11
12
|
)
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
13
|
from mteb.types import (
|
|
14
|
-
Array,
|
|
15
|
-
BatchedInput,
|
|
16
|
-
CorpusDatasetType,
|
|
17
|
-
EncodeKwargs,
|
|
18
14
|
PromptType,
|
|
19
|
-
QueryDatasetType,
|
|
20
|
-
RetrievalOutputType,
|
|
21
|
-
TopRankedDocumentsType,
|
|
22
15
|
)
|
|
23
16
|
|
|
24
|
-
|
|
25
|
-
from .
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from torch.utils.data import DataLoader
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.types import (
|
|
22
|
+
Array,
|
|
23
|
+
BatchedInput,
|
|
24
|
+
CorpusDatasetType,
|
|
25
|
+
EncodeKwargs,
|
|
26
|
+
QueryDatasetType,
|
|
27
|
+
RetrievalOutputType,
|
|
28
|
+
TopRankedDocumentsType,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
from .models_protocols import CrossEncoderProtocol, EncoderProtocol
|
|
32
|
+
from .search_encoder_index.search_backend_protocol import IndexEncoderSearchProtocol
|
|
26
33
|
|
|
27
34
|
logger = logging.getLogger(__name__)
|
|
28
35
|
|
|
@@ -52,6 +59,7 @@ class SearchEncoderWrapper:
|
|
|
52
59
|
hf_split: str,
|
|
53
60
|
hf_subset: str,
|
|
54
61
|
encode_kwargs: EncodeKwargs,
|
|
62
|
+
num_proc: int = 1,
|
|
55
63
|
) -> None:
|
|
56
64
|
"""Index the corpus for retrieval.
|
|
57
65
|
|
|
@@ -61,6 +69,7 @@ class SearchEncoderWrapper:
|
|
|
61
69
|
hf_split: Split of current task, allows to know some additional information about current split.
|
|
62
70
|
hf_subset: Subset of current task. Similar to `hf_split` to get more information
|
|
63
71
|
encode_kwargs: Additional arguments to pass to the encoder during indexing.
|
|
72
|
+
num_proc: Number of processes to use for dataloading.
|
|
64
73
|
"""
|
|
65
74
|
# Always retain corpus for potential reranking or fallback flows
|
|
66
75
|
self.task_corpus = corpus
|
|
@@ -70,6 +79,7 @@ class SearchEncoderWrapper:
|
|
|
70
79
|
corpus,
|
|
71
80
|
task_metadata,
|
|
72
81
|
prompt_type=PromptType.document,
|
|
82
|
+
num_proc=num_proc,
|
|
73
83
|
**encode_kwargs,
|
|
74
84
|
),
|
|
75
85
|
task_metadata=task_metadata,
|
|
@@ -91,6 +101,7 @@ class SearchEncoderWrapper:
|
|
|
91
101
|
top_k: int,
|
|
92
102
|
encode_kwargs: EncodeKwargs,
|
|
93
103
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
104
|
+
num_proc: int = 1,
|
|
94
105
|
) -> RetrievalOutputType:
|
|
95
106
|
"""Search the corpus for the given queries.
|
|
96
107
|
|
|
@@ -103,6 +114,7 @@ class SearchEncoderWrapper:
|
|
|
103
114
|
Passed only from Reranking tasks.
|
|
104
115
|
top_k: Number of top documents to return for each query.
|
|
105
116
|
encode_kwargs: Additional arguments to pass to the encoder during indexing.
|
|
117
|
+
num_proc: Number of processes to use for dataloading.
|
|
106
118
|
|
|
107
119
|
Returns:
|
|
108
120
|
Dictionary with query IDs as keys with dict as values, where each value is a mapping of document IDs to their relevance scores.
|
|
@@ -114,6 +126,7 @@ class SearchEncoderWrapper:
|
|
|
114
126
|
queries,
|
|
115
127
|
task_metadata,
|
|
116
128
|
prompt_type=PromptType.query,
|
|
129
|
+
num_proc=num_proc,
|
|
117
130
|
**encode_kwargs,
|
|
118
131
|
)
|
|
119
132
|
|
|
@@ -472,6 +485,7 @@ class SearchCrossEncoderWrapper:
|
|
|
472
485
|
hf_split: str,
|
|
473
486
|
hf_subset: str,
|
|
474
487
|
encode_kwargs: EncodeKwargs,
|
|
488
|
+
num_proc: int = 1,
|
|
475
489
|
) -> None:
|
|
476
490
|
"""Index the corpus for retrieval.
|
|
477
491
|
|
|
@@ -481,6 +495,7 @@ class SearchCrossEncoderWrapper:
|
|
|
481
495
|
hf_split: Split of current task, allows to know some additional information about current split.
|
|
482
496
|
hf_subset: Subset of current task. Similar to `hf_split` to get more information
|
|
483
497
|
encode_kwargs: Additional arguments to pass to the encoder during indexing.
|
|
498
|
+
num_proc: Number of processes to use.
|
|
484
499
|
"""
|
|
485
500
|
self.task_corpus = corpus
|
|
486
501
|
|
|
@@ -494,6 +509,7 @@ class SearchCrossEncoderWrapper:
|
|
|
494
509
|
top_k: int,
|
|
495
510
|
encode_kwargs: EncodeKwargs,
|
|
496
511
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
512
|
+
num_proc: int = 1,
|
|
497
513
|
) -> RetrievalOutputType:
|
|
498
514
|
"""Search the corpus using the given queries.
|
|
499
515
|
|
|
@@ -506,6 +522,7 @@ class SearchCrossEncoderWrapper:
|
|
|
506
522
|
Passed only from Reranking tasks.
|
|
507
523
|
top_k: Number of top documents to return for each query.
|
|
508
524
|
encode_kwargs: Additional arguments to pass to the encoder during indexing.
|
|
525
|
+
num_proc: Number of processes to use.
|
|
509
526
|
|
|
510
527
|
Returns:
|
|
511
528
|
Dictionary with query IDs as keys with dict as values, where each value is a mapping of document IDs to their relevance scores.
|
|
@@ -539,12 +556,14 @@ class SearchCrossEncoderWrapper:
|
|
|
539
556
|
Dataset.from_list(total_queries),
|
|
540
557
|
task_metadata,
|
|
541
558
|
prompt_type=PromptType.document,
|
|
559
|
+
num_proc=num_proc,
|
|
542
560
|
**encode_kwargs,
|
|
543
561
|
)
|
|
544
562
|
corpus_loader = create_dataloader(
|
|
545
563
|
Dataset.from_list(total_docs),
|
|
546
564
|
task_metadata,
|
|
547
565
|
prompt_type=PromptType.document,
|
|
566
|
+
num_proc=num_proc,
|
|
548
567
|
**encode_kwargs,
|
|
549
568
|
)
|
|
550
569
|
predictions = self.model.predict(
|
|
@@ -7,19 +7,20 @@ from typing import TYPE_CHECKING, Any
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import torch
|
|
9
9
|
from packaging.version import Version
|
|
10
|
-
from torch.utils.data import DataLoader
|
|
11
|
-
from typing_extensions import Unpack
|
|
12
10
|
|
|
13
11
|
from mteb._log_once import LogOnce
|
|
14
12
|
from mteb.models import ModelMeta
|
|
15
|
-
from mteb.types import
|
|
13
|
+
from mteb.types import PromptType
|
|
16
14
|
|
|
17
15
|
from .abs_encoder import AbsEncoder
|
|
18
16
|
|
|
19
17
|
if TYPE_CHECKING:
|
|
20
18
|
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
19
|
+
from torch.utils.data import DataLoader
|
|
20
|
+
from typing_extensions import Unpack
|
|
21
21
|
|
|
22
22
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
23
|
+
from mteb.types import Array, BatchedInput, EncodeKwargs
|
|
23
24
|
|
|
24
25
|
logger = logging.getLogger(__name__)
|
|
25
26
|
|
mteb/models/vllm_wrapper.py
CHANGED
|
@@ -4,23 +4,25 @@ import atexit
|
|
|
4
4
|
import gc
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
|
-
from collections.abc import Callable
|
|
8
7
|
from typing import TYPE_CHECKING, Any, Literal
|
|
9
8
|
|
|
10
9
|
import numpy as np
|
|
11
10
|
import torch
|
|
12
|
-
from torch.utils.data import DataLoader
|
|
13
11
|
|
|
14
12
|
from mteb._requires_package import requires_package
|
|
15
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
16
13
|
from mteb.models import ModelMeta
|
|
17
14
|
from mteb.models.abs_encoder import AbsEncoder
|
|
18
|
-
from mteb.types import
|
|
15
|
+
from mteb.types import PromptType
|
|
19
16
|
|
|
20
17
|
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Callable
|
|
19
|
+
|
|
20
|
+
from torch.utils.data import DataLoader
|
|
21
21
|
from vllm.config import PoolerConfig # type: ignore[import-not-found]
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
|
|
23
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
24
|
+
from mteb.types import Array, BatchedInput
|
|
25
|
+
|
|
24
26
|
|
|
25
27
|
logger = logging.getLogger(__name__)
|
|
26
28
|
|
|
@@ -4,34 +4,39 @@ import functools
|
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
6
|
import warnings
|
|
7
|
-
from collections.abc import Callable, Iterable, Iterator
|
|
8
7
|
from pathlib import Path
|
|
9
|
-
from typing import Any, Literal, cast
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Literal, cast
|
|
10
9
|
|
|
11
10
|
import pandas as pd
|
|
12
11
|
from packaging.version import InvalidVersion, Version
|
|
13
12
|
from pydantic import BaseModel, ConfigDict
|
|
14
|
-
from typing_extensions import Self
|
|
15
13
|
|
|
16
|
-
from mteb.abstasks.abstask import AbsTask
|
|
17
|
-
from mteb.abstasks.task_metadata import (
|
|
18
|
-
TaskDomain,
|
|
19
|
-
TaskType,
|
|
20
|
-
)
|
|
21
14
|
from mteb.benchmarks.benchmark import Benchmark
|
|
22
15
|
from mteb.models import ModelMeta
|
|
23
16
|
from mteb.models.get_model_meta import get_model_metas
|
|
24
|
-
from mteb.types import (
|
|
25
|
-
ISOLanguage,
|
|
26
|
-
ISOLanguageScript,
|
|
27
|
-
Modalities,
|
|
28
|
-
Score,
|
|
29
|
-
ScoresDict,
|
|
30
|
-
SplitName,
|
|
31
|
-
)
|
|
32
17
|
|
|
33
18
|
from .model_result import ModelResult, _aggregate_and_pivot
|
|
34
19
|
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from collections.abc import Callable, Iterable, Iterator
|
|
22
|
+
|
|
23
|
+
from typing_extensions import Self
|
|
24
|
+
|
|
25
|
+
from mteb.abstasks.abstask import AbsTask
|
|
26
|
+
from mteb.abstasks.task_metadata import (
|
|
27
|
+
TaskDomain,
|
|
28
|
+
TaskType,
|
|
29
|
+
)
|
|
30
|
+
from mteb.types import (
|
|
31
|
+
ISOLanguage,
|
|
32
|
+
ISOLanguageScript,
|
|
33
|
+
Modalities,
|
|
34
|
+
Score,
|
|
35
|
+
ScoresDict,
|
|
36
|
+
SplitName,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
35
40
|
logger = logging.getLogger(__name__)
|
|
36
41
|
|
|
37
42
|
|
|
@@ -144,7 +149,7 @@ class BenchmarkResults(BaseModel):
|
|
|
144
149
|
raise ValueError("name in ModelMeta is None. It must be a string.")
|
|
145
150
|
name_rev[name.name] = name.revision
|
|
146
151
|
else:
|
|
147
|
-
name_ = cast(str, name)
|
|
152
|
+
name_ = cast("str", name)
|
|
148
153
|
name_rev[name_] = revision
|
|
149
154
|
|
|
150
155
|
for model_res in self.model_results:
|
mteb/results/model_result.py
CHANGED
|
@@ -2,30 +2,36 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import warnings
|
|
5
|
-
from
|
|
6
|
-
from typing import Any, Literal, cast
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Literal, cast
|
|
7
6
|
|
|
8
7
|
import numpy as np
|
|
9
8
|
import pandas as pd
|
|
10
9
|
from pydantic import BaseModel, ConfigDict, Field
|
|
11
10
|
from typing_extensions import overload
|
|
12
11
|
|
|
13
|
-
from mteb.abstasks.abstask import AbsTask
|
|
14
|
-
from mteb.abstasks.task_metadata import (
|
|
15
|
-
TaskDomain,
|
|
16
|
-
TaskType,
|
|
17
|
-
)
|
|
18
12
|
from mteb.types import (
|
|
19
|
-
ISOLanguage,
|
|
20
|
-
ISOLanguageScript,
|
|
21
13
|
Modalities,
|
|
22
|
-
Score,
|
|
23
|
-
ScoresDict,
|
|
24
|
-
SplitName,
|
|
25
14
|
)
|
|
26
15
|
|
|
27
16
|
from .task_result import TaskError, TaskResult
|
|
28
17
|
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from collections.abc import Callable, Iterable
|
|
20
|
+
|
|
21
|
+
from mteb.abstasks.abstask import AbsTask
|
|
22
|
+
from mteb.abstasks.task_metadata import (
|
|
23
|
+
TaskDomain,
|
|
24
|
+
TaskType,
|
|
25
|
+
)
|
|
26
|
+
from mteb.types import (
|
|
27
|
+
ISOLanguage,
|
|
28
|
+
ISOLanguageScript,
|
|
29
|
+
Score,
|
|
30
|
+
ScoresDict,
|
|
31
|
+
SplitName,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
29
35
|
logger = logging.getLogger(__name__)
|
|
30
36
|
|
|
31
37
|
|
|
@@ -83,7 +89,7 @@ class ModelResult(BaseModel):
|
|
|
83
89
|
model_revision: str | None
|
|
84
90
|
task_results: list[TaskResult]
|
|
85
91
|
default_modalities: list[Modalities] = Field(
|
|
86
|
-
default_factory=lambda: [cast(Modalities, "text")], alias="modalities"
|
|
92
|
+
default_factory=lambda: [cast("Modalities", "text")], alias="modalities"
|
|
87
93
|
)
|
|
88
94
|
model_config = (
|
|
89
95
|
ConfigDict( # to free up the name model_* which is otherwise protected
|
|
@@ -202,8 +208,8 @@ class ModelResult(BaseModel):
|
|
|
202
208
|
aggregation = aggregation if aggregation is not None else np.mean
|
|
203
209
|
else:
|
|
204
210
|
use_fast = True
|
|
205
|
-
aggregation = cast(Callable[[list[Score]], Any], aggregation)
|
|
206
|
-
getter = cast(Callable[[ScoresDict], Score], getter)
|
|
211
|
+
aggregation = cast("Callable[[list[Score]], Any]", aggregation)
|
|
212
|
+
getter = cast("Callable[[ScoresDict], Score]", getter)
|
|
207
213
|
|
|
208
214
|
if format == "wide":
|
|
209
215
|
scores = {}
|