mteb 2.7.3__py3-none-any.whl → 2.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +47 -5
- mteb/_evaluators/any_sts_evaluator.py +2 -0
- mteb/_evaluators/clustering_evaluator.py +2 -0
- mteb/_evaluators/evaluator.py +2 -1
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +8 -1
- mteb/_evaluators/pair_classification_evaluator.py +3 -0
- mteb/_evaluators/retrieval_evaluator.py +3 -0
- mteb/_evaluators/sklearn_evaluator.py +6 -1
- mteb/_evaluators/text/bitext_mining_evaluator.py +2 -0
- mteb/_evaluators/text/summarization_evaluator.py +2 -0
- mteb/_evaluators/zeroshot_classification_evaluator.py +2 -0
- mteb/abstasks/abstask.py +31 -12
- mteb/abstasks/classification.py +10 -3
- mteb/abstasks/clustering.py +6 -2
- mteb/abstasks/clustering_legacy.py +8 -2
- mteb/abstasks/image/image_text_pair_classification.py +6 -2
- mteb/abstasks/multilabel_classification.py +2 -0
- mteb/abstasks/pair_classification.py +8 -2
- mteb/abstasks/retrieval.py +27 -12
- mteb/abstasks/retrieval_dataset_loaders.py +29 -19
- mteb/abstasks/sts.py +10 -3
- mteb/abstasks/text/bitext_mining.py +9 -5
- mteb/abstasks/text/reranking.py +2 -2
- mteb/abstasks/text/summarization.py +2 -1
- mteb/abstasks/zeroshot_classification.py +8 -2
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +41 -2
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/evaluate.py +10 -2
- mteb/models/model_implementations/align_models.py +1 -0
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +2 -0
- mteb/models/model_implementations/ara_models.py +1 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +40 -1
- mteb/models/model_implementations/bica_model.py +1 -0
- mteb/models/model_implementations/blip2_models.py +2 -0
- mteb/models/model_implementations/blip_models.py +8 -0
- mteb/models/model_implementations/bm25.py +10 -5
- mteb/models/model_implementations/bmretriever_models.py +4 -0
- mteb/models/model_implementations/cadet_models.py +1 -0
- mteb/models/model_implementations/cde_models.py +2 -0
- mteb/models/model_implementations/clip_models.py +3 -0
- mteb/models/model_implementations/clips_models.py +3 -0
- mteb/models/model_implementations/codefuse_models.py +5 -0
- mteb/models/model_implementations/codesage_models.py +3 -0
- mteb/models/model_implementations/cohere_models.py +4 -0
- mteb/models/model_implementations/cohere_v.py +5 -0
- mteb/models/model_implementations/colpali_models.py +3 -0
- mteb/models/model_implementations/colqwen_models.py +7 -0
- mteb/models/model_implementations/colsmol_models.py +2 -0
- mteb/models/model_implementations/conan_models.py +1 -0
- mteb/models/model_implementations/dino_models.py +19 -0
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +9 -0
- mteb/models/model_implementations/e5_v.py +1 -0
- mteb/models/model_implementations/eagerworks_models.py +1 -0
- mteb/models/model_implementations/emillykkejensen_models.py +3 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +1 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +9 -0
- mteb/models/model_implementations/facebookai.py +2 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +2 -0
- mteb/models/model_implementations/google_models.py +5 -0
- mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
- mteb/models/model_implementations/gritlm_models.py +2 -0
- mteb/models/model_implementations/gte_models.py +9 -0
- mteb/models/model_implementations/hinvec_models.py +1 -0
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +2 -0
- mteb/models/model_implementations/jina_clip.py +1 -0
- mteb/models/model_implementations/jina_models.py +7 -0
- mteb/models/model_implementations/kalm_models.py +6 -0
- mteb/models/model_implementations/kblab.py +1 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
- mteb/models/model_implementations/kfst.py +1 -0
- mteb/models/model_implementations/kowshik24_models.py +1 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +1 -0
- mteb/models/model_implementations/listconranker.py +1 -0
- mteb/models/model_implementations/llm2clip_models.py +3 -0
- mteb/models/model_implementations/llm2vec_models.py +8 -0
- mteb/models/model_implementations/mcinext_models.py +3 -0
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +63 -0
- mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +2 -0
- mteb/models/model_implementations/mod_models.py +1 -0
- mteb/models/model_implementations/model2vec_models.py +13 -0
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/nbailab.py +3 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
- mteb/models/model_implementations/nomic_models.py +6 -0
- mteb/models/model_implementations/nomic_models_vision.py +1 -0
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
- mteb/models/model_implementations/nvidia_models.py +3 -0
- mteb/models/model_implementations/octen_models.py +2 -0
- mteb/models/model_implementations/openai_models.py +5 -0
- mteb/models/model_implementations/openclip_models.py +8 -0
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
- mteb/models/model_implementations/ops_moa_models.py +2 -0
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
- mteb/models/model_implementations/pawan_models.py +1 -0
- mteb/models/model_implementations/piccolo_models.py +2 -0
- mteb/models/model_implementations/promptriever_models.py +4 -0
- mteb/models/model_implementations/pylate_models.py +13 -0
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +3 -0
- mteb/models/model_implementations/qzhou_models.py +2 -0
- mteb/models/model_implementations/rasgaard_models.py +1 -0
- mteb/models/model_implementations/reasonir_model.py +65 -0
- mteb/models/model_implementations/repllama_models.py +2 -0
- mteb/models/model_implementations/rerankers_custom.py +3 -0
- mteb/models/model_implementations/rerankers_monot5_based.py +14 -0
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +20 -0
- mteb/models/model_implementations/ruri_models.py +10 -0
- mteb/models/model_implementations/salesforce_models.py +3 -0
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +1 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +18 -0
- mteb/models/model_implementations/shuu_model.py +1 -0
- mteb/models/model_implementations/siglip_models.py +10 -0
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +2 -0
- mteb/models/model_implementations/text2vec_models.py +3 -0
- mteb/models/model_implementations/ua_sentence_models.py +1 -0
- mteb/models/model_implementations/uae_models.py +1 -0
- mteb/models/model_implementations/vdr_models.py +1 -0
- mteb/models/model_implementations/vi_vn_models.py +6 -0
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +2 -0
- mteb/models/model_implementations/voyage_models.py +15 -0
- mteb/models/model_implementations/voyage_v.py +1 -0
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +1 -0
- mteb/models/model_meta.py +35 -2
- mteb/models/models_protocols.py +4 -0
- mteb/models/search_wrappers.py +12 -0
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +1 -1
- mteb/tasks/clustering/nob/vg_clustering.py +1 -1
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +4 -4
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +42 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +1 -1
- mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/METADATA +1 -1
- {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/RECORD +434 -413
- {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/WHEEL +0 -0
- {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard": {
|
|
3
|
+
"num_samples": 107198,
|
|
4
|
+
"number_of_characters": 183652816,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 183501537,
|
|
7
|
+
"min_text_length": 1,
|
|
8
|
+
"average_text_length": 1713.6703710275399,
|
|
9
|
+
"max_text_length": 4000,
|
|
10
|
+
"unique_texts": 66270
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 151279,
|
|
15
|
+
"min_text_length": 185,
|
|
16
|
+
"average_text_length": 1292.982905982906,
|
|
17
|
+
"max_text_length": 12432,
|
|
18
|
+
"unique_texts": 117
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 819,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 7.0,
|
|
25
|
+
"max_relevant_docs_per_query": 59,
|
|
26
|
+
"unique_relevant_docs": 816
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 12528477,
|
|
30
|
+
"min_top_ranked_per_query": 107081,
|
|
31
|
+
"average_top_ranked_per_query": 107081.0,
|
|
32
|
+
"max_top_ranked_per_query": 107081
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"long": {
|
|
3
|
+
"num_samples": 662,
|
|
4
|
+
"number_of_characters": 21154322,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 21080575,
|
|
7
|
+
"min_text_length": 30,
|
|
8
|
+
"average_text_length": 38051.579422382674,
|
|
9
|
+
"max_text_length": 5732344,
|
|
10
|
+
"unique_texts": 551
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 73747,
|
|
15
|
+
"min_text_length": 158,
|
|
16
|
+
"average_text_length": 682.8425925925926,
|
|
17
|
+
"max_text_length": 2843,
|
|
18
|
+
"unique_texts": 108
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 129,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.1944444444444444,
|
|
25
|
+
"max_relevant_docs_per_query": 5,
|
|
26
|
+
"unique_relevant_docs": 129
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 59832,
|
|
30
|
+
"min_top_ranked_per_query": 554,
|
|
31
|
+
"average_top_ranked_per_query": 554.0,
|
|
32
|
+
"max_top_ranked_per_query": 554
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard": {
|
|
3
|
+
"num_samples": 60900,
|
|
4
|
+
"number_of_characters": 20971763,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 20898016,
|
|
7
|
+
"min_text_length": 1,
|
|
8
|
+
"average_text_length": 343.7626003421503,
|
|
9
|
+
"max_text_length": 158296,
|
|
10
|
+
"unique_texts": 50142
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 73747,
|
|
15
|
+
"min_text_length": 158,
|
|
16
|
+
"average_text_length": 682.8425925925926,
|
|
17
|
+
"max_text_length": 2843,
|
|
18
|
+
"unique_texts": 108
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 604,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 5.592592592592593,
|
|
25
|
+
"max_relevant_docs_per_query": 59,
|
|
26
|
+
"unique_relevant_docs": 604
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 6565536,
|
|
30
|
+
"min_top_ranked_per_query": 60792,
|
|
31
|
+
"average_top_ranked_per_query": 60792.0,
|
|
32
|
+
"max_top_ranked_per_query": 60792
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard": {
|
|
3
|
+
"num_samples": 188207,
|
|
4
|
+
"number_of_characters": 141817604,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 141734227,
|
|
7
|
+
"min_text_length": 58,
|
|
8
|
+
"average_text_length": 753.8974425803981,
|
|
9
|
+
"max_text_length": 7334,
|
|
10
|
+
"unique_texts": 176508
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 83377,
|
|
15
|
+
"min_text_length": 12,
|
|
16
|
+
"average_text_length": 406.7170731707317,
|
|
17
|
+
"max_text_length": 1255,
|
|
18
|
+
"unique_texts": 201
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 469,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 2.299019607843137,
|
|
25
|
+
"max_relevant_docs_per_query": 7,
|
|
26
|
+
"unique_relevant_docs": 234
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 37946536,
|
|
30
|
+
"min_top_ranked_per_query": 176970,
|
|
31
|
+
"average_top_ranked_per_query": 185105.05365853658,
|
|
32
|
+
"max_top_ranked_per_query": 188176
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard": {
|
|
3
|
+
"num_samples": 23904,
|
|
4
|
+
"number_of_characters": 20825122,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 20797224,
|
|
7
|
+
"min_text_length": 74,
|
|
8
|
+
"average_text_length": 872.4033726246906,
|
|
9
|
+
"max_text_length": 19104,
|
|
10
|
+
"unique_texts": 23839
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 27898,
|
|
15
|
+
"min_text_length": 13,
|
|
16
|
+
"average_text_length": 429.2,
|
|
17
|
+
"max_text_length": 1255,
|
|
18
|
+
"unique_texts": 65
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 126,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.9384615384615385,
|
|
25
|
+
"max_relevant_docs_per_query": 6,
|
|
26
|
+
"unique_relevant_docs": 95
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 1549535,
|
|
30
|
+
"min_top_ranked_per_query": 23839,
|
|
31
|
+
"average_top_ranked_per_query": 23839.0,
|
|
32
|
+
"max_top_ranked_per_query": 23839
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
mteb/evaluate.py
CHANGED
|
@@ -125,6 +125,7 @@ def _evaluate_task(
|
|
|
125
125
|
co2_tracker=False,
|
|
126
126
|
prediction_folder=prediction_folder,
|
|
127
127
|
public_only=public_only,
|
|
128
|
+
num_proc=num_proc,
|
|
128
129
|
)
|
|
129
130
|
if isinstance(result, TaskResult):
|
|
130
131
|
result.kg_co2_emissions = tracker.final_emissions
|
|
@@ -137,7 +138,7 @@ def _evaluate_task(
|
|
|
137
138
|
data_preloaded = task.data_loaded
|
|
138
139
|
if not data_preloaded:
|
|
139
140
|
try:
|
|
140
|
-
task.load_data()
|
|
141
|
+
task.load_data(num_proc=num_proc)
|
|
141
142
|
except DatasetNotFoundError as e:
|
|
142
143
|
if not task.metadata.is_public and public_only is None:
|
|
143
144
|
msg = (
|
|
@@ -163,6 +164,7 @@ def _evaluate_task(
|
|
|
163
164
|
subsets_to_run=hf_subsets,
|
|
164
165
|
encode_kwargs=encode_kwargs,
|
|
165
166
|
prediction_folder=prediction_folder,
|
|
167
|
+
num_proc=num_proc,
|
|
166
168
|
)
|
|
167
169
|
tock = time()
|
|
168
170
|
|
|
@@ -280,6 +282,7 @@ def evaluate(
|
|
|
280
282
|
prediction_folder: Path | str | None = None,
|
|
281
283
|
show_progress_bar: bool = True,
|
|
282
284
|
public_only: bool | None = None,
|
|
285
|
+
num_proc: int = 1,
|
|
283
286
|
) -> ModelResult:
|
|
284
287
|
"""This function runs a model on a given task and returns the results.
|
|
285
288
|
|
|
@@ -288,7 +291,7 @@ def evaluate(
|
|
|
288
291
|
tasks: A task to run.
|
|
289
292
|
co2_tracker: If True, track the CO₂ emissions of the evaluation, required codecarbon to be installed, which can be installed using
|
|
290
293
|
`pip install mteb[codecarbon]`. If none is passed co2 tracking will only be run if codecarbon is installed.
|
|
291
|
-
encode_kwargs: Additional keyword arguments passed to the models `encode`
|
|
294
|
+
encode_kwargs: Additional keyword arguments passed to the models `encode` and `load_data` methods;
|
|
292
295
|
raise_error: If True, raise an error if the task fails. If False, return an empty list.
|
|
293
296
|
cache: The cache to use for loading the results. If None, then no cache will be used. The default cache saved the cache in the
|
|
294
297
|
`~/.cache/mteb` directory. It can be overridden by setting the `MTEB_CACHE` environment variable to a different directory or by directly
|
|
@@ -304,6 +307,7 @@ def evaluate(
|
|
|
304
307
|
show_progress_bar: Whether to show a progress bar when running the evaluation. Default is True. Setting this to False will also set the
|
|
305
308
|
`encode_kwargs['show_progress_bar']` to False if encode_kwargs is unspecified.
|
|
306
309
|
public_only: Run only public tasks. If None, it will attempt to run the private task.
|
|
310
|
+
num_proc: Number of processes to use during data loading and transformation. Defaults to 1.
|
|
307
311
|
|
|
308
312
|
Returns:
|
|
309
313
|
The results of the evaluation.
|
|
@@ -356,6 +360,7 @@ def evaluate(
|
|
|
356
360
|
prediction_folder=prediction_folder,
|
|
357
361
|
show_progress_bar=show_progress_bar,
|
|
358
362
|
public_only=public_only,
|
|
363
|
+
num_proc=num_proc,
|
|
359
364
|
)
|
|
360
365
|
combined_results = aggregated_task.combine_task_results(results.task_results)
|
|
361
366
|
return ModelResult(
|
|
@@ -388,6 +393,7 @@ def evaluate(
|
|
|
388
393
|
prediction_folder=prediction_folder,
|
|
389
394
|
show_progress_bar=False,
|
|
390
395
|
public_only=public_only,
|
|
396
|
+
num_proc=num_proc,
|
|
391
397
|
)
|
|
392
398
|
evaluate_results.extend(_res.task_results)
|
|
393
399
|
if _res.exceptions:
|
|
@@ -467,6 +473,7 @@ def evaluate(
|
|
|
467
473
|
encode_kwargs=encode_kwargs,
|
|
468
474
|
prediction_folder=prediction_folder,
|
|
469
475
|
public_only=public_only,
|
|
476
|
+
num_proc=num_proc,
|
|
470
477
|
)
|
|
471
478
|
except Exception as e:
|
|
472
479
|
logger.error(
|
|
@@ -482,6 +489,7 @@ def evaluate(
|
|
|
482
489
|
encode_kwargs=encode_kwargs,
|
|
483
490
|
prediction_folder=prediction_folder,
|
|
484
491
|
public_only=public_only,
|
|
492
|
+
num_proc=num_proc,
|
|
485
493
|
)
|
|
486
494
|
logger.info(f"✓ Finished evaluation for {task.metadata.name}")
|
|
487
495
|
|
|
@@ -12,6 +12,7 @@ model2vecdk = ModelMeta(
|
|
|
12
12
|
revision="cb576c78dcc1b729e4612645f61db59929d69e61",
|
|
13
13
|
release_date="2025-11-21",
|
|
14
14
|
n_parameters=48042496,
|
|
15
|
+
n_embedding_parameters=None,
|
|
15
16
|
memory_usage_mb=183,
|
|
16
17
|
max_tokens=np.inf,
|
|
17
18
|
embed_dim=256,
|
|
@@ -43,6 +44,7 @@ model2vecdk_stem = ModelMeta(
|
|
|
43
44
|
revision="cb576c78dcc1b729e4612645f61db59929d69e61",
|
|
44
45
|
release_date="2025-11-21",
|
|
45
46
|
n_parameters=48578560,
|
|
47
|
+
n_embedding_parameters=None,
|
|
46
48
|
memory_usage_mb=185,
|
|
47
49
|
max_tokens=np.inf,
|
|
48
50
|
embed_dim=256,
|
|
@@ -147,6 +147,7 @@ arctic_embed_xs = ModelMeta(
|
|
|
147
147
|
open_weights=True,
|
|
148
148
|
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
|
|
149
149
|
n_parameters=22_600_000,
|
|
150
|
+
n_embedding_parameters=11_720_448,
|
|
150
151
|
memory_usage_mb=86,
|
|
151
152
|
max_tokens=512,
|
|
152
153
|
embed_dim=384,
|
|
@@ -173,6 +174,7 @@ arctic_embed_s = ModelMeta(
|
|
|
173
174
|
open_weights=True,
|
|
174
175
|
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
|
|
175
176
|
n_parameters=32_200_000,
|
|
177
|
+
n_embedding_parameters=11_720_448,
|
|
176
178
|
memory_usage_mb=127,
|
|
177
179
|
max_tokens=512,
|
|
178
180
|
embed_dim=384,
|
|
@@ -199,6 +201,7 @@ arctic_embed_m = ModelMeta(
|
|
|
199
201
|
open_weights=True,
|
|
200
202
|
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
|
|
201
203
|
n_parameters=109_000_000,
|
|
204
|
+
n_embedding_parameters=23_440_896,
|
|
202
205
|
memory_usage_mb=415,
|
|
203
206
|
max_tokens=512,
|
|
204
207
|
embed_dim=768,
|
|
@@ -225,6 +228,7 @@ arctic_embed_m_long = ModelMeta(
|
|
|
225
228
|
open_weights=True,
|
|
226
229
|
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
|
|
227
230
|
n_parameters=137_000_000,
|
|
231
|
+
n_embedding_parameters=None,
|
|
228
232
|
memory_usage_mb=522,
|
|
229
233
|
max_tokens=2048,
|
|
230
234
|
embed_dim=768,
|
|
@@ -250,6 +254,7 @@ arctic_embed_l = ModelMeta(
|
|
|
250
254
|
open_weights=True,
|
|
251
255
|
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
|
|
252
256
|
n_parameters=335_000_000,
|
|
257
|
+
n_embedding_parameters=31_254_528,
|
|
253
258
|
memory_usage_mb=1274,
|
|
254
259
|
max_tokens=512,
|
|
255
260
|
embed_dim=1024,
|
|
@@ -280,6 +285,7 @@ arctic_embed_m_v1_5 = ModelMeta(
|
|
|
280
285
|
open_weights=True,
|
|
281
286
|
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors", "GGUF"],
|
|
282
287
|
n_parameters=109_000_000,
|
|
288
|
+
n_embedding_parameters=23_440_896,
|
|
283
289
|
memory_usage_mb=415,
|
|
284
290
|
max_tokens=512,
|
|
285
291
|
embed_dim=768,
|
|
@@ -306,6 +312,7 @@ arctic_embed_m_v2_0 = ModelMeta(
|
|
|
306
312
|
open_weights=True,
|
|
307
313
|
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
|
|
308
314
|
n_parameters=305_000_000,
|
|
315
|
+
n_embedding_parameters=None,
|
|
309
316
|
memory_usage_mb=1165,
|
|
310
317
|
max_tokens=8192,
|
|
311
318
|
embed_dim=768,
|
|
@@ -331,6 +338,7 @@ arctic_embed_l_v2_0 = ModelMeta(
|
|
|
331
338
|
open_weights=True,
|
|
332
339
|
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
|
|
333
340
|
n_parameters=568_000_000,
|
|
341
|
+
n_embedding_parameters=256_002_048,
|
|
334
342
|
memory_usage_mb=2166,
|
|
335
343
|
max_tokens=8192,
|
|
336
344
|
embed_dim=1024,
|
|
@@ -179,6 +179,7 @@ amazon_titan_embed_text_v1 = ModelMeta(
|
|
|
179
179
|
embed_dim=1536,
|
|
180
180
|
open_weights=False,
|
|
181
181
|
n_parameters=None,
|
|
182
|
+
n_embedding_parameters=None,
|
|
182
183
|
memory_usage_mb=None,
|
|
183
184
|
public_training_code=None,
|
|
184
185
|
public_training_data=None, # assumed
|
|
@@ -206,6 +207,7 @@ amazon_titan_embed_text_v2 = ModelMeta(
|
|
|
206
207
|
embed_dim=1024,
|
|
207
208
|
open_weights=False,
|
|
208
209
|
n_parameters=None,
|
|
210
|
+
n_embedding_parameters=None,
|
|
209
211
|
memory_usage_mb=None,
|
|
210
212
|
public_training_code=None,
|
|
211
213
|
public_training_data=None, # assumed
|
|
@@ -235,6 +237,7 @@ cohere_embed_english_v3 = ModelMeta(
|
|
|
235
237
|
revision="1",
|
|
236
238
|
release_date="2023-11-02",
|
|
237
239
|
n_parameters=None,
|
|
240
|
+
n_embedding_parameters=None,
|
|
238
241
|
memory_usage_mb=None,
|
|
239
242
|
public_training_code=None,
|
|
240
243
|
public_training_data=None, # assumed
|
|
@@ -263,6 +266,7 @@ cohere_embed_multilingual_v3 = ModelMeta(
|
|
|
263
266
|
revision="1",
|
|
264
267
|
release_date="2023-11-02",
|
|
265
268
|
n_parameters=None,
|
|
269
|
+
n_embedding_parameters=None,
|
|
266
270
|
memory_usage_mb=None,
|
|
267
271
|
public_training_code=None,
|
|
268
272
|
public_training_data=None, # assumed
|
|
@@ -6,7 +6,29 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
|
|
|
6
6
|
|
|
7
7
|
from .e5_instruct import E5_MISTRAL_TRAINING_DATA
|
|
8
8
|
|
|
9
|
-
model_prompts = {
|
|
9
|
+
model_prompts = {
|
|
10
|
+
"query": "Represent this sentence for searching relevant passages: ",
|
|
11
|
+
"BrightBiologyRetrieval-query": "Represent this biology post for searching relevant passages: ",
|
|
12
|
+
"BrightEarthScienceRetrieval-query": "Represent this earth_science post for searching relevant passages: ",
|
|
13
|
+
"BrightEconomicsRetrieval-query": "Represent this economics post for searching relevant passages: ",
|
|
14
|
+
"BrightPsychologyRetrieval-query": "Represent this psychology post for searching relevant passages: ",
|
|
15
|
+
"BrightRoboticsRetrieval-query": "Represent this robotics post for searching relevant passages: ",
|
|
16
|
+
"BrightStackoverflowRetrieval-query": "Represent this stackoverflow post for searching relevant passages: ",
|
|
17
|
+
"BrightSustainableLivingRetrieval-query": "Represent this sustainable_living post for searching relevant passages: ",
|
|
18
|
+
"BrightPonyRetrieval-query": "Represent this Pony question for searching relevant passages: ",
|
|
19
|
+
"BrightLeetcodeRetrieval-query": "Represent this Coding problem for searching relevant examples: ",
|
|
20
|
+
"BrightAopsRetrieval-query": "Represent this Math problem for searching relevant examples: ",
|
|
21
|
+
"BrightTheoremQATheoremsRetrieval-query": "Represent this Math problem for searching relevant theorems: ",
|
|
22
|
+
"BrightTheoremQAQuestionsRetrieval-query": "Represent this Math problem for searching relevant examples: ",
|
|
23
|
+
"BrightBiologyLongRetrieval-query": "Represent this biology post for searching relevant documents: ",
|
|
24
|
+
"BrightEarthScienceLongRetrieval-query": "Represent this earth_science post for searching relevant documents: ",
|
|
25
|
+
"BrightEconomicsLongRetrieval-query": "Represent this economics post for searching relevant documents: ",
|
|
26
|
+
"BrightPsychologyLongRetrieval-query": "Represent this psychology post for searching relevant documents: ",
|
|
27
|
+
"BrightRoboticsLongRetrieval-query": "Represent this robotics post for searching relevant document: ",
|
|
28
|
+
"BrightStackoverflowLongRetrieval-query": "Represent this stackoverflow post for searching relevant document: ",
|
|
29
|
+
"BrightSustainableLivingLongRetrieval-query": "Represent this sustainable_living post for searching relevant documents: ",
|
|
30
|
+
"BrightPonyLongRetrieval-query": "Represent this Pony question for searching relevant documents: ",
|
|
31
|
+
}
|
|
10
32
|
BGE_15_CITATION = """@misc{bge_embedding,
|
|
11
33
|
title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
|
|
12
34
|
author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
|
|
@@ -325,6 +347,7 @@ bge_small_en_v1_5 = ModelMeta(
|
|
|
325
347
|
revision="5c38ec7c405ec4b44b94cc5a9bb96e735b38267a",
|
|
326
348
|
release_date="2023-09-12", # initial commit of hf model.
|
|
327
349
|
n_parameters=33_400_000,
|
|
350
|
+
n_embedding_parameters=11_720_448,
|
|
328
351
|
memory_usage_mb=127,
|
|
329
352
|
embed_dim=512,
|
|
330
353
|
license="mit",
|
|
@@ -357,6 +380,7 @@ bge_base_en_v1_5 = ModelMeta(
|
|
|
357
380
|
revision="a5beb1e3e68b9ab74eb54cfd186867f64f240e1a",
|
|
358
381
|
release_date="2023-09-11", # initial commit of hf model.
|
|
359
382
|
n_parameters=109_000_000,
|
|
383
|
+
n_embedding_parameters=23_440_896,
|
|
360
384
|
memory_usage_mb=390,
|
|
361
385
|
embed_dim=768,
|
|
362
386
|
license="mit",
|
|
@@ -389,6 +413,7 @@ bge_large_en_v1_5 = ModelMeta(
|
|
|
389
413
|
revision="d4aa6901d3a41ba39fb536a557fa166f842b0e09",
|
|
390
414
|
release_date="2023-09-12", # initial commit of hf model.
|
|
391
415
|
n_parameters=335_000_000,
|
|
416
|
+
n_embedding_parameters=31_254_528,
|
|
392
417
|
memory_usage_mb=1242,
|
|
393
418
|
embed_dim=1024,
|
|
394
419
|
license="mit",
|
|
@@ -421,6 +446,7 @@ bge_small_zh = ModelMeta(
|
|
|
421
446
|
revision="1d2363c5de6ce9ba9c890c8e23a4c72dce540ca8",
|
|
422
447
|
release_date="2023-08-05", # initial commit of hf model.
|
|
423
448
|
n_parameters=33_400_000,
|
|
449
|
+
n_embedding_parameters=10_817_536,
|
|
424
450
|
memory_usage_mb=127,
|
|
425
451
|
embed_dim=512,
|
|
426
452
|
license="mit",
|
|
@@ -448,6 +474,7 @@ bge_base_zh = ModelMeta(
|
|
|
448
474
|
revision="0e5f83d4895db7955e4cb9ed37ab73f7ded339b6",
|
|
449
475
|
release_date="2023-08-05", # initial commit of hf model.
|
|
450
476
|
n_parameters=109_000_000,
|
|
477
|
+
n_embedding_parameters=16_226_304,
|
|
451
478
|
memory_usage_mb=390,
|
|
452
479
|
embed_dim=768,
|
|
453
480
|
license="mit",
|
|
@@ -475,6 +502,7 @@ bge_large_zh = ModelMeta(
|
|
|
475
502
|
revision="b5d9f5c027e87b6f0b6fa4b614f8f9cdc45ce0e8",
|
|
476
503
|
release_date="2023-08-02", # initial commit of hf model.
|
|
477
504
|
n_parameters=335_000_000,
|
|
505
|
+
n_embedding_parameters=21_635_072,
|
|
478
506
|
memory_usage_mb=1242,
|
|
479
507
|
embed_dim=1024,
|
|
480
508
|
license="mit",
|
|
@@ -502,6 +530,7 @@ bge_small_en = ModelMeta(
|
|
|
502
530
|
revision="4778d71a06863076696b03fd2777eb118712cad8",
|
|
503
531
|
release_date="2023-08-05", # initial commit of hf model.
|
|
504
532
|
n_parameters=33_400_000,
|
|
533
|
+
n_embedding_parameters=11_720_448,
|
|
505
534
|
memory_usage_mb=127,
|
|
506
535
|
embed_dim=512,
|
|
507
536
|
license="mit",
|
|
@@ -529,6 +558,7 @@ bge_base_en = ModelMeta(
|
|
|
529
558
|
revision="b737bf5dcc6ee8bdc530531266b4804a5d77b5d8",
|
|
530
559
|
release_date="2023-08-05", # initial commit of hf model.
|
|
531
560
|
n_parameters=109_000_000,
|
|
561
|
+
n_embedding_parameters=23_440_896,
|
|
532
562
|
memory_usage_mb=390,
|
|
533
563
|
embed_dim=768,
|
|
534
564
|
license="mit",
|
|
@@ -562,6 +592,7 @@ bge_large_en = ModelMeta(
|
|
|
562
592
|
revision="abe7d9d814b775ca171121fb03f394dc42974275",
|
|
563
593
|
release_date="2023-08-05", # initial commit of hf model.
|
|
564
594
|
n_parameters=335_000_000,
|
|
595
|
+
n_embedding_parameters=31_254_528,
|
|
565
596
|
memory_usage_mb=1242,
|
|
566
597
|
embed_dim=1024,
|
|
567
598
|
license="mit",
|
|
@@ -590,6 +621,7 @@ bge_small_zh_v1_5 = ModelMeta(
|
|
|
590
621
|
revision="7999e1d3359715c523056ef9478215996d62a620",
|
|
591
622
|
release_date="2023-09-12", # initial commit of hf model.
|
|
592
623
|
n_parameters=33_400_000,
|
|
624
|
+
n_embedding_parameters=10_817_536,
|
|
593
625
|
memory_usage_mb=91,
|
|
594
626
|
embed_dim=512,
|
|
595
627
|
license="mit",
|
|
@@ -616,6 +648,7 @@ bge_base_zh_v1_5 = ModelMeta(
|
|
|
616
648
|
revision="f03589ceff5aac7111bd60cfc7d497ca17ecac65",
|
|
617
649
|
release_date="2023-09-11", # initial commit of hf model.
|
|
618
650
|
n_parameters=109_000_000,
|
|
651
|
+
n_embedding_parameters=16_226_304,
|
|
619
652
|
memory_usage_mb=416,
|
|
620
653
|
embed_dim=768,
|
|
621
654
|
license="mit",
|
|
@@ -642,6 +675,7 @@ bge_large_zh_v1_5 = ModelMeta(
|
|
|
642
675
|
revision="79e7739b6ab944e86d6171e44d24c997fc1e0116",
|
|
643
676
|
release_date="2023-09-12", # initial commit of hf model.
|
|
644
677
|
n_parameters=335_000_000,
|
|
678
|
+
n_embedding_parameters=21_635_072,
|
|
645
679
|
memory_usage_mb=1278,
|
|
646
680
|
embed_dim=1024,
|
|
647
681
|
license="mit",
|
|
@@ -665,6 +699,7 @@ bge_m3 = ModelMeta(
|
|
|
665
699
|
revision="5617a9f61b028005a4858fdac845db406aefb181",
|
|
666
700
|
release_date="2024-06-28",
|
|
667
701
|
n_parameters=568_000_000,
|
|
702
|
+
n_embedding_parameters=256_002_048,
|
|
668
703
|
memory_usage_mb=2167,
|
|
669
704
|
embed_dim=1024,
|
|
670
705
|
license="mit",
|
|
@@ -761,6 +796,7 @@ bge_multilingual_gemma2 = ModelMeta(
|
|
|
761
796
|
revision="992e13d8984fde2c31ef8a3cb2c038aeec513b8a",
|
|
762
797
|
release_date="2024-07-25", # initial commit of hf model.
|
|
763
798
|
n_parameters=int(9.24 * 1e9),
|
|
799
|
+
n_embedding_parameters=917_511_168,
|
|
764
800
|
memory_usage_mb=35254,
|
|
765
801
|
embed_dim=3584, # from old C-MTEB leaderboard
|
|
766
802
|
license="https://ai.google.dev/gemma/terms",
|
|
@@ -808,6 +844,7 @@ bge_en_icl = ModelMeta(
|
|
|
808
844
|
revision="971c7e1445cc86656ca0bd85ed770b8675a40bb5",
|
|
809
845
|
release_date="2024-07-25", # initial commit of hf model.
|
|
810
846
|
n_parameters=int(7.11 * 1e9),
|
|
847
|
+
n_embedding_parameters=131_084_288,
|
|
811
848
|
memory_usage_mb=27125,
|
|
812
849
|
embed_dim=4096,
|
|
813
850
|
license="apache-2.0",
|
|
@@ -842,6 +879,7 @@ bge_m3_unsupervised = ModelMeta(
|
|
|
842
879
|
revision="46f03bc86361cf88102b0b517b36c8259f2946b1",
|
|
843
880
|
release_date="2024-01-30", # January 30, 2024 - BGE-M3 release date
|
|
844
881
|
n_parameters=568_000_000,
|
|
882
|
+
n_embedding_parameters=256_002_048,
|
|
845
883
|
memory_usage_mb=2167,
|
|
846
884
|
embed_dim=1024,
|
|
847
885
|
license="mit",
|
|
@@ -871,6 +909,7 @@ manu__bge_m3_custom_fr = ModelMeta(
|
|
|
871
909
|
languages=None,
|
|
872
910
|
loader=sentence_transformers_loader,
|
|
873
911
|
n_parameters=567754752,
|
|
912
|
+
n_embedding_parameters=256_002_048,
|
|
874
913
|
memory_usage_mb=2166,
|
|
875
914
|
max_tokens=8194.0,
|
|
876
915
|
embed_dim=1024,
|
|
@@ -177,6 +177,7 @@ blip2_opt_2_7b = ModelMeta(
|
|
|
177
177
|
release_date="2024-03-22",
|
|
178
178
|
modalities=["image", "text"],
|
|
179
179
|
n_parameters=3_740_000_000,
|
|
180
|
+
n_embedding_parameters=None,
|
|
180
181
|
memory_usage_mb=14285,
|
|
181
182
|
max_tokens=None,
|
|
182
183
|
embed_dim=768,
|
|
@@ -201,6 +202,7 @@ blip2_opt_6_7b_coco = ModelMeta(
|
|
|
201
202
|
release_date="2024-03-31",
|
|
202
203
|
modalities=["image", "text"],
|
|
203
204
|
n_parameters=7_750_000_000,
|
|
205
|
+
n_embedding_parameters=None,
|
|
204
206
|
memory_usage_mb=29577,
|
|
205
207
|
max_tokens=None,
|
|
206
208
|
embed_dim=768,
|
|
@@ -141,6 +141,7 @@ blip_image_captioning_large = ModelMeta(
|
|
|
141
141
|
release_date="2023-12-07",
|
|
142
142
|
modalities=["image", "text"],
|
|
143
143
|
n_parameters=470_000_000,
|
|
144
|
+
n_embedding_parameters=23_442_432,
|
|
144
145
|
memory_usage_mb=1792,
|
|
145
146
|
max_tokens=512,
|
|
146
147
|
embed_dim=768,
|
|
@@ -169,6 +170,7 @@ blip_image_captioning_base = ModelMeta(
|
|
|
169
170
|
release_date="2023-08-01",
|
|
170
171
|
modalities=["image", "text"],
|
|
171
172
|
n_parameters=247_000_000,
|
|
173
|
+
n_embedding_parameters=23_442_432,
|
|
172
174
|
memory_usage_mb=942,
|
|
173
175
|
max_tokens=512,
|
|
174
176
|
embed_dim=768,
|
|
@@ -198,6 +200,7 @@ blip_vqa_base = ModelMeta(
|
|
|
198
200
|
release_date="2023-12-07",
|
|
199
201
|
modalities=["image", "text"],
|
|
200
202
|
n_parameters=247_000_000,
|
|
203
|
+
n_embedding_parameters=23_442_432,
|
|
201
204
|
memory_usage_mb=1467,
|
|
202
205
|
max_tokens=512,
|
|
203
206
|
embed_dim=768,
|
|
@@ -225,6 +228,7 @@ blip_vqa_capfilt_large = ModelMeta(
|
|
|
225
228
|
release_date="2023-01-22",
|
|
226
229
|
modalities=["image", "text"],
|
|
227
230
|
n_parameters=247_000_000,
|
|
231
|
+
n_embedding_parameters=23_442_432,
|
|
228
232
|
memory_usage_mb=942,
|
|
229
233
|
max_tokens=512,
|
|
230
234
|
embed_dim=768,
|
|
@@ -252,6 +256,7 @@ blip_itm_base_coco = ModelMeta(
|
|
|
252
256
|
release_date="2023-08-01",
|
|
253
257
|
modalities=["image", "text"],
|
|
254
258
|
n_parameters=247_000_000,
|
|
259
|
+
n_embedding_parameters=23_442_432,
|
|
255
260
|
memory_usage_mb=942,
|
|
256
261
|
max_tokens=512,
|
|
257
262
|
embed_dim=768,
|
|
@@ -279,6 +284,7 @@ blip_itm_large_coco = ModelMeta(
|
|
|
279
284
|
release_date="2023-08-01",
|
|
280
285
|
modalities=["image", "text"],
|
|
281
286
|
n_parameters=470_000_000,
|
|
287
|
+
n_embedding_parameters=23_442_432,
|
|
282
288
|
memory_usage_mb=1793,
|
|
283
289
|
max_tokens=512,
|
|
284
290
|
embed_dim=768,
|
|
@@ -307,6 +313,7 @@ blip_itm_base_flickr = ModelMeta(
|
|
|
307
313
|
release_date="2023-08-01",
|
|
308
314
|
modalities=["image", "text"],
|
|
309
315
|
n_parameters=247_000_000,
|
|
316
|
+
n_embedding_parameters=23_442_432,
|
|
310
317
|
memory_usage_mb=942,
|
|
311
318
|
max_tokens=512,
|
|
312
319
|
embed_dim=768,
|
|
@@ -335,6 +342,7 @@ blip_itm_large_flickr = ModelMeta(
|
|
|
335
342
|
release_date="2023-08-01",
|
|
336
343
|
modalities=["image", "text"],
|
|
337
344
|
n_parameters=470_000_000,
|
|
345
|
+
n_embedding_parameters=23_442_432,
|
|
338
346
|
memory_usage_mb=1793,
|
|
339
347
|
max_tokens=512,
|
|
340
348
|
embed_dim=768,
|