mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +63 -14
- mteb/_evaluators/any_sts_evaluator.py +12 -5
- mteb/_evaluators/clustering_evaluator.py +12 -4
- mteb/_evaluators/evaluator.py +11 -5
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
- mteb/_evaluators/pair_classification_evaluator.py +13 -5
- mteb/_evaluators/retrieval_evaluator.py +22 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +20 -11
- mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
- mteb/_evaluators/text/summarization_evaluator.py +10 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +48 -21
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +25 -9
- mteb/abstasks/clustering.py +23 -10
- mteb/abstasks/clustering_legacy.py +22 -8
- mteb/abstasks/image/image_text_pair_classification.py +23 -9
- mteb/abstasks/multilabel_classification.py +13 -5
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +56 -30
- mteb/abstasks/retrieval_dataset_loaders.py +48 -37
- mteb/abstasks/sts.py +29 -13
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +23 -12
- mteb/abstasks/text/reranking.py +2 -2
- mteb/abstasks/text/summarization.py +19 -8
- mteb/abstasks/zeroshot_classification.py +23 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +41 -2
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +10 -5
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/evaluate.py +33 -20
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +11 -4
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +32 -6
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +10 -4
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +2 -0
- mteb/models/model_implementations/ara_models.py +1 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +40 -1
- mteb/models/model_implementations/bica_model.py +1 -0
- mteb/models/model_implementations/blip2_models.py +11 -4
- mteb/models/model_implementations/blip_models.py +17 -4
- mteb/models/model_implementations/bm25.py +24 -14
- mteb/models/model_implementations/bmretriever_models.py +10 -2
- mteb/models/model_implementations/cadet_models.py +1 -0
- mteb/models/model_implementations/cde_models.py +11 -5
- mteb/models/model_implementations/clip_models.py +12 -4
- mteb/models/model_implementations/clips_models.py +3 -0
- mteb/models/model_implementations/codefuse_models.py +5 -0
- mteb/models/model_implementations/codesage_models.py +3 -0
- mteb/models/model_implementations/cohere_models.py +14 -4
- mteb/models/model_implementations/cohere_v.py +14 -4
- mteb/models/model_implementations/colpali_models.py +7 -3
- mteb/models/model_implementations/colqwen_models.py +17 -31
- mteb/models/model_implementations/colsmol_models.py +3 -1
- mteb/models/model_implementations/conan_models.py +11 -4
- mteb/models/model_implementations/dino_models.py +28 -4
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +9 -0
- mteb/models/model_implementations/e5_v.py +10 -4
- mteb/models/model_implementations/eagerworks_models.py +11 -4
- mteb/models/model_implementations/emillykkejensen_models.py +3 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +1 -0
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +9 -0
- mteb/models/model_implementations/facebookai.py +2 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +7 -3
- mteb/models/model_implementations/google_models.py +15 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
- mteb/models/model_implementations/gritlm_models.py +3 -0
- mteb/models/model_implementations/gte_models.py +9 -0
- mteb/models/model_implementations/hinvec_models.py +6 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +14 -5
- mteb/models/model_implementations/jina_clip.py +10 -4
- mteb/models/model_implementations/jina_models.py +17 -5
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +1 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
- mteb/models/model_implementations/kfst.py +1 -0
- mteb/models/model_implementations/kowshik24_models.py +1 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +7 -1
- mteb/models/model_implementations/listconranker.py +10 -4
- mteb/models/model_implementations/llm2clip_models.py +12 -4
- mteb/models/model_implementations/llm2vec_models.py +20 -6
- mteb/models/model_implementations/mcinext_models.py +8 -2
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +63 -0
- mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +11 -4
- mteb/models/model_implementations/mod_models.py +2 -1
- mteb/models/model_implementations/model2vec_models.py +23 -4
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/nbailab.py +3 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
- mteb/models/model_implementations/nomic_models.py +17 -4
- mteb/models/model_implementations/nomic_models_vision.py +5 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
- mteb/models/model_implementations/nvidia_models.py +15 -4
- mteb/models/model_implementations/octen_models.py +3 -1
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +17 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
- mteb/models/model_implementations/ops_moa_models.py +9 -2
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
- mteb/models/model_implementations/pawan_models.py +1 -0
- mteb/models/model_implementations/piccolo_models.py +2 -0
- mteb/models/model_implementations/promptriever_models.py +16 -6
- mteb/models/model_implementations/pylate_models.py +32 -13
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +11 -1
- mteb/models/model_implementations/qzhou_models.py +2 -0
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/rasgaard_models.py +1 -0
- mteb/models/model_implementations/reasonir_model.py +65 -0
- mteb/models/model_implementations/repllama_models.py +15 -6
- mteb/models/model_implementations/rerankers_custom.py +13 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +20 -0
- mteb/models/model_implementations/ruri_models.py +10 -0
- mteb/models/model_implementations/salesforce_models.py +10 -1
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +18 -0
- mteb/models/model_implementations/shuu_model.py +1 -0
- mteb/models/model_implementations/siglip_models.py +19 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +2 -0
- mteb/models/model_implementations/text2vec_models.py +3 -0
- mteb/models/model_implementations/ua_sentence_models.py +1 -0
- mteb/models/model_implementations/uae_models.py +10 -4
- mteb/models/model_implementations/vdr_models.py +8 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -0
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +11 -4
- mteb/models/model_implementations/voyage_models.py +52 -4
- mteb/models/model_implementations/voyage_v.py +11 -6
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +2 -1
- mteb/models/model_meta.py +47 -9
- mteb/models/models_protocols.py +23 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +31 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +32 -16
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +4 -4
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +42 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +1 -1
- mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,18 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
|
-
from torch.utils.data import DataLoader
|
|
5
6
|
from tqdm.auto import tqdm
|
|
6
7
|
|
|
7
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
8
8
|
from mteb.models.abs_encoder import AbsEncoder
|
|
9
9
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from torch.utils.data import DataLoader
|
|
13
|
+
|
|
14
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
11
16
|
|
|
12
17
|
|
|
13
18
|
class DINOModel(AbsEncoder):
|
|
@@ -112,6 +117,7 @@ dinov2_small = ModelMeta(
|
|
|
112
117
|
release_date="2023-07-18",
|
|
113
118
|
modalities=["image"],
|
|
114
119
|
n_parameters=22_100_000,
|
|
120
|
+
n_embedding_parameters=None,
|
|
115
121
|
memory_usage_mb=84,
|
|
116
122
|
max_tokens=None,
|
|
117
123
|
embed_dim=384,
|
|
@@ -143,6 +149,7 @@ dinov2_base = ModelMeta(
|
|
|
143
149
|
release_date="2023-07-18",
|
|
144
150
|
modalities=["image"],
|
|
145
151
|
n_parameters=86_600_000,
|
|
152
|
+
n_embedding_parameters=None,
|
|
146
153
|
memory_usage_mb=330,
|
|
147
154
|
max_tokens=None,
|
|
148
155
|
embed_dim=768,
|
|
@@ -174,6 +181,7 @@ dinov2_large = ModelMeta(
|
|
|
174
181
|
release_date="2023-07-18",
|
|
175
182
|
modalities=["image"],
|
|
176
183
|
n_parameters=304_000_000,
|
|
184
|
+
n_embedding_parameters=None,
|
|
177
185
|
memory_usage_mb=1161,
|
|
178
186
|
max_tokens=None,
|
|
179
187
|
embed_dim=1024,
|
|
@@ -205,6 +213,7 @@ dinov2_giant = ModelMeta(
|
|
|
205
213
|
release_date="2023-07-18",
|
|
206
214
|
modalities=["image"],
|
|
207
215
|
n_parameters=1_140_000_000,
|
|
216
|
+
n_embedding_parameters=None,
|
|
208
217
|
memory_usage_mb=4335,
|
|
209
218
|
max_tokens=None,
|
|
210
219
|
embed_dim=1536,
|
|
@@ -240,6 +249,7 @@ webssl_dino300m_full2b = ModelMeta(
|
|
|
240
249
|
release_date="2025-04-24",
|
|
241
250
|
modalities=["image"],
|
|
242
251
|
n_parameters=304_000_000,
|
|
252
|
+
n_embedding_parameters=None,
|
|
243
253
|
memory_usage_mb=1158,
|
|
244
254
|
max_tokens=None,
|
|
245
255
|
embed_dim=1024,
|
|
@@ -271,6 +281,7 @@ webssl_dino1b_full2b = ModelMeta(
|
|
|
271
281
|
release_date="2025-04-24",
|
|
272
282
|
modalities=["image"],
|
|
273
283
|
n_parameters=1_130_000_000,
|
|
284
|
+
n_embedding_parameters=None,
|
|
274
285
|
memory_usage_mb=4329,
|
|
275
286
|
max_tokens=None,
|
|
276
287
|
embed_dim=1536,
|
|
@@ -302,6 +313,7 @@ webssl_dino2b_full2b = ModelMeta(
|
|
|
302
313
|
release_date="2025-04-24",
|
|
303
314
|
modalities=["image"],
|
|
304
315
|
n_parameters=2_080_000_000,
|
|
316
|
+
n_embedding_parameters=None,
|
|
305
317
|
memory_usage_mb=7951,
|
|
306
318
|
max_tokens=None,
|
|
307
319
|
embed_dim=2688,
|
|
@@ -333,6 +345,7 @@ webssl_dino3b_full2b = ModelMeta(
|
|
|
333
345
|
release_date="2025-04-24",
|
|
334
346
|
modalities=["image"],
|
|
335
347
|
n_parameters=3_000_000_000,
|
|
348
|
+
n_embedding_parameters=None,
|
|
336
349
|
memory_usage_mb=11247,
|
|
337
350
|
max_tokens=None,
|
|
338
351
|
embed_dim=3072,
|
|
@@ -364,6 +377,7 @@ webssl_dino5b_full2b = ModelMeta(
|
|
|
364
377
|
release_date="2025-04-24",
|
|
365
378
|
modalities=["image"],
|
|
366
379
|
n_parameters=5_000_000_000,
|
|
380
|
+
n_embedding_parameters=None,
|
|
367
381
|
memory_usage_mb=18838,
|
|
368
382
|
max_tokens=None,
|
|
369
383
|
embed_dim=3584,
|
|
@@ -395,6 +409,7 @@ webssl_dino7b_full8b_224 = ModelMeta(
|
|
|
395
409
|
release_date="2025-04-24",
|
|
396
410
|
modalities=["image"],
|
|
397
411
|
n_parameters=7_000_000_000,
|
|
412
|
+
n_embedding_parameters=None,
|
|
398
413
|
memory_usage_mb=24605,
|
|
399
414
|
max_tokens=None,
|
|
400
415
|
embed_dim=4096,
|
|
@@ -426,6 +441,7 @@ webssl_dino7b_full8b_378 = ModelMeta(
|
|
|
426
441
|
release_date="2025-04-24",
|
|
427
442
|
modalities=["image"],
|
|
428
443
|
n_parameters=7_000_000_000,
|
|
444
|
+
n_embedding_parameters=None,
|
|
429
445
|
memory_usage_mb=24613,
|
|
430
446
|
max_tokens=None,
|
|
431
447
|
embed_dim=4096,
|
|
@@ -457,6 +473,7 @@ webssl_dino7b_full8b_518 = ModelMeta(
|
|
|
457
473
|
release_date="2025-04-24",
|
|
458
474
|
modalities=["image"],
|
|
459
475
|
n_parameters=7_000_000_000,
|
|
476
|
+
n_embedding_parameters=None,
|
|
460
477
|
memory_usage_mb=24623,
|
|
461
478
|
max_tokens=None,
|
|
462
479
|
embed_dim=4096,
|
|
@@ -489,6 +506,7 @@ webssl_dino2b_light2b = ModelMeta(
|
|
|
489
506
|
release_date="2025-04-24",
|
|
490
507
|
modalities=["image"],
|
|
491
508
|
n_parameters=2_000_000_000,
|
|
509
|
+
n_embedding_parameters=None,
|
|
492
510
|
memory_usage_mb=7951,
|
|
493
511
|
max_tokens=None,
|
|
494
512
|
embed_dim=2688,
|
|
@@ -520,6 +538,7 @@ webssl_dino2b_heavy2b = ModelMeta(
|
|
|
520
538
|
release_date="2025-04-24",
|
|
521
539
|
modalities=["image"],
|
|
522
540
|
n_parameters=2_000_000_000,
|
|
541
|
+
n_embedding_parameters=None,
|
|
523
542
|
memory_usage_mb=7951,
|
|
524
543
|
max_tokens=None,
|
|
525
544
|
embed_dim=2688,
|
|
@@ -551,6 +570,7 @@ webssl_dino3b_light2b = ModelMeta(
|
|
|
551
570
|
release_date="2025-04-24",
|
|
552
571
|
modalities=["image"],
|
|
553
572
|
n_parameters=3_000_000_000,
|
|
573
|
+
n_embedding_parameters=None,
|
|
554
574
|
memory_usage_mb=11247,
|
|
555
575
|
max_tokens=None,
|
|
556
576
|
embed_dim=3072,
|
|
@@ -582,6 +602,7 @@ webssl_dino3b_heavy2b = ModelMeta(
|
|
|
582
602
|
release_date="2025-04-24",
|
|
583
603
|
modalities=["image"],
|
|
584
604
|
n_parameters=3_000_000_000,
|
|
605
|
+
n_embedding_parameters=None,
|
|
585
606
|
memory_usage_mb=11247,
|
|
586
607
|
max_tokens=None,
|
|
587
608
|
embed_dim=3072,
|
|
@@ -613,6 +634,7 @@ webssl_mae300m_full2b = ModelMeta(
|
|
|
613
634
|
release_date="2025-04-24",
|
|
614
635
|
modalities=["image"],
|
|
615
636
|
n_parameters=304_000_000,
|
|
637
|
+
n_embedding_parameters=None,
|
|
616
638
|
memory_usage_mb=1161,
|
|
617
639
|
max_tokens=None,
|
|
618
640
|
embed_dim=1024,
|
|
@@ -644,6 +666,7 @@ webssl_mae700m_full2b = ModelMeta(
|
|
|
644
666
|
release_date="2025-04-24",
|
|
645
667
|
modalities=["image"],
|
|
646
668
|
n_parameters=700_000_000,
|
|
669
|
+
n_embedding_parameters=None,
|
|
647
670
|
memory_usage_mb=2412,
|
|
648
671
|
max_tokens=None,
|
|
649
672
|
embed_dim=1280,
|
|
@@ -675,6 +698,7 @@ webssl_mae1b_full2b = ModelMeta(
|
|
|
675
698
|
release_date="2025-04-24",
|
|
676
699
|
modalities=["image"],
|
|
677
700
|
n_parameters=1_000_000_000,
|
|
701
|
+
n_embedding_parameters=None,
|
|
678
702
|
memory_usage_mb=4337,
|
|
679
703
|
max_tokens=None,
|
|
680
704
|
embed_dim=1536,
|
|
@@ -57,6 +57,7 @@ e5_instruct = ModelMeta(
|
|
|
57
57
|
use_instructions=True,
|
|
58
58
|
reference="https://huggingface.co/intfloat/multilingual-e5-large-instruct",
|
|
59
59
|
n_parameters=560_000_000,
|
|
60
|
+
n_embedding_parameters=256_002_048,
|
|
60
61
|
memory_usage_mb=1068,
|
|
61
62
|
embed_dim=1024,
|
|
62
63
|
license="mit",
|
|
@@ -102,6 +103,7 @@ e5_mistral = ModelMeta(
|
|
|
102
103
|
use_instructions=True,
|
|
103
104
|
reference="https://huggingface.co/intfloat/e5-mistral-7b-instruct",
|
|
104
105
|
n_parameters=7_111_000_000,
|
|
106
|
+
n_embedding_parameters=131_072_000,
|
|
105
107
|
memory_usage_mb=13563,
|
|
106
108
|
embed_dim=4096,
|
|
107
109
|
license="mit",
|
|
@@ -145,6 +147,7 @@ zeta_alpha_ai__zeta_alpha_e5_mistral = ModelMeta(
|
|
|
145
147
|
release_date="2024-08-30",
|
|
146
148
|
languages=["eng-Latn"],
|
|
147
149
|
n_parameters=7110660096,
|
|
150
|
+
n_embedding_parameters=None,
|
|
148
151
|
memory_usage_mb=13563,
|
|
149
152
|
max_tokens=32768.0,
|
|
150
153
|
embed_dim=4096,
|
|
@@ -228,6 +231,7 @@ BeastyZ__e5_R_mistral_7b = ModelMeta(
|
|
|
228
231
|
release_date="2024-06-28",
|
|
229
232
|
languages=["eng-Latn"],
|
|
230
233
|
n_parameters=7241732096,
|
|
234
|
+
n_embedding_parameters=131_072_000,
|
|
231
235
|
memory_usage_mb=27625,
|
|
232
236
|
max_tokens=32768.0,
|
|
233
237
|
embed_dim=4096,
|
|
@@ -76,6 +76,7 @@ e5_mult_small = ModelMeta(
|
|
|
76
76
|
revision="fd1525a9fd15316a2d503bf26ab031a61d056e98",
|
|
77
77
|
release_date=E5_PAPER_RELEASE_DATE,
|
|
78
78
|
n_parameters=118_000_000,
|
|
79
|
+
n_embedding_parameters=96_014_208,
|
|
79
80
|
memory_usage_mb=449,
|
|
80
81
|
embed_dim=384,
|
|
81
82
|
license="mit",
|
|
@@ -103,6 +104,7 @@ e5_mult_base = ModelMeta(
|
|
|
103
104
|
revision="d13f1b27baf31030b7fd040960d60d909913633f",
|
|
104
105
|
release_date=E5_PAPER_RELEASE_DATE,
|
|
105
106
|
n_parameters=278_000_000,
|
|
107
|
+
n_embedding_parameters=192_001_536,
|
|
106
108
|
memory_usage_mb=1061,
|
|
107
109
|
embed_dim=768,
|
|
108
110
|
license="mit",
|
|
@@ -130,6 +132,7 @@ e5_mult_large = ModelMeta(
|
|
|
130
132
|
revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb",
|
|
131
133
|
release_date=E5_PAPER_RELEASE_DATE,
|
|
132
134
|
n_parameters=560_000_000,
|
|
135
|
+
n_embedding_parameters=256_002_048,
|
|
133
136
|
memory_usage_mb=2136,
|
|
134
137
|
embed_dim=1024,
|
|
135
138
|
license="mit",
|
|
@@ -157,6 +160,7 @@ e5_eng_small_v2 = ModelMeta(
|
|
|
157
160
|
revision="dca8b1a9dae0d4575df2bf423a5edb485a431236",
|
|
158
161
|
release_date=E5_PAPER_RELEASE_DATE,
|
|
159
162
|
n_parameters=33_000_000,
|
|
163
|
+
n_embedding_parameters=11_720_448,
|
|
160
164
|
memory_usage_mb=127,
|
|
161
165
|
embed_dim=384,
|
|
162
166
|
license="mit",
|
|
@@ -184,6 +188,7 @@ e5_eng_small = ModelMeta(
|
|
|
184
188
|
revision="e272f3049e853b47cb5ca3952268c6662abda68f",
|
|
185
189
|
release_date=E5_PAPER_RELEASE_DATE,
|
|
186
190
|
n_parameters=33_000_000,
|
|
191
|
+
n_embedding_parameters=11_720_448,
|
|
187
192
|
memory_usage_mb=127,
|
|
188
193
|
embed_dim=384,
|
|
189
194
|
license="mit",
|
|
@@ -211,6 +216,7 @@ e5_eng_base_v2 = ModelMeta(
|
|
|
211
216
|
revision="1c644c92ad3ba1efdad3f1451a637716616a20e8",
|
|
212
217
|
release_date=E5_PAPER_RELEASE_DATE,
|
|
213
218
|
n_parameters=109_000_000,
|
|
219
|
+
n_embedding_parameters=23_440_896,
|
|
214
220
|
memory_usage_mb=418,
|
|
215
221
|
embed_dim=768,
|
|
216
222
|
license="mit",
|
|
@@ -239,6 +245,7 @@ e5_eng_large_v2 = ModelMeta(
|
|
|
239
245
|
revision="b322e09026e4ea05f42beadf4d661fb4e101d311",
|
|
240
246
|
release_date=E5_PAPER_RELEASE_DATE,
|
|
241
247
|
n_parameters=335_000_000,
|
|
248
|
+
n_embedding_parameters=31_254_528,
|
|
242
249
|
memory_usage_mb=1278,
|
|
243
250
|
embed_dim=1024,
|
|
244
251
|
license="mit",
|
|
@@ -267,6 +274,7 @@ e5_large = ModelMeta(
|
|
|
267
274
|
revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81",
|
|
268
275
|
release_date="2022-12-26",
|
|
269
276
|
n_parameters=335_000_000,
|
|
277
|
+
n_embedding_parameters=31_254_528,
|
|
270
278
|
memory_usage_mb=1278,
|
|
271
279
|
embed_dim=1024,
|
|
272
280
|
license="apache-2.0",
|
|
@@ -295,6 +303,7 @@ e5_base = ModelMeta(
|
|
|
295
303
|
revision="b533fe4636f4a2507c08ddab40644d20b0006d6a",
|
|
296
304
|
release_date="2022-12-26",
|
|
297
305
|
n_parameters=109_000_000,
|
|
306
|
+
n_embedding_parameters=23_440_896,
|
|
298
307
|
memory_usage_mb=418,
|
|
299
308
|
embed_dim=768,
|
|
300
309
|
license="apache-2.0",
|
|
@@ -1,14 +1,19 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
6
|
from packaging import version
|
|
5
|
-
from torch.utils.data import DataLoader
|
|
6
7
|
from tqdm.auto import tqdm
|
|
7
8
|
|
|
8
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
9
9
|
from mteb.models.abs_encoder import AbsEncoder
|
|
10
10
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
11
|
-
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from torch.utils.data import DataLoader
|
|
14
|
+
|
|
15
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
16
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
12
17
|
|
|
13
18
|
E5_V_TRANSFORMERS_VERSION = (
|
|
14
19
|
"4.44.2" # Issue 1647: Only works with transformers==4.44.2.
|
|
@@ -166,6 +171,7 @@ e5_v = ModelMeta(
|
|
|
166
171
|
release_date="2024-07-17",
|
|
167
172
|
modalities=["image", "text"],
|
|
168
173
|
n_parameters=8_360_000_000,
|
|
174
|
+
n_embedding_parameters=None,
|
|
169
175
|
memory_usage_mb=15936,
|
|
170
176
|
max_tokens=8192,
|
|
171
177
|
embed_dim=4096,
|
|
@@ -1,17 +1,23 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
|
-
from torch.utils.data import DataLoader
|
|
5
6
|
from tqdm.auto import tqdm
|
|
6
7
|
|
|
7
8
|
from mteb._requires_package import (
|
|
8
9
|
requires_image_dependencies,
|
|
9
10
|
requires_package,
|
|
10
11
|
)
|
|
11
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
12
12
|
from mteb.models.abs_encoder import AbsEncoder
|
|
13
13
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
14
|
-
from mteb.types import
|
|
14
|
+
from mteb.types import PromptType
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from torch.utils.data import DataLoader
|
|
18
|
+
|
|
19
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
20
|
+
from mteb.types import Array, BatchedInput
|
|
15
21
|
|
|
16
22
|
|
|
17
23
|
class EagerEmbedV1Wrapper(AbsEncoder):
|
|
@@ -147,6 +153,7 @@ Eager_Embed_V1 = ModelMeta(
|
|
|
147
153
|
release_date="2025-11-20",
|
|
148
154
|
modalities=["image", "text"],
|
|
149
155
|
n_parameters=4_000_000_000,
|
|
156
|
+
n_embedding_parameters=None,
|
|
150
157
|
memory_usage_mb=16929,
|
|
151
158
|
max_tokens=262144,
|
|
152
159
|
embed_dim=2560,
|
|
@@ -10,6 +10,7 @@ embedding_gemma_300m_scandi = ModelMeta(
|
|
|
10
10
|
revision="9f3307b9f601db564a9190cb475324d128dcfe86",
|
|
11
11
|
release_date="2025-10-17",
|
|
12
12
|
n_parameters=307_581_696,
|
|
13
|
+
n_embedding_parameters=None,
|
|
13
14
|
embed_dim=768,
|
|
14
15
|
max_tokens=2048,
|
|
15
16
|
license="apache-2.0",
|
|
@@ -43,6 +44,7 @@ qwen_scandi = ModelMeta(
|
|
|
43
44
|
revision="cf1e7ba36ebd3d605549d8f02930a18e17b54513",
|
|
44
45
|
release_date="2025-10-17",
|
|
45
46
|
n_parameters=595776512,
|
|
47
|
+
n_embedding_parameters=None,
|
|
46
48
|
memory_usage_mb=2272,
|
|
47
49
|
embed_dim=1024,
|
|
48
50
|
max_tokens=32768,
|
|
@@ -67,6 +69,7 @@ mmbert_scandi = ModelMeta(
|
|
|
67
69
|
revision="82d74c7a5d8e1ddf31b132865df2d16b2b0294ee",
|
|
68
70
|
release_date="2025-10-17",
|
|
69
71
|
n_parameters=306939648,
|
|
72
|
+
n_embedding_parameters=None,
|
|
70
73
|
memory_usage_mb=1171,
|
|
71
74
|
embed_dim=768,
|
|
72
75
|
max_tokens=8192,
|
|
@@ -1,15 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from pathlib import Path
|
|
2
|
-
from typing import Any
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
3
5
|
|
|
4
6
|
import torch
|
|
5
|
-
from torch.utils.data import DataLoader
|
|
6
7
|
from tqdm.auto import tqdm
|
|
7
8
|
|
|
8
9
|
from mteb._requires_package import requires_image_dependencies
|
|
9
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
10
10
|
from mteb.models.abs_encoder import AbsEncoder
|
|
11
11
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
12
|
-
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from torch.utils.data import DataLoader
|
|
15
|
+
|
|
16
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
17
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
13
18
|
|
|
14
19
|
EVA_CLIP_CITATION = """@article{EVA-CLIP,
|
|
15
20
|
title={EVA-CLIP: Improved Training Techniques for CLIP at Scale},
|
|
@@ -144,6 +149,7 @@ EVA02_CLIP_B_16 = ModelMeta(
|
|
|
144
149
|
release_date="2023-04-26",
|
|
145
150
|
modalities=["image", "text"],
|
|
146
151
|
n_parameters=149_000_000,
|
|
152
|
+
n_embedding_parameters=None,
|
|
147
153
|
memory_usage_mb=568,
|
|
148
154
|
max_tokens=77,
|
|
149
155
|
embed_dim=512,
|
|
@@ -168,6 +174,7 @@ EVA02_CLIP_L_14 = ModelMeta(
|
|
|
168
174
|
release_date="2023-04-26",
|
|
169
175
|
modalities=["image", "text"],
|
|
170
176
|
n_parameters=428_000_000,
|
|
177
|
+
n_embedding_parameters=None,
|
|
171
178
|
memory_usage_mb=1633,
|
|
172
179
|
max_tokens=77,
|
|
173
180
|
embed_dim=768,
|
|
@@ -192,6 +199,7 @@ EVA02_CLIP_bigE_14 = ModelMeta(
|
|
|
192
199
|
release_date="2023-04-26",
|
|
193
200
|
modalities=["image", "text"],
|
|
194
201
|
n_parameters=4_700_000_000,
|
|
202
|
+
n_embedding_parameters=None,
|
|
195
203
|
memory_usage_mb=17929,
|
|
196
204
|
max_tokens=77,
|
|
197
205
|
embed_dim=1024,
|
|
@@ -217,6 +225,7 @@ EVA02_CLIP_bigE_14_plus = ModelMeta(
|
|
|
217
225
|
release_date="2023-04-26",
|
|
218
226
|
modalities=["image", "text"],
|
|
219
227
|
n_parameters=5_000_000_000,
|
|
228
|
+
n_embedding_parameters=None,
|
|
220
229
|
memory_usage_mb=19073,
|
|
221
230
|
max_tokens=77,
|
|
222
231
|
embed_dim=1024,
|
|
@@ -12,6 +12,7 @@ parsbert = ModelMeta(
|
|
|
12
12
|
revision="d73a0e2c7492c33bd5819bcdb23eba207404dd19",
|
|
13
13
|
release_date="2021-05-19",
|
|
14
14
|
n_parameters=162_841_344,
|
|
15
|
+
n_embedding_parameters=76_800_000,
|
|
15
16
|
memory_usage_mb=621,
|
|
16
17
|
embed_dim=768,
|
|
17
18
|
license="not specified",
|
|
@@ -48,6 +49,7 @@ bert_zwnj = ModelMeta(
|
|
|
48
49
|
revision="b9506ddc579ac8c398ae6dae680401ae0a1a5b23",
|
|
49
50
|
release_date="2021-06-28",
|
|
50
51
|
n_parameters=118_297_344,
|
|
52
|
+
n_embedding_parameters=32_256_000,
|
|
51
53
|
memory_usage_mb=451,
|
|
52
54
|
embed_dim=768,
|
|
53
55
|
license="not specified",
|
|
@@ -74,6 +76,7 @@ roberta_zwnj = ModelMeta(
|
|
|
74
76
|
revision="36f912ac44e22250aee16ea533a4ff8cd848c1a1",
|
|
75
77
|
release_date="2021-06-28",
|
|
76
78
|
n_parameters=118_298_112,
|
|
79
|
+
n_embedding_parameters=32_256_000,
|
|
77
80
|
memory_usage_mb=451,
|
|
78
81
|
embed_dim=768,
|
|
79
82
|
license="not specified",
|
|
@@ -99,6 +102,7 @@ sentence_transformer_parsbert = ModelMeta(
|
|
|
99
102
|
revision="72bd0a3557622f0ae08a092f4643609e0b950cdd",
|
|
100
103
|
release_date="2024-12-10",
|
|
101
104
|
n_parameters=162_841_344,
|
|
105
|
+
n_embedding_parameters=76_800_000,
|
|
102
106
|
memory_usage_mb=621,
|
|
103
107
|
embed_dim=768,
|
|
104
108
|
license="apache-2.0",
|
|
@@ -123,6 +127,7 @@ tooka_bert_base = ModelMeta(
|
|
|
123
127
|
revision="fa5ca89df5670700d9325b8872ac65c17cb24582",
|
|
124
128
|
release_date="2024-12-08",
|
|
125
129
|
n_parameters=122_905_344,
|
|
130
|
+
n_embedding_parameters=36_864_000,
|
|
126
131
|
memory_usage_mb=469,
|
|
127
132
|
embed_dim=768,
|
|
128
133
|
license="apache-2.0",
|
|
@@ -150,6 +155,7 @@ tooka_sbert = ModelMeta(
|
|
|
150
155
|
revision="5d07f0c543aca654373b931ae07cd197769110fd",
|
|
151
156
|
release_date="2024-12-07",
|
|
152
157
|
n_parameters=353_039_360,
|
|
158
|
+
n_embedding_parameters=49_152_000,
|
|
153
159
|
memory_usage_mb=1347,
|
|
154
160
|
embed_dim=1024,
|
|
155
161
|
license="apache-2.0",
|
|
@@ -181,6 +187,7 @@ fa_bert = ModelMeta(
|
|
|
181
187
|
revision="a0e3973064c97768e121b9b95f21adc94e0ca3fb",
|
|
182
188
|
release_date="2024-10-07",
|
|
183
189
|
n_parameters=124_441_344,
|
|
190
|
+
n_embedding_parameters=38_400_000,
|
|
184
191
|
memory_usage_mb=475,
|
|
185
192
|
embed_dim=768,
|
|
186
193
|
license="not specified",
|
|
@@ -229,6 +236,7 @@ tooka_sbert_v2_small = ModelMeta(
|
|
|
229
236
|
revision="8bbed87e36669387f71437c061430ba56d1b496f",
|
|
230
237
|
release_date="2025-05-01",
|
|
231
238
|
n_parameters=122_905_344,
|
|
239
|
+
n_embedding_parameters=36_864_000,
|
|
232
240
|
memory_usage_mb=496,
|
|
233
241
|
embed_dim=768,
|
|
234
242
|
license="not specified",
|
|
@@ -260,6 +268,7 @@ tooka_sbert_v2_large = ModelMeta(
|
|
|
260
268
|
revision="b59682efa961122cc0e4408296d5852870c82eae",
|
|
261
269
|
release_date="2025-05-01",
|
|
262
270
|
n_parameters=353_039_360,
|
|
271
|
+
n_embedding_parameters=49_152_000,
|
|
263
272
|
memory_usage_mb=1347,
|
|
264
273
|
embed_dim=1024,
|
|
265
274
|
license="not specified",
|
|
@@ -113,6 +113,7 @@ xlmr_base = ModelMeta(
|
|
|
113
113
|
revision="e73636d4f797dec63c3081bb6ed5c7b0bb3f2089",
|
|
114
114
|
release_date="2019-11-05", # arxiv paper release
|
|
115
115
|
n_parameters=278043648,
|
|
116
|
+
n_embedding_parameters=192_001_536,
|
|
116
117
|
memory_usage_mb=1064,
|
|
117
118
|
embed_dim=768,
|
|
118
119
|
license="mit",
|
|
@@ -163,6 +164,7 @@ xlmr_large = ModelMeta(
|
|
|
163
164
|
revision="c23d21b0620b635a76227c604d44e43a9f0ee389",
|
|
164
165
|
release_date="2019-11-05", # arxiv paper release
|
|
165
166
|
n_parameters=559890432,
|
|
167
|
+
n_embedding_parameters=256_002_048,
|
|
166
168
|
memory_usage_mb=2141,
|
|
167
169
|
embed_dim=1024,
|
|
168
170
|
license="mit",
|
|
@@ -6,16 +6,18 @@ import warnings
|
|
|
6
6
|
from typing import TYPE_CHECKING, Any
|
|
7
7
|
|
|
8
8
|
import torch
|
|
9
|
-
from torch.utils.data import DataLoader
|
|
10
9
|
from tqdm.autonotebook import tqdm
|
|
11
10
|
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
11
|
from mteb.models.abs_encoder import AbsEncoder
|
|
14
12
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
15
|
-
from mteb.types import
|
|
13
|
+
from mteb.types import PromptType
|
|
16
14
|
|
|
17
15
|
if TYPE_CHECKING:
|
|
18
16
|
from PIL import Image
|
|
17
|
+
from torch.utils.data import DataLoader
|
|
18
|
+
|
|
19
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
20
|
+
from mteb.types import Array, BatchedInput
|
|
19
21
|
|
|
20
22
|
logger = logging.getLogger(__name__)
|
|
21
23
|
|
|
@@ -354,6 +356,7 @@ gme_qwen2vl_2b = ModelMeta(
|
|
|
354
356
|
release_date="2024-12-24",
|
|
355
357
|
modalities=["image", "text"],
|
|
356
358
|
n_parameters=2_210_000_000,
|
|
359
|
+
n_embedding_parameters=233_373_696,
|
|
357
360
|
memory_usage_mb=8427,
|
|
358
361
|
embed_dim=1536,
|
|
359
362
|
license="apache-2.0",
|
|
@@ -378,6 +381,7 @@ gme_qwen2vl_7b = ModelMeta(
|
|
|
378
381
|
release_date="2024-12-24",
|
|
379
382
|
modalities=["image", "text"],
|
|
380
383
|
n_parameters=8_290_000_000,
|
|
384
|
+
n_embedding_parameters=544_997_376,
|
|
381
385
|
memory_usage_mb=31629,
|
|
382
386
|
embed_dim=3584,
|
|
383
387
|
license="apache-2.0",
|
|
@@ -1,17 +1,23 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import numpy as np
|
|
4
6
|
from packaging.version import Version
|
|
5
|
-
from torch.utils.data import DataLoader
|
|
6
7
|
from tqdm.auto import tqdm
|
|
7
8
|
from transformers import __version__ as transformers_version
|
|
8
9
|
|
|
9
10
|
from mteb._requires_package import requires_package
|
|
10
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
11
11
|
from mteb.models import sentence_transformers_loader
|
|
12
12
|
from mteb.models.abs_encoder import AbsEncoder
|
|
13
13
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
14
|
-
from mteb.types import
|
|
14
|
+
from mteb.types import PromptType
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from torch.utils.data import DataLoader
|
|
18
|
+
|
|
19
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
20
|
+
from mteb.types import Array, BatchedInput
|
|
15
21
|
|
|
16
22
|
MULTILINGUAL_EVALUATED_LANGUAGES = [
|
|
17
23
|
"arb-Arab",
|
|
@@ -156,6 +162,7 @@ google_text_emb_004 = ModelMeta(
|
|
|
156
162
|
revision="1", # revision is intended for implementation
|
|
157
163
|
release_date="2024-05-14",
|
|
158
164
|
n_parameters=None,
|
|
165
|
+
n_embedding_parameters=None,
|
|
159
166
|
memory_usage_mb=None,
|
|
160
167
|
max_tokens=2048,
|
|
161
168
|
embed_dim=768,
|
|
@@ -181,6 +188,7 @@ google_text_emb_005 = ModelMeta(
|
|
|
181
188
|
revision="1", # revision is intended for implementation
|
|
182
189
|
release_date="2024-11-18",
|
|
183
190
|
n_parameters=None,
|
|
191
|
+
n_embedding_parameters=None,
|
|
184
192
|
memory_usage_mb=None,
|
|
185
193
|
max_tokens=2048,
|
|
186
194
|
embed_dim=768,
|
|
@@ -206,6 +214,7 @@ google_text_multilingual_emb_002 = ModelMeta(
|
|
|
206
214
|
revision="1",
|
|
207
215
|
release_date="2024-05-14",
|
|
208
216
|
n_parameters=None,
|
|
217
|
+
n_embedding_parameters=None,
|
|
209
218
|
memory_usage_mb=None,
|
|
210
219
|
max_tokens=2048,
|
|
211
220
|
embed_dim=768,
|
|
@@ -231,6 +240,7 @@ google_gemini_embedding_001 = ModelMeta(
|
|
|
231
240
|
revision="1",
|
|
232
241
|
release_date="2025-03-07",
|
|
233
242
|
n_parameters=None,
|
|
243
|
+
n_embedding_parameters=None,
|
|
234
244
|
memory_usage_mb=None,
|
|
235
245
|
max_tokens=2048,
|
|
236
246
|
embed_dim=3072,
|
|
@@ -266,6 +276,7 @@ embedding_gemma_300m = ModelMeta(
|
|
|
266
276
|
revision="64614b0b8b64f0c6c1e52b07e4e9a4e8fe4d2da2",
|
|
267
277
|
release_date="2025-09-04",
|
|
268
278
|
n_parameters=307_581_696,
|
|
279
|
+
n_embedding_parameters=201_326_592,
|
|
269
280
|
embed_dim=768,
|
|
270
281
|
max_tokens=2048,
|
|
271
282
|
license="gemma",
|
|
@@ -4,20 +4,21 @@ import logging
|
|
|
4
4
|
from typing import TYPE_CHECKING, Any
|
|
5
5
|
|
|
6
6
|
import torch
|
|
7
|
-
from torch.utils.data import DataLoader
|
|
8
7
|
from tqdm.auto import tqdm
|
|
9
8
|
|
|
10
9
|
from mteb._requires_package import (
|
|
11
10
|
requires_image_dependencies,
|
|
12
11
|
)
|
|
13
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
14
12
|
from mteb.models.model_meta import ModelMeta
|
|
15
|
-
from mteb.types import Array, BatchedInput, PromptType
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(__name__)
|
|
18
13
|
|
|
19
14
|
if TYPE_CHECKING:
|
|
20
15
|
from PIL import Image
|
|
16
|
+
from torch.utils.data import DataLoader
|
|
17
|
+
|
|
18
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
19
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
class GraniteVisionEmbeddingWrapper:
|
|
@@ -172,6 +173,7 @@ granite_vision_embedding = ModelMeta(
|
|
|
172
173
|
release_date="2025-06-11",
|
|
173
174
|
modalities=["image", "text"],
|
|
174
175
|
n_parameters=2_980_000_000,
|
|
176
|
+
n_embedding_parameters=None,
|
|
175
177
|
memory_usage_mb=11351,
|
|
176
178
|
max_tokens=128000,
|
|
177
179
|
embed_dim=128,
|
|
@@ -44,6 +44,7 @@ gritlm7b = ModelMeta(
|
|
|
44
44
|
revision="13f00a0e36500c80ce12870ea513846a066004af",
|
|
45
45
|
release_date="2024-02-15",
|
|
46
46
|
n_parameters=7_240_000_000,
|
|
47
|
+
n_embedding_parameters=131_072_000,
|
|
47
48
|
memory_usage_mb=13813,
|
|
48
49
|
embed_dim=4096,
|
|
49
50
|
license="apache-2.0",
|
|
@@ -73,6 +74,8 @@ gritlm8x7b = ModelMeta(
|
|
|
73
74
|
revision="7f089b13e3345510281733ca1e6ff871b5b4bc76",
|
|
74
75
|
release_date="2024-02-15",
|
|
75
76
|
n_parameters=57_920_000_000,
|
|
77
|
+
n_embedding_parameters=None,
|
|
78
|
+
n_active_parameters_override=13_000_000_000,
|
|
76
79
|
memory_usage_mb=89079,
|
|
77
80
|
embed_dim=32768,
|
|
78
81
|
license="apache-2.0",
|