mteb 2.1.4__py3-none-any.whl → 2.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +4 -0
- mteb/_create_dataloaders.py +6 -3
- mteb/_evaluators/any_sts_evaluator.py +21 -12
- mteb/_evaluators/classification_metrics.py +54 -0
- mteb/_evaluators/clustering_evaluator.py +1 -1
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +9 -4
- mteb/_evaluators/pair_classification_evaluator.py +30 -38
- mteb/_evaluators/sklearn_evaluator.py +15 -28
- mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
- mteb/_evaluators/text/summarization_evaluator.py +4 -2
- mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
- mteb/abstasks/_data_filter/__init__.py +0 -0
- mteb/abstasks/_data_filter/filters.py +125 -0
- mteb/abstasks/_data_filter/task_pipelines.py +102 -0
- mteb/abstasks/_statistics_calculation.py +6 -2
- mteb/abstasks/classification.py +0 -2
- mteb/abstasks/clustering.py +1 -1
- mteb/abstasks/clustering_legacy.py +3 -0
- mteb/abstasks/multilabel_classification.py +10 -3
- mteb/abstasks/pair_classification.py +8 -1
- mteb/abstasks/sts.py +7 -0
- mteb/abstasks/task_metadata.py +1 -0
- mteb/benchmarks/_create_table.py +84 -37
- mteb/benchmarks/benchmark.py +74 -15
- mteb/benchmarks/benchmarks/__init__.py +8 -0
- mteb/benchmarks/benchmarks/benchmarks.py +259 -15
- mteb/benchmarks/get_benchmark.py +2 -0
- mteb/cache.py +47 -10
- mteb/deprecated_evaluator.py +8 -13
- mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
- mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
- mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
- mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/evaluate.py +65 -45
- mteb/leaderboard/app.py +268 -133
- mteb/leaderboard/benchmark_selector.py +14 -5
- mteb/leaderboard/figures.py +13 -15
- mteb/leaderboard/table.py +82 -17
- mteb/models/__init__.py +4 -1
- mteb/models/abs_encoder.py +21 -17
- mteb/models/cache_wrappers/__init__.py +2 -1
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +2 -2
- mteb/models/cache_wrappers/cache_wrapper.py +1 -1
- mteb/models/get_model_meta.py +3 -114
- mteb/models/instruct_wrapper.py +5 -1
- mteb/models/model_implementations/align_models.py +7 -0
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +65 -0
- mteb/models/model_implementations/ara_models.py +8 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +60 -0
- mteb/models/model_implementations/bica_model.py +35 -0
- mteb/models/model_implementations/blip2_models.py +11 -0
- mteb/models/model_implementations/blip_models.py +27 -0
- mteb/models/model_implementations/bm25.py +1 -0
- mteb/models/model_implementations/bmretriever_models.py +4 -0
- mteb/models/model_implementations/cadet_models.py +9 -0
- mteb/models/model_implementations/cde_models.py +14 -0
- mteb/models/model_implementations/clip_models.py +3 -0
- mteb/models/model_implementations/clips_models.py +100 -0
- mteb/models/model_implementations/codefuse_models.py +162 -0
- mteb/models/model_implementations/codesage_models.py +15 -0
- mteb/models/model_implementations/cohere_models.py +8 -1
- mteb/models/model_implementations/cohere_v.py +5 -0
- mteb/models/model_implementations/colpali_models.py +14 -6
- mteb/models/model_implementations/colqwen_models.py +271 -1
- mteb/models/model_implementations/colsmol_models.py +2 -0
- mteb/models/model_implementations/conan_models.py +1 -0
- mteb/models/model_implementations/dino_models.py +171 -0
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +12 -101
- mteb/models/model_implementations/e5_v.py +1 -0
- mteb/models/model_implementations/eagerworks_models.py +164 -0
- mteb/models/model_implementations/emillykkejensen_models.py +91 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +32 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +58 -0
- mteb/models/model_implementations/facebookai.py +193 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +11 -5
- mteb/models/model_implementations/google_models.py +16 -5
- mteb/models/model_implementations/granite_vision_embedding_models.py +7 -2
- mteb/models/model_implementations/gritlm_models.py +2 -0
- mteb/models/model_implementations/gte_models.py +78 -0
- mteb/models/model_implementations/hinvec_models.py +1 -0
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +255 -2
- mteb/models/model_implementations/jina_clip.py +1 -0
- mteb/models/model_implementations/jina_models.py +209 -5
- mteb/models/model_implementations/kalm_models.py +203 -25
- mteb/models/model_implementations/kblab.py +31 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
- mteb/models/model_implementations/kfst.py +25 -0
- mteb/models/model_implementations/kowshik24_models.py +32 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +3 -2
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +3 -0
- mteb/models/model_implementations/llm2vec_models.py +8 -0
- mteb/models/model_implementations/mcinext_models.py +3 -0
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +362 -0
- mteb/models/model_implementations/mme5_models.py +1 -0
- mteb/models/model_implementations/moco_models.py +11 -0
- mteb/models/model_implementations/mod_models.py +191 -0
- mteb/models/model_implementations/model2vec_models.py +13 -0
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/mxbai_models.py +9 -0
- mteb/models/model_implementations/nbailab.py +70 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
- mteb/models/model_implementations/nomic_models.py +156 -4
- mteb/models/model_implementations/nomic_models_vision.py +7 -2
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +23 -16
- mteb/models/model_implementations/nvidia_models.py +4 -1
- mteb/models/model_implementations/octen_models.py +195 -0
- mteb/models/model_implementations/openai_models.py +20 -16
- mteb/models/model_implementations/openclip_models.py +24 -0
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
- mteb/models/model_implementations/ops_moa_models.py +4 -2
- mteb/models/model_implementations/pawan_models.py +39 -0
- mteb/models/model_implementations/piccolo_models.py +8 -0
- mteb/models/model_implementations/promptriever_models.py +8 -4
- mteb/models/model_implementations/pylate_models.py +37 -4
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +6 -3
- mteb/models/model_implementations/qzhou_models.py +3 -1
- mteb/models/model_implementations/random_baseline.py +16 -21
- mteb/models/model_implementations/rasgaard_models.py +34 -0
- mteb/models/model_implementations/reasonir_model.py +1 -0
- mteb/models/model_implementations/repllama_models.py +2 -0
- mteb/models/model_implementations/rerankers_custom.py +3 -3
- mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +51 -0
- mteb/models/model_implementations/ruri_models.py +322 -0
- mteb/models/model_implementations/salesforce_models.py +3 -0
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +658 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +57 -0
- mteb/models/model_implementations/shuu_model.py +32 -31
- mteb/models/model_implementations/siglip_models.py +10 -0
- mteb/models/model_implementations/sonar_models.py +1 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +376 -0
- mteb/models/model_implementations/ua_sentence_models.py +10 -0
- mteb/models/model_implementations/uae_models.py +1 -0
- mteb/models/model_implementations/vdr_models.py +2 -0
- mteb/models/model_implementations/vi_vn_models.py +39 -0
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +2 -0
- mteb/models/model_implementations/voyage_models.py +15 -0
- mteb/models/model_implementations/voyage_v.py +8 -2
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +34 -0
- mteb/models/model_implementations/yuan_models_en.py +58 -0
- mteb/models/model_meta.py +442 -22
- mteb/models/search_encoder_index/__init__.py +7 -0
- mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
- mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +157 -0
- mteb/models/search_wrappers.py +165 -48
- mteb/models/sentence_transformer_wrapper.py +2 -7
- mteb/results/benchmark_results.py +88 -47
- mteb/results/model_result.py +11 -4
- mteb/results/task_result.py +37 -19
- mteb/similarity_functions.py +49 -0
- mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/__init__.py +6 -1
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +1 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +1 -2
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
- mteb/tasks/classification/nld/iconclass_classification.py +3 -0
- mteb/tasks/classification/nld/open_tender_classification.py +3 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/__init__.py +4 -0
- mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/rus/__init__.py +2 -2
- mteb/tasks/pair_classification/rus/terra.py +51 -25
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/jpn/__init__.py +9 -1
- mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
- mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
- mteb/tasks/reranking/multilingual/__init__.py +2 -0
- mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
- mteb/tasks/retrieval/jpn/__init__.py +8 -0
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
- mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
- mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
- mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/kor/__init__.py +2 -1
- mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
- mteb/tasks/retrieval/multilingual/__init__.py +22 -0
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
- mteb/tasks/retrieval/nld/__init__.py +8 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- mteb/types/_encoder_io.py +7 -2
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/METADATA +11 -5
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/RECORD +457 -391
- mteb/models/model_implementations/nb_sbert.py +0 -25
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/WHEEL +0 -0
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/top_level.txt +0 -0
|
@@ -106,6 +106,7 @@ dinov2_training_datasets = set(
|
|
|
106
106
|
dinov2_small = ModelMeta(
|
|
107
107
|
loader=DINOModel, # type: ignore
|
|
108
108
|
name="facebook/dinov2-small",
|
|
109
|
+
model_type=["dense"],
|
|
109
110
|
languages=["eng-Latn"],
|
|
110
111
|
revision="ed25f3a31f01632728cabb09d1542f84ab7b0056",
|
|
111
112
|
release_date="2023-07-18",
|
|
@@ -123,11 +124,20 @@ dinov2_small = ModelMeta(
|
|
|
123
124
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
124
125
|
use_instructions=False,
|
|
125
126
|
training_datasets=dinov2_training_datasets,
|
|
127
|
+
citation="""@misc{oquab2023dinov2,
|
|
128
|
+
title={DINOv2: Learning Robust Visual Features without Supervision},
|
|
129
|
+
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
|
|
130
|
+
year={2023},
|
|
131
|
+
eprint={2304.07193},
|
|
132
|
+
archivePrefix={arXiv},
|
|
133
|
+
primaryClass={cs.CV}
|
|
134
|
+
}""",
|
|
126
135
|
)
|
|
127
136
|
|
|
128
137
|
dinov2_base = ModelMeta(
|
|
129
138
|
loader=DINOModel, # type: ignore
|
|
130
139
|
name="facebook/dinov2-base",
|
|
140
|
+
model_type=["dense"],
|
|
131
141
|
languages=["eng-Latn"],
|
|
132
142
|
revision="f9e44c814b77203eaa57a6bdbbd535f21ede1415",
|
|
133
143
|
release_date="2023-07-18",
|
|
@@ -145,11 +155,20 @@ dinov2_base = ModelMeta(
|
|
|
145
155
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
146
156
|
use_instructions=False,
|
|
147
157
|
training_datasets=dinov2_training_datasets,
|
|
158
|
+
citation="""@misc{oquab2023dinov2,
|
|
159
|
+
title={DINOv2: Learning Robust Visual Features without Supervision},
|
|
160
|
+
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
|
|
161
|
+
year={2023},
|
|
162
|
+
eprint={2304.07193},
|
|
163
|
+
archivePrefix={arXiv},
|
|
164
|
+
primaryClass={cs.CV}
|
|
165
|
+
}""",
|
|
148
166
|
)
|
|
149
167
|
|
|
150
168
|
dinov2_large = ModelMeta(
|
|
151
169
|
loader=DINOModel, # type: ignore
|
|
152
170
|
name="facebook/dinov2-large",
|
|
171
|
+
model_type=["dense"],
|
|
153
172
|
languages=["eng-Latn"],
|
|
154
173
|
revision="47b73eefe95e8d44ec3623f8890bd894b6ea2d6c",
|
|
155
174
|
release_date="2023-07-18",
|
|
@@ -167,11 +186,20 @@ dinov2_large = ModelMeta(
|
|
|
167
186
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
168
187
|
use_instructions=False,
|
|
169
188
|
training_datasets=dinov2_training_datasets,
|
|
189
|
+
citation="""@misc{oquab2023dinov2,
|
|
190
|
+
title={DINOv2: Learning Robust Visual Features without Supervision},
|
|
191
|
+
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
|
|
192
|
+
year={2023},
|
|
193
|
+
eprint={2304.07193},
|
|
194
|
+
archivePrefix={arXiv},
|
|
195
|
+
primaryClass={cs.CV}
|
|
196
|
+
}""",
|
|
170
197
|
)
|
|
171
198
|
|
|
172
199
|
dinov2_giant = ModelMeta(
|
|
173
200
|
loader=DINOModel, # type: ignore
|
|
174
201
|
name="facebook/dinov2-giant",
|
|
202
|
+
model_type=["dense"],
|
|
175
203
|
languages=["eng-Latn"],
|
|
176
204
|
revision="611a9d42f2335e0f921f1e313ad3c1b7178d206d",
|
|
177
205
|
release_date="2023-07-18",
|
|
@@ -189,6 +217,14 @@ dinov2_giant = ModelMeta(
|
|
|
189
217
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
190
218
|
use_instructions=False,
|
|
191
219
|
training_datasets=dinov2_training_datasets,
|
|
220
|
+
citation="""@misc{oquab2023dinov2,
|
|
221
|
+
title={DINOv2: Learning Robust Visual Features without Supervision},
|
|
222
|
+
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
|
|
223
|
+
year={2023},
|
|
224
|
+
eprint={2304.07193},
|
|
225
|
+
archivePrefix={arXiv},
|
|
226
|
+
primaryClass={cs.CV}
|
|
227
|
+
}""",
|
|
192
228
|
)
|
|
193
229
|
|
|
194
230
|
webssl_dino_training_datasets = set(
|
|
@@ -198,6 +234,7 @@ webssl_dino_training_datasets = set(
|
|
|
198
234
|
webssl_dino300m_full2b = ModelMeta(
|
|
199
235
|
loader=DINOModel,
|
|
200
236
|
name="facebook/webssl-dino300m-full2b-224",
|
|
237
|
+
model_type=["dense"],
|
|
201
238
|
languages=["eng-Latn"],
|
|
202
239
|
revision="8529cdb3fb75014932af3b896455fc21c386168e",
|
|
203
240
|
release_date="2025-04-24",
|
|
@@ -215,11 +252,20 @@ webssl_dino300m_full2b = ModelMeta(
|
|
|
215
252
|
similarity_fn_name=None,
|
|
216
253
|
use_instructions=False,
|
|
217
254
|
training_datasets=webssl_dino_training_datasets,
|
|
255
|
+
citation="""@article{fan2025scaling,
|
|
256
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
257
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
258
|
+
year={2025},
|
|
259
|
+
eprint={2504.01017},
|
|
260
|
+
archivePrefix={arXiv},
|
|
261
|
+
primaryClass={cs.CV}
|
|
262
|
+
}""",
|
|
218
263
|
)
|
|
219
264
|
|
|
220
265
|
webssl_dino1b_full2b = ModelMeta(
|
|
221
266
|
loader=DINOModel,
|
|
222
267
|
name="facebook/webssl-dino1b-full2b-224",
|
|
268
|
+
model_type=["dense"],
|
|
223
269
|
languages=["eng-Latn"],
|
|
224
270
|
revision="d3bf033d9c8cc62ea9e73c40956642cad2ec568a",
|
|
225
271
|
release_date="2025-04-24",
|
|
@@ -237,11 +283,20 @@ webssl_dino1b_full2b = ModelMeta(
|
|
|
237
283
|
similarity_fn_name=None,
|
|
238
284
|
use_instructions=False,
|
|
239
285
|
training_datasets=webssl_dino_training_datasets,
|
|
286
|
+
citation="""@article{fan2025scaling,
|
|
287
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
288
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
289
|
+
year={2025},
|
|
290
|
+
eprint={2504.01017},
|
|
291
|
+
archivePrefix={arXiv},
|
|
292
|
+
primaryClass={cs.CV}
|
|
293
|
+
}""",
|
|
240
294
|
)
|
|
241
295
|
|
|
242
296
|
webssl_dino2b_full2b = ModelMeta(
|
|
243
297
|
loader=DINOModel,
|
|
244
298
|
name="facebook/webssl-dino2b-full2b-224",
|
|
299
|
+
model_type=["dense"],
|
|
245
300
|
languages=["eng-Latn"],
|
|
246
301
|
revision="cd5893e3fd2e988eb716792049b3dd53b3f1b68b",
|
|
247
302
|
release_date="2025-04-24",
|
|
@@ -259,11 +314,20 @@ webssl_dino2b_full2b = ModelMeta(
|
|
|
259
314
|
similarity_fn_name=None,
|
|
260
315
|
use_instructions=False,
|
|
261
316
|
training_datasets=webssl_dino_training_datasets,
|
|
317
|
+
citation="""@article{fan2025scaling,
|
|
318
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
319
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
320
|
+
year={2025},
|
|
321
|
+
eprint={2504.01017},
|
|
322
|
+
archivePrefix={arXiv},
|
|
323
|
+
primaryClass={cs.CV}
|
|
324
|
+
}""",
|
|
262
325
|
)
|
|
263
326
|
|
|
264
327
|
webssl_dino3b_full2b = ModelMeta(
|
|
265
328
|
loader=DINOModel,
|
|
266
329
|
name="facebook/webssl-dino3b-full2b-224",
|
|
330
|
+
model_type=["dense"],
|
|
267
331
|
languages=["eng-Latn"],
|
|
268
332
|
revision="2d015c340b16bc47bc6557fcb4e6c83a9d4aa1d3",
|
|
269
333
|
release_date="2025-04-24",
|
|
@@ -281,11 +345,20 @@ webssl_dino3b_full2b = ModelMeta(
|
|
|
281
345
|
similarity_fn_name=None,
|
|
282
346
|
use_instructions=False,
|
|
283
347
|
training_datasets=webssl_dino_training_datasets,
|
|
348
|
+
citation="""@article{fan2025scaling,
|
|
349
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
350
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
351
|
+
year={2025},
|
|
352
|
+
eprint={2504.01017},
|
|
353
|
+
archivePrefix={arXiv},
|
|
354
|
+
primaryClass={cs.CV}
|
|
355
|
+
}""",
|
|
284
356
|
)
|
|
285
357
|
|
|
286
358
|
webssl_dino5b_full2b = ModelMeta(
|
|
287
359
|
loader=DINOModel,
|
|
288
360
|
name="facebook/webssl-dino5b-full2b-224",
|
|
361
|
+
model_type=["dense"],
|
|
289
362
|
languages=["eng-Latn"],
|
|
290
363
|
revision="88006b18b9af369f6c611db7a64d908bde3714e0",
|
|
291
364
|
release_date="2025-04-24",
|
|
@@ -303,11 +376,20 @@ webssl_dino5b_full2b = ModelMeta(
|
|
|
303
376
|
similarity_fn_name=None,
|
|
304
377
|
use_instructions=False,
|
|
305
378
|
training_datasets=webssl_dino_training_datasets,
|
|
379
|
+
citation="""@article{fan2025scaling,
|
|
380
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
381
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
382
|
+
year={2025},
|
|
383
|
+
eprint={2504.01017},
|
|
384
|
+
archivePrefix={arXiv},
|
|
385
|
+
primaryClass={cs.CV}
|
|
386
|
+
}""",
|
|
306
387
|
)
|
|
307
388
|
|
|
308
389
|
webssl_dino7b_full8b_224 = ModelMeta(
|
|
309
390
|
loader=DINOModel,
|
|
310
391
|
name="facebook/webssl-dino7b-full8b-224",
|
|
392
|
+
model_type=["dense"],
|
|
311
393
|
languages=["eng-Latn"],
|
|
312
394
|
revision="c6085463ea680043042a80c6d41db2c65e85f466",
|
|
313
395
|
release_date="2025-04-24",
|
|
@@ -325,11 +407,20 @@ webssl_dino7b_full8b_224 = ModelMeta(
|
|
|
325
407
|
similarity_fn_name=None,
|
|
326
408
|
use_instructions=False,
|
|
327
409
|
training_datasets=webssl_dino_training_datasets,
|
|
410
|
+
citation="""@article{fan2025scaling,
|
|
411
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
412
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
413
|
+
year={2025},
|
|
414
|
+
eprint={2504.01017},
|
|
415
|
+
archivePrefix={arXiv},
|
|
416
|
+
primaryClass={cs.CV}
|
|
417
|
+
}""",
|
|
328
418
|
)
|
|
329
419
|
|
|
330
420
|
webssl_dino7b_full8b_378 = ModelMeta(
|
|
331
421
|
loader=DINOModel,
|
|
332
422
|
name="facebook/webssl-dino7b-full8b-378",
|
|
423
|
+
model_type=["dense"],
|
|
333
424
|
languages=["eng-Latn"],
|
|
334
425
|
revision="53c8c5b43070bd2ddb3f66161140408ce832301f",
|
|
335
426
|
release_date="2025-04-24",
|
|
@@ -347,11 +438,20 @@ webssl_dino7b_full8b_378 = ModelMeta(
|
|
|
347
438
|
similarity_fn_name=None,
|
|
348
439
|
use_instructions=False,
|
|
349
440
|
training_datasets=webssl_dino_training_datasets,
|
|
441
|
+
citation="""@article{fan2025scaling,
|
|
442
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
443
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
444
|
+
year={2025},
|
|
445
|
+
eprint={2504.01017},
|
|
446
|
+
archivePrefix={arXiv},
|
|
447
|
+
primaryClass={cs.CV}
|
|
448
|
+
}""",
|
|
350
449
|
)
|
|
351
450
|
|
|
352
451
|
webssl_dino7b_full8b_518 = ModelMeta(
|
|
353
452
|
loader=DINOModel,
|
|
354
453
|
name="facebook/webssl-dino7b-full8b-518",
|
|
454
|
+
model_type=["dense"],
|
|
355
455
|
languages=["eng-Latn"],
|
|
356
456
|
revision="aee350d2c5e3e5fdb7ee6985291d808ea5eef431",
|
|
357
457
|
release_date="2025-04-24",
|
|
@@ -369,12 +469,21 @@ webssl_dino7b_full8b_518 = ModelMeta(
|
|
|
369
469
|
similarity_fn_name=None,
|
|
370
470
|
use_instructions=False,
|
|
371
471
|
training_datasets=webssl_dino_training_datasets,
|
|
472
|
+
citation="""@article{fan2025scaling,
|
|
473
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
474
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
475
|
+
year={2025},
|
|
476
|
+
eprint={2504.01017},
|
|
477
|
+
archivePrefix={arXiv},
|
|
478
|
+
primaryClass={cs.CV}
|
|
479
|
+
}""",
|
|
372
480
|
)
|
|
373
481
|
|
|
374
482
|
|
|
375
483
|
webssl_dino2b_light2b = ModelMeta(
|
|
376
484
|
loader=DINOModel,
|
|
377
485
|
name="facebook/webssl-dino2b-light2b-224",
|
|
486
|
+
model_type=["dense"],
|
|
378
487
|
languages=["eng-Latn"],
|
|
379
488
|
revision="633a663f304e63cc3cbec3f7f9ca2fbc94736128",
|
|
380
489
|
release_date="2025-04-24",
|
|
@@ -392,11 +501,20 @@ webssl_dino2b_light2b = ModelMeta(
|
|
|
392
501
|
similarity_fn_name=None,
|
|
393
502
|
use_instructions=False,
|
|
394
503
|
training_datasets=webssl_dino_training_datasets,
|
|
504
|
+
citation="""@article{fan2025scaling,
|
|
505
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
506
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
507
|
+
year={2025},
|
|
508
|
+
eprint={2504.01017},
|
|
509
|
+
archivePrefix={arXiv},
|
|
510
|
+
primaryClass={cs.CV}
|
|
511
|
+
}""",
|
|
395
512
|
)
|
|
396
513
|
|
|
397
514
|
webssl_dino2b_heavy2b = ModelMeta(
|
|
398
515
|
loader=DINOModel,
|
|
399
516
|
name="facebook/webssl-dino2b-heavy2b-224",
|
|
517
|
+
model_type=["dense"],
|
|
400
518
|
languages=["eng-Latn"],
|
|
401
519
|
revision="9f46eb0c0129656a1ef195fde072e3765abdb7c6",
|
|
402
520
|
release_date="2025-04-24",
|
|
@@ -414,11 +532,20 @@ webssl_dino2b_heavy2b = ModelMeta(
|
|
|
414
532
|
similarity_fn_name=None,
|
|
415
533
|
use_instructions=False,
|
|
416
534
|
training_datasets=webssl_dino_training_datasets,
|
|
535
|
+
citation="""@article{fan2025scaling,
|
|
536
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
537
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
538
|
+
year={2025},
|
|
539
|
+
eprint={2504.01017},
|
|
540
|
+
archivePrefix={arXiv},
|
|
541
|
+
primaryClass={cs.CV}
|
|
542
|
+
}""",
|
|
417
543
|
)
|
|
418
544
|
|
|
419
545
|
webssl_dino3b_light2b = ModelMeta(
|
|
420
546
|
loader=DINOModel,
|
|
421
547
|
name="facebook/webssl-dino3b-light2b-224",
|
|
548
|
+
model_type=["dense"],
|
|
422
549
|
languages=["eng-Latn"],
|
|
423
550
|
revision="4d0160f60673805431f4ad14983e712ed88be5b8",
|
|
424
551
|
release_date="2025-04-24",
|
|
@@ -436,11 +563,20 @@ webssl_dino3b_light2b = ModelMeta(
|
|
|
436
563
|
similarity_fn_name=None,
|
|
437
564
|
use_instructions=False,
|
|
438
565
|
training_datasets=webssl_dino_training_datasets,
|
|
566
|
+
citation="""@article{fan2025scaling,
|
|
567
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
568
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
569
|
+
year={2025},
|
|
570
|
+
eprint={2504.01017},
|
|
571
|
+
archivePrefix={arXiv},
|
|
572
|
+
primaryClass={cs.CV}
|
|
573
|
+
}""",
|
|
439
574
|
)
|
|
440
575
|
|
|
441
576
|
webssl_dino3b_heavy2b = ModelMeta(
|
|
442
577
|
loader=DINOModel,
|
|
443
578
|
name="facebook/webssl-dino3b-heavy2b-224",
|
|
579
|
+
model_type=["dense"],
|
|
444
580
|
languages=["eng-Latn"],
|
|
445
581
|
revision="dd39c2910747561b332285d96c4dce0bdb240775",
|
|
446
582
|
release_date="2025-04-24",
|
|
@@ -458,11 +594,20 @@ webssl_dino3b_heavy2b = ModelMeta(
|
|
|
458
594
|
similarity_fn_name=None,
|
|
459
595
|
use_instructions=False,
|
|
460
596
|
training_datasets=webssl_dino_training_datasets,
|
|
597
|
+
citation="""@article{fan2025scaling,
|
|
598
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
599
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
600
|
+
year={2025},
|
|
601
|
+
eprint={2504.01017},
|
|
602
|
+
archivePrefix={arXiv},
|
|
603
|
+
primaryClass={cs.CV}
|
|
604
|
+
}""",
|
|
461
605
|
)
|
|
462
606
|
|
|
463
607
|
webssl_mae300m_full2b = ModelMeta(
|
|
464
608
|
loader=DINOModel,
|
|
465
609
|
name="facebook/webssl-mae300m-full2b-224",
|
|
610
|
+
model_type=["dense"],
|
|
466
611
|
languages=["eng-Latn"],
|
|
467
612
|
revision="4655a0ac1726c206ba14d5ccb26758c62a4d03b0",
|
|
468
613
|
release_date="2025-04-24",
|
|
@@ -480,11 +625,20 @@ webssl_mae300m_full2b = ModelMeta(
|
|
|
480
625
|
similarity_fn_name=None,
|
|
481
626
|
use_instructions=False,
|
|
482
627
|
training_datasets=webssl_dino_training_datasets,
|
|
628
|
+
citation="""@article{fan2025scaling,
|
|
629
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
630
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
631
|
+
year={2025},
|
|
632
|
+
eprint={2504.01017},
|
|
633
|
+
archivePrefix={arXiv},
|
|
634
|
+
primaryClass={cs.CV}
|
|
635
|
+
}""",
|
|
483
636
|
)
|
|
484
637
|
|
|
485
638
|
webssl_mae700m_full2b = ModelMeta(
|
|
486
639
|
loader=DINOModel,
|
|
487
640
|
name="facebook/webssl-mae700m-full2b-224",
|
|
641
|
+
model_type=["dense"],
|
|
488
642
|
languages=["eng-Latn"],
|
|
489
643
|
revision="c32be382e757d73a178de1ead62c27391d4b4280",
|
|
490
644
|
release_date="2025-04-24",
|
|
@@ -502,11 +656,20 @@ webssl_mae700m_full2b = ModelMeta(
|
|
|
502
656
|
similarity_fn_name=None,
|
|
503
657
|
use_instructions=False,
|
|
504
658
|
training_datasets=webssl_dino_training_datasets,
|
|
659
|
+
citation="""@article{fan2025scaling,
|
|
660
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
661
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
662
|
+
year={2025},
|
|
663
|
+
eprint={2504.01017},
|
|
664
|
+
archivePrefix={arXiv},
|
|
665
|
+
primaryClass={cs.CV}
|
|
666
|
+
}""",
|
|
505
667
|
)
|
|
506
668
|
|
|
507
669
|
webssl_mae1b_full2b = ModelMeta(
|
|
508
670
|
loader=DINOModel,
|
|
509
671
|
name="facebook/webssl-mae1b-full2b-224",
|
|
672
|
+
model_type=["dense"],
|
|
510
673
|
languages=["eng-Latn"],
|
|
511
674
|
revision="5880aefedbad8db0f44d27358f6f08e8576f70fc",
|
|
512
675
|
release_date="2025-04-24",
|
|
@@ -524,4 +687,12 @@ webssl_mae1b_full2b = ModelMeta(
|
|
|
524
687
|
similarity_fn_name=None,
|
|
525
688
|
use_instructions=False,
|
|
526
689
|
training_datasets=webssl_dino_training_datasets,
|
|
690
|
+
citation="""@article{fan2025scaling,
|
|
691
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
692
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
693
|
+
year={2025},
|
|
694
|
+
eprint={2504.01017},
|
|
695
|
+
archivePrefix={arXiv},
|
|
696
|
+
primaryClass={cs.CV}
|
|
697
|
+
}""",
|
|
527
698
|
)
|
|
@@ -40,6 +40,7 @@ e5_instruct = ModelMeta(
|
|
|
40
40
|
normalized=True,
|
|
41
41
|
),
|
|
42
42
|
name="intfloat/multilingual-e5-large-instruct",
|
|
43
|
+
model_type=["dense"],
|
|
43
44
|
languages=XLMR_LANGUAGES,
|
|
44
45
|
open_weights=True,
|
|
45
46
|
revision="baa7be480a7de1539afce709c8f13f833a510e0a",
|
|
@@ -78,6 +79,7 @@ e5_mistral = ModelMeta(
|
|
|
78
79
|
normalized=True,
|
|
79
80
|
),
|
|
80
81
|
name="intfloat/e5-mistral-7b-instruct",
|
|
82
|
+
model_type=["dense"],
|
|
81
83
|
languages=MISTRAL_LANGUAGES,
|
|
82
84
|
open_weights=True,
|
|
83
85
|
revision="07163b72af1488142a360786df853f237b1a3ca1",
|
|
@@ -125,6 +127,7 @@ zeta_alpha_ai__zeta_alpha_e5_mistral = ModelMeta(
|
|
|
125
127
|
normalized=True,
|
|
126
128
|
),
|
|
127
129
|
name="zeta-alpha-ai/Zeta-Alpha-E5-Mistral",
|
|
130
|
+
model_type=["dense"],
|
|
128
131
|
revision="c791d37474fa6a5c72eb3a2522be346bc21fbfc3",
|
|
129
132
|
release_date="2024-08-30",
|
|
130
133
|
languages=["eng-Latn"],
|
|
@@ -201,6 +204,7 @@ BeastyZ__e5_R_mistral_7b = ModelMeta(
|
|
|
201
204
|
tokenizer_kwargs={"pad_token": "</s>"},
|
|
202
205
|
),
|
|
203
206
|
name="BeastyZ/e5-R-mistral-7b",
|
|
207
|
+
model_type=["dense"],
|
|
204
208
|
revision="3f810a6a7fd220369ad248e3705cf13d71803602",
|
|
205
209
|
release_date="2024-06-28",
|
|
206
210
|
languages=["eng-Latn"],
|
|
@@ -5,108 +5,10 @@ from mteb.models.model_meta import (
|
|
|
5
5
|
from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
|
|
6
6
|
from mteb.types import PromptType
|
|
7
7
|
|
|
8
|
+
from .facebookai import XLMR_LANGUAGES
|
|
9
|
+
|
|
8
10
|
E5_PAPER_RELEASE_DATE = "2024-02-08"
|
|
9
|
-
|
|
10
|
-
"afr-Latn",
|
|
11
|
-
"amh-Latn",
|
|
12
|
-
"ara-Latn",
|
|
13
|
-
"asm-Latn",
|
|
14
|
-
"aze-Latn",
|
|
15
|
-
"bel-Latn",
|
|
16
|
-
"bul-Latn",
|
|
17
|
-
"ben-Latn",
|
|
18
|
-
"ben-Beng",
|
|
19
|
-
"bre-Latn",
|
|
20
|
-
"bos-Latn",
|
|
21
|
-
"cat-Latn",
|
|
22
|
-
"ces-Latn",
|
|
23
|
-
"cym-Latn",
|
|
24
|
-
"dan-Latn",
|
|
25
|
-
"deu-Latn",
|
|
26
|
-
"ell-Latn",
|
|
27
|
-
"eng-Latn",
|
|
28
|
-
"epo-Latn",
|
|
29
|
-
"spa-Latn",
|
|
30
|
-
"est-Latn",
|
|
31
|
-
"eus-Latn",
|
|
32
|
-
"fas-Latn",
|
|
33
|
-
"fin-Latn",
|
|
34
|
-
"fra-Latn",
|
|
35
|
-
"fry-Latn",
|
|
36
|
-
"gle-Latn",
|
|
37
|
-
"gla-Latn",
|
|
38
|
-
"glg-Latn",
|
|
39
|
-
"guj-Latn",
|
|
40
|
-
"hau-Latn",
|
|
41
|
-
"heb-Latn",
|
|
42
|
-
"hin-Latn",
|
|
43
|
-
"hin-Deva",
|
|
44
|
-
"hrv-Latn",
|
|
45
|
-
"hun-Latn",
|
|
46
|
-
"hye-Latn",
|
|
47
|
-
"ind-Latn",
|
|
48
|
-
"isl-Latn",
|
|
49
|
-
"ita-Latn",
|
|
50
|
-
"jpn-Latn",
|
|
51
|
-
"jav-Latn",
|
|
52
|
-
"kat-Latn",
|
|
53
|
-
"kaz-Latn",
|
|
54
|
-
"khm-Latn",
|
|
55
|
-
"kan-Latn",
|
|
56
|
-
"kor-Latn",
|
|
57
|
-
"kur-Latn",
|
|
58
|
-
"kir-Latn",
|
|
59
|
-
"lat-Latn",
|
|
60
|
-
"lao-Latn",
|
|
61
|
-
"lit-Latn",
|
|
62
|
-
"lav-Latn",
|
|
63
|
-
"mlg-Latn",
|
|
64
|
-
"mkd-Latn",
|
|
65
|
-
"mal-Latn",
|
|
66
|
-
"mon-Latn",
|
|
67
|
-
"mar-Latn",
|
|
68
|
-
"msa-Latn",
|
|
69
|
-
"mya-Latn",
|
|
70
|
-
"nep-Latn",
|
|
71
|
-
"nld-Latn",
|
|
72
|
-
"nob-Latn",
|
|
73
|
-
"orm-Latn",
|
|
74
|
-
"ori-Latn",
|
|
75
|
-
"pan-Latn",
|
|
76
|
-
"pol-Latn",
|
|
77
|
-
"pus-Latn",
|
|
78
|
-
"por-Latn",
|
|
79
|
-
"ron-Latn",
|
|
80
|
-
"rus-Latn",
|
|
81
|
-
"san-Latn",
|
|
82
|
-
"snd-Latn",
|
|
83
|
-
"sin-Latn",
|
|
84
|
-
"slk-Latn",
|
|
85
|
-
"slv-Latn",
|
|
86
|
-
"som-Latn",
|
|
87
|
-
"sqi-Latn",
|
|
88
|
-
"srp-Latn",
|
|
89
|
-
"sun-Latn",
|
|
90
|
-
"swe-Latn",
|
|
91
|
-
"swa-Latn",
|
|
92
|
-
"tam-Latn",
|
|
93
|
-
"tam-Taml",
|
|
94
|
-
"tel-Latn",
|
|
95
|
-
"tel-Telu",
|
|
96
|
-
"tha-Latn",
|
|
97
|
-
"tgl-Latn",
|
|
98
|
-
"tur-Latn",
|
|
99
|
-
"uig-Latn",
|
|
100
|
-
"ukr-Latn",
|
|
101
|
-
"urd-Latn",
|
|
102
|
-
"urd-Arab",
|
|
103
|
-
"uzb-Latn",
|
|
104
|
-
"vie-Latn",
|
|
105
|
-
"xho-Latn",
|
|
106
|
-
"yid-Latn",
|
|
107
|
-
"zho-Hant",
|
|
108
|
-
"zho-Hans",
|
|
109
|
-
]
|
|
11
|
+
|
|
110
12
|
|
|
111
13
|
MULTILINGUAL_E5_CITATION = """
|
|
112
14
|
@article{wang2024multilingual,
|
|
@@ -168,6 +70,7 @@ e5_mult_small = ModelMeta(
|
|
|
168
70
|
model_prompts=model_prompts,
|
|
169
71
|
),
|
|
170
72
|
name="intfloat/multilingual-e5-small",
|
|
73
|
+
model_type=["dense"],
|
|
171
74
|
languages=XLMR_LANGUAGES,
|
|
172
75
|
open_weights=True,
|
|
173
76
|
revision="fd1525a9fd15316a2d503bf26ab031a61d056e98",
|
|
@@ -194,6 +97,7 @@ e5_mult_base = ModelMeta(
|
|
|
194
97
|
model_prompts=model_prompts,
|
|
195
98
|
),
|
|
196
99
|
name="intfloat/multilingual-e5-base",
|
|
100
|
+
model_type=["dense"],
|
|
197
101
|
languages=XLMR_LANGUAGES,
|
|
198
102
|
open_weights=True,
|
|
199
103
|
revision="d13f1b27baf31030b7fd040960d60d909913633f",
|
|
@@ -220,6 +124,7 @@ e5_mult_large = ModelMeta(
|
|
|
220
124
|
model_prompts=model_prompts,
|
|
221
125
|
),
|
|
222
126
|
name="intfloat/multilingual-e5-large",
|
|
127
|
+
model_type=["dense"],
|
|
223
128
|
languages=XLMR_LANGUAGES,
|
|
224
129
|
open_weights=True,
|
|
225
130
|
revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb",
|
|
@@ -246,6 +151,7 @@ e5_eng_small_v2 = ModelMeta(
|
|
|
246
151
|
model_prompts=model_prompts,
|
|
247
152
|
),
|
|
248
153
|
name="intfloat/e5-small-v2",
|
|
154
|
+
model_type=["dense"],
|
|
249
155
|
languages=["eng-Latn"],
|
|
250
156
|
open_weights=True,
|
|
251
157
|
revision="dca8b1a9dae0d4575df2bf423a5edb485a431236",
|
|
@@ -272,6 +178,7 @@ e5_eng_small = ModelMeta(
|
|
|
272
178
|
model_prompts=model_prompts,
|
|
273
179
|
),
|
|
274
180
|
name="intfloat/e5-small",
|
|
181
|
+
model_type=["dense"],
|
|
275
182
|
languages=["eng-Latn"],
|
|
276
183
|
open_weights=True,
|
|
277
184
|
revision="e272f3049e853b47cb5ca3952268c6662abda68f",
|
|
@@ -298,6 +205,7 @@ e5_eng_base_v2 = ModelMeta(
|
|
|
298
205
|
model_prompts=model_prompts,
|
|
299
206
|
),
|
|
300
207
|
name="intfloat/e5-base-v2",
|
|
208
|
+
model_type=["dense"],
|
|
301
209
|
languages=["eng-Latn"],
|
|
302
210
|
open_weights=True,
|
|
303
211
|
revision="1c644c92ad3ba1efdad3f1451a637716616a20e8",
|
|
@@ -325,6 +233,7 @@ e5_eng_large_v2 = ModelMeta(
|
|
|
325
233
|
model_prompts=model_prompts,
|
|
326
234
|
),
|
|
327
235
|
name="intfloat/e5-large-v2",
|
|
236
|
+
model_type=["dense"],
|
|
328
237
|
languages=["eng-Latn"],
|
|
329
238
|
open_weights=True,
|
|
330
239
|
revision="b322e09026e4ea05f42beadf4d661fb4e101d311",
|
|
@@ -352,6 +261,7 @@ e5_large = ModelMeta(
|
|
|
352
261
|
model_prompts=model_prompts,
|
|
353
262
|
),
|
|
354
263
|
name="intfloat/e5-large",
|
|
264
|
+
model_type=["dense"],
|
|
355
265
|
languages=["eng-Latn"],
|
|
356
266
|
open_weights=True,
|
|
357
267
|
revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81",
|
|
@@ -379,6 +289,7 @@ e5_base = ModelMeta(
|
|
|
379
289
|
model_prompts=model_prompts,
|
|
380
290
|
),
|
|
381
291
|
name="intfloat/e5-base",
|
|
292
|
+
model_type=["dense"],
|
|
382
293
|
languages=["eng-Latn"],
|
|
383
294
|
open_weights=True,
|
|
384
295
|
revision="b533fe4636f4a2507c08ddab40644d20b0006d6a",
|