mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +78 -30
- mteb/_evaluators/any_sts_evaluator.py +13 -6
- mteb/_evaluators/clustering_evaluator.py +13 -5
- mteb/_evaluators/evaluator.py +12 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
- mteb/_evaluators/pair_classification_evaluator.py +17 -7
- mteb/_evaluators/retrieval_evaluator.py +23 -14
- mteb/_evaluators/retrieval_metrics.py +26 -19
- mteb/_evaluators/sklearn_evaluator.py +27 -17
- mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
- mteb/_evaluators/text/summarization_evaluator.py +31 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +9 -3
- mteb/abstasks/_data_filter/task_pipelines.py +10 -2
- mteb/abstasks/_statistics_calculation.py +21 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +78 -44
- mteb/abstasks/aggregate_task_metadata.py +21 -18
- mteb/abstasks/aggregated_task.py +23 -35
- mteb/abstasks/classification.py +39 -18
- mteb/abstasks/clustering.py +37 -20
- mteb/abstasks/clustering_legacy.py +30 -16
- mteb/abstasks/image/image_text_pair_classification.py +26 -9
- mteb/abstasks/multilabel_classification.py +33 -21
- mteb/abstasks/pair_classification.py +44 -19
- mteb/abstasks/regression.py +18 -10
- mteb/abstasks/retrieval.py +82 -52
- mteb/abstasks/retrieval_dataset_loaders.py +50 -39
- mteb/abstasks/sts.py +34 -15
- mteb/abstasks/task_metadata.py +44 -37
- mteb/abstasks/text/bitext_mining.py +57 -35
- mteb/abstasks/text/reranking.py +10 -8
- mteb/abstasks/text/summarization.py +26 -10
- mteb/abstasks/zeroshot_classification.py +27 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +15 -3
- mteb/benchmarks/benchmarks/__init__.py +6 -0
- mteb/benchmarks/benchmarks/benchmarks.py +153 -13
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +189 -31
- mteb/cli/_display_tasks.py +10 -4
- mteb/cli/build_cli.py +112 -13
- mteb/cli/generate_model_card.py +50 -23
- mteb/deprecated_evaluator.py +72 -54
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +71 -47
- mteb/filter_tasks.py +36 -32
- mteb/get_tasks.py +37 -33
- mteb/languages/language_scripts.py +11 -4
- mteb/leaderboard/app.py +172 -37
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +20 -14
- mteb/models/abs_encoder.py +30 -16
- mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +16 -11
- mteb/models/get_model_meta.py +53 -9
- mteb/models/instruct_wrapper.py +41 -13
- mteb/models/model_implementations/align_models.py +11 -5
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +6 -4
- mteb/models/model_implementations/ara_models.py +2 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +85 -22
- mteb/models/model_implementations/bica_model.py +4 -3
- mteb/models/model_implementations/blip2_models.py +13 -6
- mteb/models/model_implementations/blip_models.py +33 -20
- mteb/models/model_implementations/bm25.py +27 -17
- mteb/models/model_implementations/bmretriever_models.py +16 -6
- mteb/models/model_implementations/cadet_models.py +2 -1
- mteb/models/model_implementations/cde_models.py +22 -9
- mteb/models/model_implementations/clip_models.py +18 -10
- mteb/models/model_implementations/clips_models.py +6 -3
- mteb/models/model_implementations/codefuse_models.py +10 -5
- mteb/models/model_implementations/codesage_models.py +6 -3
- mteb/models/model_implementations/cohere_models.py +19 -9
- mteb/models/model_implementations/cohere_v.py +16 -6
- mteb/models/model_implementations/colpali_models.py +10 -6
- mteb/models/model_implementations/colqwen_models.py +24 -38
- mteb/models/model_implementations/colsmol_models.py +5 -3
- mteb/models/model_implementations/conan_models.py +12 -5
- mteb/models/model_implementations/dino_models.py +70 -46
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +18 -9
- mteb/models/model_implementations/e5_v.py +16 -10
- mteb/models/model_implementations/eagerworks_models.py +12 -5
- mteb/models/model_implementations/emillykkejensen_models.py +9 -6
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +3 -2
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +18 -9
- mteb/models/model_implementations/facebookai.py +16 -2
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +13 -8
- mteb/models/model_implementations/google_models.py +16 -5
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
- mteb/models/model_implementations/gritlm_models.py +5 -2
- mteb/models/model_implementations/gte_models.py +34 -13
- mteb/models/model_implementations/hinvec_models.py +7 -2
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +16 -7
- mteb/models/model_implementations/jina_clip.py +58 -14
- mteb/models/model_implementations/jina_models.py +35 -16
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +13 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
- mteb/models/model_implementations/kfst.py +2 -1
- mteb/models/model_implementations/kowshik24_models.py +2 -1
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +8 -2
- mteb/models/model_implementations/listconranker.py +11 -5
- mteb/models/model_implementations/llm2clip_models.py +18 -10
- mteb/models/model_implementations/llm2vec_models.py +28 -14
- mteb/models/model_implementations/mcinext_models.py +12 -3
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +131 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
- mteb/models/model_implementations/mme5_models.py +3 -2
- mteb/models/model_implementations/moco_models.py +15 -8
- mteb/models/model_implementations/mod_models.py +3 -2
- mteb/models/model_implementations/model2vec_models.py +37 -18
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +6 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
- mteb/models/model_implementations/nomic_models.py +47 -19
- mteb/models/model_implementations/nomic_models_vision.py +6 -4
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
- mteb/models/model_implementations/nvidia_models.py +165 -22
- mteb/models/model_implementations/octen_models.py +64 -3
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +30 -17
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
- mteb/models/model_implementations/ops_moa_models.py +10 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
- mteb/models/model_implementations/pawan_models.py +2 -1
- mteb/models/model_implementations/piccolo_models.py +3 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +20 -10
- mteb/models/model_implementations/pylate_models.py +41 -21
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +14 -4
- mteb/models/model_implementations/qzhou_models.py +4 -2
- mteb/models/model_implementations/random_baseline.py +7 -6
- mteb/models/model_implementations/rasgaard_models.py +3 -2
- mteb/models/model_implementations/reasonir_model.py +66 -1
- mteb/models/model_implementations/repllama_models.py +18 -9
- mteb/models/model_implementations/rerankers_custom.py +25 -10
- mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +40 -20
- mteb/models/model_implementations/ruri_models.py +20 -10
- mteb/models/model_implementations/salesforce_models.py +13 -4
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +142 -22
- mteb/models/model_implementations/shuu_model.py +2 -1
- mteb/models/model_implementations/siglip_models.py +39 -24
- mteb/models/model_implementations/slm_models.py +419 -0
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +4 -2
- mteb/models/model_implementations/text2vec_models.py +12 -3
- mteb/models/model_implementations/ua_sentence_models.py +2 -1
- mteb/models/model_implementations/uae_models.py +17 -5
- mteb/models/model_implementations/vdr_models.py +9 -2
- mteb/models/model_implementations/vi_vn_models.py +12 -6
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +14 -7
- mteb/models/model_implementations/voyage_models.py +136 -4
- mteb/models/model_implementations/voyage_v.py +17 -10
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +2 -1
- mteb/models/model_implementations/yuan_models_en.py +3 -2
- mteb/models/model_meta.py +127 -40
- mteb/models/models_protocols.py +43 -22
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
- mteb/models/search_wrappers.py +63 -29
- mteb/models/sentence_transformer_wrapper.py +52 -26
- mteb/models/vllm_wrapper.py +329 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +48 -35
- mteb/results/model_result.py +68 -32
- mteb/results/task_result.py +110 -72
- mteb/similarity_functions.py +19 -9
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +2 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +16 -16
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +44 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +3 -3
- mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +13 -1
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +18 -5
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
|
@@ -18,6 +18,7 @@ Haon_Chen__speed_embedding_7b_instruct = ModelMeta(
|
|
|
18
18
|
release_date="2024-10-31",
|
|
19
19
|
languages=["eng-Latn"],
|
|
20
20
|
n_parameters=7110660096,
|
|
21
|
+
n_embedding_parameters=None,
|
|
21
22
|
memory_usage_mb=13563,
|
|
22
23
|
max_tokens=32768.0,
|
|
23
24
|
embed_dim=None,
|
|
@@ -25,7 +26,7 @@ Haon_Chen__speed_embedding_7b_instruct = ModelMeta(
|
|
|
25
26
|
open_weights=True,
|
|
26
27
|
public_training_code=None,
|
|
27
28
|
public_training_data=None,
|
|
28
|
-
framework=["PyTorch"],
|
|
29
|
+
framework=["PyTorch", "Transformers", "safetensors"],
|
|
29
30
|
reference="https://huggingface.co/Haon-Chen/speed-embedding-7b-instruct",
|
|
30
31
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
31
32
|
use_instructions=None,
|
|
@@ -47,6 +48,7 @@ Gameselo__STS_multilingual_mpnet_base_v2 = ModelMeta(
|
|
|
47
48
|
languages=[],
|
|
48
49
|
loader=sentence_transformers_loader,
|
|
49
50
|
n_parameters=278043648,
|
|
51
|
+
n_embedding_parameters=192_001_536,
|
|
50
52
|
memory_usage_mb=1061,
|
|
51
53
|
max_tokens=514.0,
|
|
52
54
|
embed_dim=768,
|
|
@@ -54,7 +56,7 @@ Gameselo__STS_multilingual_mpnet_base_v2 = ModelMeta(
|
|
|
54
56
|
open_weights=True,
|
|
55
57
|
public_training_code=None,
|
|
56
58
|
public_training_data=None,
|
|
57
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
59
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
58
60
|
reference="https://huggingface.co/Gameselo/STS-multilingual-mpnet-base-v2",
|
|
59
61
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
60
62
|
use_instructions=None,
|
|
@@ -148,6 +150,7 @@ Hum_Works__lodestone_base_4096_v1 = ModelMeta(
|
|
|
148
150
|
languages=["eng-Latn"],
|
|
149
151
|
loader=sentence_transformers_loader,
|
|
150
152
|
n_parameters=None,
|
|
153
|
+
n_embedding_parameters=None,
|
|
151
154
|
memory_usage_mb=None,
|
|
152
155
|
max_tokens=None,
|
|
153
156
|
embed_dim=768,
|
|
@@ -155,7 +158,7 @@ Hum_Works__lodestone_base_4096_v1 = ModelMeta(
|
|
|
155
158
|
open_weights=True,
|
|
156
159
|
public_training_code=None,
|
|
157
160
|
public_training_data=None,
|
|
158
|
-
framework=["PyTorch"],
|
|
161
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
159
162
|
reference="https://huggingface.co/Hum-Works/lodestone-base-4096-v1",
|
|
160
163
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
161
164
|
use_instructions=None,
|
|
@@ -215,6 +218,7 @@ Jaume__gemma_2b_embeddings = ModelMeta(
|
|
|
215
218
|
languages=[],
|
|
216
219
|
loader=sentence_transformers_loader,
|
|
217
220
|
n_parameters=2506172416,
|
|
221
|
+
n_embedding_parameters=None,
|
|
218
222
|
memory_usage_mb=9560,
|
|
219
223
|
max_tokens=8192.0,
|
|
220
224
|
embed_dim=2048,
|
|
@@ -222,7 +226,7 @@ Jaume__gemma_2b_embeddings = ModelMeta(
|
|
|
222
226
|
open_weights=True,
|
|
223
227
|
public_training_code=None,
|
|
224
228
|
public_training_data=None,
|
|
225
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
229
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
226
230
|
reference="https://huggingface.co/Jaume/gemma-2b-embeddings",
|
|
227
231
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
228
232
|
use_instructions=None,
|
|
@@ -250,6 +254,7 @@ Lajavaness__bilingual_embedding_base = ModelMeta(
|
|
|
250
254
|
trust_remote_code=True,
|
|
251
255
|
),
|
|
252
256
|
n_parameters=278043648,
|
|
257
|
+
n_embedding_parameters=192_001_536,
|
|
253
258
|
memory_usage_mb=1061,
|
|
254
259
|
max_tokens=514.0,
|
|
255
260
|
embed_dim=768,
|
|
@@ -257,7 +262,7 @@ Lajavaness__bilingual_embedding_base = ModelMeta(
|
|
|
257
262
|
open_weights=True,
|
|
258
263
|
public_training_code=None,
|
|
259
264
|
public_training_data=None,
|
|
260
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
265
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
261
266
|
reference="https://huggingface.co/Lajavaness/bilingual-embedding-base",
|
|
262
267
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
263
268
|
use_instructions=None,
|
|
@@ -299,6 +304,7 @@ Lajavaness__bilingual_embedding_large = ModelMeta(
|
|
|
299
304
|
trust_remote_code=True,
|
|
300
305
|
),
|
|
301
306
|
n_parameters=559890432,
|
|
307
|
+
n_embedding_parameters=256_002_048,
|
|
302
308
|
memory_usage_mb=2136,
|
|
303
309
|
max_tokens=514.0,
|
|
304
310
|
embed_dim=1024,
|
|
@@ -306,7 +312,7 @@ Lajavaness__bilingual_embedding_large = ModelMeta(
|
|
|
306
312
|
open_weights=True,
|
|
307
313
|
public_training_code=None,
|
|
308
314
|
public_training_data=None,
|
|
309
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
315
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
310
316
|
reference="https://huggingface.co/Lajavaness/bilingual-embedding-large",
|
|
311
317
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
312
318
|
use_instructions=None,
|
|
@@ -348,6 +354,7 @@ Lajavaness__bilingual_embedding_small = ModelMeta(
|
|
|
348
354
|
trust_remote_code=True,
|
|
349
355
|
),
|
|
350
356
|
n_parameters=117653760,
|
|
357
|
+
n_embedding_parameters=96_014_208,
|
|
351
358
|
memory_usage_mb=449,
|
|
352
359
|
max_tokens=512.0,
|
|
353
360
|
embed_dim=384,
|
|
@@ -355,7 +362,7 @@ Lajavaness__bilingual_embedding_small = ModelMeta(
|
|
|
355
362
|
open_weights=True,
|
|
356
363
|
public_training_code=None,
|
|
357
364
|
public_training_data=None,
|
|
358
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
365
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
359
366
|
reference="https://huggingface.co/Lajavaness/bilingual-embedding-small",
|
|
360
367
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
361
368
|
use_instructions=None,
|
|
@@ -394,6 +401,7 @@ Mihaiii__Bulbasaur = ModelMeta(
|
|
|
394
401
|
languages=None,
|
|
395
402
|
loader=sentence_transformers_loader,
|
|
396
403
|
n_parameters=17389824,
|
|
404
|
+
n_embedding_parameters=11_720_448,
|
|
397
405
|
memory_usage_mb=66,
|
|
398
406
|
max_tokens=512.0,
|
|
399
407
|
embed_dim=384,
|
|
@@ -401,7 +409,7 @@ Mihaiii__Bulbasaur = ModelMeta(
|
|
|
401
409
|
open_weights=True,
|
|
402
410
|
public_training_code=None,
|
|
403
411
|
public_training_data=None,
|
|
404
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
412
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
405
413
|
reference="https://huggingface.co/Mihaiii/Bulbasaur",
|
|
406
414
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
407
415
|
use_instructions=None,
|
|
@@ -418,6 +426,7 @@ Mihaiii__Ivysaur = ModelMeta(
|
|
|
418
426
|
languages=None,
|
|
419
427
|
loader=sentence_transformers_loader,
|
|
420
428
|
n_parameters=22713216,
|
|
429
|
+
n_embedding_parameters=11_720_448,
|
|
421
430
|
memory_usage_mb=87,
|
|
422
431
|
max_tokens=512.0,
|
|
423
432
|
embed_dim=384,
|
|
@@ -425,7 +434,7 @@ Mihaiii__Ivysaur = ModelMeta(
|
|
|
425
434
|
open_weights=True,
|
|
426
435
|
public_training_code=None,
|
|
427
436
|
public_training_data=None,
|
|
428
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
437
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
429
438
|
reference="https://huggingface.co/Mihaiii/Ivysaur",
|
|
430
439
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
431
440
|
use_instructions=None,
|
|
@@ -442,6 +451,7 @@ Mihaiii__Squirtle = ModelMeta(
|
|
|
442
451
|
languages=None,
|
|
443
452
|
loader=sentence_transformers_loader,
|
|
444
453
|
n_parameters=15615360,
|
|
454
|
+
n_embedding_parameters=11_720_448,
|
|
445
455
|
memory_usage_mb=60,
|
|
446
456
|
max_tokens=512.0,
|
|
447
457
|
embed_dim=384,
|
|
@@ -449,7 +459,7 @@ Mihaiii__Squirtle = ModelMeta(
|
|
|
449
459
|
open_weights=True,
|
|
450
460
|
public_training_code=None,
|
|
451
461
|
public_training_data=None,
|
|
452
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
462
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
453
463
|
reference="https://huggingface.co/Mihaiii/Squirtle",
|
|
454
464
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
455
465
|
use_instructions=None,
|
|
@@ -466,6 +476,7 @@ Mihaiii__Venusaur = ModelMeta(
|
|
|
466
476
|
languages=None,
|
|
467
477
|
loader=sentence_transformers_loader,
|
|
468
478
|
n_parameters=15615360,
|
|
479
|
+
n_embedding_parameters=11_720_448,
|
|
469
480
|
memory_usage_mb=60,
|
|
470
481
|
max_tokens=512.0,
|
|
471
482
|
embed_dim=384,
|
|
@@ -473,7 +484,7 @@ Mihaiii__Venusaur = ModelMeta(
|
|
|
473
484
|
open_weights=True,
|
|
474
485
|
public_training_code=None,
|
|
475
486
|
public_training_data=None,
|
|
476
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
487
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
477
488
|
reference="https://huggingface.co/Mihaiii/Venusaur",
|
|
478
489
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
479
490
|
use_instructions=None,
|
|
@@ -490,6 +501,7 @@ Mihaiii__Wartortle = ModelMeta(
|
|
|
490
501
|
languages=None,
|
|
491
502
|
loader=sentence_transformers_loader,
|
|
492
503
|
n_parameters=17389824,
|
|
504
|
+
n_embedding_parameters=11_720_448,
|
|
493
505
|
memory_usage_mb=66,
|
|
494
506
|
max_tokens=512.0,
|
|
495
507
|
embed_dim=384,
|
|
@@ -497,7 +509,7 @@ Mihaiii__Wartortle = ModelMeta(
|
|
|
497
509
|
open_weights=True,
|
|
498
510
|
public_training_code=None,
|
|
499
511
|
public_training_data=None,
|
|
500
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
512
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
501
513
|
reference="https://huggingface.co/Mihaiii/Wartortle",
|
|
502
514
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
503
515
|
use_instructions=None,
|
|
@@ -514,6 +526,7 @@ Mihaiii__gte_micro = ModelMeta(
|
|
|
514
526
|
languages=None,
|
|
515
527
|
loader=sentence_transformers_loader,
|
|
516
528
|
n_parameters=17389824,
|
|
529
|
+
n_embedding_parameters=11_720_448,
|
|
517
530
|
memory_usage_mb=66,
|
|
518
531
|
max_tokens=512.0,
|
|
519
532
|
embed_dim=384,
|
|
@@ -521,7 +534,7 @@ Mihaiii__gte_micro = ModelMeta(
|
|
|
521
534
|
open_weights=True,
|
|
522
535
|
public_training_code=None,
|
|
523
536
|
public_training_data=None,
|
|
524
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
537
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
525
538
|
reference="https://huggingface.co/Mihaiii/gte-micro",
|
|
526
539
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
527
540
|
use_instructions=None,
|
|
@@ -537,6 +550,7 @@ Mihaiii__gte_micro_v4 = ModelMeta(
|
|
|
537
550
|
languages=None,
|
|
538
551
|
loader=sentence_transformers_loader,
|
|
539
552
|
n_parameters=19164288,
|
|
553
|
+
n_embedding_parameters=11_720_448,
|
|
540
554
|
memory_usage_mb=73,
|
|
541
555
|
max_tokens=512.0,
|
|
542
556
|
embed_dim=384,
|
|
@@ -544,7 +558,7 @@ Mihaiii__gte_micro_v4 = ModelMeta(
|
|
|
544
558
|
open_weights=True,
|
|
545
559
|
public_training_code=None,
|
|
546
560
|
public_training_data=None,
|
|
547
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
561
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
548
562
|
reference="https://huggingface.co/Mihaiii/gte-micro-v4",
|
|
549
563
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
550
564
|
use_instructions=None,
|
|
@@ -560,6 +574,7 @@ OrdalieTech__Solon_embeddings_large_0_1 = ModelMeta(
|
|
|
560
574
|
languages=["fra-Latn"],
|
|
561
575
|
loader=sentence_transformers_loader,
|
|
562
576
|
n_parameters=559890432,
|
|
577
|
+
n_embedding_parameters=256_002_048,
|
|
563
578
|
memory_usage_mb=2136,
|
|
564
579
|
max_tokens=514.0,
|
|
565
580
|
embed_dim=1024,
|
|
@@ -567,7 +582,7 @@ OrdalieTech__Solon_embeddings_large_0_1 = ModelMeta(
|
|
|
567
582
|
open_weights=True,
|
|
568
583
|
public_training_code=None,
|
|
569
584
|
public_training_data=None,
|
|
570
|
-
framework=["PyTorch"],
|
|
585
|
+
framework=["PyTorch", "Transformers", "safetensors"],
|
|
571
586
|
reference="https://huggingface.co/OrdalieTech/Solon-embeddings-large-0.1",
|
|
572
587
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
573
588
|
use_instructions=None,
|
|
@@ -583,6 +598,7 @@ Omartificial_Intelligence_Space__Arabert_all_nli_triplet_Matryoshka = ModelMeta(
|
|
|
583
598
|
languages=["ara-Arab"],
|
|
584
599
|
loader=sentence_transformers_loader,
|
|
585
600
|
n_parameters=135193344,
|
|
601
|
+
n_embedding_parameters=49_152_000,
|
|
586
602
|
memory_usage_mb=516,
|
|
587
603
|
max_tokens=512.0,
|
|
588
604
|
embed_dim=768,
|
|
@@ -590,7 +606,7 @@ Omartificial_Intelligence_Space__Arabert_all_nli_triplet_Matryoshka = ModelMeta(
|
|
|
590
606
|
open_weights=True,
|
|
591
607
|
public_training_code=None,
|
|
592
608
|
public_training_data=None,
|
|
593
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
609
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
594
610
|
reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka",
|
|
595
611
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
596
612
|
use_instructions=None,
|
|
@@ -615,6 +631,7 @@ Omartificial_Intelligence_Space__Arabic_MiniLM_L12_v2_all_nli_triplet = ModelMet
|
|
|
615
631
|
languages=["ara-Arab"],
|
|
616
632
|
loader=sentence_transformers_loader,
|
|
617
633
|
n_parameters=117653760,
|
|
634
|
+
n_embedding_parameters=96_014_208,
|
|
618
635
|
memory_usage_mb=449,
|
|
619
636
|
max_tokens=512.0,
|
|
620
637
|
embed_dim=384,
|
|
@@ -622,7 +639,7 @@ Omartificial_Intelligence_Space__Arabic_MiniLM_L12_v2_all_nli_triplet = ModelMet
|
|
|
622
639
|
open_weights=True,
|
|
623
640
|
public_training_code=None,
|
|
624
641
|
public_training_data=None,
|
|
625
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
642
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
626
643
|
reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet",
|
|
627
644
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
628
645
|
use_instructions=None,
|
|
@@ -640,6 +657,7 @@ Omartificial_Intelligence_Space__Arabic_all_nli_triplet_Matryoshka = ModelMeta(
|
|
|
640
657
|
languages=["ara-Arab"],
|
|
641
658
|
loader=sentence_transformers_loader,
|
|
642
659
|
n_parameters=278043648,
|
|
660
|
+
n_embedding_parameters=192_001_536,
|
|
643
661
|
memory_usage_mb=1061,
|
|
644
662
|
max_tokens=514.0,
|
|
645
663
|
embed_dim=768,
|
|
@@ -647,7 +665,7 @@ Omartificial_Intelligence_Space__Arabic_all_nli_triplet_Matryoshka = ModelMeta(
|
|
|
647
665
|
open_weights=True,
|
|
648
666
|
public_training_code=None,
|
|
649
667
|
public_training_data=None,
|
|
650
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
668
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
651
669
|
reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka",
|
|
652
670
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
653
671
|
use_instructions=None,
|
|
@@ -674,6 +692,7 @@ Omartificial_Intelligence_Space__Arabic_labse_Matryoshka = ModelMeta(
|
|
|
674
692
|
languages=["ara-Arab"],
|
|
675
693
|
loader=sentence_transformers_loader,
|
|
676
694
|
n_parameters=470926848,
|
|
695
|
+
n_embedding_parameters=384_885_504,
|
|
677
696
|
memory_usage_mb=1796,
|
|
678
697
|
max_tokens=512.0,
|
|
679
698
|
embed_dim=768,
|
|
@@ -681,7 +700,7 @@ Omartificial_Intelligence_Space__Arabic_labse_Matryoshka = ModelMeta(
|
|
|
681
700
|
open_weights=True,
|
|
682
701
|
public_training_code=None,
|
|
683
702
|
public_training_data=None,
|
|
684
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
703
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
685
704
|
reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-labse-Matryoshka",
|
|
686
705
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
687
706
|
use_instructions=None,
|
|
@@ -708,6 +727,7 @@ Omartificial_Intelligence_Space__Arabic_mpnet_base_all_nli_triplet = ModelMeta(
|
|
|
708
727
|
languages=["ara-Arab"],
|
|
709
728
|
loader=sentence_transformers_loader,
|
|
710
729
|
n_parameters=109486464,
|
|
730
|
+
n_embedding_parameters=23_444_736,
|
|
711
731
|
memory_usage_mb=418,
|
|
712
732
|
max_tokens=514.0,
|
|
713
733
|
embed_dim=768,
|
|
@@ -715,7 +735,7 @@ Omartificial_Intelligence_Space__Arabic_mpnet_base_all_nli_triplet = ModelMeta(
|
|
|
715
735
|
open_weights=True,
|
|
716
736
|
public_training_code=None,
|
|
717
737
|
public_training_data=None,
|
|
718
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
738
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
719
739
|
reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet",
|
|
720
740
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
721
741
|
use_instructions=None,
|
|
@@ -742,6 +762,7 @@ Omartificial_Intelligence_Space__Marbert_all_nli_triplet_Matryoshka = ModelMeta(
|
|
|
742
762
|
languages=["ara-Arab"],
|
|
743
763
|
loader=sentence_transformers_loader,
|
|
744
764
|
n_parameters=162841344,
|
|
765
|
+
n_embedding_parameters=76_800_000,
|
|
745
766
|
memory_usage_mb=621,
|
|
746
767
|
max_tokens=512.0,
|
|
747
768
|
embed_dim=768,
|
|
@@ -749,7 +770,7 @@ Omartificial_Intelligence_Space__Marbert_all_nli_triplet_Matryoshka = ModelMeta(
|
|
|
749
770
|
open_weights=True,
|
|
750
771
|
public_training_code=None,
|
|
751
772
|
public_training_data=None,
|
|
752
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
773
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
753
774
|
reference="https://huggingface.co/Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka",
|
|
754
775
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
755
776
|
use_instructions=None,
|
|
@@ -774,6 +795,7 @@ consciousai__cai_lunaris_text_embeddings = ModelMeta(
|
|
|
774
795
|
languages=None,
|
|
775
796
|
loader=sentence_transformers_loader,
|
|
776
797
|
n_parameters=None,
|
|
798
|
+
n_embedding_parameters=31_254_528,
|
|
777
799
|
memory_usage_mb=None,
|
|
778
800
|
max_tokens=512.0,
|
|
779
801
|
embed_dim=1024,
|
|
@@ -781,7 +803,7 @@ consciousai__cai_lunaris_text_embeddings = ModelMeta(
|
|
|
781
803
|
open_weights=True,
|
|
782
804
|
public_training_code=None,
|
|
783
805
|
public_training_data=None,
|
|
784
|
-
framework=["PyTorch"],
|
|
806
|
+
framework=["PyTorch", "Sentence Transformers", "Transformers"],
|
|
785
807
|
reference="https://huggingface.co/consciousAI/cai-lunaris-text-embeddings",
|
|
786
808
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
787
809
|
use_instructions=None,
|
|
@@ -797,6 +819,7 @@ consciousai__cai_stellaris_text_embeddings = ModelMeta(
|
|
|
797
819
|
languages=None,
|
|
798
820
|
loader=sentence_transformers_loader,
|
|
799
821
|
n_parameters=None,
|
|
822
|
+
n_embedding_parameters=None,
|
|
800
823
|
memory_usage_mb=None,
|
|
801
824
|
max_tokens=514.0,
|
|
802
825
|
embed_dim=768,
|
|
@@ -804,7 +827,7 @@ consciousai__cai_stellaris_text_embeddings = ModelMeta(
|
|
|
804
827
|
open_weights=True,
|
|
805
828
|
public_training_code=None,
|
|
806
829
|
public_training_data=None,
|
|
807
|
-
framework=["PyTorch"],
|
|
830
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
808
831
|
reference="https://huggingface.co/consciousAI/cai-stellaris-text-embeddings",
|
|
809
832
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
810
833
|
use_instructions=None,
|
|
@@ -829,6 +852,7 @@ manu__sentence_croissant_alpha_v0_2 = ModelMeta(
|
|
|
829
852
|
languages=None,
|
|
830
853
|
loader=sentence_transformers_loader,
|
|
831
854
|
n_parameters=1279887360,
|
|
855
|
+
n_embedding_parameters=65_536_000,
|
|
832
856
|
memory_usage_mb=2441,
|
|
833
857
|
max_tokens=2048.0,
|
|
834
858
|
embed_dim=2048,
|
|
@@ -836,7 +860,7 @@ manu__sentence_croissant_alpha_v0_2 = ModelMeta(
|
|
|
836
860
|
open_weights=True,
|
|
837
861
|
public_training_code=None,
|
|
838
862
|
public_training_data=None,
|
|
839
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
863
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
840
864
|
reference="https://huggingface.co/manu/sentence_croissant_alpha_v0.2",
|
|
841
865
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
842
866
|
use_instructions=None,
|
|
@@ -852,6 +876,7 @@ manu__sentence_croissant_alpha_v0_3 = ModelMeta(
|
|
|
852
876
|
languages=None,
|
|
853
877
|
loader=sentence_transformers_loader,
|
|
854
878
|
n_parameters=1279887360,
|
|
879
|
+
n_embedding_parameters=65_536_000,
|
|
855
880
|
memory_usage_mb=2441,
|
|
856
881
|
max_tokens=2048.0,
|
|
857
882
|
embed_dim=2048,
|
|
@@ -859,7 +884,7 @@ manu__sentence_croissant_alpha_v0_3 = ModelMeta(
|
|
|
859
884
|
open_weights=True,
|
|
860
885
|
public_training_code=None,
|
|
861
886
|
public_training_data=None,
|
|
862
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
887
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
863
888
|
reference="https://huggingface.co/manu/sentence_croissant_alpha_v0.3",
|
|
864
889
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
865
890
|
use_instructions=None,
|
|
@@ -875,6 +900,7 @@ manu__sentence_croissant_alpha_v0_4 = ModelMeta(
|
|
|
875
900
|
languages=["fra-Latn", "eng-Latn"],
|
|
876
901
|
loader=sentence_transformers_loader,
|
|
877
902
|
n_parameters=1279887360,
|
|
903
|
+
n_embedding_parameters=65_536_000,
|
|
878
904
|
memory_usage_mb=2441,
|
|
879
905
|
max_tokens=2048.0,
|
|
880
906
|
embed_dim=2048,
|
|
@@ -882,7 +908,7 @@ manu__sentence_croissant_alpha_v0_4 = ModelMeta(
|
|
|
882
908
|
open_weights=True,
|
|
883
909
|
public_training_code=None,
|
|
884
910
|
public_training_data=None,
|
|
885
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
911
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
886
912
|
reference="https://huggingface.co/manu/sentence_croissant_alpha_v0.4",
|
|
887
913
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
888
914
|
use_instructions=None,
|
|
@@ -899,6 +925,7 @@ thenlper__gte_base = ModelMeta(
|
|
|
899
925
|
languages=["eng-Latn"],
|
|
900
926
|
loader=sentence_transformers_loader,
|
|
901
927
|
n_parameters=109482752,
|
|
928
|
+
n_embedding_parameters=23_440_896,
|
|
902
929
|
memory_usage_mb=209,
|
|
903
930
|
max_tokens=512.0,
|
|
904
931
|
embed_dim=768,
|
|
@@ -906,7 +933,7 @@ thenlper__gte_base = ModelMeta(
|
|
|
906
933
|
open_weights=True,
|
|
907
934
|
public_training_code=None,
|
|
908
935
|
public_training_data=None,
|
|
909
|
-
framework=["PyTorch"],
|
|
936
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
910
937
|
reference="https://huggingface.co/thenlper/gte-base",
|
|
911
938
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
912
939
|
use_instructions=None,
|
|
@@ -928,6 +955,7 @@ thenlper__gte_large = ModelMeta(
|
|
|
928
955
|
languages=["eng-Latn"],
|
|
929
956
|
loader=sentence_transformers_loader,
|
|
930
957
|
n_parameters=335142400,
|
|
958
|
+
n_embedding_parameters=31_254_528,
|
|
931
959
|
memory_usage_mb=639,
|
|
932
960
|
max_tokens=512.0,
|
|
933
961
|
embed_dim=1024,
|
|
@@ -935,7 +963,7 @@ thenlper__gte_large = ModelMeta(
|
|
|
935
963
|
open_weights=True,
|
|
936
964
|
public_training_code=None,
|
|
937
965
|
public_training_data=None,
|
|
938
|
-
framework=["PyTorch"],
|
|
966
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
939
967
|
reference="https://huggingface.co/thenlper/gte-large",
|
|
940
968
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
941
969
|
use_instructions=None,
|
|
@@ -957,6 +985,7 @@ thenlper__gte_small = ModelMeta(
|
|
|
957
985
|
languages=["eng-Latn"],
|
|
958
986
|
loader=sentence_transformers_loader,
|
|
959
987
|
n_parameters=33360512,
|
|
988
|
+
n_embedding_parameters=11_720_448,
|
|
960
989
|
memory_usage_mb=64,
|
|
961
990
|
max_tokens=512.0,
|
|
962
991
|
embed_dim=384,
|
|
@@ -964,7 +993,7 @@ thenlper__gte_small = ModelMeta(
|
|
|
964
993
|
open_weights=True,
|
|
965
994
|
public_training_code=None,
|
|
966
995
|
public_training_data=None,
|
|
967
|
-
framework=["PyTorch"],
|
|
996
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
968
997
|
reference="https://huggingface.co/thenlper/gte-small",
|
|
969
998
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
970
999
|
use_instructions=None,
|
|
@@ -986,6 +1015,7 @@ OrlikB__KartonBERT_USE_base_v1 = ModelMeta(
|
|
|
986
1015
|
languages=["pol-Latn"],
|
|
987
1016
|
loader=sentence_transformers_loader,
|
|
988
1017
|
n_parameters=103705344,
|
|
1018
|
+
n_embedding_parameters=None,
|
|
989
1019
|
memory_usage_mb=396,
|
|
990
1020
|
max_tokens=512.0,
|
|
991
1021
|
embed_dim=768,
|
|
@@ -1009,6 +1039,7 @@ OrlikB__st_polish_kartonberta_base_alpha_v1 = ModelMeta(
|
|
|
1009
1039
|
languages=["pol-Latn"],
|
|
1010
1040
|
loader=sentence_transformers_loader,
|
|
1011
1041
|
n_parameters=None,
|
|
1042
|
+
n_embedding_parameters=None,
|
|
1012
1043
|
memory_usage_mb=None,
|
|
1013
1044
|
max_tokens=514.0,
|
|
1014
1045
|
embed_dim=768,
|
|
@@ -1032,6 +1063,7 @@ sdadas__mmlw_e5_base = ModelMeta(
|
|
|
1032
1063
|
languages=["pol-Latn"],
|
|
1033
1064
|
loader=sentence_transformers_loader,
|
|
1034
1065
|
n_parameters=278043648,
|
|
1066
|
+
n_embedding_parameters=192_001_536,
|
|
1035
1067
|
memory_usage_mb=1061,
|
|
1036
1068
|
max_tokens=514.0,
|
|
1037
1069
|
embed_dim=768,
|
|
@@ -1039,7 +1071,7 @@ sdadas__mmlw_e5_base = ModelMeta(
|
|
|
1039
1071
|
open_weights=True,
|
|
1040
1072
|
public_training_code=None,
|
|
1041
1073
|
public_training_data=None,
|
|
1042
|
-
framework=["PyTorch"],
|
|
1074
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
1043
1075
|
reference="https://huggingface.co/sdadas/mmlw-e5-base",
|
|
1044
1076
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1045
1077
|
use_instructions=None,
|
|
@@ -1047,7 +1079,7 @@ sdadas__mmlw_e5_base = ModelMeta(
|
|
|
1047
1079
|
adapted_from="intfloat/multilingual-e5-base",
|
|
1048
1080
|
superseded_by=None,
|
|
1049
1081
|
citation="""@article{dadas2024pirb,
|
|
1050
|
-
title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
|
|
1082
|
+
title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
|
|
1051
1083
|
author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
|
|
1052
1084
|
year={2024},
|
|
1053
1085
|
eprint={2402.13350},
|
|
@@ -1063,6 +1095,7 @@ dwzhu__e5_base_4k = ModelMeta(
|
|
|
1063
1095
|
languages=["eng-Latn"],
|
|
1064
1096
|
loader=sentence_transformers_loader,
|
|
1065
1097
|
n_parameters=None,
|
|
1098
|
+
n_embedding_parameters=23_440_896,
|
|
1066
1099
|
memory_usage_mb=None,
|
|
1067
1100
|
max_tokens=4096.0,
|
|
1068
1101
|
embed_dim=None,
|
|
@@ -1070,7 +1103,7 @@ dwzhu__e5_base_4k = ModelMeta(
|
|
|
1070
1103
|
open_weights=True,
|
|
1071
1104
|
public_training_code=None,
|
|
1072
1105
|
public_training_data=None,
|
|
1073
|
-
framework=["PyTorch"],
|
|
1106
|
+
framework=["PyTorch", "Transformers"],
|
|
1074
1107
|
reference="https://huggingface.co/dwzhu/e5-base-4k",
|
|
1075
1108
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1076
1109
|
use_instructions=None,
|
|
@@ -1092,6 +1125,7 @@ sdadas__mmlw_e5_large = ModelMeta(
|
|
|
1092
1125
|
languages=["pol-Latn"],
|
|
1093
1126
|
loader=sentence_transformers_loader,
|
|
1094
1127
|
n_parameters=559890432,
|
|
1128
|
+
n_embedding_parameters=256_002_048,
|
|
1095
1129
|
memory_usage_mb=2136,
|
|
1096
1130
|
max_tokens=514.0,
|
|
1097
1131
|
embed_dim=1024,
|
|
@@ -1099,7 +1133,7 @@ sdadas__mmlw_e5_large = ModelMeta(
|
|
|
1099
1133
|
open_weights=True,
|
|
1100
1134
|
public_training_code=None,
|
|
1101
1135
|
public_training_data=None,
|
|
1102
|
-
framework=["PyTorch"],
|
|
1136
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
1103
1137
|
reference="https://huggingface.co/sdadas/mmlw-e5-large",
|
|
1104
1138
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1105
1139
|
use_instructions=None,
|
|
@@ -1107,7 +1141,7 @@ sdadas__mmlw_e5_large = ModelMeta(
|
|
|
1107
1141
|
adapted_from="intfloat/multilingual-e5-large",
|
|
1108
1142
|
superseded_by=None,
|
|
1109
1143
|
citation="""@article{dadas2024pirb,
|
|
1110
|
-
title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
|
|
1144
|
+
title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
|
|
1111
1145
|
author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
|
|
1112
1146
|
year={2024},
|
|
1113
1147
|
eprint={2402.13350},
|
|
@@ -1123,6 +1157,7 @@ sdadas__mmlw_e5_small = ModelMeta(
|
|
|
1123
1157
|
languages=["pol-Latn"],
|
|
1124
1158
|
loader=sentence_transformers_loader,
|
|
1125
1159
|
n_parameters=117653760,
|
|
1160
|
+
n_embedding_parameters=96_014_208,
|
|
1126
1161
|
memory_usage_mb=449,
|
|
1127
1162
|
max_tokens=512.0,
|
|
1128
1163
|
embed_dim=384,
|
|
@@ -1130,7 +1165,7 @@ sdadas__mmlw_e5_small = ModelMeta(
|
|
|
1130
1165
|
open_weights=True,
|
|
1131
1166
|
public_training_code=None,
|
|
1132
1167
|
public_training_data=None,
|
|
1133
|
-
framework=["PyTorch"],
|
|
1168
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
1134
1169
|
reference="https://huggingface.co/sdadas/mmlw-e5-small",
|
|
1135
1170
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1136
1171
|
use_instructions=None,
|
|
@@ -1138,7 +1173,7 @@ sdadas__mmlw_e5_small = ModelMeta(
|
|
|
1138
1173
|
adapted_from="intfloat/multilingual-e5-small",
|
|
1139
1174
|
superseded_by=None,
|
|
1140
1175
|
citation="""@article{dadas2024pirb,
|
|
1141
|
-
title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
|
|
1176
|
+
title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
|
|
1142
1177
|
author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
|
|
1143
1178
|
year={2024},
|
|
1144
1179
|
eprint={2402.13350},
|
|
@@ -1154,6 +1189,7 @@ sdadas__mmlw_roberta_base = ModelMeta(
|
|
|
1154
1189
|
languages=["pol-Latn"],
|
|
1155
1190
|
loader=sentence_transformers_loader,
|
|
1156
1191
|
n_parameters=124442880,
|
|
1192
|
+
n_embedding_parameters=38_400_768,
|
|
1157
1193
|
memory_usage_mb=475,
|
|
1158
1194
|
max_tokens=514.0,
|
|
1159
1195
|
embed_dim=768,
|
|
@@ -1161,7 +1197,7 @@ sdadas__mmlw_roberta_base = ModelMeta(
|
|
|
1161
1197
|
open_weights=True,
|
|
1162
1198
|
public_training_code=None,
|
|
1163
1199
|
public_training_data=None,
|
|
1164
|
-
framework=["PyTorch"],
|
|
1200
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
1165
1201
|
reference="https://huggingface.co/sdadas/mmlw-roberta-base",
|
|
1166
1202
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1167
1203
|
use_instructions=None,
|
|
@@ -1169,7 +1205,7 @@ sdadas__mmlw_roberta_base = ModelMeta(
|
|
|
1169
1205
|
adapted_from="sdadas/polish-roberta-base-v2",
|
|
1170
1206
|
superseded_by=None,
|
|
1171
1207
|
citation="""@article{dadas2024pirb,
|
|
1172
|
-
title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
|
|
1208
|
+
title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
|
|
1173
1209
|
author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
|
|
1174
1210
|
year={2024},
|
|
1175
1211
|
eprint={2402.13350},
|
|
@@ -1185,6 +1221,7 @@ sdadas__mmlw_roberta_large = ModelMeta(
|
|
|
1185
1221
|
languages=["pol-Latn"],
|
|
1186
1222
|
loader=sentence_transformers_loader,
|
|
1187
1223
|
n_parameters=434961408,
|
|
1224
|
+
n_embedding_parameters=131_073_024,
|
|
1188
1225
|
memory_usage_mb=1659,
|
|
1189
1226
|
max_tokens=514.0,
|
|
1190
1227
|
embed_dim=1024,
|
|
@@ -1192,7 +1229,7 @@ sdadas__mmlw_roberta_large = ModelMeta(
|
|
|
1192
1229
|
open_weights=True,
|
|
1193
1230
|
public_training_code=None,
|
|
1194
1231
|
public_training_data=None,
|
|
1195
|
-
framework=["PyTorch"],
|
|
1232
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
1196
1233
|
reference="https://huggingface.co/sdadas/mmlw-roberta-large",
|
|
1197
1234
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1198
1235
|
use_instructions=None,
|
|
@@ -1200,7 +1237,7 @@ sdadas__mmlw_roberta_large = ModelMeta(
|
|
|
1200
1237
|
adapted_from="sdadas/polish-roberta-large-v2",
|
|
1201
1238
|
superseded_by=None,
|
|
1202
1239
|
citation="""@article{dadas2024pirb,
|
|
1203
|
-
title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
|
|
1240
|
+
title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
|
|
1204
1241
|
author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
|
|
1205
1242
|
year={2024},
|
|
1206
1243
|
eprint={2402.13350},
|
|
@@ -1271,6 +1308,7 @@ izhx__udever_bloom_1b1 = ModelMeta(
|
|
|
1271
1308
|
languages=udever_languages,
|
|
1272
1309
|
loader=sentence_transformers_loader,
|
|
1273
1310
|
n_parameters=None,
|
|
1311
|
+
n_embedding_parameters=385_351_680,
|
|
1274
1312
|
memory_usage_mb=None,
|
|
1275
1313
|
max_tokens=None,
|
|
1276
1314
|
embed_dim=None,
|
|
@@ -1278,7 +1316,7 @@ izhx__udever_bloom_1b1 = ModelMeta(
|
|
|
1278
1316
|
open_weights=True,
|
|
1279
1317
|
public_training_code=None,
|
|
1280
1318
|
public_training_data=None,
|
|
1281
|
-
framework=["PyTorch"],
|
|
1319
|
+
framework=["PyTorch", "Transformers"],
|
|
1282
1320
|
reference="https://huggingface.co/izhx/udever-bloom-1b1",
|
|
1283
1321
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1284
1322
|
use_instructions=None,
|
|
@@ -1300,6 +1338,7 @@ izhx__udever_bloom_3b = ModelMeta(
|
|
|
1300
1338
|
languages=udever_languages,
|
|
1301
1339
|
loader=sentence_transformers_loader,
|
|
1302
1340
|
n_parameters=None,
|
|
1341
|
+
n_embedding_parameters=642_252_800,
|
|
1303
1342
|
memory_usage_mb=None,
|
|
1304
1343
|
max_tokens=None,
|
|
1305
1344
|
embed_dim=None,
|
|
@@ -1307,7 +1346,7 @@ izhx__udever_bloom_3b = ModelMeta(
|
|
|
1307
1346
|
open_weights=True,
|
|
1308
1347
|
public_training_code=None,
|
|
1309
1348
|
public_training_data=None,
|
|
1310
|
-
framework=["PyTorch"],
|
|
1349
|
+
framework=["PyTorch", "Transformers"],
|
|
1311
1350
|
reference="https://huggingface.co/izhx/udever-bloom-3b",
|
|
1312
1351
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1313
1352
|
use_instructions=None,
|
|
@@ -1329,6 +1368,7 @@ izhx__udever_bloom_560m = ModelMeta(
|
|
|
1329
1368
|
languages=udever_languages,
|
|
1330
1369
|
loader=sentence_transformers_loader,
|
|
1331
1370
|
n_parameters=None,
|
|
1371
|
+
n_embedding_parameters=256_901_120,
|
|
1332
1372
|
memory_usage_mb=None,
|
|
1333
1373
|
max_tokens=None,
|
|
1334
1374
|
embed_dim=None,
|
|
@@ -1336,7 +1376,7 @@ izhx__udever_bloom_560m = ModelMeta(
|
|
|
1336
1376
|
open_weights=True,
|
|
1337
1377
|
public_training_code=None,
|
|
1338
1378
|
public_training_data=None,
|
|
1339
|
-
framework=["PyTorch"],
|
|
1379
|
+
framework=["PyTorch", "Transformers"],
|
|
1340
1380
|
reference="https://huggingface.co/izhx/udever-bloom-560m",
|
|
1341
1381
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1342
1382
|
use_instructions=None,
|
|
@@ -1358,6 +1398,7 @@ izhx__udever_bloom_7b1 = ModelMeta(
|
|
|
1358
1398
|
languages=udever_languages,
|
|
1359
1399
|
loader=sentence_transformers_loader,
|
|
1360
1400
|
n_parameters=None,
|
|
1401
|
+
n_embedding_parameters=1_027_604_480,
|
|
1361
1402
|
memory_usage_mb=None,
|
|
1362
1403
|
max_tokens=None,
|
|
1363
1404
|
embed_dim=None,
|
|
@@ -1365,7 +1406,7 @@ izhx__udever_bloom_7b1 = ModelMeta(
|
|
|
1365
1406
|
open_weights=True,
|
|
1366
1407
|
public_training_code=None,
|
|
1367
1408
|
public_training_data=None,
|
|
1368
|
-
framework=["PyTorch"],
|
|
1409
|
+
framework=["PyTorch", "Transformers"],
|
|
1369
1410
|
reference="https://huggingface.co/izhx/udever-bloom-7b1",
|
|
1370
1411
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1371
1412
|
use_instructions=None,
|
|
@@ -1387,6 +1428,7 @@ avsolatorio__gist_embedding_v0 = ModelMeta(
|
|
|
1387
1428
|
languages=["eng-Latn"],
|
|
1388
1429
|
loader=sentence_transformers_loader,
|
|
1389
1430
|
n_parameters=109482240,
|
|
1431
|
+
n_embedding_parameters=23_440_896,
|
|
1390
1432
|
memory_usage_mb=418,
|
|
1391
1433
|
max_tokens=512.0,
|
|
1392
1434
|
embed_dim=768,
|
|
@@ -1394,7 +1436,7 @@ avsolatorio__gist_embedding_v0 = ModelMeta(
|
|
|
1394
1436
|
open_weights=True,
|
|
1395
1437
|
public_training_code=None,
|
|
1396
1438
|
public_training_data=None,
|
|
1397
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
1439
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
1398
1440
|
reference="https://huggingface.co/avsolatorio/GIST-Embedding-v0",
|
|
1399
1441
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1400
1442
|
use_instructions=None,
|
|
@@ -1437,6 +1479,7 @@ avsolatorio__gist_all_minilm_l6_v2 = ModelMeta(
|
|
|
1437
1479
|
languages=["eng-Latn"],
|
|
1438
1480
|
loader=sentence_transformers_loader,
|
|
1439
1481
|
n_parameters=22713216,
|
|
1482
|
+
n_embedding_parameters=11_720_448,
|
|
1440
1483
|
memory_usage_mb=87,
|
|
1441
1484
|
max_tokens=512.0,
|
|
1442
1485
|
embed_dim=384,
|
|
@@ -1444,7 +1487,7 @@ avsolatorio__gist_all_minilm_l6_v2 = ModelMeta(
|
|
|
1444
1487
|
open_weights=True,
|
|
1445
1488
|
public_training_code=None,
|
|
1446
1489
|
public_training_data=None,
|
|
1447
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
1490
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
1448
1491
|
reference="https://huggingface.co/avsolatorio/GIST-all-MiniLM-L6-v2",
|
|
1449
1492
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1450
1493
|
use_instructions=None,
|
|
@@ -1487,6 +1530,7 @@ avsolatorio__gist_large_embedding_v0 = ModelMeta(
|
|
|
1487
1530
|
languages=["eng-Latn"],
|
|
1488
1531
|
loader=sentence_transformers_loader,
|
|
1489
1532
|
n_parameters=335141888,
|
|
1533
|
+
n_embedding_parameters=31_254_528,
|
|
1490
1534
|
memory_usage_mb=1278,
|
|
1491
1535
|
max_tokens=512.0,
|
|
1492
1536
|
embed_dim=1024,
|
|
@@ -1494,7 +1538,7 @@ avsolatorio__gist_large_embedding_v0 = ModelMeta(
|
|
|
1494
1538
|
open_weights=True,
|
|
1495
1539
|
public_training_code=None,
|
|
1496
1540
|
public_training_data=None,
|
|
1497
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
1541
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
1498
1542
|
reference="https://huggingface.co/avsolatorio/GIST-large-Embedding-v0",
|
|
1499
1543
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1500
1544
|
use_instructions=None,
|
|
@@ -1537,6 +1581,7 @@ avsolatorio__gist_small_embedding_v0 = ModelMeta(
|
|
|
1537
1581
|
languages=["eng-Latn"],
|
|
1538
1582
|
loader=sentence_transformers_loader,
|
|
1539
1583
|
n_parameters=33360000,
|
|
1584
|
+
n_embedding_parameters=11_720_448,
|
|
1540
1585
|
memory_usage_mb=127,
|
|
1541
1586
|
max_tokens=512.0,
|
|
1542
1587
|
embed_dim=384,
|
|
@@ -1544,7 +1589,7 @@ avsolatorio__gist_small_embedding_v0 = ModelMeta(
|
|
|
1544
1589
|
open_weights=True,
|
|
1545
1590
|
public_training_code=None,
|
|
1546
1591
|
public_training_data=None,
|
|
1547
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
1592
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
1548
1593
|
reference="https://huggingface.co/avsolatorio/GIST-small-Embedding-v0",
|
|
1549
1594
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1550
1595
|
use_instructions=None,
|
|
@@ -1587,6 +1632,7 @@ bigscience__sgpt_bloom_7b1_msmarco = ModelMeta(
|
|
|
1587
1632
|
languages=None,
|
|
1588
1633
|
loader=sentence_transformers_loader,
|
|
1589
1634
|
n_parameters=None,
|
|
1635
|
+
n_embedding_parameters=1_026_793_472,
|
|
1590
1636
|
memory_usage_mb=None,
|
|
1591
1637
|
max_tokens=None,
|
|
1592
1638
|
embed_dim=4096,
|
|
@@ -1594,7 +1640,7 @@ bigscience__sgpt_bloom_7b1_msmarco = ModelMeta(
|
|
|
1594
1640
|
open_weights=True,
|
|
1595
1641
|
public_training_code=None,
|
|
1596
1642
|
public_training_data=None,
|
|
1597
|
-
framework=["PyTorch"],
|
|
1643
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
1598
1644
|
reference="https://huggingface.co/bigscience/sgpt-bloom-7b1-msmarco",
|
|
1599
1645
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1600
1646
|
use_instructions=None,
|
|
@@ -1616,6 +1662,7 @@ aari1995__german_semantic_sts_v2 = ModelMeta(
|
|
|
1616
1662
|
languages=["deu-Latn"],
|
|
1617
1663
|
loader=sentence_transformers_loader,
|
|
1618
1664
|
n_parameters=335736320,
|
|
1665
|
+
n_embedding_parameters=31_848_448,
|
|
1619
1666
|
memory_usage_mb=1281,
|
|
1620
1667
|
max_tokens=512.0,
|
|
1621
1668
|
embed_dim=1024,
|
|
@@ -1623,7 +1670,7 @@ aari1995__german_semantic_sts_v2 = ModelMeta(
|
|
|
1623
1670
|
open_weights=True,
|
|
1624
1671
|
public_training_code=None,
|
|
1625
1672
|
public_training_data=None,
|
|
1626
|
-
framework=["PyTorch"],
|
|
1673
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
1627
1674
|
reference="https://huggingface.co/aari1995/German_Semantic_STS_V2",
|
|
1628
1675
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1629
1676
|
use_instructions=None,
|
|
@@ -1640,6 +1687,7 @@ abhinand__medembed_small_v0_1 = ModelMeta(
|
|
|
1640
1687
|
languages=["eng-Latn"],
|
|
1641
1688
|
loader=sentence_transformers_loader,
|
|
1642
1689
|
n_parameters=33360000,
|
|
1690
|
+
n_embedding_parameters=11_720_448,
|
|
1643
1691
|
memory_usage_mb=127,
|
|
1644
1692
|
max_tokens=512.0,
|
|
1645
1693
|
embed_dim=384,
|
|
@@ -1647,7 +1695,7 @@ abhinand__medembed_small_v0_1 = ModelMeta(
|
|
|
1647
1695
|
open_weights=True,
|
|
1648
1696
|
public_training_code=None,
|
|
1649
1697
|
public_training_data=None,
|
|
1650
|
-
framework=["PyTorch"],
|
|
1698
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
1651
1699
|
reference="https://huggingface.co/abhinand/MedEmbed-small-v0.1",
|
|
1652
1700
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1653
1701
|
use_instructions=None,
|
|
@@ -1678,6 +1726,7 @@ avsolatorio__noinstruct_small_embedding_v0 = ModelMeta(
|
|
|
1678
1726
|
languages=["eng-Latn"],
|
|
1679
1727
|
loader=sentence_transformers_loader,
|
|
1680
1728
|
n_parameters=33360000,
|
|
1729
|
+
n_embedding_parameters=11720448,
|
|
1681
1730
|
memory_usage_mb=127,
|
|
1682
1731
|
max_tokens=512.0,
|
|
1683
1732
|
embed_dim=384,
|
|
@@ -1701,6 +1750,7 @@ brahmairesearch__slx_v0_1 = ModelMeta(
|
|
|
1701
1750
|
languages=["eng-Latn"],
|
|
1702
1751
|
loader=sentence_transformers_loader,
|
|
1703
1752
|
n_parameters=22713216,
|
|
1753
|
+
n_embedding_parameters=11_720_448,
|
|
1704
1754
|
memory_usage_mb=87,
|
|
1705
1755
|
max_tokens=512.0,
|
|
1706
1756
|
embed_dim=384,
|
|
@@ -1708,7 +1758,7 @@ brahmairesearch__slx_v0_1 = ModelMeta(
|
|
|
1708
1758
|
open_weights=True,
|
|
1709
1759
|
public_training_code=None,
|
|
1710
1760
|
public_training_data=None,
|
|
1711
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
1761
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
1712
1762
|
reference="https://huggingface.co/brahmairesearch/slx-v0.1",
|
|
1713
1763
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1714
1764
|
use_instructions=None,
|
|
@@ -1724,6 +1774,7 @@ deepfile__embedder_100p = ModelMeta(
|
|
|
1724
1774
|
languages=None,
|
|
1725
1775
|
loader=sentence_transformers_loader,
|
|
1726
1776
|
n_parameters=None,
|
|
1777
|
+
n_embedding_parameters=192_001_536,
|
|
1727
1778
|
memory_usage_mb=1061,
|
|
1728
1779
|
max_tokens=514.0,
|
|
1729
1780
|
embed_dim=768,
|
|
@@ -1731,7 +1782,7 @@ deepfile__embedder_100p = ModelMeta(
|
|
|
1731
1782
|
open_weights=True,
|
|
1732
1783
|
public_training_code=None,
|
|
1733
1784
|
public_training_data=None,
|
|
1734
|
-
framework=["PyTorch"],
|
|
1785
|
+
framework=["PyTorch", "Transformers", "safetensors"],
|
|
1735
1786
|
reference="https://huggingface.co/deepfile/embedder-100p",
|
|
1736
1787
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1737
1788
|
use_instructions=None,
|
|
@@ -1747,6 +1798,7 @@ infgrad__stella_base_en_v2 = ModelMeta(
|
|
|
1747
1798
|
languages=["eng-Latn"],
|
|
1748
1799
|
loader=sentence_transformers_loader,
|
|
1749
1800
|
n_parameters=None,
|
|
1801
|
+
n_embedding_parameters=23_440_896,
|
|
1750
1802
|
memory_usage_mb=None,
|
|
1751
1803
|
max_tokens=512.0,
|
|
1752
1804
|
embed_dim=None,
|
|
@@ -1754,7 +1806,7 @@ infgrad__stella_base_en_v2 = ModelMeta(
|
|
|
1754
1806
|
open_weights=True,
|
|
1755
1807
|
public_training_code=None,
|
|
1756
1808
|
public_training_data=None,
|
|
1757
|
-
framework=["PyTorch"],
|
|
1809
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
1758
1810
|
reference="https://huggingface.co/infgrad/stella-base-en-v2",
|
|
1759
1811
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1760
1812
|
use_instructions=None,
|
|
@@ -1770,6 +1822,7 @@ malenia1__ternary_weight_embedding = ModelMeta(
|
|
|
1770
1822
|
languages=None,
|
|
1771
1823
|
loader=sentence_transformers_loader,
|
|
1772
1824
|
n_parameters=98688000,
|
|
1825
|
+
n_embedding_parameters=None,
|
|
1773
1826
|
memory_usage_mb=158,
|
|
1774
1827
|
max_tokens=512.0,
|
|
1775
1828
|
embed_dim=1024,
|
|
@@ -1777,7 +1830,7 @@ malenia1__ternary_weight_embedding = ModelMeta(
|
|
|
1777
1830
|
open_weights=True,
|
|
1778
1831
|
public_training_code=None,
|
|
1779
1832
|
public_training_data=None,
|
|
1780
|
-
framework=["PyTorch"],
|
|
1833
|
+
framework=["PyTorch", "safetensors"],
|
|
1781
1834
|
reference="https://huggingface.co/malenia1/ternary-weight-embedding",
|
|
1782
1835
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1783
1836
|
use_instructions=None,
|
|
@@ -1793,6 +1846,7 @@ omarelshehy__arabic_english_sts_matryoshka = ModelMeta(
|
|
|
1793
1846
|
languages=["ara-Arab", "eng-Latn"],
|
|
1794
1847
|
loader=sentence_transformers_loader,
|
|
1795
1848
|
n_parameters=559890432,
|
|
1849
|
+
n_embedding_parameters=256_002_048,
|
|
1796
1850
|
memory_usage_mb=2136,
|
|
1797
1851
|
max_tokens=514.0,
|
|
1798
1852
|
embed_dim=1024,
|
|
@@ -1800,7 +1854,7 @@ omarelshehy__arabic_english_sts_matryoshka = ModelMeta(
|
|
|
1800
1854
|
open_weights=True,
|
|
1801
1855
|
public_training_code=None,
|
|
1802
1856
|
public_training_data=None,
|
|
1803
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
1857
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
1804
1858
|
reference="https://huggingface.co/omarelshehy/arabic-english-sts-matryoshka",
|
|
1805
1859
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1806
1860
|
use_instructions=None,
|
|
@@ -1833,6 +1887,7 @@ openbmb__minicpm_embedding = ModelMeta(
|
|
|
1833
1887
|
release_date="2024-09-04",
|
|
1834
1888
|
languages=["zho-Hans", "eng-Latn"],
|
|
1835
1889
|
n_parameters=2724880896,
|
|
1890
|
+
n_embedding_parameters=282_822_912,
|
|
1836
1891
|
memory_usage_mb=5197,
|
|
1837
1892
|
max_tokens=512.0,
|
|
1838
1893
|
embed_dim=2304,
|
|
@@ -1840,7 +1895,7 @@ openbmb__minicpm_embedding = ModelMeta(
|
|
|
1840
1895
|
open_weights=True,
|
|
1841
1896
|
public_training_code=None,
|
|
1842
1897
|
public_training_data=None,
|
|
1843
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
1898
|
+
framework=["PyTorch", "Sentence Transformers", "Transformers", "safetensors"],
|
|
1844
1899
|
reference="https://huggingface.co/openbmb/MiniCPM-Embedding",
|
|
1845
1900
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1846
1901
|
use_instructions=None,
|
|
@@ -1857,6 +1912,7 @@ silma_ai__silma_embedding_matryoshka_v0_1 = ModelMeta(
|
|
|
1857
1912
|
languages=["ara-Arab", "eng-Latn"],
|
|
1858
1913
|
loader=sentence_transformers_loader,
|
|
1859
1914
|
n_parameters=135193344,
|
|
1915
|
+
n_embedding_parameters=49_152_000,
|
|
1860
1916
|
memory_usage_mb=516,
|
|
1861
1917
|
max_tokens=512.0,
|
|
1862
1918
|
embed_dim=768,
|
|
@@ -1864,7 +1920,7 @@ silma_ai__silma_embedding_matryoshka_v0_1 = ModelMeta(
|
|
|
1864
1920
|
open_weights=True,
|
|
1865
1921
|
public_training_code=None,
|
|
1866
1922
|
public_training_data=None,
|
|
1867
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
1923
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
1868
1924
|
reference="https://huggingface.co/silma-ai/silma-embeddding-matryoshka-v0.1",
|
|
1869
1925
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1870
1926
|
use_instructions=None,
|
|
@@ -1888,6 +1944,7 @@ sbert_chinese_general_v1 = ModelMeta(
|
|
|
1888
1944
|
languages=["zho-Hans"],
|
|
1889
1945
|
loader=sentence_transformers_loader,
|
|
1890
1946
|
n_parameters=None,
|
|
1947
|
+
n_embedding_parameters=16_226_304,
|
|
1891
1948
|
memory_usage_mb=None, # Not visible on repo
|
|
1892
1949
|
max_tokens=512,
|
|
1893
1950
|
embed_dim=128,
|
|
@@ -1895,7 +1952,7 @@ sbert_chinese_general_v1 = ModelMeta(
|
|
|
1895
1952
|
open_weights=True,
|
|
1896
1953
|
public_training_code=None,
|
|
1897
1954
|
public_training_data=None,
|
|
1898
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
1955
|
+
framework=["PyTorch", "Sentence Transformers", "Transformers"],
|
|
1899
1956
|
reference="https://huggingface.co/DMetaSoul/sbert-chinese-general-v1",
|
|
1900
1957
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1901
1958
|
use_instructions=None,
|
|
@@ -1916,6 +1973,7 @@ dmeta_embedding_zh_small = ModelMeta(
|
|
|
1916
1973
|
languages=["zho-Hans"],
|
|
1917
1974
|
loader=sentence_transformers_loader,
|
|
1918
1975
|
n_parameters=int(74.2 * 1e6),
|
|
1976
|
+
n_embedding_parameters=16_226_304,
|
|
1919
1977
|
memory_usage_mb=283,
|
|
1920
1978
|
max_tokens=1024,
|
|
1921
1979
|
embed_dim=768,
|
|
@@ -1923,7 +1981,7 @@ dmeta_embedding_zh_small = ModelMeta(
|
|
|
1923
1981
|
open_weights=True,
|
|
1924
1982
|
public_training_code=None,
|
|
1925
1983
|
public_training_data=None,
|
|
1926
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
1984
|
+
framework=["PyTorch", "Sentence Transformers", "Transformers", "safetensors"],
|
|
1927
1985
|
reference="https://huggingface.co/DMetaSoul/Dmeta-embedding-zh-small/",
|
|
1928
1986
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1929
1987
|
use_instructions=None,
|
|
@@ -1939,6 +1997,7 @@ xiaobu_embedding = ModelMeta(
|
|
|
1939
1997
|
languages=["zho-Hans"],
|
|
1940
1998
|
loader=sentence_transformers_loader,
|
|
1941
1999
|
n_parameters=int(326 * 1e6),
|
|
2000
|
+
n_embedding_parameters=21_635_072,
|
|
1942
2001
|
memory_usage_mb=1244,
|
|
1943
2002
|
max_tokens=512,
|
|
1944
2003
|
embed_dim=1024,
|
|
@@ -1946,7 +2005,7 @@ xiaobu_embedding = ModelMeta(
|
|
|
1946
2005
|
open_weights=True,
|
|
1947
2006
|
public_training_code=None,
|
|
1948
2007
|
public_training_data=None,
|
|
1949
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
2008
|
+
framework=["PyTorch", "Sentence Transformers", "Transformers"],
|
|
1950
2009
|
reference="https://huggingface.co/lier007/xiaobu-embedding",
|
|
1951
2010
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1952
2011
|
use_instructions=None,
|
|
@@ -1963,6 +2022,7 @@ xiaobu_embedding_v2 = ModelMeta(
|
|
|
1963
2022
|
languages=["zho-Hans"],
|
|
1964
2023
|
loader=sentence_transformers_loader,
|
|
1965
2024
|
n_parameters=int(326 * 1e6),
|
|
2025
|
+
n_embedding_parameters=21_635_072,
|
|
1966
2026
|
memory_usage_mb=1242,
|
|
1967
2027
|
max_tokens=512,
|
|
1968
2028
|
embed_dim=768,
|
|
@@ -1970,7 +2030,7 @@ xiaobu_embedding_v2 = ModelMeta(
|
|
|
1970
2030
|
open_weights=True,
|
|
1971
2031
|
public_training_code=None,
|
|
1972
2032
|
public_training_data=None,
|
|
1973
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
2033
|
+
framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
|
|
1974
2034
|
reference="https://huggingface.co/lier007/xiaobu-embedding-v2",
|
|
1975
2035
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
1976
2036
|
use_instructions=None,
|
|
@@ -1987,6 +2047,7 @@ yinka_embedding = ModelMeta(
|
|
|
1987
2047
|
languages=["zho-Hans"],
|
|
1988
2048
|
loader=sentence_transformers_loader,
|
|
1989
2049
|
n_parameters=int(326 * 1e6),
|
|
2050
|
+
n_embedding_parameters=21_635_072,
|
|
1990
2051
|
memory_usage_mb=1244,
|
|
1991
2052
|
max_tokens=512,
|
|
1992
2053
|
embed_dim=1024,
|
|
@@ -1994,7 +2055,7 @@ yinka_embedding = ModelMeta(
|
|
|
1994
2055
|
open_weights=True,
|
|
1995
2056
|
public_training_code=None,
|
|
1996
2057
|
public_training_data=None,
|
|
1997
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
2058
|
+
framework=["PyTorch", "Sentence Transformers", "Transformers"],
|
|
1998
2059
|
reference="https://huggingface.co/Classical/Yinka",
|
|
1999
2060
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
2000
2061
|
use_instructions=None,
|
|
@@ -2010,6 +2071,7 @@ conan_embedding = ModelMeta(
|
|
|
2010
2071
|
languages=["zho-Hans"],
|
|
2011
2072
|
loader=sentence_transformers_loader,
|
|
2012
2073
|
n_parameters=int(326 * 1e6),
|
|
2074
|
+
n_embedding_parameters=21_635_072,
|
|
2013
2075
|
memory_usage_mb=1242,
|
|
2014
2076
|
max_tokens=512,
|
|
2015
2077
|
embed_dim=768,
|
|
@@ -2017,7 +2079,7 @@ conan_embedding = ModelMeta(
|
|
|
2017
2079
|
open_weights=True,
|
|
2018
2080
|
public_training_code=None,
|
|
2019
2081
|
public_training_data=None,
|
|
2020
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
2082
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
2021
2083
|
reference="https://huggingface.co/Classical/Yinka",
|
|
2022
2084
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
2023
2085
|
use_instructions=None,
|
|
@@ -2025,13 +2087,13 @@ conan_embedding = ModelMeta(
|
|
|
2025
2087
|
training_datasets=None, # They "scraped" things from the internet, we don't know, could be leakage
|
|
2026
2088
|
superseded_by=None,
|
|
2027
2089
|
citation="""@misc{li2024conanembeddinggeneraltextembedding,
|
|
2028
|
-
title={Conan-embedding: General Text Embedding with More and Better Negative Samples},
|
|
2090
|
+
title={Conan-embedding: General Text Embedding with More and Better Negative Samples},
|
|
2029
2091
|
author={Shiyu Li and Yang Tang and Shizhe Chen and Xi Chen},
|
|
2030
2092
|
year={2024},
|
|
2031
2093
|
eprint={2408.15710},
|
|
2032
2094
|
archivePrefix={arXiv},
|
|
2033
2095
|
primaryClass={cs.CL},
|
|
2034
|
-
url={https://arxiv.org/abs/2408.15710},
|
|
2096
|
+
url={https://arxiv.org/abs/2408.15710},
|
|
2035
2097
|
}""",
|
|
2036
2098
|
)
|
|
2037
2099
|
|
|
@@ -2043,6 +2105,7 @@ ember_v1 = ModelMeta(
|
|
|
2043
2105
|
release_date="2023-10-10",
|
|
2044
2106
|
languages=["eng-Latn"],
|
|
2045
2107
|
n_parameters=int(335 * 1e6),
|
|
2108
|
+
n_embedding_parameters=31_254_528,
|
|
2046
2109
|
memory_usage_mb=1278,
|
|
2047
2110
|
max_tokens=512,
|
|
2048
2111
|
embed_dim=1024,
|
|
@@ -2050,14 +2113,14 @@ ember_v1 = ModelMeta(
|
|
|
2050
2113
|
open_weights=True,
|
|
2051
2114
|
public_training_code=None,
|
|
2052
2115
|
public_training_data=None,
|
|
2053
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
2116
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
2054
2117
|
reference="https://huggingface.co/llmrails/ember-v1",
|
|
2055
2118
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
2056
2119
|
use_instructions=None,
|
|
2057
2120
|
training_datasets=None,
|
|
2058
2121
|
superseded_by=None,
|
|
2059
2122
|
citation="""@misc{nur2024emberv1,
|
|
2060
|
-
title={ember-v1: SOTA embedding model},
|
|
2123
|
+
title={ember-v1: SOTA embedding model},
|
|
2061
2124
|
author={Enrike Nur and Anar Aliyev},
|
|
2062
2125
|
year={2023},
|
|
2063
2126
|
}""",
|