mteb 2.7.3__py3-none-any.whl → 2.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (434) hide show
  1. mteb/_create_dataloaders.py +47 -5
  2. mteb/_evaluators/any_sts_evaluator.py +2 -0
  3. mteb/_evaluators/clustering_evaluator.py +2 -0
  4. mteb/_evaluators/evaluator.py +2 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +8 -1
  6. mteb/_evaluators/pair_classification_evaluator.py +3 -0
  7. mteb/_evaluators/retrieval_evaluator.py +3 -0
  8. mteb/_evaluators/sklearn_evaluator.py +6 -1
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +2 -0
  10. mteb/_evaluators/text/summarization_evaluator.py +2 -0
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -0
  12. mteb/abstasks/abstask.py +31 -12
  13. mteb/abstasks/classification.py +10 -3
  14. mteb/abstasks/clustering.py +6 -2
  15. mteb/abstasks/clustering_legacy.py +8 -2
  16. mteb/abstasks/image/image_text_pair_classification.py +6 -2
  17. mteb/abstasks/multilabel_classification.py +2 -0
  18. mteb/abstasks/pair_classification.py +8 -2
  19. mteb/abstasks/retrieval.py +27 -12
  20. mteb/abstasks/retrieval_dataset_loaders.py +29 -19
  21. mteb/abstasks/sts.py +10 -3
  22. mteb/abstasks/text/bitext_mining.py +9 -5
  23. mteb/abstasks/text/reranking.py +2 -2
  24. mteb/abstasks/text/summarization.py +2 -1
  25. mteb/abstasks/zeroshot_classification.py +8 -2
  26. mteb/benchmarks/benchmarks/__init__.py +2 -0
  27. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  28. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  29. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  30. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  31. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  32. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  33. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  34. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  35. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  36. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  37. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  38. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  39. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  40. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  41. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  42. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  43. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  44. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  45. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  48. mteb/evaluate.py +10 -2
  49. mteb/models/model_implementations/align_models.py +1 -0
  50. mteb/models/model_implementations/amazon_models.py +1 -0
  51. mteb/models/model_implementations/andersborges.py +2 -0
  52. mteb/models/model_implementations/ara_models.py +1 -0
  53. mteb/models/model_implementations/arctic_models.py +8 -0
  54. mteb/models/model_implementations/b1ade_models.py +1 -0
  55. mteb/models/model_implementations/bedrock_models.py +4 -0
  56. mteb/models/model_implementations/bge_models.py +40 -1
  57. mteb/models/model_implementations/bica_model.py +1 -0
  58. mteb/models/model_implementations/blip2_models.py +2 -0
  59. mteb/models/model_implementations/blip_models.py +8 -0
  60. mteb/models/model_implementations/bm25.py +10 -5
  61. mteb/models/model_implementations/bmretriever_models.py +4 -0
  62. mteb/models/model_implementations/cadet_models.py +1 -0
  63. mteb/models/model_implementations/cde_models.py +2 -0
  64. mteb/models/model_implementations/clip_models.py +3 -0
  65. mteb/models/model_implementations/clips_models.py +3 -0
  66. mteb/models/model_implementations/codefuse_models.py +5 -0
  67. mteb/models/model_implementations/codesage_models.py +3 -0
  68. mteb/models/model_implementations/cohere_models.py +4 -0
  69. mteb/models/model_implementations/cohere_v.py +5 -0
  70. mteb/models/model_implementations/colpali_models.py +3 -0
  71. mteb/models/model_implementations/colqwen_models.py +7 -0
  72. mteb/models/model_implementations/colsmol_models.py +2 -0
  73. mteb/models/model_implementations/conan_models.py +1 -0
  74. mteb/models/model_implementations/dino_models.py +19 -0
  75. mteb/models/model_implementations/e5_instruct.py +4 -0
  76. mteb/models/model_implementations/e5_models.py +9 -0
  77. mteb/models/model_implementations/e5_v.py +1 -0
  78. mteb/models/model_implementations/eagerworks_models.py +1 -0
  79. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  80. mteb/models/model_implementations/en_code_retriever.py +1 -0
  81. mteb/models/model_implementations/euler_models.py +1 -0
  82. mteb/models/model_implementations/evaclip_models.py +4 -0
  83. mteb/models/model_implementations/fa_models.py +9 -0
  84. mteb/models/model_implementations/facebookai.py +2 -0
  85. mteb/models/model_implementations/geogpt_models.py +1 -0
  86. mteb/models/model_implementations/gme_v_models.py +2 -0
  87. mteb/models/model_implementations/google_models.py +5 -0
  88. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  89. mteb/models/model_implementations/gritlm_models.py +2 -0
  90. mteb/models/model_implementations/gte_models.py +9 -0
  91. mteb/models/model_implementations/hinvec_models.py +1 -0
  92. mteb/models/model_implementations/human.py +1 -0
  93. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  94. mteb/models/model_implementations/inf_models.py +2 -0
  95. mteb/models/model_implementations/jasper_models.py +2 -0
  96. mteb/models/model_implementations/jina_clip.py +1 -0
  97. mteb/models/model_implementations/jina_models.py +7 -0
  98. mteb/models/model_implementations/kalm_models.py +6 -0
  99. mteb/models/model_implementations/kblab.py +1 -0
  100. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  101. mteb/models/model_implementations/kfst.py +1 -0
  102. mteb/models/model_implementations/kowshik24_models.py +1 -0
  103. mteb/models/model_implementations/lens_models.py +2 -0
  104. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  105. mteb/models/model_implementations/linq_models.py +1 -0
  106. mteb/models/model_implementations/listconranker.py +1 -0
  107. mteb/models/model_implementations/llm2clip_models.py +3 -0
  108. mteb/models/model_implementations/llm2vec_models.py +8 -0
  109. mteb/models/model_implementations/mcinext_models.py +3 -0
  110. mteb/models/model_implementations/mdbr_models.py +2 -0
  111. mteb/models/model_implementations/misc_models.py +63 -0
  112. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  113. mteb/models/model_implementations/mme5_models.py +2 -1
  114. mteb/models/model_implementations/moco_models.py +2 -0
  115. mteb/models/model_implementations/mod_models.py +1 -0
  116. mteb/models/model_implementations/model2vec_models.py +13 -0
  117. mteb/models/model_implementations/moka_models.py +3 -0
  118. mteb/models/model_implementations/nbailab.py +3 -0
  119. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  120. mteb/models/model_implementations/nomic_models.py +6 -0
  121. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  122. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  123. mteb/models/model_implementations/nvidia_models.py +3 -0
  124. mteb/models/model_implementations/octen_models.py +2 -0
  125. mteb/models/model_implementations/openai_models.py +5 -0
  126. mteb/models/model_implementations/openclip_models.py +8 -0
  127. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  128. mteb/models/model_implementations/ops_moa_models.py +2 -0
  129. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  130. mteb/models/model_implementations/pawan_models.py +1 -0
  131. mteb/models/model_implementations/piccolo_models.py +2 -0
  132. mteb/models/model_implementations/promptriever_models.py +4 -0
  133. mteb/models/model_implementations/pylate_models.py +13 -0
  134. mteb/models/model_implementations/qodo_models.py +2 -0
  135. mteb/models/model_implementations/qtack_models.py +1 -0
  136. mteb/models/model_implementations/qwen3_models.py +3 -0
  137. mteb/models/model_implementations/qzhou_models.py +2 -0
  138. mteb/models/model_implementations/rasgaard_models.py +1 -0
  139. mteb/models/model_implementations/reasonir_model.py +65 -0
  140. mteb/models/model_implementations/repllama_models.py +2 -0
  141. mteb/models/model_implementations/rerankers_custom.py +3 -0
  142. mteb/models/model_implementations/rerankers_monot5_based.py +14 -0
  143. mteb/models/model_implementations/richinfoai_models.py +1 -0
  144. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  145. mteb/models/model_implementations/ruri_models.py +10 -0
  146. mteb/models/model_implementations/salesforce_models.py +3 -0
  147. mteb/models/model_implementations/samilpwc_models.py +1 -0
  148. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  149. mteb/models/model_implementations/searchmap_models.py +1 -0
  150. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  151. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +1 -0
  152. mteb/models/model_implementations/seed_models.py +1 -0
  153. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  154. mteb/models/model_implementations/shuu_model.py +1 -0
  155. mteb/models/model_implementations/siglip_models.py +10 -0
  156. mteb/models/model_implementations/sonar_models.py +2 -1
  157. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  158. mteb/models/model_implementations/stella_models.py +6 -0
  159. mteb/models/model_implementations/tarka_models.py +2 -0
  160. mteb/models/model_implementations/text2vec_models.py +3 -0
  161. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  162. mteb/models/model_implementations/uae_models.py +1 -0
  163. mteb/models/model_implementations/vdr_models.py +1 -0
  164. mteb/models/model_implementations/vi_vn_models.py +6 -0
  165. mteb/models/model_implementations/vista_models.py +2 -0
  166. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  167. mteb/models/model_implementations/voyage_models.py +15 -0
  168. mteb/models/model_implementations/voyage_v.py +1 -0
  169. mteb/models/model_implementations/xyz_models.py +1 -0
  170. mteb/models/model_implementations/youtu_models.py +1 -0
  171. mteb/models/model_implementations/yuan_models.py +1 -0
  172. mteb/models/model_implementations/yuan_models_en.py +1 -0
  173. mteb/models/model_meta.py +35 -2
  174. mteb/models/models_protocols.py +4 -0
  175. mteb/models/search_wrappers.py +12 -0
  176. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  177. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  178. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  179. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  180. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  181. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  182. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  183. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  184. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  185. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  186. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  187. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  188. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  189. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  190. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  191. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  192. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  193. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  194. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  195. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  196. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  197. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  198. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  199. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  200. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  201. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  202. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  203. mteb/tasks/classification/est/estonian_valence.py +1 -1
  204. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  205. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  206. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  207. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  208. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  209. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  210. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  211. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  212. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  213. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  214. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  215. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  216. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  217. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  218. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  219. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  220. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  221. mteb/tasks/classification/kor/klue_tc.py +2 -2
  222. mteb/tasks/classification/kor/kor_fin.py +1 -1
  223. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  224. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  225. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  226. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  227. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  228. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  229. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  230. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  231. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  232. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  233. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  234. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  235. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  236. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  237. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  238. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  239. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  240. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  241. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  242. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  243. mteb/tasks/classification/ron/moroco.py +1 -1
  244. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  245. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  246. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  247. mteb/tasks/classification/rus/headline_classification.py +2 -2
  248. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  249. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  250. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  251. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  252. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  253. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  254. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  255. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  256. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  257. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  258. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  259. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  260. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  261. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  262. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  263. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  264. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  265. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  266. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  267. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  268. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  269. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  270. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  271. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  272. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  273. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  274. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  275. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  276. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  277. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  278. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  279. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  280. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  281. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  282. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  283. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  284. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  285. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  286. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  287. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  288. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  289. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  290. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  291. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  292. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  293. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  294. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  295. mteb/tasks/clustering/nob/snl_clustering.py +1 -1
  296. mteb/tasks/clustering/nob/vg_clustering.py +1 -1
  297. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  298. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  299. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  300. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  301. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  302. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  303. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  304. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  305. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  306. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  307. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  308. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  309. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  310. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  311. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  312. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  313. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  314. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
  315. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  316. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  317. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  318. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  319. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  320. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  321. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  322. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  323. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  324. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  325. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  326. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  327. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  328. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  329. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  330. mteb/tasks/pair_classification/rus/terra.py +2 -2
  331. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  332. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  333. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  334. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  335. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  336. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  337. mteb/tasks/retrieval/code/code_rag.py +4 -4
  338. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  339. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  340. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  341. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  342. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  343. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  344. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  345. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  346. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  347. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  348. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  349. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  350. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  351. mteb/tasks/retrieval/eng/__init__.py +42 -0
  352. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  353. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  354. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  355. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  356. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  357. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  358. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  359. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  360. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  361. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  362. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  363. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  364. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  365. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  366. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  367. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  368. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  369. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  370. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  371. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  372. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  373. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  374. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  375. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  376. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  377. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  378. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  379. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  380. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  381. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  382. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  383. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  384. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  385. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  386. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  387. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  388. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  389. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  390. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  391. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  392. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  393. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  394. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  395. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  396. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  397. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  398. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  399. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
  400. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  401. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  402. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  403. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  404. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  405. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  406. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  407. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  408. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  409. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  410. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  411. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  412. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  413. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  414. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  415. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  416. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  417. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  418. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  419. mteb/tasks/retrieval/nob/norquad.py +1 -1
  420. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  421. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  422. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  423. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  424. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  425. mteb/tasks/sts/kor/klue_sts.py +1 -1
  426. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  427. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  428. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  429. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/METADATA +1 -1
  430. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/RECORD +434 -413
  431. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/WHEEL +0 -0
  432. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/entry_points.txt +0 -0
  433. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/licenses/LICENSE +0 -0
  434. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/top_level.txt +0 -0
@@ -121,6 +121,7 @@ all_minilm_l6_v2 = ModelMeta(
121
121
  revision="8b3219a92973c328a8e22fadcfa821b5dc75636a",
122
122
  release_date="2021-08-30",
123
123
  n_parameters=22_700_000,
124
+ n_embedding_parameters=11_720_448,
124
125
  memory_usage_mb=87,
125
126
  embed_dim=384,
126
127
  license="apache-2.0",
@@ -152,6 +153,7 @@ all_minilm_l12_v2 = ModelMeta(
152
153
  revision="364dd28d28dcd3359b537f3cf1f5348ba679da62",
153
154
  release_date="2021-08-30",
154
155
  n_parameters=33_400_000,
156
+ n_embedding_parameters=11_720_448,
155
157
  memory_usage_mb=127,
156
158
  embed_dim=384,
157
159
  license="apache-2.0",
@@ -183,6 +185,7 @@ paraphrase_multilingual_minilm_l12_v2 = ModelMeta(
183
185
  revision="bf3bf13ab40c3157080a7ab344c831b9ad18b5eb",
184
186
  release_date="2019-11-01", # release date of paper
185
187
  n_parameters=118_000_000,
188
+ n_embedding_parameters=96_014_208,
186
189
  memory_usage_mb=449,
187
190
  embed_dim=768,
188
191
  license="apache-2.0",
@@ -214,6 +217,7 @@ paraphrase_multilingual_mpnet_base_v2 = ModelMeta(
214
217
  revision="79f2382ceacceacdf38563d7c5d16b9ff8d725d6",
215
218
  release_date="2019-11-01", # release date of paper
216
219
  n_parameters=278_000_000,
220
+ n_embedding_parameters=192_001_536,
217
221
  memory_usage_mb=1061,
218
222
  embed_dim=768,
219
223
  license="apache-2.0",
@@ -256,6 +260,7 @@ labse = ModelMeta(
256
260
  revision="e34fab64a3011d2176c99545a93d5cbddc9a91b7",
257
261
  release_date="2019-11-01", # release date of paper
258
262
  n_parameters=471_000_000,
263
+ n_embedding_parameters=384_885_504,
259
264
  memory_usage_mb=1796,
260
265
  embed_dim=768,
261
266
  license="apache-2.0",
@@ -294,6 +299,7 @@ multi_qa_minilm_l6_cos_v1 = ModelMeta(
294
299
  revision="b207367332321f8e44f96e224ef15bc607f4dbf0",
295
300
  release_date="2021-08-30",
296
301
  n_parameters=22_700_000,
302
+ n_embedding_parameters=11_720_448,
297
303
  memory_usage_mb=87,
298
304
  embed_dim=384,
299
305
  license="apache-2.0",
@@ -325,6 +331,7 @@ all_mpnet_base_v2 = ModelMeta(
325
331
  revision="9a3225965996d404b775526de6dbfe85d3368642",
326
332
  release_date="2021-08-30",
327
333
  n_parameters=109_000_000,
334
+ n_embedding_parameters=23_444_736,
328
335
  memory_usage_mb=418,
329
336
  embed_dim=768,
330
337
  license="apache-2.0",
@@ -435,6 +442,7 @@ static_similarity_mrl_multilingual_v1 = ModelMeta(
435
442
  revision="7264ea07c5365a11d7e6d87dbb6195889a13054f",
436
443
  release_date="2025-01-15",
437
444
  n_parameters=108_420_096,
445
+ n_embedding_parameters=None,
438
446
  memory_usage_mb=413,
439
447
  embed_dim=1024,
440
448
  license="apache-2.0",
@@ -468,6 +476,7 @@ contriever = ModelMeta(
468
476
  revision="abe8c1493371369031bcb1e02acb754cf4e162fa",
469
477
  release_date="2022-06-25", # release date of model on HF
470
478
  n_parameters=150_000_000,
479
+ n_embedding_parameters=23_440_896,
471
480
  memory_usage_mb=572,
472
481
  embed_dim=768,
473
482
  license=None,
@@ -498,6 +507,7 @@ microllama_text_embedding = ModelMeta(
498
507
  revision="98f70f14cdf12d7ea217ed2fd4e808b0195f1e7e",
499
508
  release_date="2024-11-10",
500
509
  n_parameters=272_000_000,
510
+ n_embedding_parameters=32_769_024,
501
511
  memory_usage_mb=1037,
502
512
  embed_dim=1024,
503
513
  license="apache-2.0",
@@ -544,6 +554,7 @@ sentence_t5_base = ModelMeta(
544
554
  revision="50c53e206f8b01c9621484a3c0aafce4e55efebf",
545
555
  release_date="2022-02-09",
546
556
  n_parameters=110_000_000,
557
+ n_embedding_parameters=24_674_304,
547
558
  memory_usage_mb=209,
548
559
  embed_dim=768,
549
560
  license="apache-2.0",
@@ -567,6 +578,7 @@ sentence_t5_large = ModelMeta(
567
578
  revision="1fc08ea477205aa54a3e5b13f0971ae16b86410a",
568
579
  release_date="2022-02-09",
569
580
  n_parameters=335_000_000,
581
+ n_embedding_parameters=32_899_072,
570
582
  memory_usage_mb=639,
571
583
  embed_dim=768,
572
584
  license="apache-2.0",
@@ -590,6 +602,7 @@ sentence_t5_xl = ModelMeta(
590
602
  revision="2965d31b368fb14117688e0bde77cbd720e91f53",
591
603
  release_date="2024-03-27",
592
604
  n_parameters=3_000_000_000,
605
+ n_embedding_parameters=32_899_072,
593
606
  memory_usage_mb=2367,
594
607
  embed_dim=768,
595
608
  license="apache-2.0",
@@ -613,6 +626,7 @@ sentence_t5_xxl = ModelMeta(
613
626
  revision="4d122282ba80e807e9e6eb8c358269e92796365d",
614
627
  release_date="2024-03-27",
615
628
  n_parameters=11_000_000_000,
629
+ n_embedding_parameters=None,
616
630
  memory_usage_mb=9279,
617
631
  embed_dim=768,
618
632
  license="apache-2.0",
@@ -646,6 +660,7 @@ gtr_t5_large = ModelMeta(
646
660
  revision="a2c8ac47f998531948d4cbe32a0b577a7037a5e3",
647
661
  release_date="2022-02-09",
648
662
  n_parameters=335_000_000,
663
+ n_embedding_parameters=32_899_072,
649
664
  memory_usage_mb=639,
650
665
  embed_dim=768,
651
666
  license="apache-2.0",
@@ -681,6 +696,7 @@ gtr_t5_xl = ModelMeta(
681
696
  revision="23a8d667a1ad2578af181ce762867003c498d1bf",
682
697
  release_date="2022-02-09",
683
698
  n_parameters=1_240_000_000,
699
+ n_embedding_parameters=32_899_072,
684
700
  memory_usage_mb=2367,
685
701
  embed_dim=768,
686
702
  license="apache-2.0",
@@ -715,6 +731,7 @@ gtr_t5_xxl = ModelMeta(
715
731
  revision="73f2a9156a3dcc2194dfdb2bf201cd7d17e17884",
716
732
  release_date="2022-02-09",
717
733
  n_parameters=4_860_000_000,
734
+ n_embedding_parameters=None,
718
735
  memory_usage_mb=9279,
719
736
  embed_dim=768,
720
737
  license="apache-2.0",
@@ -750,6 +767,7 @@ gtr_t5_base = ModelMeta(
750
767
  revision="7027e9594267928589816394bdd295273ddc0739",
751
768
  release_date="2022-02-09",
752
769
  n_parameters=110_000_000,
770
+ n_embedding_parameters=24_674_304,
753
771
  memory_usage_mb=209,
754
772
  embed_dim=768,
755
773
  license="apache-2.0",
@@ -10,6 +10,7 @@ codemodernbert_crow_meta = ModelMeta(
10
10
  revision="044a7a4b552f86e284817234c336bccf16f895ce",
11
11
  release_date="2025-04-21",
12
12
  n_parameters=151668480,
13
+ n_embedding_parameters=None,
13
14
  memory_usage_mb=607,
14
15
  embed_dim=768,
15
16
  license="apache-2.0",
@@ -136,6 +136,7 @@ siglip_so400m_patch14_224 = ModelMeta(
136
136
  release_date="2024-01-08",
137
137
  modalities=["image", "text"],
138
138
  n_parameters=877_000_000,
139
+ n_embedding_parameters=None,
139
140
  memory_usage_mb=3347,
140
141
  max_tokens=16,
141
142
  embed_dim=1152,
@@ -160,6 +161,7 @@ siglip_so400m_patch14_384 = ModelMeta(
160
161
  release_date="2024-01-08",
161
162
  modalities=["image", "text"],
162
163
  n_parameters=878_000_000,
164
+ n_embedding_parameters=None,
163
165
  memory_usage_mb=3349,
164
166
  max_tokens=64,
165
167
  embed_dim=1152,
@@ -184,6 +186,7 @@ siglip_so400m_patch16_256_i18n = ModelMeta(
184
186
  release_date="2024-01-08",
185
187
  modalities=["image", "text"],
186
188
  n_parameters=1_130_000_000,
189
+ n_embedding_parameters=None,
187
190
  memory_usage_mb=4306,
188
191
  max_tokens=64,
189
192
  embed_dim=1152,
@@ -208,6 +211,7 @@ siglip_base_patch16_256_multilingual = ModelMeta(
208
211
  release_date="2024-01-08",
209
212
  modalities=["image", "text"],
210
213
  n_parameters=371_000_000,
214
+ n_embedding_parameters=None,
211
215
  memory_usage_mb=1414,
212
216
  max_tokens=64,
213
217
  embed_dim=768,
@@ -232,6 +236,7 @@ siglip_base_patch16_256 = ModelMeta(
232
236
  release_date="2024-01-08",
233
237
  modalities=["image", "text"],
234
238
  n_parameters=203_000_000,
239
+ n_embedding_parameters=None,
235
240
  memory_usage_mb=775,
236
241
  max_tokens=64,
237
242
  embed_dim=768,
@@ -256,6 +261,7 @@ siglip_base_patch16_512 = ModelMeta(
256
261
  release_date="2024-01-08",
257
262
  modalities=["image", "text"],
258
263
  n_parameters=204_000_000,
264
+ n_embedding_parameters=None,
259
265
  memory_usage_mb=777,
260
266
  max_tokens=64,
261
267
  embed_dim=768,
@@ -280,6 +286,7 @@ siglip_base_patch16_384 = ModelMeta(
280
286
  release_date="2024-01-08",
281
287
  modalities=["image", "text"],
282
288
  n_parameters=203_000_000,
289
+ n_embedding_parameters=None,
283
290
  memory_usage_mb=776,
284
291
  max_tokens=64,
285
292
  embed_dim=768,
@@ -304,6 +311,7 @@ siglip_base_patch16_224 = ModelMeta(
304
311
  release_date="2024-01-08",
305
312
  modalities=["image", "text"],
306
313
  n_parameters=203_000_000,
314
+ n_embedding_parameters=None,
307
315
  memory_usage_mb=775,
308
316
  max_tokens=64,
309
317
  embed_dim=768,
@@ -328,6 +336,7 @@ siglip_large_patch16_256 = ModelMeta(
328
336
  release_date="2024-01-08",
329
337
  modalities=["image", "text"],
330
338
  n_parameters=652_000_000,
339
+ n_embedding_parameters=None,
331
340
  memory_usage_mb=2488,
332
341
  max_tokens=64,
333
342
  embed_dim=1024,
@@ -352,6 +361,7 @@ siglip_large_patch16_384 = ModelMeta(
352
361
  release_date="2024-01-08",
353
362
  modalities=["image", "text"],
354
363
  n_parameters=652_000_000,
364
+ n_embedding_parameters=None,
355
365
  memory_usage_mb=2489,
356
366
  max_tokens=64,
357
367
  embed_dim=1024,
@@ -224,7 +224,8 @@ sonar = ModelMeta(
224
224
  use_instructions=False, # it does take a language code as input
225
225
  revision="a551c586dcf4a49c8fd847de369412d556a7f2f2",
226
226
  release_date="2021-05-21",
227
- n_parameters=None, # it is really multiple models so not sure how to calculate this
227
+ n_parameters=None,
228
+ n_embedding_parameters=None, # it is really multiple models so not sure how to calculate this
228
229
  max_tokens=512, # https://github.com/facebookresearch/SONAR/blob/549d287466443bd8720f938047882630c1c5c3f7/sonar/models/sonar_text/builder.py#L139
229
230
  embed_dim=1024,
230
231
  license="mit",
@@ -12,6 +12,7 @@ spartan8806_atles_champion_embedding = ModelMeta(
12
12
  revision="d4c74d7000bbd25f3597fc0f2dcde59ef1386e8f",
13
13
  release_date="2025-11-15",
14
14
  n_parameters=110_000_000,
15
+ n_embedding_parameters=23_444_736,
15
16
  memory_usage_mb=420,
16
17
  max_tokens=512,
17
18
  embed_dim=768,
@@ -66,6 +66,7 @@ stella_en_400m = ModelMeta(
66
66
  revision="1bb50bc7bb726810eac2140e62155b88b0df198f",
67
67
  release_date="2024-07-12",
68
68
  n_parameters=435_000_000,
69
+ n_embedding_parameters=None,
69
70
  memory_usage_mb=1660,
70
71
  max_tokens=8192,
71
72
  embed_dim=4096,
@@ -101,6 +102,7 @@ stella_en_1_5b = ModelMeta(
101
102
  revision="d03be74b361d4eb24f42a2fe5bd2e29917df4604",
102
103
  release_date="2024-07-12",
103
104
  n_parameters=1_540_000_000,
105
+ n_embedding_parameters=232_928_256,
104
106
  memory_usage_mb=5887,
105
107
  max_tokens=131072,
106
108
  embed_dim=8960,
@@ -130,6 +132,7 @@ stella_large_zh_v3_1792d = ModelMeta(
130
132
  revision="d5d39eb8cd11c80a63df53314e59997074469f09",
131
133
  release_date="2024-02-17",
132
134
  n_parameters=None,
135
+ n_embedding_parameters=21_635_072,
133
136
  memory_usage_mb=None, # can't see on model card
134
137
  embed_dim=1792,
135
138
  license="not specified",
@@ -157,6 +160,7 @@ stella_base_zh_v3_1792d = ModelMeta(
157
160
  revision="82254892a0fba125aa2abf3a4800d2dd12821343",
158
161
  release_date="2024-02-17",
159
162
  n_parameters=None,
163
+ n_embedding_parameters=16_226_304,
160
164
  memory_usage_mb=None, # can't see on model card
161
165
  embed_dim=1792,
162
166
  license="mit",
@@ -185,6 +189,7 @@ stella_mrl_large_zh_v3_5_1792d = ModelMeta(
185
189
  revision="17bb1c32a93a8fc5f6fc9e91d5ea86da99983cfe",
186
190
  release_date="2024-02-27",
187
191
  n_parameters=int(326 * 1e6),
192
+ n_embedding_parameters=21_635_072,
188
193
  memory_usage_mb=1242,
189
194
  embed_dim=1792,
190
195
  license="mit",
@@ -209,6 +214,7 @@ zpoint_large_embedding_zh = ModelMeta(
209
214
  revision="b1075144f440ab4409c05622c1179130ebd57d03",
210
215
  release_date="2024-06-04",
211
216
  n_parameters=int(326 * 1e6),
217
+ n_embedding_parameters=21_635_072,
212
218
  memory_usage_mb=1242,
213
219
  embed_dim=1792,
214
220
  license="mit",
@@ -327,6 +327,7 @@ tarka_embedding_150m_v1 = ModelMeta(
327
327
  revision="b0ffecc4ef0d873e517507ed080e43b88b2704b9",
328
328
  release_date="2025-11-04",
329
329
  n_parameters=155_714_304,
330
+ n_embedding_parameters=None,
330
331
  embed_dim=768,
331
332
  max_tokens=2048,
332
333
  license="gemma",
@@ -361,6 +362,7 @@ tarka_embedding_350m_v1 = ModelMeta(
361
362
  revision="a850d6a329145474727424fed6b12b62096b8ba3",
362
363
  release_date="2025-11-11",
363
364
  n_parameters=354_483_968,
365
+ n_embedding_parameters=None,
364
366
  memory_usage_mb=676,
365
367
  embed_dim=1024,
366
368
  max_tokens=128000,
@@ -22,6 +22,7 @@ text2vec_base_chinese = ModelMeta(
22
22
  revision="183bb99aa7af74355fb58d16edf8c13ae7c5433e",
23
23
  release_date="2022-01-23",
24
24
  n_parameters=int(102 * 1e6),
25
+ n_embedding_parameters=16_226_304,
25
26
  embed_dim=768,
26
27
  license="apache-2.0",
27
28
  max_tokens=512,
@@ -51,6 +52,7 @@ text2vec_base_chinese_paraphrase = ModelMeta(
51
52
  revision="e90c150a9c7fb55a67712a766d6820c55fb83cdd",
52
53
  release_date="2023-06-19",
53
54
  n_parameters=118 * 1e6,
55
+ n_embedding_parameters=30_720_000,
54
56
  memory_usage_mb=450,
55
57
  embed_dim=768,
56
58
  license="apache-2.0",
@@ -95,6 +97,7 @@ text2vec_base_multilingual = ModelMeta(
95
97
  # So probably best not to.
96
98
  loader=sentence_transformers_loader,
97
99
  n_parameters=117654272,
100
+ n_embedding_parameters=96_014_208,
98
101
  memory_usage_mb=449,
99
102
  embed_dim=384,
100
103
  license="apache-2.0",
@@ -8,6 +8,7 @@ xlm_roberta_ua_distilled = ModelMeta(
8
8
  model_type=["dense"],
9
9
  loader=sentence_transformers_loader,
10
10
  n_parameters=278_000_000,
11
+ n_embedding_parameters=192_001_536,
11
12
  memory_usage_mb=1061,
12
13
  max_tokens=512,
13
14
  embed_dim=768,
@@ -72,6 +72,7 @@ uae_large_v1 = ModelMeta(
72
72
  revision="369c368f70f16a613f19f5598d4f12d9f44235d4",
73
73
  release_date="2023-12-04", # initial commit of hf model.
74
74
  n_parameters=int(335 * 1e6),
75
+ n_embedding_parameters=31_254_528,
75
76
  memory_usage_mb=1278,
76
77
  max_tokens=512,
77
78
  embed_dim=1024,
@@ -38,6 +38,7 @@ vdr_2b_multi_v1 = ModelMeta(
38
38
  release_date="2024-01-08",
39
39
  modalities=["text"], # TODO: integrate with image
40
40
  n_parameters=2_000_000_000,
41
+ n_embedding_parameters=233_373_696,
41
42
  memory_usage_mb=4213,
42
43
  max_tokens=32768,
43
44
  embed_dim=1536,
@@ -16,6 +16,7 @@ greennode_embedding_large_vn_v1 = ModelMeta(
16
16
  loader=sentence_transformers_loader,
17
17
  open_weights=True,
18
18
  n_parameters=568_000_000,
19
+ n_embedding_parameters=256_002_048,
19
20
  memory_usage_mb=2167,
20
21
  embed_dim=1024,
21
22
  license="cc-by-4.0",
@@ -41,6 +42,7 @@ greennode_embedding_large_vn_mixed_v1 = ModelMeta(
41
42
  loader=sentence_transformers_loader,
42
43
  open_weights=True,
43
44
  n_parameters=568_000_000,
45
+ n_embedding_parameters=256_002_048,
44
46
  memory_usage_mb=2167,
45
47
  embed_dim=1024,
46
48
  license="cc-by-4.0",
@@ -66,6 +68,7 @@ aiteamvn_vietnamese_embeddings = ModelMeta(
66
68
  loader=sentence_transformers_loader,
67
69
  open_weights=True,
68
70
  n_parameters=568_000_000,
71
+ n_embedding_parameters=256_002_048,
69
72
  memory_usage_mb=2166,
70
73
  embed_dim=1024,
71
74
  license="cc-by-4.0",
@@ -98,6 +101,7 @@ hiieu_halong_embedding = ModelMeta(
98
101
  use_instructions=False,
99
102
  open_weights=True,
100
103
  n_parameters=278_000_000,
104
+ n_embedding_parameters=192_001_536,
101
105
  memory_usage_mb=1061,
102
106
  embed_dim=768,
103
107
  license="apache-2.0",
@@ -129,6 +133,7 @@ sup_simcse_vietnamese_phobert_base_ = ModelMeta(
129
133
  use_instructions=False,
130
134
  open_weights=True,
131
135
  n_parameters=135_000_000,
136
+ n_embedding_parameters=49_152_768,
132
137
  memory_usage_mb=517,
133
138
  max_tokens=256,
134
139
  embed_dim=768,
@@ -167,6 +172,7 @@ bkai_foundation_models_vietnamese_bi_encoder = ModelMeta(
167
172
  use_instructions=False,
168
173
  open_weights=True,
169
174
  n_parameters=135_000_000,
175
+ n_embedding_parameters=49_152_768,
170
176
  memory_usage_mb=515,
171
177
  max_tokens=256,
172
178
  embed_dim=768,
@@ -258,6 +258,7 @@ visualized_bge_base = ModelMeta(
258
258
  release_date="2024-06-06",
259
259
  modalities=["image", "text"],
260
260
  n_parameters=196_000_000,
261
+ n_embedding_parameters=None,
261
262
  memory_usage_mb=1631,
262
263
  max_tokens=512,
263
264
  embed_dim=768,
@@ -286,6 +287,7 @@ visualized_bge_m3 = ModelMeta(
286
287
  release_date="2024-06-06",
287
288
  modalities=["image", "text"],
288
289
  n_parameters=872_909_505,
290
+ n_embedding_parameters=None,
289
291
  memory_usage_mb=4263,
290
292
  max_tokens=8192,
291
293
  embed_dim=1024,
@@ -280,6 +280,7 @@ vlm2vec_lora = ModelMeta(
280
280
  release_date="2024-10-08",
281
281
  modalities=["image", "text"],
282
282
  n_parameters=None,
283
+ n_embedding_parameters=None,
283
284
  memory_usage_mb=None,
284
285
  max_tokens=131072,
285
286
  embed_dim=3072,
@@ -304,6 +305,7 @@ vlm2vec_full = ModelMeta(
304
305
  release_date="2024-10-08",
305
306
  modalities=["image", "text"],
306
307
  n_parameters=4_150_000_000,
308
+ n_embedding_parameters=None,
307
309
  memory_usage_mb=7909,
308
310
  max_tokens=131072,
309
311
  embed_dim=3072,
@@ -308,6 +308,7 @@ voyage_3_large = ModelMeta(
308
308
  embed_dim=1024,
309
309
  open_weights=False,
310
310
  n_parameters=None,
311
+ n_embedding_parameters=None,
311
312
  memory_usage_mb=None,
312
313
  license=None,
313
314
  reference="https://blog.voyageai.com/2025/01/07/voyage-3-large/",
@@ -336,6 +337,7 @@ voyage_3_5 = ModelMeta(
336
337
  embed_dim=1024,
337
338
  open_weights=False,
338
339
  n_parameters=None,
340
+ n_embedding_parameters=None,
339
341
  memory_usage_mb=None,
340
342
  license=None,
341
343
  reference="https://blog.voyageai.com/2025/05/20/voyage-3-5/",
@@ -363,6 +365,7 @@ voyage_3_5_int8 = ModelMeta(
363
365
  embed_dim=1024,
364
366
  open_weights=False,
365
367
  n_parameters=None,
368
+ n_embedding_parameters=None,
366
369
  memory_usage_mb=None,
367
370
  license=None,
368
371
  reference="https://blog.voyageai.com/2025/05/20/voyage-3-5/",
@@ -390,6 +393,7 @@ voyage_3_5_binary = ModelMeta(
390
393
  embed_dim=1024, # Same as original after unpacking from bits
391
394
  open_weights=False,
392
395
  n_parameters=None,
396
+ n_embedding_parameters=None,
393
397
  memory_usage_mb=None,
394
398
  license=None,
395
399
  reference="https://blog.voyageai.com/2025/05/20/voyage-3-5/",
@@ -417,6 +421,7 @@ voyage_large_2_instruct = ModelMeta(
417
421
  embed_dim=1024,
418
422
  open_weights=False,
419
423
  n_parameters=None,
424
+ n_embedding_parameters=None,
420
425
  memory_usage_mb=None,
421
426
  license=None,
422
427
  reference="https://blog.voyageai.com/2024/05/05/voyage-large-2-instruct-instruction-tuned-and-rank-1-on-mteb/",
@@ -443,6 +448,7 @@ voyage_finance_2 = ModelMeta(
443
448
  embed_dim=1024,
444
449
  open_weights=False,
445
450
  n_parameters=None,
451
+ n_embedding_parameters=None,
446
452
  memory_usage_mb=None,
447
453
  license=None,
448
454
  reference="https://blog.voyageai.com/2024/06/03/domain-specific-embeddings-finance-edition-voyage-finance-2/",
@@ -469,6 +475,7 @@ voyage_law_2 = ModelMeta(
469
475
  embed_dim=1024,
470
476
  open_weights=False,
471
477
  n_parameters=None,
478
+ n_embedding_parameters=None,
472
479
  memory_usage_mb=None,
473
480
  license=None,
474
481
  reference="https://blog.voyageai.com/2024/04/15/domain-specific-embeddings-and-retrieval-legal-edition-voyage-law-2/",
@@ -495,6 +502,7 @@ voyage_code_2 = ModelMeta(
495
502
  embed_dim=1536,
496
503
  open_weights=False,
497
504
  n_parameters=None,
505
+ n_embedding_parameters=None,
498
506
  memory_usage_mb=None,
499
507
  license=None,
500
508
  reference="https://blog.voyageai.com/2024/01/23/voyage-code-2-elevate-your-code-retrieval/",
@@ -521,6 +529,7 @@ voyage_code_3 = ModelMeta(
521
529
  embed_dim=1024,
522
530
  open_weights=False,
523
531
  n_parameters=None,
532
+ n_embedding_parameters=None,
524
533
  memory_usage_mb=None,
525
534
  license=None,
526
535
  reference="https://blog.voyageai.com/2024/12/04/voyage-code-3/",
@@ -548,6 +557,7 @@ voyage_large_2 = ModelMeta(
548
557
  embed_dim=1536,
549
558
  open_weights=False,
550
559
  n_parameters=None,
560
+ n_embedding_parameters=None,
551
561
  memory_usage_mb=None,
552
562
  license=None,
553
563
  reference="https://blog.voyageai.com/2023/10/29/voyage-embeddings/",
@@ -574,6 +584,7 @@ voyage_2 = ModelMeta(
574
584
  embed_dim=1024,
575
585
  open_weights=False,
576
586
  n_parameters=None,
587
+ n_embedding_parameters=None,
577
588
  memory_usage_mb=None,
578
589
  license=None,
579
590
  reference="https://blog.voyageai.com/2023/10/29/voyage-embeddings/",
@@ -599,6 +610,7 @@ voyage_multilingual_2 = ModelMeta(
599
610
  embed_dim=1024,
600
611
  open_weights=False,
601
612
  n_parameters=None,
613
+ n_embedding_parameters=None,
602
614
  memory_usage_mb=None,
603
615
  license=None,
604
616
  reference="https://blog.voyageai.com/2024/06/10/voyage-multilingual-2-multilingual-embedding-model/",
@@ -625,6 +637,7 @@ voyage_3 = ModelMeta(
625
637
  embed_dim=1024,
626
638
  open_weights=False,
627
639
  n_parameters=None,
640
+ n_embedding_parameters=None,
628
641
  memory_usage_mb=None,
629
642
  license=None,
630
643
  reference="https://blog.voyageai.com/2024/09/18/voyage-3/",
@@ -651,6 +664,7 @@ voyage_3_lite = ModelMeta(
651
664
  embed_dim=512,
652
665
  open_weights=False,
653
666
  n_parameters=None,
667
+ n_embedding_parameters=None,
654
668
  memory_usage_mb=None,
655
669
  license=None,
656
670
  reference="https://blog.voyageai.com/2024/09/18/voyage-3/",
@@ -679,6 +693,7 @@ voyage_3_exp = ModelMeta(
679
693
  open_weights=False,
680
694
  # from their card https://huggingface.co/voyageai/voyage-3-m-exp#model-information
681
695
  n_parameters=int(6918 * 1e6),
696
+ n_embedding_parameters=None,
682
697
  memory_usage_mb=None,
683
698
  license=None,
684
699
  reference="https://huggingface.co/voyageai/voyage-3-m-exp",
@@ -215,6 +215,7 @@ voyage_v = ModelMeta(
215
215
  revision="1",
216
216
  release_date="2024-11-10",
217
217
  n_parameters=None,
218
+ n_embedding_parameters=None,
218
219
  memory_usage_mb=None,
219
220
  max_tokens=32768,
220
221
  embed_dim=1024,
@@ -31,6 +31,7 @@ xyz_embedding = ModelMeta(
31
31
  revision="4004120220b99baea764a1d3508427248ac3bccf",
32
32
  release_date="2024-09-13",
33
33
  n_parameters=326000000,
34
+ n_embedding_parameters=21_635_072,
34
35
  memory_usage_mb=1242,
35
36
  max_tokens=512,
36
37
  embed_dim=768,
@@ -121,6 +121,7 @@ Youtu_Embedding_V1 = ModelMeta(
121
121
  release_date="2025-09-28",
122
122
  open_weights=True,
123
123
  n_parameters=2672957440,
124
+ n_embedding_parameters=None,
124
125
  memory_usage_mb=None,
125
126
  embed_dim=2048,
126
127
  license="apache-2.0",
@@ -20,6 +20,7 @@ yuan_embedding_2_zh = ModelMeta(
20
20
  revision="b5ebcace6f4fc6e5a4d1852557eb2dc2d1040cee",
21
21
  release_date="2025-11-24",
22
22
  n_parameters=326000000,
23
+ n_embedding_parameters=21_635_072,
23
24
  memory_usage_mb=1242,
24
25
  embed_dim=1792,
25
26
  license="apache-2.0",
@@ -43,6 +43,7 @@ yuan_embedding_2_en = ModelMeta(
43
43
  revision="b2fd15da3bcae3473c8529593825c15068f09fce",
44
44
  release_date="2025-11-27",
45
45
  n_parameters=595776512,
46
+ n_embedding_parameters=None,
46
47
  memory_usage_mb=2272,
47
48
  embed_dim=1024,
48
49
  max_tokens=2048,