mteb 2.7.3__py3-none-any.whl → 2.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (434) hide show
  1. mteb/_create_dataloaders.py +47 -5
  2. mteb/_evaluators/any_sts_evaluator.py +2 -0
  3. mteb/_evaluators/clustering_evaluator.py +2 -0
  4. mteb/_evaluators/evaluator.py +2 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +8 -1
  6. mteb/_evaluators/pair_classification_evaluator.py +3 -0
  7. mteb/_evaluators/retrieval_evaluator.py +3 -0
  8. mteb/_evaluators/sklearn_evaluator.py +6 -1
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +2 -0
  10. mteb/_evaluators/text/summarization_evaluator.py +2 -0
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -0
  12. mteb/abstasks/abstask.py +31 -12
  13. mteb/abstasks/classification.py +10 -3
  14. mteb/abstasks/clustering.py +6 -2
  15. mteb/abstasks/clustering_legacy.py +8 -2
  16. mteb/abstasks/image/image_text_pair_classification.py +6 -2
  17. mteb/abstasks/multilabel_classification.py +2 -0
  18. mteb/abstasks/pair_classification.py +8 -2
  19. mteb/abstasks/retrieval.py +27 -12
  20. mteb/abstasks/retrieval_dataset_loaders.py +29 -19
  21. mteb/abstasks/sts.py +10 -3
  22. mteb/abstasks/text/bitext_mining.py +9 -5
  23. mteb/abstasks/text/reranking.py +2 -2
  24. mteb/abstasks/text/summarization.py +2 -1
  25. mteb/abstasks/zeroshot_classification.py +8 -2
  26. mteb/benchmarks/benchmarks/__init__.py +2 -0
  27. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  28. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  29. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  30. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  31. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  32. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  33. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  34. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  35. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  36. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  37. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  38. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  39. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  40. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  41. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  42. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  43. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  44. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  45. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  48. mteb/evaluate.py +10 -2
  49. mteb/models/model_implementations/align_models.py +1 -0
  50. mteb/models/model_implementations/amazon_models.py +1 -0
  51. mteb/models/model_implementations/andersborges.py +2 -0
  52. mteb/models/model_implementations/ara_models.py +1 -0
  53. mteb/models/model_implementations/arctic_models.py +8 -0
  54. mteb/models/model_implementations/b1ade_models.py +1 -0
  55. mteb/models/model_implementations/bedrock_models.py +4 -0
  56. mteb/models/model_implementations/bge_models.py +40 -1
  57. mteb/models/model_implementations/bica_model.py +1 -0
  58. mteb/models/model_implementations/blip2_models.py +2 -0
  59. mteb/models/model_implementations/blip_models.py +8 -0
  60. mteb/models/model_implementations/bm25.py +10 -5
  61. mteb/models/model_implementations/bmretriever_models.py +4 -0
  62. mteb/models/model_implementations/cadet_models.py +1 -0
  63. mteb/models/model_implementations/cde_models.py +2 -0
  64. mteb/models/model_implementations/clip_models.py +3 -0
  65. mteb/models/model_implementations/clips_models.py +3 -0
  66. mteb/models/model_implementations/codefuse_models.py +5 -0
  67. mteb/models/model_implementations/codesage_models.py +3 -0
  68. mteb/models/model_implementations/cohere_models.py +4 -0
  69. mteb/models/model_implementations/cohere_v.py +5 -0
  70. mteb/models/model_implementations/colpali_models.py +3 -0
  71. mteb/models/model_implementations/colqwen_models.py +7 -0
  72. mteb/models/model_implementations/colsmol_models.py +2 -0
  73. mteb/models/model_implementations/conan_models.py +1 -0
  74. mteb/models/model_implementations/dino_models.py +19 -0
  75. mteb/models/model_implementations/e5_instruct.py +4 -0
  76. mteb/models/model_implementations/e5_models.py +9 -0
  77. mteb/models/model_implementations/e5_v.py +1 -0
  78. mteb/models/model_implementations/eagerworks_models.py +1 -0
  79. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  80. mteb/models/model_implementations/en_code_retriever.py +1 -0
  81. mteb/models/model_implementations/euler_models.py +1 -0
  82. mteb/models/model_implementations/evaclip_models.py +4 -0
  83. mteb/models/model_implementations/fa_models.py +9 -0
  84. mteb/models/model_implementations/facebookai.py +2 -0
  85. mteb/models/model_implementations/geogpt_models.py +1 -0
  86. mteb/models/model_implementations/gme_v_models.py +2 -0
  87. mteb/models/model_implementations/google_models.py +5 -0
  88. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  89. mteb/models/model_implementations/gritlm_models.py +2 -0
  90. mteb/models/model_implementations/gte_models.py +9 -0
  91. mteb/models/model_implementations/hinvec_models.py +1 -0
  92. mteb/models/model_implementations/human.py +1 -0
  93. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  94. mteb/models/model_implementations/inf_models.py +2 -0
  95. mteb/models/model_implementations/jasper_models.py +2 -0
  96. mteb/models/model_implementations/jina_clip.py +1 -0
  97. mteb/models/model_implementations/jina_models.py +7 -0
  98. mteb/models/model_implementations/kalm_models.py +6 -0
  99. mteb/models/model_implementations/kblab.py +1 -0
  100. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  101. mteb/models/model_implementations/kfst.py +1 -0
  102. mteb/models/model_implementations/kowshik24_models.py +1 -0
  103. mteb/models/model_implementations/lens_models.py +2 -0
  104. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  105. mteb/models/model_implementations/linq_models.py +1 -0
  106. mteb/models/model_implementations/listconranker.py +1 -0
  107. mteb/models/model_implementations/llm2clip_models.py +3 -0
  108. mteb/models/model_implementations/llm2vec_models.py +8 -0
  109. mteb/models/model_implementations/mcinext_models.py +3 -0
  110. mteb/models/model_implementations/mdbr_models.py +2 -0
  111. mteb/models/model_implementations/misc_models.py +63 -0
  112. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  113. mteb/models/model_implementations/mme5_models.py +2 -1
  114. mteb/models/model_implementations/moco_models.py +2 -0
  115. mteb/models/model_implementations/mod_models.py +1 -0
  116. mteb/models/model_implementations/model2vec_models.py +13 -0
  117. mteb/models/model_implementations/moka_models.py +3 -0
  118. mteb/models/model_implementations/nbailab.py +3 -0
  119. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  120. mteb/models/model_implementations/nomic_models.py +6 -0
  121. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  122. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  123. mteb/models/model_implementations/nvidia_models.py +3 -0
  124. mteb/models/model_implementations/octen_models.py +2 -0
  125. mteb/models/model_implementations/openai_models.py +5 -0
  126. mteb/models/model_implementations/openclip_models.py +8 -0
  127. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  128. mteb/models/model_implementations/ops_moa_models.py +2 -0
  129. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  130. mteb/models/model_implementations/pawan_models.py +1 -0
  131. mteb/models/model_implementations/piccolo_models.py +2 -0
  132. mteb/models/model_implementations/promptriever_models.py +4 -0
  133. mteb/models/model_implementations/pylate_models.py +13 -0
  134. mteb/models/model_implementations/qodo_models.py +2 -0
  135. mteb/models/model_implementations/qtack_models.py +1 -0
  136. mteb/models/model_implementations/qwen3_models.py +3 -0
  137. mteb/models/model_implementations/qzhou_models.py +2 -0
  138. mteb/models/model_implementations/rasgaard_models.py +1 -0
  139. mteb/models/model_implementations/reasonir_model.py +65 -0
  140. mteb/models/model_implementations/repllama_models.py +2 -0
  141. mteb/models/model_implementations/rerankers_custom.py +3 -0
  142. mteb/models/model_implementations/rerankers_monot5_based.py +14 -0
  143. mteb/models/model_implementations/richinfoai_models.py +1 -0
  144. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  145. mteb/models/model_implementations/ruri_models.py +10 -0
  146. mteb/models/model_implementations/salesforce_models.py +3 -0
  147. mteb/models/model_implementations/samilpwc_models.py +1 -0
  148. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  149. mteb/models/model_implementations/searchmap_models.py +1 -0
  150. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  151. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +1 -0
  152. mteb/models/model_implementations/seed_models.py +1 -0
  153. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  154. mteb/models/model_implementations/shuu_model.py +1 -0
  155. mteb/models/model_implementations/siglip_models.py +10 -0
  156. mteb/models/model_implementations/sonar_models.py +2 -1
  157. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  158. mteb/models/model_implementations/stella_models.py +6 -0
  159. mteb/models/model_implementations/tarka_models.py +2 -0
  160. mteb/models/model_implementations/text2vec_models.py +3 -0
  161. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  162. mteb/models/model_implementations/uae_models.py +1 -0
  163. mteb/models/model_implementations/vdr_models.py +1 -0
  164. mteb/models/model_implementations/vi_vn_models.py +6 -0
  165. mteb/models/model_implementations/vista_models.py +2 -0
  166. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  167. mteb/models/model_implementations/voyage_models.py +15 -0
  168. mteb/models/model_implementations/voyage_v.py +1 -0
  169. mteb/models/model_implementations/xyz_models.py +1 -0
  170. mteb/models/model_implementations/youtu_models.py +1 -0
  171. mteb/models/model_implementations/yuan_models.py +1 -0
  172. mteb/models/model_implementations/yuan_models_en.py +1 -0
  173. mteb/models/model_meta.py +35 -2
  174. mteb/models/models_protocols.py +4 -0
  175. mteb/models/search_wrappers.py +12 -0
  176. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  177. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  178. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  179. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  180. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  181. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  182. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  183. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  184. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  185. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  186. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  187. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  188. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  189. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  190. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  191. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  192. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  193. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  194. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  195. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  196. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  197. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  198. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  199. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  200. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  201. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  202. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  203. mteb/tasks/classification/est/estonian_valence.py +1 -1
  204. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  205. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  206. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  207. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  208. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  209. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  210. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  211. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  212. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  213. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  214. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  215. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  216. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  217. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  218. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  219. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  220. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  221. mteb/tasks/classification/kor/klue_tc.py +2 -2
  222. mteb/tasks/classification/kor/kor_fin.py +1 -1
  223. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  224. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  225. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  226. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  227. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  228. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  229. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  230. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  231. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  232. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  233. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  234. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  235. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  236. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  237. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  238. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  239. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  240. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  241. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  242. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  243. mteb/tasks/classification/ron/moroco.py +1 -1
  244. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  245. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  246. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  247. mteb/tasks/classification/rus/headline_classification.py +2 -2
  248. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  249. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  250. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  251. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  252. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  253. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  254. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  255. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  256. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  257. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  258. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  259. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  260. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  261. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  262. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  263. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  264. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  265. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  266. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  267. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  268. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  269. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  270. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  271. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  272. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  273. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  274. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  275. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  276. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  277. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  278. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  279. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  280. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  281. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  282. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  283. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  284. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  285. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  286. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  287. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  288. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  289. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  290. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  291. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  292. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  293. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  294. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  295. mteb/tasks/clustering/nob/snl_clustering.py +1 -1
  296. mteb/tasks/clustering/nob/vg_clustering.py +1 -1
  297. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  298. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  299. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  300. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  301. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  302. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  303. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  304. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  305. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  306. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  307. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  308. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  309. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  310. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  311. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  312. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  313. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  314. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
  315. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  316. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  317. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  318. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  319. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  320. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  321. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  322. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  323. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  324. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  325. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  326. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  327. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  328. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  329. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  330. mteb/tasks/pair_classification/rus/terra.py +2 -2
  331. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  332. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  333. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  334. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  335. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  336. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  337. mteb/tasks/retrieval/code/code_rag.py +4 -4
  338. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  339. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  340. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  341. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  342. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  343. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  344. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  345. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  346. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  347. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  348. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  349. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  350. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  351. mteb/tasks/retrieval/eng/__init__.py +42 -0
  352. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  353. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  354. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  355. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  356. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  357. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  358. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  359. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  360. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  361. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  362. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  363. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  364. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  365. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  366. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  367. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  368. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  369. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  370. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  371. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  372. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  373. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  374. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  375. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  376. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  377. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  378. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  379. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  380. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  381. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  382. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  383. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  384. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  385. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  386. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  387. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  388. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  389. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  390. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  391. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  392. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  393. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  394. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  395. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  396. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  397. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  398. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  399. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
  400. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  401. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  402. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  403. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  404. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  405. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  406. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  407. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  408. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  409. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  410. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  411. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  412. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  413. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  414. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  415. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  416. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  417. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  418. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  419. mteb/tasks/retrieval/nob/norquad.py +1 -1
  420. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  421. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  422. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  423. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  424. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  425. mteb/tasks/sts/kor/klue_sts.py +1 -1
  426. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  427. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  428. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  429. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/METADATA +1 -1
  430. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/RECORD +434 -413
  431. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/WHEEL +0 -0
  432. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/entry_points.txt +0 -0
  433. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/licenses/LICENSE +0 -0
  434. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/top_level.txt +0 -0
@@ -36,12 +36,76 @@ REASONIR_TRAINING_DATA = {
36
36
  "DuRetrieval",
37
37
  "QuoraRetrieval",
38
38
  }
39
+ _prompts_dict = {
40
+ "BrightBiologyRetrieval": {
41
+ "query": "Given a Biology post, retrieve relevant passages that help answer the post"
42
+ },
43
+ "BrightEarthScienceRetrieval": {
44
+ "query": "Given a Earth Science post, retrieve relevant passages that help answer the post"
45
+ },
46
+ "BrightEconomicsRetrieval": {
47
+ "query": "Given a Economics post, retrieve relevant passages that help answer the post"
48
+ },
49
+ "BrightPsychologyRetrieval": {
50
+ "query": "Given a Psychology post, retrieve relevant passages that help answer the post"
51
+ },
52
+ "BrightRoboticsRetrieval": {
53
+ "query": "Given a Robotics post, retrieve relevant passages that help answer the post"
54
+ },
55
+ "BrightStackoverflowRetrieval": {
56
+ "query": "Given a Stackoverflow post, retrieve relevant passages that help answer the post"
57
+ },
58
+ "BrightSustainableLivingRetrieval": {
59
+ "query": "Given a Sustainable Living post, retrieve relevant passages that help answer the post"
60
+ },
61
+ "BrightPonyRetrieval": {
62
+ "query": "Given a Pony question, retrieve relevant passages that help answer the question"
63
+ },
64
+ "BrightLeetcodeRetrieval": {
65
+ "query": "Given a coding problem, retrieve relevant examples that help answer the problem",
66
+ },
67
+ "BrightAopsRetrieval": {
68
+ "query": "Given a Math problem, retrieve relevant examples that help answer the problem"
69
+ },
70
+ "BrightTheoremQATheoremsRetrieval": {
71
+ "query": "Given a Math problem, retrieve relevant theorems that help answer the problem",
72
+ },
73
+ "BrightTheoremQAQuestionsRetrieval": {
74
+ "query": "Given a Math problem, retrieve relevant examples that help answer the problem",
75
+ },
76
+ "BrightBiologyLongRetrieval": {
77
+ "query": "Given a Biology post, retrieve relevant documents that help answer the post"
78
+ },
79
+ "BrightEarthScienceLongRetrieval": {
80
+ "query": "Given a Earth Science post, retrieve relevant documents that help answer the post"
81
+ },
82
+ "BrightEconomicsLongRetrieval": {
83
+ "query": "Given a Economics post, retrieve relevant documents that help answer the post"
84
+ },
85
+ "BrightPsychologyLongRetrieval": {
86
+ "query": "Given a Psychology post, retrieve relevant documents that help answer the post"
87
+ },
88
+ "BrightRoboticsLongRetrieval": {
89
+ "query": "Given a Robotics post, retrieve relevant documents that help answer the post"
90
+ },
91
+ "BrightStackoverflowLongRetrieval": {
92
+ "query": "Given a Stackoverflow post, retrieve relevant documents that help answer the post"
93
+ },
94
+ "BrightSustainableLivingLongRetrieval": {
95
+ "query": "Given a Sustainable Living post, retrieve relevant documents that help answer the post"
96
+ },
97
+ "BrightPonyLongRetrieval": {
98
+ "query": "Given a Pony question, retrieve relevant documents that help answer the question"
99
+ },
100
+ }
101
+
39
102
 
40
103
  ReasonIR_8B = ModelMeta(
41
104
  loader=InstructSentenceTransformerModel,
42
105
  loader_kwargs=dict(
43
106
  instruction_template=instruction_template,
44
107
  trust_remote_code=True,
108
+ prompts_dict=_prompts_dict,
45
109
  ),
46
110
  name="ReasonIR/ReasonIR-8B",
47
111
  model_type=["dense"],
@@ -50,6 +114,7 @@ ReasonIR_8B = ModelMeta(
50
114
  revision="c3d0690370ff4a8c3d3882d8dfa85c43650034fa",
51
115
  release_date="2025-04-29",
52
116
  n_parameters=7_500_000_000,
117
+ n_embedding_parameters=None,
53
118
  memory_usage_mb=None,
54
119
  embed_dim=4096,
55
120
  license="cc-by-nc-4.0",
@@ -179,6 +179,7 @@ repllama_llama2_original = ModelMeta(
179
179
  "mMARCO-NL", # translation not trained on
180
180
  },
181
181
  n_parameters=7_000_000,
182
+ n_embedding_parameters=131_072_000,
182
183
  memory_usage_mb=27,
183
184
  max_tokens=4096,
184
185
  embed_dim=4096,
@@ -208,6 +209,7 @@ repllama_llama2_reproduced = ModelMeta(
208
209
  revision="01c7f73d771dfac7d292323805ebc428287df4f9-ad5c1d0938a1e02954bcafb4d811ba2f34052e71", # base-peft revision
209
210
  release_date="2024-09-15",
210
211
  n_parameters=7_000_000,
212
+ n_embedding_parameters=None,
211
213
  memory_usage_mb=27,
212
214
  max_tokens=4096,
213
215
  embed_dim=4096,
@@ -231,6 +231,7 @@ monobert_large = ModelMeta(
231
231
  revision="0a97706f3827389da43b83348d5d18c9d53876fa",
232
232
  release_date="2020-05-28",
233
233
  n_parameters=None,
234
+ n_embedding_parameters=31_254_528,
234
235
  memory_usage_mb=None,
235
236
  max_tokens=None,
236
237
  embed_dim=None,
@@ -256,6 +257,7 @@ jina_reranker_multilingual = ModelMeta(
256
257
  revision="126747772a932960028d9f4dc93bd5d9c4869be4",
257
258
  release_date="2024-09-26",
258
259
  n_parameters=None,
260
+ n_embedding_parameters=None,
259
261
  memory_usage_mb=531,
260
262
  max_tokens=None,
261
263
  embed_dim=None,
@@ -319,6 +321,7 @@ bge_reranker_v2_m3 = ModelMeta(
319
321
  revision="953dc6f6f85a1b2dbfca4c34a2796e7dde08d41e",
320
322
  release_date="2024-06-24",
321
323
  n_parameters=None,
324
+ n_embedding_parameters=256_002_048,
322
325
  memory_usage_mb=2166,
323
326
  max_tokens=None,
324
327
  embed_dim=None,
@@ -327,6 +327,7 @@ monot5_small = ModelMeta(
327
327
  revision="77f8e3f7b1eb1afe353aa21a7c3a2fc8feca702e",
328
328
  release_date="2022-03-28",
329
329
  n_parameters=None,
330
+ n_embedding_parameters=16_449_536,
330
331
  memory_usage_mb=None,
331
332
  max_tokens=None,
332
333
  embed_dim=None,
@@ -369,6 +370,7 @@ monot5_base = ModelMeta(
369
370
  url={https://arxiv.org/abs/2206.02873},
370
371
  }""",
371
372
  n_parameters=None,
373
+ n_embedding_parameters=24_674_304,
372
374
  memory_usage_mb=None,
373
375
  max_tokens=None,
374
376
  embed_dim=None,
@@ -393,6 +395,7 @@ monot5_large = ModelMeta(
393
395
  revision="48cfad1d8dd587670393f27ee8ec41fde63e3d98",
394
396
  release_date="2022-03-28",
395
397
  n_parameters=None,
398
+ n_embedding_parameters=32_899_072,
396
399
  memory_usage_mb=None,
397
400
  max_tokens=None,
398
401
  embed_dim=None,
@@ -426,6 +429,7 @@ monot5_3b = ModelMeta(
426
429
  revision="bc0c419a438c81f592f878ce32430a1823f5db6c",
427
430
  release_date="2022-03-28",
428
431
  n_parameters=None,
432
+ n_embedding_parameters=32_899_072,
429
433
  memory_usage_mb=None,
430
434
  max_tokens=None,
431
435
  embed_dim=None,
@@ -482,6 +486,7 @@ flant5_base = ModelMeta(
482
486
  # "qed": ["train"],
483
487
  ),
484
488
  n_parameters=None,
489
+ n_embedding_parameters=24_674_304,
485
490
  memory_usage_mb=944,
486
491
  max_tokens=None,
487
492
  embed_dim=None,
@@ -528,6 +533,7 @@ flant5_large = ModelMeta(
528
533
  # "qed": ["train"],
529
534
  ),
530
535
  n_parameters=None,
536
+ n_embedding_parameters=32_899_072,
531
537
  memory_usage_mb=2987,
532
538
  max_tokens=None,
533
539
  embed_dim=None,
@@ -574,6 +580,7 @@ flant5_xl = ModelMeta(
574
580
  # "qed": ["train"],
575
581
  ),
576
582
  n_parameters=None,
583
+ n_embedding_parameters=65_798_144,
577
584
  memory_usage_mb=10871,
578
585
  max_tokens=None,
579
586
  embed_dim=None,
@@ -620,6 +627,7 @@ flant5_xxl = ModelMeta(
620
627
  # "qed": ["train"],
621
628
  ),
622
629
  n_parameters=None,
630
+ n_embedding_parameters=131_596_288,
623
631
  memory_usage_mb=42980,
624
632
  max_tokens=None,
625
633
  embed_dim=None,
@@ -644,6 +652,7 @@ llama2_7b = ModelMeta(
644
652
  revision="01c7f73d771dfac7d292323805ebc428287df4f9",
645
653
  release_date="2023-07-18",
646
654
  n_parameters=None,
655
+ n_embedding_parameters=131_072_000,
647
656
  memory_usage_mb=None,
648
657
  max_tokens=None,
649
658
  embed_dim=None,
@@ -686,6 +695,7 @@ llama2_7b_chat = ModelMeta(
686
695
  url={https://arxiv.org/abs/2307.09288},
687
696
  }""",
688
697
  n_parameters=None,
698
+ n_embedding_parameters=131_072_000,
689
699
  memory_usage_mb=None,
690
700
  max_tokens=None,
691
701
  embed_dim=None,
@@ -710,6 +720,7 @@ mistral_7b = ModelMeta(
710
720
  revision="3ad372fc79158a2148299e3318516c786aeded6c",
711
721
  release_date="2023-12-11",
712
722
  n_parameters=None,
723
+ n_embedding_parameters=None,
713
724
  memory_usage_mb=None,
714
725
  max_tokens=None,
715
726
  embed_dim=None,
@@ -746,6 +757,7 @@ followir_7b = ModelMeta(
746
757
  # "jhu-clsp/FollowIR-train"
747
758
  ),
748
759
  n_parameters=None,
760
+ n_embedding_parameters=None,
749
761
  memory_usage_mb=13813,
750
762
  max_tokens=None,
751
763
  embed_dim=None,
@@ -896,6 +908,7 @@ mt5_base_mmarco_v2 = ModelMeta(
896
908
  """,
897
909
  training_datasets={"MSMARCO"},
898
910
  n_parameters=None,
911
+ n_embedding_parameters=192_086_016,
899
912
  memory_usage_mb=None,
900
913
  max_tokens=None,
901
914
  embed_dim=None,
@@ -919,6 +932,7 @@ mt5_13b_mmarco_100k = ModelMeta(
919
932
  revision="e1a4317e102a525ea9e16745ad21394a4f1bffbc",
920
933
  release_date="2022-11-04",
921
934
  n_parameters=None,
935
+ n_embedding_parameters=1_024_458_752,
922
936
  memory_usage_mb=None,
923
937
  max_tokens=None,
924
938
  embed_dim=None,
@@ -15,6 +15,7 @@ ritrieve_zh_v1 = ModelMeta(
15
15
  revision="f8d5a707656c55705027678e311f9202c8ced12c",
16
16
  release_date="2025-03-25",
17
17
  n_parameters=int(326 * 1e6),
18
+ n_embedding_parameters=21_635_072,
18
19
  memory_usage_mb=1242,
19
20
  embed_dim=1792,
20
21
  license="mit",
@@ -244,6 +244,7 @@ rubert_tiny = ModelMeta(
244
244
  revision="5441c5ea8026d4f6d7505ec004845409f1259fb1",
245
245
  release_date="2021-05-24",
246
246
  n_parameters=11_900_000,
247
+ n_embedding_parameters=9_223_968,
247
248
  memory_usage_mb=45,
248
249
  embed_dim=312,
249
250
  license="mit",
@@ -270,6 +271,7 @@ rubert_tiny2 = ModelMeta(
270
271
  revision="dad72b8f77c5eef6995dd3e4691b758ba56b90c3",
271
272
  release_date="2021-10-28",
272
273
  n_parameters=29_400_000,
274
+ n_embedding_parameters=26_154_336,
273
275
  memory_usage_mb=112,
274
276
  embed_dim=312,
275
277
  license="mit",
@@ -297,6 +299,7 @@ sbert_large_nlu_ru = ModelMeta(
297
299
  revision="af977d5dfa46a3635e29bf0ef383f2df2a08d47a",
298
300
  release_date="2020-11-20",
299
301
  n_parameters=427_000_000,
302
+ n_embedding_parameters=123_021_312,
300
303
  memory_usage_mb=1629,
301
304
  embed_dim=1024,
302
305
  license="mit",
@@ -323,6 +326,7 @@ sbert_large_mt_nlu_ru = ModelMeta(
323
326
  revision="05300876c2b83f46d3ddd422a7f17e45cf633bb0",
324
327
  release_date="2021-05-18",
325
328
  n_parameters=427_000_000,
329
+ n_embedding_parameters=123_021_312,
326
330
  memory_usage_mb=1629,
327
331
  embed_dim=1024,
328
332
  license="not specified",
@@ -351,6 +355,7 @@ user_base_ru = ModelMeta(
351
355
  revision="436a489a2087d61aa670b3496a9915f84e46c861",
352
356
  release_date="2024-06-10",
353
357
  n_parameters=427_000_000,
358
+ n_embedding_parameters=38_603_520,
354
359
  memory_usage_mb=473,
355
360
  embed_dim=768,
356
361
  license="apache-2.0",
@@ -412,6 +417,7 @@ user_bge_m3 = ModelMeta(
412
417
  revision="0cc6cfe48e260fb0474c753087a69369e88709ae",
413
418
  release_date="2024-07-05",
414
419
  n_parameters=359_026_688,
420
+ n_embedding_parameters=47_273_984,
415
421
  memory_usage_mb=1370,
416
422
  embed_dim=1024,
417
423
  license="apache-2.0",
@@ -463,6 +469,7 @@ deberta_v1_ru = ModelMeta(
463
469
  revision="bdd30b0e19757e6940c92c7aff19e8fc0a60dff4",
464
470
  release_date="2023-02-07",
465
471
  n_parameters=124_000_000,
472
+ n_embedding_parameters=38_603_520,
466
473
  memory_usage_mb=473,
467
474
  embed_dim=768,
468
475
  license="apache-2.0",
@@ -494,6 +501,7 @@ rubert_base_cased = ModelMeta(
494
501
  revision="4036cab694767a299f2b9e6492909664d9414229",
495
502
  release_date="2020-03-04",
496
503
  n_parameters=1280_000_000,
504
+ n_embedding_parameters=91_812_096,
497
505
  memory_usage_mb=4883,
498
506
  embed_dim=768,
499
507
  license="not specified",
@@ -530,6 +538,7 @@ distilrubert_small_cased_conversational = ModelMeta(
530
538
  revision="e348066b4a7279b97138038299bddc6580a9169a",
531
539
  release_date="2022-06-28",
532
540
  n_parameters=107_000_000,
541
+ n_embedding_parameters=91_812_096,
533
542
  memory_usage_mb=408,
534
543
  embed_dim=768,
535
544
  license="not specified",
@@ -565,6 +574,7 @@ rubert_base_cased_sentence = ModelMeta(
565
574
  revision="78b5122d6365337dd4114281b0d08cd1edbb3bc8",
566
575
  release_date="2020-03-04",
567
576
  n_parameters=107_000_000,
577
+ n_embedding_parameters=91_812_096,
568
578
  memory_usage_mb=408,
569
579
  embed_dim=768,
570
580
  license="not specified",
@@ -590,6 +600,7 @@ labse_en_ru = ModelMeta(
590
600
  revision="cf0714e606d4af551e14ad69a7929cd6b0da7f7e",
591
601
  release_date="2021-06-10",
592
602
  n_parameters=129_000_000,
603
+ n_embedding_parameters=42_303_744,
593
604
  memory_usage_mb=492,
594
605
  embed_dim=768,
595
606
  license="not specified",
@@ -618,6 +629,7 @@ rubert_tiny_turbo = ModelMeta(
618
629
  revision="8ce0cf757446ce9bb2d5f5a4ac8103c7a1049054",
619
630
  release_date="2024-06-21",
620
631
  n_parameters=29_200_000,
632
+ n_embedding_parameters=26_154_336,
621
633
  memory_usage_mb=111,
622
634
  embed_dim=312,
623
635
  license="mit",
@@ -641,6 +653,7 @@ rubert_mini_frida = ModelMeta(
641
653
  revision="19b279b78afd945b5ccae78f63e284909814adc2",
642
654
  release_date="2025-03-02",
643
655
  n_parameters=32_300_000,
656
+ n_embedding_parameters=26_154_336,
644
657
  memory_usage_mb=123,
645
658
  embed_dim=312,
646
659
  license="mit",
@@ -669,6 +682,7 @@ labse_ru_turbo = ModelMeta(
669
682
  revision="1940b046c6b5e125df11722b899130329d0a46da",
670
683
  release_date="2024-06-27",
671
684
  n_parameters=129_000_000,
685
+ n_embedding_parameters=42_303_744,
672
686
  memory_usage_mb=490,
673
687
  embed_dim=768,
674
688
  license="mit",
@@ -720,6 +734,7 @@ rosberta_ru_en = ModelMeta(
720
734
  use_instructions=True,
721
735
  reference="https://huggingface.co/ai-forever/ru-en-RoSBERTa",
722
736
  n_parameters=404_000_000,
737
+ n_embedding_parameters=100_869_120,
723
738
  memory_usage_mb=1540,
724
739
  max_tokens=512,
725
740
  embed_dim=1024,
@@ -886,6 +901,7 @@ frida = ModelMeta(
886
901
  use_instructions=True,
887
902
  reference="https://huggingface.co/ai-forever/FRIDA",
888
903
  n_parameters=823_000_000,
904
+ n_embedding_parameters=143_847_936,
889
905
  memory_usage_mb=3141,
890
906
  max_tokens=512,
891
907
  embed_dim=1536,
@@ -918,6 +934,7 @@ giga_embeddings = ModelMeta(
918
934
  revision="0ad5b29bfecd806cecc9d66b927d828a736594dc",
919
935
  release_date="2025-09-23",
920
936
  n_parameters=3_227_176_961,
937
+ n_embedding_parameters=None,
921
938
  memory_usage_mb=12865,
922
939
  embed_dim=2048,
923
940
  license="mit",
@@ -950,6 +967,7 @@ berta = ModelMeta(
950
967
  revision="914c8c8aed14042ed890fc2c662d5e9e66b2faa7",
951
968
  release_date="2025-03-10",
952
969
  n_parameters=128_000_000,
970
+ n_embedding_parameters=42_303_744,
953
971
  memory_usage_mb=489,
954
972
  embed_dim=768,
955
973
  license="mit",
@@ -1025,6 +1043,7 @@ user2_small = ModelMeta(
1025
1043
  use_instructions=True,
1026
1044
  reference="https://huggingface.co/collections/deepvk/user2-6802650d7210f222ec60e05f",
1027
1045
  n_parameters=34_400_000,
1046
+ n_embedding_parameters=None,
1028
1047
  memory_usage_mb=131,
1029
1048
  max_tokens=8192,
1030
1049
  embed_dim=384,
@@ -1058,6 +1077,7 @@ user2_base = ModelMeta(
1058
1077
  use_instructions=True,
1059
1078
  reference="https://huggingface.co/collections/deepvk/user2-6802650d7210f222ec60e05f",
1060
1079
  n_parameters=149_000_000,
1080
+ n_embedding_parameters=None,
1061
1081
  memory_usage_mb=568,
1062
1082
  max_tokens=8192,
1063
1083
  embed_dim=768,
@@ -38,6 +38,7 @@ cl_nagoya_ruri_v3_30m = ModelMeta(
38
38
  revision="24899e5de370b56d179604a007c0d727bf144504",
39
39
  release_date="2025-04-07",
40
40
  n_parameters=36_705_536,
41
+ n_embedding_parameters=None,
41
42
  memory_usage_mb=140,
42
43
  embed_dim=256,
43
44
  license="apache-2.0",
@@ -69,6 +70,7 @@ cl_nagoya_ruri_v3_70m = ModelMeta(
69
70
  revision="07a8b0aba47d29d2ca21f89b915c1efe2c23d1cc",
70
71
  release_date="2025-04-09",
71
72
  n_parameters=36_705_536,
73
+ n_embedding_parameters=None,
72
74
  memory_usage_mb=140,
73
75
  embed_dim=256,
74
76
  license="apache-2.0",
@@ -98,6 +100,7 @@ cl_nagoya_ruri_v3_130m = ModelMeta(
98
100
  revision="e3114c6ee10dbab8b4b235fbc6dcf9dd4d5ac1a6",
99
101
  release_date="2025-04-09",
100
102
  n_parameters=132_140_544,
103
+ n_embedding_parameters=None,
101
104
  memory_usage_mb=504,
102
105
  embed_dim=512,
103
106
  license="apache-2.0",
@@ -127,6 +130,7 @@ cl_nagoya_ruri_v3_310m = ModelMeta(
127
130
  revision="18b60fb8c2b9df296fb4212bb7d23ef94e579cd3",
128
131
  release_date="2025-04-09",
129
132
  n_parameters=314_611_968,
133
+ n_embedding_parameters=None,
130
134
  memory_usage_mb=1200,
131
135
  embed_dim=768,
132
136
  license="apache-2.0",
@@ -157,6 +161,7 @@ cl_nagoya_ruri_small_v2 = ModelMeta(
157
161
  revision="db18646e673b713cd0518a5bb0fefdce21e77cd9",
158
162
  release_date="2024-12-05",
159
163
  n_parameters=68_087_808,
164
+ n_embedding_parameters=25_165_824,
160
165
  memory_usage_mb=260,
161
166
  embed_dim=768,
162
167
  license="apache-2.0",
@@ -186,6 +191,7 @@ cl_nagoya_ruri_base_v2 = ModelMeta(
186
191
  revision="8ce03882903668a01c83ca3b8111ac025a3bc734",
187
192
  release_date="2024-12-05",
188
193
  n_parameters=111_207_168,
194
+ n_embedding_parameters=25_165_824,
189
195
  memory_usage_mb=424,
190
196
  embed_dim=768,
191
197
  license="apache-2.0",
@@ -215,6 +221,7 @@ cl_nagoya_ruri_large_v2 = ModelMeta(
215
221
  revision="42898ef34a5574977380ebf0dfd28cbfbd36438b",
216
222
  release_date="2024-12-06",
217
223
  n_parameters=337_441_792,
224
+ n_embedding_parameters=33_554_432,
218
225
  memory_usage_mb=1287,
219
226
  embed_dim=1024,
220
227
  license="apache-2.0",
@@ -245,6 +252,7 @@ cl_nagoya_ruri_small_v1 = ModelMeta(
245
252
  revision="bc56ce90cd7a979f6eb199fc52dfe700bfd94bc3",
246
253
  release_date="2024-08-28",
247
254
  n_parameters=68_087_808,
255
+ n_embedding_parameters=25_165_824,
248
256
  memory_usage_mb=130,
249
257
  embed_dim=768,
250
258
  license="apache-2.0",
@@ -274,6 +282,7 @@ cl_nagoya_ruri_base_v1 = ModelMeta(
274
282
  revision="1ae40b8b6c78518a499425086bab8fc16c2e4b0e",
275
283
  release_date="2024-08-28",
276
284
  n_parameters=111_207_168,
285
+ n_embedding_parameters=25_165_824,
277
286
  memory_usage_mb=212,
278
287
  embed_dim=768,
279
288
  license="apache-2.0",
@@ -304,6 +313,7 @@ cl_nagoya_ruri_large_v1 = ModelMeta(
304
313
  revision="a011c39b13e8bc137ee13c6bc82191ece46c414c",
305
314
  release_date="2024-08-28",
306
315
  n_parameters=337_441_792,
316
+ n_embedding_parameters=33_554_432,
307
317
  memory_usage_mb=644,
308
318
  embed_dim=1024,
309
319
  license="apache-2.0",
@@ -58,6 +58,7 @@ SFR_Embedding_2_R = ModelMeta(
58
58
  revision="91762139d94ed4371a9fa31db5551272e0b83818",
59
59
  release_date="2024-06-14", # initial commit of hf model.
60
60
  n_parameters=7_110_000_000,
61
+ n_embedding_parameters=None,
61
62
  memory_usage_mb=13563,
62
63
  embed_dim=4096,
63
64
  license="cc-by-nc-4.0",
@@ -96,6 +97,7 @@ SFR_Embedding_Code_2B_R = ModelMeta(
96
97
  revision="c73d8631a005876ed5abde34db514b1fb6566973",
97
98
  release_date="2025-01-17", # initial commit of hf model.
98
99
  n_parameters=2_610_000_000,
100
+ n_embedding_parameters=None,
99
101
  memory_usage_mb=4986,
100
102
  embed_dim=2304,
101
103
  license="cc-by-nc-4.0",
@@ -134,6 +136,7 @@ SFR_Embedding_Mistral = ModelMeta(
134
136
  revision="938c560d1c236aa563b2dbdf084f28ab28bccb11",
135
137
  release_date="2024-01-24", # initial commit of hf model.
136
138
  n_parameters=7_110_000_000,
139
+ n_embedding_parameters=None,
137
140
  memory_usage_mb=13563,
138
141
  embed_dim=4096,
139
142
  license="cc-by-nc-4.0",
@@ -51,6 +51,7 @@ samilpwc_expr = ModelMeta(
51
51
  revision="33358978be40f36491045f9c2a359d38c3f50047",
52
52
  release_date="2025-08-12",
53
53
  n_parameters=560_000_000,
54
+ n_embedding_parameters=256_002_048,
54
55
  memory_usage_mb=2136,
55
56
  embed_dim=1024,
56
57
  license="apache-2.0",
@@ -124,6 +124,7 @@ sbintuitions_sarashina_embedding_v2_1b = ModelMeta(
124
124
  revision="1f3408afaa7b617e3445d891310a9c26dd0c68a5",
125
125
  release_date="2025-07-30",
126
126
  n_parameters=1_224_038_144,
127
+ n_embedding_parameters=183_500_800,
127
128
  memory_usage_mb=4669,
128
129
  embed_dim=1792,
129
130
  license="https://huggingface.co/sbintuitions/sarashina-embedding-v2-1b/blob/main/LICENSE",
@@ -150,6 +151,7 @@ sbintuitions_sarashina_embedding_v1_1b = ModelMeta(
150
151
  revision="d060fcd8984075071e7fad81baff035cbb3b6c7e",
151
152
  release_date="2024-11-22",
152
153
  n_parameters=1_224_038_144,
154
+ n_embedding_parameters=183_500_800,
153
155
  memory_usage_mb=4669,
154
156
  embed_dim=1792,
155
157
  license="https://huggingface.co/sbintuitions/sarashina-embedding-v1-1b/blob/main/LICENSE",
@@ -27,6 +27,7 @@ searchmap_preview = ModelMeta(
27
27
  use_instructions=True,
28
28
  release_date="2025-03-05",
29
29
  n_parameters=435_000_000,
30
+ n_embedding_parameters=None,
30
31
  memory_usage_mb=1660,
31
32
  embed_dim=4096,
32
33
  license="mit",
@@ -431,6 +431,7 @@ seed_embedding = ModelMeta(
431
431
  embed_dim=2048,
432
432
  open_weights=False,
433
433
  n_parameters=None,
434
+ n_embedding_parameters=None,
434
435
  memory_usage_mb=None,
435
436
  license=None,
436
437
  reference="https://seed1-6-embedding.github.io/",
@@ -616,6 +616,7 @@ seed_embedding = ModelMeta(
616
616
  embed_dim=2048,
617
617
  open_weights=False,
618
618
  n_parameters=None,
619
+ n_embedding_parameters=None,
619
620
  memory_usage_mb=None,
620
621
  license=None,
621
622
  reference="https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-embedding-vision",
@@ -253,6 +253,7 @@ seed_embedding = ModelMeta(
253
253
  embed_dim=2048,
254
254
  open_weights=False,
255
255
  n_parameters=None,
256
+ n_embedding_parameters=None,
256
257
  memory_usage_mb=None,
257
258
  license=None,
258
259
  reference="https://seed1-5-embedding.github.io/",