mteb 2.7.3__py3-none-any.whl → 2.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (434) hide show
  1. mteb/_create_dataloaders.py +47 -5
  2. mteb/_evaluators/any_sts_evaluator.py +2 -0
  3. mteb/_evaluators/clustering_evaluator.py +2 -0
  4. mteb/_evaluators/evaluator.py +2 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +8 -1
  6. mteb/_evaluators/pair_classification_evaluator.py +3 -0
  7. mteb/_evaluators/retrieval_evaluator.py +3 -0
  8. mteb/_evaluators/sklearn_evaluator.py +6 -1
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +2 -0
  10. mteb/_evaluators/text/summarization_evaluator.py +2 -0
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -0
  12. mteb/abstasks/abstask.py +31 -12
  13. mteb/abstasks/classification.py +10 -3
  14. mteb/abstasks/clustering.py +6 -2
  15. mteb/abstasks/clustering_legacy.py +8 -2
  16. mteb/abstasks/image/image_text_pair_classification.py +6 -2
  17. mteb/abstasks/multilabel_classification.py +2 -0
  18. mteb/abstasks/pair_classification.py +8 -2
  19. mteb/abstasks/retrieval.py +27 -12
  20. mteb/abstasks/retrieval_dataset_loaders.py +29 -19
  21. mteb/abstasks/sts.py +10 -3
  22. mteb/abstasks/text/bitext_mining.py +9 -5
  23. mteb/abstasks/text/reranking.py +2 -2
  24. mteb/abstasks/text/summarization.py +2 -1
  25. mteb/abstasks/zeroshot_classification.py +8 -2
  26. mteb/benchmarks/benchmarks/__init__.py +2 -0
  27. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  28. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  29. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  30. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  31. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  32. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  33. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  34. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  35. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  36. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  37. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  38. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  39. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  40. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  41. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  42. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  43. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  44. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  45. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  48. mteb/evaluate.py +10 -2
  49. mteb/models/model_implementations/align_models.py +1 -0
  50. mteb/models/model_implementations/amazon_models.py +1 -0
  51. mteb/models/model_implementations/andersborges.py +2 -0
  52. mteb/models/model_implementations/ara_models.py +1 -0
  53. mteb/models/model_implementations/arctic_models.py +8 -0
  54. mteb/models/model_implementations/b1ade_models.py +1 -0
  55. mteb/models/model_implementations/bedrock_models.py +4 -0
  56. mteb/models/model_implementations/bge_models.py +40 -1
  57. mteb/models/model_implementations/bica_model.py +1 -0
  58. mteb/models/model_implementations/blip2_models.py +2 -0
  59. mteb/models/model_implementations/blip_models.py +8 -0
  60. mteb/models/model_implementations/bm25.py +10 -5
  61. mteb/models/model_implementations/bmretriever_models.py +4 -0
  62. mteb/models/model_implementations/cadet_models.py +1 -0
  63. mteb/models/model_implementations/cde_models.py +2 -0
  64. mteb/models/model_implementations/clip_models.py +3 -0
  65. mteb/models/model_implementations/clips_models.py +3 -0
  66. mteb/models/model_implementations/codefuse_models.py +5 -0
  67. mteb/models/model_implementations/codesage_models.py +3 -0
  68. mteb/models/model_implementations/cohere_models.py +4 -0
  69. mteb/models/model_implementations/cohere_v.py +5 -0
  70. mteb/models/model_implementations/colpali_models.py +3 -0
  71. mteb/models/model_implementations/colqwen_models.py +7 -0
  72. mteb/models/model_implementations/colsmol_models.py +2 -0
  73. mteb/models/model_implementations/conan_models.py +1 -0
  74. mteb/models/model_implementations/dino_models.py +19 -0
  75. mteb/models/model_implementations/e5_instruct.py +4 -0
  76. mteb/models/model_implementations/e5_models.py +9 -0
  77. mteb/models/model_implementations/e5_v.py +1 -0
  78. mteb/models/model_implementations/eagerworks_models.py +1 -0
  79. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  80. mteb/models/model_implementations/en_code_retriever.py +1 -0
  81. mteb/models/model_implementations/euler_models.py +1 -0
  82. mteb/models/model_implementations/evaclip_models.py +4 -0
  83. mteb/models/model_implementations/fa_models.py +9 -0
  84. mteb/models/model_implementations/facebookai.py +2 -0
  85. mteb/models/model_implementations/geogpt_models.py +1 -0
  86. mteb/models/model_implementations/gme_v_models.py +2 -0
  87. mteb/models/model_implementations/google_models.py +5 -0
  88. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  89. mteb/models/model_implementations/gritlm_models.py +2 -0
  90. mteb/models/model_implementations/gte_models.py +9 -0
  91. mteb/models/model_implementations/hinvec_models.py +1 -0
  92. mteb/models/model_implementations/human.py +1 -0
  93. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  94. mteb/models/model_implementations/inf_models.py +2 -0
  95. mteb/models/model_implementations/jasper_models.py +2 -0
  96. mteb/models/model_implementations/jina_clip.py +1 -0
  97. mteb/models/model_implementations/jina_models.py +7 -0
  98. mteb/models/model_implementations/kalm_models.py +6 -0
  99. mteb/models/model_implementations/kblab.py +1 -0
  100. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  101. mteb/models/model_implementations/kfst.py +1 -0
  102. mteb/models/model_implementations/kowshik24_models.py +1 -0
  103. mteb/models/model_implementations/lens_models.py +2 -0
  104. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  105. mteb/models/model_implementations/linq_models.py +1 -0
  106. mteb/models/model_implementations/listconranker.py +1 -0
  107. mteb/models/model_implementations/llm2clip_models.py +3 -0
  108. mteb/models/model_implementations/llm2vec_models.py +8 -0
  109. mteb/models/model_implementations/mcinext_models.py +3 -0
  110. mteb/models/model_implementations/mdbr_models.py +2 -0
  111. mteb/models/model_implementations/misc_models.py +63 -0
  112. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  113. mteb/models/model_implementations/mme5_models.py +2 -1
  114. mteb/models/model_implementations/moco_models.py +2 -0
  115. mteb/models/model_implementations/mod_models.py +1 -0
  116. mteb/models/model_implementations/model2vec_models.py +13 -0
  117. mteb/models/model_implementations/moka_models.py +3 -0
  118. mteb/models/model_implementations/nbailab.py +3 -0
  119. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  120. mteb/models/model_implementations/nomic_models.py +6 -0
  121. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  122. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  123. mteb/models/model_implementations/nvidia_models.py +3 -0
  124. mteb/models/model_implementations/octen_models.py +2 -0
  125. mteb/models/model_implementations/openai_models.py +5 -0
  126. mteb/models/model_implementations/openclip_models.py +8 -0
  127. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  128. mteb/models/model_implementations/ops_moa_models.py +2 -0
  129. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  130. mteb/models/model_implementations/pawan_models.py +1 -0
  131. mteb/models/model_implementations/piccolo_models.py +2 -0
  132. mteb/models/model_implementations/promptriever_models.py +4 -0
  133. mteb/models/model_implementations/pylate_models.py +13 -0
  134. mteb/models/model_implementations/qodo_models.py +2 -0
  135. mteb/models/model_implementations/qtack_models.py +1 -0
  136. mteb/models/model_implementations/qwen3_models.py +3 -0
  137. mteb/models/model_implementations/qzhou_models.py +2 -0
  138. mteb/models/model_implementations/rasgaard_models.py +1 -0
  139. mteb/models/model_implementations/reasonir_model.py +65 -0
  140. mteb/models/model_implementations/repllama_models.py +2 -0
  141. mteb/models/model_implementations/rerankers_custom.py +3 -0
  142. mteb/models/model_implementations/rerankers_monot5_based.py +14 -0
  143. mteb/models/model_implementations/richinfoai_models.py +1 -0
  144. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  145. mteb/models/model_implementations/ruri_models.py +10 -0
  146. mteb/models/model_implementations/salesforce_models.py +3 -0
  147. mteb/models/model_implementations/samilpwc_models.py +1 -0
  148. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  149. mteb/models/model_implementations/searchmap_models.py +1 -0
  150. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  151. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +1 -0
  152. mteb/models/model_implementations/seed_models.py +1 -0
  153. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  154. mteb/models/model_implementations/shuu_model.py +1 -0
  155. mteb/models/model_implementations/siglip_models.py +10 -0
  156. mteb/models/model_implementations/sonar_models.py +2 -1
  157. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  158. mteb/models/model_implementations/stella_models.py +6 -0
  159. mteb/models/model_implementations/tarka_models.py +2 -0
  160. mteb/models/model_implementations/text2vec_models.py +3 -0
  161. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  162. mteb/models/model_implementations/uae_models.py +1 -0
  163. mteb/models/model_implementations/vdr_models.py +1 -0
  164. mteb/models/model_implementations/vi_vn_models.py +6 -0
  165. mteb/models/model_implementations/vista_models.py +2 -0
  166. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  167. mteb/models/model_implementations/voyage_models.py +15 -0
  168. mteb/models/model_implementations/voyage_v.py +1 -0
  169. mteb/models/model_implementations/xyz_models.py +1 -0
  170. mteb/models/model_implementations/youtu_models.py +1 -0
  171. mteb/models/model_implementations/yuan_models.py +1 -0
  172. mteb/models/model_implementations/yuan_models_en.py +1 -0
  173. mteb/models/model_meta.py +35 -2
  174. mteb/models/models_protocols.py +4 -0
  175. mteb/models/search_wrappers.py +12 -0
  176. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  177. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  178. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  179. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  180. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  181. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  182. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  183. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  184. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  185. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  186. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  187. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  188. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  189. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  190. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  191. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  192. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  193. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  194. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  195. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  196. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  197. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  198. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  199. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  200. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  201. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  202. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  203. mteb/tasks/classification/est/estonian_valence.py +1 -1
  204. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  205. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  206. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  207. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  208. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  209. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  210. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  211. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  212. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  213. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  214. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  215. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  216. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  217. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  218. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  219. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  220. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  221. mteb/tasks/classification/kor/klue_tc.py +2 -2
  222. mteb/tasks/classification/kor/kor_fin.py +1 -1
  223. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  224. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  225. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  226. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  227. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  228. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  229. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  230. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  231. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  232. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  233. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  234. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  235. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  236. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  237. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  238. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  239. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  240. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  241. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  242. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  243. mteb/tasks/classification/ron/moroco.py +1 -1
  244. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  245. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  246. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  247. mteb/tasks/classification/rus/headline_classification.py +2 -2
  248. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  249. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  250. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  251. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  252. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  253. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  254. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  255. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  256. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  257. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  258. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  259. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  260. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  261. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  262. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  263. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  264. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  265. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  266. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  267. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  268. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  269. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  270. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  271. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  272. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  273. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  274. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  275. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  276. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  277. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  278. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  279. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  280. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  281. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  282. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  283. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  284. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  285. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  286. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  287. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  288. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  289. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  290. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  291. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  292. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  293. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  294. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  295. mteb/tasks/clustering/nob/snl_clustering.py +1 -1
  296. mteb/tasks/clustering/nob/vg_clustering.py +1 -1
  297. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  298. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  299. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  300. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  301. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  302. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  303. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  304. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  305. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  306. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  307. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  308. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  309. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  310. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  311. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  312. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  313. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  314. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
  315. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  316. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  317. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  318. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  319. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  320. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  321. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  322. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  323. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  324. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  325. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  326. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  327. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  328. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  329. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  330. mteb/tasks/pair_classification/rus/terra.py +2 -2
  331. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  332. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  333. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  334. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  335. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  336. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  337. mteb/tasks/retrieval/code/code_rag.py +4 -4
  338. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  339. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  340. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  341. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  342. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  343. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  344. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  345. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  346. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  347. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  348. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  349. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  350. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  351. mteb/tasks/retrieval/eng/__init__.py +42 -0
  352. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  353. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  354. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  355. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  356. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  357. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  358. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  359. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  360. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  361. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  362. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  363. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  364. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  365. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  366. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  367. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  368. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  369. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  370. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  371. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  372. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  373. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  374. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  375. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  376. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  377. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  378. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  379. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  380. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  381. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  382. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  383. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  384. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  385. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  386. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  387. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  388. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  389. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  390. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  391. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  392. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  393. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  394. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  395. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  396. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  397. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  398. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  399. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
  400. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  401. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  402. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  403. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  404. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  405. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  406. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  407. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  408. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  409. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  410. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  411. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  412. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  413. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  414. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  415. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  416. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  417. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  418. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  419. mteb/tasks/retrieval/nob/norquad.py +1 -1
  420. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  421. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  422. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  423. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  424. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  425. mteb/tasks/sts/kor/klue_sts.py +1 -1
  426. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  427. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  428. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  429. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/METADATA +1 -1
  430. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/RECORD +434 -413
  431. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/WHEEL +0 -0
  432. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/entry_points.txt +0 -0
  433. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/licenses/LICENSE +0 -0
  434. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/top_level.txt +0 -0
@@ -173,6 +173,7 @@ m2v_base_glove_subword = ModelMeta(
173
173
  revision="5f4f5ca159b7321a8b39739bba0794fa0debddf4",
174
174
  release_date="2024-09-21",
175
175
  n_parameters=int(103 * 1e6),
176
+ n_embedding_parameters=int(103 * 1e6),
176
177
  memory_usage_mb=391,
177
178
  max_tokens=np.inf, # Theoretically infinite
178
179
  embed_dim=256,
@@ -199,6 +200,7 @@ m2v_base_glove = ModelMeta(
199
200
  revision="38ebd7f10f71e67fa8db898290f92b82e9cfff2b",
200
201
  release_date="2024-09-21",
201
202
  n_parameters=int(102 * 1e6),
203
+ n_embedding_parameters=int(102 * 1e6),
202
204
  memory_usage_mb=391,
203
205
  max_tokens=np.inf,
204
206
  embed_dim=256,
@@ -224,6 +226,7 @@ m2v_base_output = ModelMeta(
224
226
  revision="02460ae401a22b09d2c6652e23371398329551e2",
225
227
  release_date="2024-09-21",
226
228
  n_parameters=int(7.56 * 1e6),
229
+ n_embedding_parameters=int(7.56 * 1e6),
227
230
  memory_usage_mb=29,
228
231
  max_tokens=np.inf,
229
232
  embed_dim=256,
@@ -249,6 +252,7 @@ m2v_multilingual_output = ModelMeta(
249
252
  revision="2cf4ec4e1f51aeca6c55cf9b93097d00711a6305",
250
253
  release_date="2024-09-21",
251
254
  n_parameters=int(128 * 1e6),
255
+ n_embedding_parameters=int(128 * 1e6),
252
256
  memory_usage_mb=489,
253
257
  max_tokens=np.inf,
254
258
  embed_dim=256,
@@ -274,6 +278,7 @@ potion_base_2m = ModelMeta(
274
278
  revision="86db093558fbced2072b929eb1690bce5272bd4b",
275
279
  release_date="2024-10-29",
276
280
  n_parameters=int(2 * 1e6),
281
+ n_embedding_parameters=int(2 * 1e6),
277
282
  memory_usage_mb=7,
278
283
  max_tokens=np.inf,
279
284
  embed_dim=64,
@@ -299,6 +304,7 @@ potion_base_4m = ModelMeta(
299
304
  revision="81b1802ada41afcd0987a37dc15e569c9fa76f04",
300
305
  release_date="2024-10-29",
301
306
  n_parameters=int(3.78 * 1e6),
307
+ n_embedding_parameters=int(3.78 * 1e6),
302
308
  memory_usage_mb=14,
303
309
  max_tokens=np.inf,
304
310
  embed_dim=128,
@@ -324,6 +330,7 @@ potion_base_8m = ModelMeta(
324
330
  revision="dcbec7aa2d52fc76754ac6291803feedd8c619ce",
325
331
  release_date="2024-10-29",
326
332
  n_parameters=int(7.56 * 1e6),
333
+ n_embedding_parameters=int(7.56 * 1e6),
327
334
  memory_usage_mb=29,
328
335
  max_tokens=np.inf,
329
336
  embed_dim=256,
@@ -349,6 +356,7 @@ potion_multilingual_128m = ModelMeta(
349
356
  revision="38ebd7f10f71e67fa8db898290f92b82e9cfff2a",
350
357
  release_date="2025-05-23",
351
358
  n_parameters=128 * 1e6,
359
+ n_embedding_parameters=128 * 1e6,
352
360
  memory_usage_mb=489,
353
361
  max_tokens=np.inf,
354
362
  embed_dim=256,
@@ -374,6 +382,7 @@ pubmed_bert_100k = ModelMeta(
374
382
  revision="bac5e3b12fb8c650e92a19c41b436732c4f16e9e",
375
383
  release_date="2025-01-03",
376
384
  n_parameters=1 * 1e5,
385
+ n_embedding_parameters=1 * 1e5,
377
386
  memory_usage_mb=0,
378
387
  max_tokens=np.inf,
379
388
  embed_dim=64,
@@ -398,6 +407,7 @@ pubmed_bert_500k = ModelMeta(
398
407
  revision="34ba71e35c393fdad7ed695113f653feb407b16b",
399
408
  release_date="2025-01-03",
400
409
  n_parameters=5 * 1e5,
410
+ n_embedding_parameters=5 * 1e5,
401
411
  memory_usage_mb=2,
402
412
  max_tokens=np.inf,
403
413
  embed_dim=64,
@@ -422,6 +432,7 @@ pubmed_bert_1m = ModelMeta(
422
432
  revision="2b7fed222594708da6d88bcda92ae9b434b7ddd1",
423
433
  release_date="2025-01-03",
424
434
  n_parameters=1 * 1e6,
435
+ n_embedding_parameters=1 * 1e6,
425
436
  memory_usage_mb=2,
426
437
  max_tokens=np.inf,
427
438
  embed_dim=64,
@@ -446,6 +457,7 @@ pubmed_bert_2m = ModelMeta(
446
457
  revision="1d7bbe04d6713e425161146bfdc71473cbed498a",
447
458
  release_date="2025-01-03",
448
459
  n_parameters=1.95 * 1e6,
460
+ n_embedding_parameters=1.95 * 1e6,
449
461
  memory_usage_mb=7,
450
462
  max_tokens=np.inf,
451
463
  embed_dim=64,
@@ -470,6 +482,7 @@ pubmed_bert_8m = ModelMeta(
470
482
  revision="387d350015e963744f4fafe56a574b7cd48646c9",
471
483
  release_date="2025-01-03",
472
484
  n_parameters=7.81 * 1e6,
485
+ n_embedding_parameters=7.81 * 1e6,
473
486
  memory_usage_mb=30,
474
487
  max_tokens=np.inf,
475
488
  embed_dim=256,
@@ -97,6 +97,7 @@ m3e_base = ModelMeta(
97
97
  revision="764b537a0e50e5c7d64db883f2d2e051cbe3c64c",
98
98
  release_date="2023-06-06", # first commit
99
99
  n_parameters=int(102 * 1e6),
100
+ n_embedding_parameters=16_226_304,
100
101
  memory_usage_mb=390,
101
102
  embed_dim=768,
102
103
  # They don't give a specific license but commercial use is not allowed
@@ -123,6 +124,7 @@ m3e_small = ModelMeta(
123
124
  revision="44c696631b2a8c200220aaaad5f987f096e986df",
124
125
  release_date="2023-06-02", # first commit
125
126
  n_parameters=None,
127
+ n_embedding_parameters=10_817_536,
126
128
  memory_usage_mb=None, # Can't be seen on HF page
127
129
  embed_dim=512,
128
130
  # They don't give a specific license but commercial use is not allowed
@@ -149,6 +151,7 @@ m3e_large = ModelMeta(
149
151
  revision="12900375086c37ba5d83d1e417b21dc7d1d1f388",
150
152
  release_date="2023-06-21", # first commit
151
153
  n_parameters=None,
154
+ n_embedding_parameters=21_635_072,
152
155
  memory_usage_mb=None, # Can't be seen on HF page
153
156
  embed_dim=768,
154
157
  # They don't give a specific license but commercial use is not allowed
@@ -12,6 +12,7 @@ nb_sbert = ModelMeta(
12
12
  revision="b95656350a076aeafd2d23763660f80655408cc6",
13
13
  release_date="2022-11-23",
14
14
  n_parameters=1_780_000_000,
15
+ n_embedding_parameters=91_812_096,
15
16
  memory_usage_mb=678,
16
17
  embed_dim=4096,
17
18
  license="apache-2.0",
@@ -34,6 +35,7 @@ nb_bert_large = ModelMeta(
34
35
  revision="f9d0fc184adab4dc354d85e1854b7634540d7550",
35
36
  release_date="2021-04-29",
36
37
  n_parameters=355087360,
38
+ n_embedding_parameters=51_200_000,
37
39
  memory_usage_mb=1359,
38
40
  embed_dim=1024,
39
41
  license="cc-by-4.0",
@@ -56,6 +58,7 @@ nb_bert_base = ModelMeta(
56
58
  revision="9417c3f62a3adc99f17ff92bff446f35d011f994",
57
59
  release_date="2021-01-13",
58
60
  n_parameters=177853440,
61
+ n_embedding_parameters=91_812_096,
59
62
  memory_usage_mb=681,
60
63
  embed_dim=768,
61
64
  license="cc-by-4.0",
@@ -110,6 +110,7 @@ no_instruct_small_v0 = ModelMeta(
110
110
  revision="b38747000553d8268915c95a55fc87e707c9aadd",
111
111
  release_date="2024-05-01", # first commit
112
112
  n_parameters=33_400_000,
113
+ n_embedding_parameters=11_720_448,
113
114
  memory_usage_mb=127,
114
115
  max_tokens=512,
115
116
  embed_dim=384,
@@ -215,6 +215,7 @@ nomic_embed_v1_5 = ModelMeta(
215
215
  release_date="2024-02-10", # first commit
216
216
  citation=NOMIC_CITATION,
217
217
  n_parameters=137_000_000,
218
+ n_embedding_parameters=None,
218
219
  memory_usage_mb=522,
219
220
  max_tokens=8192,
220
221
  embed_dim=768,
@@ -249,6 +250,7 @@ nomic_embed_v1 = ModelMeta(
249
250
  revision="0759316f275aa0cb93a5b830973843ca66babcf5",
250
251
  release_date="2024-01-31", # first commit
251
252
  n_parameters=None,
253
+ n_embedding_parameters=None,
252
254
  memory_usage_mb=522,
253
255
  max_tokens=8192,
254
256
  embed_dim=768,
@@ -284,6 +286,7 @@ nomic_embed_v1_ablated = ModelMeta(
284
286
  revision="7d948905c5d5d3874fa55a925d68e49dbf411e5f",
285
287
  release_date="2024-01-15", # first commit
286
288
  n_parameters=None,
289
+ n_embedding_parameters=None,
287
290
  memory_usage_mb=None,
288
291
  max_tokens=8192,
289
292
  embed_dim=768,
@@ -312,6 +315,7 @@ nomic_embed_v1_unsupervised = ModelMeta(
312
315
  revision="b53d557b15ae63852847c222d336c1609eced93c",
313
316
  release_date="2024-01-15", # first commit
314
317
  n_parameters=None,
318
+ n_embedding_parameters=None,
315
319
  memory_usage_mb=None,
316
320
  max_tokens=8192,
317
321
  embed_dim=768,
@@ -340,6 +344,7 @@ nomic_modern_bert_embed = ModelMeta(
340
344
  revision="5960f1566fb7cb1adf1eb6e816639cf4646d9b12",
341
345
  release_date="2024-12-29",
342
346
  n_parameters=149_000_000,
347
+ n_embedding_parameters=None,
343
348
  memory_usage_mb=568,
344
349
  max_tokens=8192,
345
350
  embed_dim=768,
@@ -479,6 +484,7 @@ nomic_embed_text_v2_moe = ModelMeta(
479
484
  revision="1066b6599d099fbb93dfcb64f9c37a7c9e503e85",
480
485
  release_date="2025-02-07",
481
486
  n_parameters=475292928,
487
+ n_embedding_parameters=None,
482
488
  memory_usage_mb=1813,
483
489
  max_tokens=512,
484
490
  embed_dim=768,
@@ -175,6 +175,7 @@ nomic_embed_vision_v1_5 = ModelMeta(
175
175
  release_date="2024-06-08",
176
176
  modalities=["image", "text"],
177
177
  n_parameters=92_900_000,
178
+ n_embedding_parameters=None,
178
179
  memory_usage_mb=355,
179
180
  max_tokens=2048,
180
181
  embed_dim=768,
@@ -162,6 +162,7 @@ llama_nemoretriever_colembed_1b_v1 = ModelMeta(
162
162
  release_date="2025-06-27",
163
163
  modalities=["image", "text"],
164
164
  n_parameters=2_418_000_000,
165
+ n_embedding_parameters=None,
165
166
  memory_usage_mb=4610,
166
167
  max_tokens=8192,
167
168
  embed_dim=2048,
@@ -189,6 +190,7 @@ llama_nemoretriever_colembed_3b_v1 = ModelMeta(
189
190
  release_date="2025-06-27",
190
191
  modalities=["image", "text"],
191
192
  n_parameters=4_407_000_000,
193
+ n_embedding_parameters=None,
192
194
  memory_usage_mb=8403,
193
195
  max_tokens=8192,
194
196
  embed_dim=3072,
@@ -204,6 +204,7 @@ NV_embed_v2 = ModelMeta(
204
204
  revision="7604d305b621f14095a1aa23d351674c2859553a",
205
205
  release_date="2024-09-09", # initial commit of hf model.
206
206
  n_parameters=7_850_000_000,
207
+ n_embedding_parameters=None,
207
208
  memory_usage_mb=14975,
208
209
  embed_dim=4096,
209
210
  license="cc-by-nc-4.0",
@@ -235,6 +236,7 @@ NV_embed_v1 = ModelMeta(
235
236
  revision="570834afd5fef5bf3a3c2311a2b6e0a66f6f4f2c",
236
237
  release_date="2024-09-13", # initial commit of hf model.
237
238
  n_parameters=7_850_000_000,
239
+ n_embedding_parameters=None,
238
240
  memory_usage_mb=14975,
239
241
  embed_dim=4096,
240
242
  license="cc-by-nc-4.0",
@@ -624,6 +626,7 @@ llama_embed_nemotron_8b = ModelMeta(
624
626
  revision="84a375593d27d3528beb4e104822515659e093b4",
625
627
  release_date="2025-10-23",
626
628
  n_parameters=7_504_924_672,
629
+ n_embedding_parameters=None,
627
630
  memory_usage_mb=28629,
628
631
  embed_dim=4096,
629
632
  license="https://huggingface.co/nvidia/llama-embed-nemotron-8b/blob/main/LICENSE",
@@ -208,6 +208,7 @@ Octen_Embedding_4B = ModelMeta(
208
208
  revision="6e188e3b072c3e3678b235ad84e6e97bcbb71e8f",
209
209
  release_date="2025-12-30",
210
210
  n_parameters=4021774336,
211
+ n_embedding_parameters=None,
211
212
  memory_usage_mb=7671,
212
213
  embed_dim=2560,
213
214
  max_tokens=32768,
@@ -238,6 +239,7 @@ Octen_Embedding_8B = ModelMeta(
238
239
  revision="f7db178d5a82fb841f606a6a67c423cead2fdbba",
239
240
  release_date="2025-12-23",
240
241
  n_parameters=7567295488,
242
+ n_embedding_parameters=None,
241
243
  memory_usage_mb=14433,
242
244
  embed_dim=4096,
243
245
  max_tokens=32768,
@@ -185,6 +185,7 @@ text_embedding_3_small = ModelMeta(
185
185
  embed_dim=1536,
186
186
  open_weights=False,
187
187
  n_parameters=None,
188
+ n_embedding_parameters=None,
188
189
  memory_usage_mb=None,
189
190
  license=None,
190
191
  reference="https://openai.com/index/new-embedding-models-and-api-updates/",
@@ -213,6 +214,7 @@ text_embedding_3_large = ModelMeta(
213
214
  framework=["API"],
214
215
  use_instructions=False,
215
216
  n_parameters=None,
217
+ n_embedding_parameters=None,
216
218
  memory_usage_mb=None,
217
219
  public_training_code=None,
218
220
  public_training_data=None, # assumed
@@ -238,6 +240,7 @@ text_embedding_ada_002 = ModelMeta(
238
240
  framework=["API"],
239
241
  use_instructions=False,
240
242
  n_parameters=None,
243
+ n_embedding_parameters=None,
241
244
  memory_usage_mb=None,
242
245
  public_training_code=None,
243
246
  public_training_data=None, # assumed
@@ -262,6 +265,7 @@ text_embedding_3_small_512 = ModelMeta(
262
265
  embed_dim=512,
263
266
  open_weights=False,
264
267
  n_parameters=None,
268
+ n_embedding_parameters=None,
265
269
  memory_usage_mb=None,
266
270
  license=None,
267
271
  reference="https://openai.com/index/new-embedding-models-and-api-updates/",
@@ -292,6 +296,7 @@ text_embedding_3_large_512 = ModelMeta(
292
296
  framework=["API"],
293
297
  use_instructions=False,
294
298
  n_parameters=None,
299
+ n_embedding_parameters=None,
295
300
  memory_usage_mb=None,
296
301
  public_training_code=None,
297
302
  public_training_data=None, # assumed
@@ -133,6 +133,7 @@ CLIP_ViT_L_14_DataComp_XL_s13B_b90K = ModelMeta(
133
133
  release_date="2023-04-26",
134
134
  modalities=["image", "text"],
135
135
  n_parameters=428_000_000,
136
+ n_embedding_parameters=None,
136
137
  memory_usage_mb=1633,
137
138
  max_tokens=77,
138
139
  embed_dim=768,
@@ -159,6 +160,7 @@ CLIP_ViT_B_32_DataComp_XL_s13B_b90K = ModelMeta(
159
160
  release_date="2023-04-26",
160
161
  modalities=["image", "text"],
161
162
  n_parameters=151_000_000,
163
+ n_embedding_parameters=None,
162
164
  memory_usage_mb=576,
163
165
  max_tokens=77,
164
166
  embed_dim=512,
@@ -185,6 +187,7 @@ CLIP_ViT_B_16_DataComp_XL_s13B_b90K = ModelMeta(
185
187
  release_date="2023-04-26",
186
188
  modalities=["image", "text"],
187
189
  n_parameters=150_000_000,
190
+ n_embedding_parameters=None,
188
191
  memory_usage_mb=572,
189
192
  max_tokens=77,
190
193
  embed_dim=512,
@@ -211,6 +214,7 @@ CLIP_ViT_bigG_14_laion2B_39B_b160k = ModelMeta(
211
214
  release_date="2023-01-23",
212
215
  modalities=["image", "text"],
213
216
  n_parameters=2_540_000_000,
217
+ n_embedding_parameters=None,
214
218
  memory_usage_mb=9689,
215
219
  max_tokens=77,
216
220
  embed_dim=1280,
@@ -237,6 +241,7 @@ CLIP_ViT_g_14_laion2B_s34B_b88K = ModelMeta(
237
241
  release_date="2023-03-06",
238
242
  modalities=["image", "text"],
239
243
  n_parameters=1_367_000_000,
244
+ n_embedding_parameters=None,
240
245
  memory_usage_mb=5215,
241
246
  max_tokens=77,
242
247
  embed_dim=1024,
@@ -263,6 +268,7 @@ CLIP_ViT_H_14_laion2B_s32B_b79K = ModelMeta(
263
268
  release_date="2022-09-15",
264
269
  modalities=["image", "text"],
265
270
  n_parameters=986_000_000,
271
+ n_embedding_parameters=None,
266
272
  memory_usage_mb=3762,
267
273
  max_tokens=77,
268
274
  embed_dim=1024,
@@ -289,6 +295,7 @@ CLIP_ViT_L_14_laion2B_s32B_b82K = ModelMeta(
289
295
  release_date="2022-09-15",
290
296
  modalities=["image", "text"],
291
297
  n_parameters=428_000_000,
298
+ n_embedding_parameters=None,
292
299
  memory_usage_mb=1631,
293
300
  max_tokens=77,
294
301
  embed_dim=768,
@@ -315,6 +322,7 @@ CLIP_ViT_B_32_laion2B_s34B_b79K = ModelMeta(
315
322
  release_date="2022-09-15",
316
323
  modalities=["image", "text"],
317
324
  n_parameters=151_000_000,
325
+ n_embedding_parameters=None,
318
326
  memory_usage_mb=577,
319
327
  max_tokens=77,
320
328
  embed_dim=512,
@@ -140,6 +140,7 @@ opensearch_neural_sparse_encoding_doc_v3_gte = ModelMeta(
140
140
  revision="a8abaa916125ee512a7a8f4d706d07eb0128a8e6",
141
141
  release_date="2025-06-18",
142
142
  n_parameters=137_394_234,
143
+ n_embedding_parameters=23_440_896,
143
144
  memory_usage_mb=549,
144
145
  embed_dim=30522,
145
146
  license="apache-2.0",
@@ -166,6 +167,7 @@ opensearch_neural_sparse_encoding_doc_v3_distill = ModelMeta(
166
167
  revision="babf71f3c48695e2e53a978208e8aba48335e3c0",
167
168
  release_date="2025-03-28",
168
169
  n_parameters=66_985_530,
170
+ n_embedding_parameters=23_440_896,
169
171
  memory_usage_mb=267,
170
172
  embed_dim=30522,
171
173
  license="apache-2.0",
@@ -188,6 +190,7 @@ opensearch_neural_sparse_encoding_doc_v2_distill = ModelMeta(
188
190
  revision="8921a26c78b8559d6604eb1f5c0b74c079bee38f",
189
191
  release_date="2024-07-17",
190
192
  n_parameters=66_985_530,
193
+ n_embedding_parameters=23_440_896,
191
194
  memory_usage_mb=267,
192
195
  embed_dim=30522,
193
196
  license="apache-2.0",
@@ -211,6 +214,7 @@ opensearch_neural_sparse_encoding_doc_v2_mini = ModelMeta(
211
214
  revision="4af867a426867dfdd744097531046f4289a32fdd",
212
215
  release_date="2024-07-18",
213
216
  n_parameters=22_744_506,
217
+ n_embedding_parameters=11_720_448,
214
218
  memory_usage_mb=86,
215
219
  embed_dim=30522,
216
220
  license="apache-2.0",
@@ -233,6 +237,7 @@ opensearch_neural_sparse_encoding_doc_v1 = ModelMeta(
233
237
  revision="98cdcbd72867c547f72f2b7b7bed9cdf9f09922d",
234
238
  release_date="2024-03-07",
235
239
  n_parameters=132_955_194,
240
+ n_embedding_parameters=23_440_896,
236
241
  memory_usage_mb=507,
237
242
  embed_dim=30522,
238
243
  license="apache-2.0",
@@ -33,6 +33,7 @@ ops_moa_conan_embedding = ModelMeta(
33
33
  languages=["zho-Hans"],
34
34
  loader=OPSWrapper,
35
35
  n_parameters=int(343 * 1e6),
36
+ n_embedding_parameters=21_635_072,
36
37
  memory_usage_mb=1308,
37
38
  max_tokens=512,
38
39
  embed_dim=1536,
@@ -65,6 +66,7 @@ ops_moa_yuan_embedding = ModelMeta(
65
66
  languages=["zho-Hans"],
66
67
  loader=OPSWrapper,
67
68
  n_parameters=int(343 * 1e6),
69
+ n_embedding_parameters=21_635_072,
68
70
  memory_usage_mb=1242,
69
71
  max_tokens=512,
70
72
  embed_dim=1536,
@@ -4,6 +4,7 @@ solon_embeddings_1_1 = ModelMeta(
4
4
  name="OrdalieTech/Solon-embeddings-mini-beta-1.1",
5
5
  languages=["fra-Latn"],
6
6
  n_parameters=210_000_000,
7
+ n_embedding_parameters=None,
7
8
  public_training_code=None,
8
9
  memory_usage_mb=808.0,
9
10
  open_weights=True,
@@ -20,6 +20,7 @@ pawan_embd_68m = ModelMeta(
20
20
  revision="32f295145802bdbd65699ad65fd27d2a5b69a909",
21
21
  release_date="2025-12-08",
22
22
  n_parameters=68_000_000,
23
+ n_embedding_parameters=None,
23
24
  memory_usage_mb=260,
24
25
  embed_dim=768,
25
26
  license="apache-2.0",
@@ -12,6 +12,7 @@ piccolo_base_zh = ModelMeta(
12
12
  revision="47c0a63b8f667c3482e05b2fd45577bb19252196",
13
13
  release_date="2023-09-04", # first commit
14
14
  n_parameters=None,
15
+ n_embedding_parameters=16_226_304,
15
16
  memory_usage_mb=None, # can't see on model card
16
17
  embed_dim=768,
17
18
  license="mit",
@@ -37,6 +38,7 @@ piccolo_large_zh_v2 = ModelMeta(
37
38
  revision="05948c1d889355936bdf9db7d30df57dd78d25a3",
38
39
  release_date="2024-04-22", # first commit
39
40
  n_parameters=None,
41
+ n_embedding_parameters=None,
40
42
  memory_usage_mb=None, # we don't know because they removed the model
41
43
  embed_dim=1024,
42
44
  license="not specified",
@@ -87,6 +87,7 @@ promptriever_llama2 = ModelMeta(
87
87
  revision="01c7f73d771dfac7d292323805ebc428287df4f9-30b14e3813c0fa45facfd01a594580c3fe5ecf23", # base-peft revision
88
88
  release_date="2024-09-15",
89
89
  n_parameters=7_000_000_000,
90
+ n_embedding_parameters=None,
90
91
  memory_usage_mb=26703,
91
92
  max_tokens=4096,
92
93
  embed_dim=4096,
@@ -123,6 +124,7 @@ promptriever_llama3 = ModelMeta(
123
124
  },
124
125
  release_date="2024-09-15",
125
126
  n_parameters=8_000_000_000,
127
+ n_embedding_parameters=None,
126
128
  memory_usage_mb=30518,
127
129
  max_tokens=8192,
128
130
  embed_dim=4096,
@@ -152,6 +154,7 @@ promptriever_llama3_instruct = ModelMeta(
152
154
  revision="5206a32e0bd3067aef1ce90f5528ade7d866253f-8b677258615625122c2eb7329292b8c402612c21", # base-peft revision
153
155
  release_date="2024-09-15",
154
156
  n_parameters=8_000_000_000,
157
+ n_embedding_parameters=None,
155
158
  memory_usage_mb=30518,
156
159
  max_tokens=8192,
157
160
  embed_dim=4096,
@@ -185,6 +188,7 @@ promptriever_mistral_v1 = ModelMeta(
185
188
  revision="7231864981174d9bee8c7687c24c8344414eae6b-876d63e49b6115ecb6839893a56298fadee7e8f5", # base-peft revision
186
189
  release_date="2024-09-15",
187
190
  n_parameters=7_000_000_000,
191
+ n_embedding_parameters=131_072_000,
188
192
  memory_usage_mb=26703,
189
193
  training_datasets={
190
194
  # "samaya-ai/msmarco-w-instructions",
@@ -53,6 +53,7 @@ class PylateSearchEncoder:
53
53
  hf_split: str,
54
54
  hf_subset: str,
55
55
  encode_kwargs: EncodeKwargs,
56
+ num_proc: int,
56
57
  ) -> None:
57
58
  """Index the corpus for retrieval.
58
59
 
@@ -62,6 +63,7 @@ class PylateSearchEncoder:
62
63
  hf_split: Split of current task, allows to know some additional information about current split.
63
64
  hf_subset: Subset of current task. Similar to `hf_split` to get more information
64
65
  encode_kwargs: Additional arguments to pass to the encoder during indexing.
66
+ num_proc: Number of processes to use for indexing.
65
67
  """
66
68
  self.task_corpus = corpus
67
69
 
@@ -87,12 +89,14 @@ class PylateSearchEncoder:
87
89
  top_k: int,
88
90
  encode_kwargs: EncodeKwargs,
89
91
  top_ranked: TopRankedDocumentsType | None = None,
92
+ num_proc: int,
90
93
  ) -> RetrievalOutputType:
91
94
  queries_dataloader = create_dataloader(
92
95
  queries,
93
96
  task_metadata,
94
97
  prompt_type=PromptType.query,
95
98
  batch_size=encode_kwargs.get("batch_size", 32),
99
+ num_proc=num_proc,
96
100
  )
97
101
 
98
102
  query_embeddings = self.encode(
@@ -116,6 +120,7 @@ class PylateSearchEncoder:
116
120
  hf_subset=hf_subset,
117
121
  hf_split=hf_split,
118
122
  encode_kwargs=encode_kwargs,
123
+ num_proc=num_proc,
119
124
  )
120
125
  else:
121
126
  result_heaps = self._pylate_full_corpus_search(
@@ -126,6 +131,7 @@ class PylateSearchEncoder:
126
131
  hf_subset=hf_subset,
127
132
  hf_split=hf_split,
128
133
  encode_kwargs=encode_kwargs,
134
+ num_proc=num_proc,
129
135
  )
130
136
 
131
137
  results = {qid: {} for qid in query_idx_to_id.values()}
@@ -144,6 +150,7 @@ class PylateSearchEncoder:
144
150
  hf_split: str,
145
151
  top_k: int,
146
152
  encode_kwargs: EncodeKwargs,
153
+ num_proc: int,
147
154
  ) -> dict[str, list[tuple[float, str]]]:
148
155
  from pylate import indexes, retrieve
149
156
 
@@ -170,6 +177,7 @@ class PylateSearchEncoder:
170
177
  task_metadata,
171
178
  prompt_type=PromptType.document,
172
179
  batch_size=encode_kwargs.get("batch_size", 32),
180
+ num_proc=num_proc,
173
181
  )
174
182
  documents_embeddings = self.encode(
175
183
  documents_loader,
@@ -208,6 +216,7 @@ class PylateSearchEncoder:
208
216
  hf_subset: str,
209
217
  hf_split: str,
210
218
  encode_kwargs: EncodeKwargs,
219
+ num_proc: int = 1,
211
220
  ) -> dict[str, list[tuple[float, str]]]:
212
221
  """Rerank with PyLate's rank.rerank using per-query candidates.
213
222
 
@@ -230,6 +239,7 @@ class PylateSearchEncoder:
230
239
  task_metadata,
231
240
  prompt_type=PromptType.document,
232
241
  batch_size=encode_kwargs.get("batch_size", 32),
242
+ num_proc=num_proc,
233
243
  ),
234
244
  task_metadata=task_metadata,
235
245
  hf_split=hf_split,
@@ -352,6 +362,7 @@ colbert_v2 = ModelMeta(
352
362
  public_training_data=None,
353
363
  release_date="2024-09-21",
354
364
  n_parameters=int(110 * 1e6),
365
+ n_embedding_parameters=23_440_896,
355
366
  memory_usage_mb=418,
356
367
  max_tokens=180,
357
368
  embed_dim=None,
@@ -408,6 +419,7 @@ jina_colbert_v2 = ModelMeta(
408
419
  public_training_data=None,
409
420
  release_date="2024-08-16",
410
421
  n_parameters=int(559 * 1e6),
422
+ n_embedding_parameters=None,
411
423
  memory_usage_mb=1067,
412
424
  max_tokens=8192,
413
425
  embed_dim=None,
@@ -464,6 +476,7 @@ lightonai__gte_moderncolbert_v1 = ModelMeta(
464
476
  public_training_data="https://huggingface.co/datasets/lightonai/ms-marco-en-bge-gemma",
465
477
  release_date="2025-04-30",
466
478
  n_parameters=int(149 * 1e6),
479
+ n_embedding_parameters=None,
467
480
  memory_usage_mb=None,
468
481
  max_tokens=8192,
469
482
  embed_dim=None,
@@ -36,6 +36,7 @@ Qodo_Embed_1_1_5B = ModelMeta(
36
36
  revision="84bbef079b32e8823ec226d4e9e92902706b0eb6",
37
37
  release_date="2025-02-19",
38
38
  n_parameters=1_780_000_000,
39
+ n_embedding_parameters=232_928_256,
39
40
  memory_usage_mb=6776,
40
41
  embed_dim=1536,
41
42
  license="https://huggingface.co/Qodo/Qodo-Embed-1-1.5B/blob/main/LICENSE",
@@ -59,6 +60,7 @@ Qodo_Embed_1_7B = ModelMeta(
59
60
  revision="f9edd9bf7f687c0e832424058e265120f603cd81",
60
61
  release_date="2025-02-24",
61
62
  n_parameters=7_613_000_000,
63
+ n_embedding_parameters=None,
62
64
  memory_usage_mb=29040,
63
65
  embed_dim=3584,
64
66
  license="https://huggingface.co/Qodo/Qodo-Embed-1-1.5B/blob/main/LICENSE",
@@ -31,6 +31,7 @@ mini_gte = ModelMeta(
31
31
  revision="7fbe6f9b4cc42615e0747299f837ad7769025492",
32
32
  release_date="2025-01-28",
33
33
  n_parameters=int(66.3 * 1e6),
34
+ n_embedding_parameters=23_440_896,
34
35
  memory_usage_mb=253,
35
36
  embed_dim=768,
36
37
  license="apache-2.0",
@@ -147,6 +147,7 @@ Qwen3_Embedding_0B6 = ModelMeta(
147
147
  revision="b22da495047858cce924d27d76261e96be6febc0", # Commit of @tomaarsen
148
148
  release_date="2025-06-05",
149
149
  n_parameters=595776512,
150
+ n_embedding_parameters=None,
150
151
  memory_usage_mb=1136,
151
152
  embed_dim=1024,
152
153
  max_tokens=32768,
@@ -170,6 +171,7 @@ Qwen3_Embedding_4B = ModelMeta(
170
171
  revision="636cd9bf47d976946cdbb2b0c3ca0cb2f8eea5ff", # Commit of @tomaarsen
171
172
  release_date="2025-06-05",
172
173
  n_parameters=4021774336,
174
+ n_embedding_parameters=None,
173
175
  memory_usage_mb=7671,
174
176
  embed_dim=2560,
175
177
  max_tokens=32768,
@@ -193,6 +195,7 @@ Qwen3_Embedding_8B = ModelMeta(
193
195
  revision="4e423935c619ae4df87b646a3ce949610c66241c", # Commit of @tomaarsen
194
196
  release_date="2025-06-05",
195
197
  n_parameters=7567295488,
198
+ n_embedding_parameters=None,
196
199
  memory_usage_mb=14433,
197
200
  embed_dim=4096,
198
201
  max_tokens=32768,
@@ -64,6 +64,7 @@ QZhou_Embedding = ModelMeta(
64
64
  revision="f1e6c03ee3882e7b9fa5cec91217715272e433b8",
65
65
  release_date="2025-08-24",
66
66
  n_parameters=7_070_619_136,
67
+ n_embedding_parameters=None,
67
68
  memory_usage_mb=14436,
68
69
  embed_dim=3584,
69
70
  license="apache-2.0",
@@ -98,6 +99,7 @@ QZhou_Embedding_Zh = ModelMeta(
98
99
  revision="0321ccb126413d1e49c5ce908e802b63d35f18e2",
99
100
  release_date="2025-09-28",
100
101
  n_parameters=7_575_747_328,
102
+ n_embedding_parameters=None,
101
103
  memory_usage_mb=29431,
102
104
  embed_dim=1792,
103
105
  license="apache-2.0",
@@ -12,6 +12,7 @@ potion_base_8m = ModelMeta(
12
12
  revision="387897cfb09992e6d45ea9cd7b28b9fcf119e23a",
13
13
  release_date="2025-10-08",
14
14
  n_parameters=22893312,
15
+ n_embedding_parameters=22893312,
15
16
  memory_usage_mb=87,
16
17
  max_tokens=np.inf,
17
18
  embed_dim=256,