mteb 2.7.3__py3-none-any.whl → 2.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (434) hide show
  1. mteb/_create_dataloaders.py +47 -5
  2. mteb/_evaluators/any_sts_evaluator.py +2 -0
  3. mteb/_evaluators/clustering_evaluator.py +2 -0
  4. mteb/_evaluators/evaluator.py +2 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +8 -1
  6. mteb/_evaluators/pair_classification_evaluator.py +3 -0
  7. mteb/_evaluators/retrieval_evaluator.py +3 -0
  8. mteb/_evaluators/sklearn_evaluator.py +6 -1
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +2 -0
  10. mteb/_evaluators/text/summarization_evaluator.py +2 -0
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -0
  12. mteb/abstasks/abstask.py +31 -12
  13. mteb/abstasks/classification.py +10 -3
  14. mteb/abstasks/clustering.py +6 -2
  15. mteb/abstasks/clustering_legacy.py +8 -2
  16. mteb/abstasks/image/image_text_pair_classification.py +6 -2
  17. mteb/abstasks/multilabel_classification.py +2 -0
  18. mteb/abstasks/pair_classification.py +8 -2
  19. mteb/abstasks/retrieval.py +27 -12
  20. mteb/abstasks/retrieval_dataset_loaders.py +29 -19
  21. mteb/abstasks/sts.py +10 -3
  22. mteb/abstasks/text/bitext_mining.py +9 -5
  23. mteb/abstasks/text/reranking.py +2 -2
  24. mteb/abstasks/text/summarization.py +2 -1
  25. mteb/abstasks/zeroshot_classification.py +8 -2
  26. mteb/benchmarks/benchmarks/__init__.py +2 -0
  27. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  28. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  29. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  30. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  31. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  32. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  33. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  34. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  35. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  36. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  37. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  38. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  39. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  40. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  41. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  42. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  43. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  44. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  45. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  48. mteb/evaluate.py +10 -2
  49. mteb/models/model_implementations/align_models.py +1 -0
  50. mteb/models/model_implementations/amazon_models.py +1 -0
  51. mteb/models/model_implementations/andersborges.py +2 -0
  52. mteb/models/model_implementations/ara_models.py +1 -0
  53. mteb/models/model_implementations/arctic_models.py +8 -0
  54. mteb/models/model_implementations/b1ade_models.py +1 -0
  55. mteb/models/model_implementations/bedrock_models.py +4 -0
  56. mteb/models/model_implementations/bge_models.py +40 -1
  57. mteb/models/model_implementations/bica_model.py +1 -0
  58. mteb/models/model_implementations/blip2_models.py +2 -0
  59. mteb/models/model_implementations/blip_models.py +8 -0
  60. mteb/models/model_implementations/bm25.py +10 -5
  61. mteb/models/model_implementations/bmretriever_models.py +4 -0
  62. mteb/models/model_implementations/cadet_models.py +1 -0
  63. mteb/models/model_implementations/cde_models.py +2 -0
  64. mteb/models/model_implementations/clip_models.py +3 -0
  65. mteb/models/model_implementations/clips_models.py +3 -0
  66. mteb/models/model_implementations/codefuse_models.py +5 -0
  67. mteb/models/model_implementations/codesage_models.py +3 -0
  68. mteb/models/model_implementations/cohere_models.py +4 -0
  69. mteb/models/model_implementations/cohere_v.py +5 -0
  70. mteb/models/model_implementations/colpali_models.py +3 -0
  71. mteb/models/model_implementations/colqwen_models.py +7 -0
  72. mteb/models/model_implementations/colsmol_models.py +2 -0
  73. mteb/models/model_implementations/conan_models.py +1 -0
  74. mteb/models/model_implementations/dino_models.py +19 -0
  75. mteb/models/model_implementations/e5_instruct.py +4 -0
  76. mteb/models/model_implementations/e5_models.py +9 -0
  77. mteb/models/model_implementations/e5_v.py +1 -0
  78. mteb/models/model_implementations/eagerworks_models.py +1 -0
  79. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  80. mteb/models/model_implementations/en_code_retriever.py +1 -0
  81. mteb/models/model_implementations/euler_models.py +1 -0
  82. mteb/models/model_implementations/evaclip_models.py +4 -0
  83. mteb/models/model_implementations/fa_models.py +9 -0
  84. mteb/models/model_implementations/facebookai.py +2 -0
  85. mteb/models/model_implementations/geogpt_models.py +1 -0
  86. mteb/models/model_implementations/gme_v_models.py +2 -0
  87. mteb/models/model_implementations/google_models.py +5 -0
  88. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  89. mteb/models/model_implementations/gritlm_models.py +2 -0
  90. mteb/models/model_implementations/gte_models.py +9 -0
  91. mteb/models/model_implementations/hinvec_models.py +1 -0
  92. mteb/models/model_implementations/human.py +1 -0
  93. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  94. mteb/models/model_implementations/inf_models.py +2 -0
  95. mteb/models/model_implementations/jasper_models.py +2 -0
  96. mteb/models/model_implementations/jina_clip.py +1 -0
  97. mteb/models/model_implementations/jina_models.py +7 -0
  98. mteb/models/model_implementations/kalm_models.py +6 -0
  99. mteb/models/model_implementations/kblab.py +1 -0
  100. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  101. mteb/models/model_implementations/kfst.py +1 -0
  102. mteb/models/model_implementations/kowshik24_models.py +1 -0
  103. mteb/models/model_implementations/lens_models.py +2 -0
  104. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  105. mteb/models/model_implementations/linq_models.py +1 -0
  106. mteb/models/model_implementations/listconranker.py +1 -0
  107. mteb/models/model_implementations/llm2clip_models.py +3 -0
  108. mteb/models/model_implementations/llm2vec_models.py +8 -0
  109. mteb/models/model_implementations/mcinext_models.py +3 -0
  110. mteb/models/model_implementations/mdbr_models.py +2 -0
  111. mteb/models/model_implementations/misc_models.py +63 -0
  112. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  113. mteb/models/model_implementations/mme5_models.py +2 -1
  114. mteb/models/model_implementations/moco_models.py +2 -0
  115. mteb/models/model_implementations/mod_models.py +1 -0
  116. mteb/models/model_implementations/model2vec_models.py +13 -0
  117. mteb/models/model_implementations/moka_models.py +3 -0
  118. mteb/models/model_implementations/nbailab.py +3 -0
  119. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  120. mteb/models/model_implementations/nomic_models.py +6 -0
  121. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  122. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  123. mteb/models/model_implementations/nvidia_models.py +3 -0
  124. mteb/models/model_implementations/octen_models.py +2 -0
  125. mteb/models/model_implementations/openai_models.py +5 -0
  126. mteb/models/model_implementations/openclip_models.py +8 -0
  127. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  128. mteb/models/model_implementations/ops_moa_models.py +2 -0
  129. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  130. mteb/models/model_implementations/pawan_models.py +1 -0
  131. mteb/models/model_implementations/piccolo_models.py +2 -0
  132. mteb/models/model_implementations/promptriever_models.py +4 -0
  133. mteb/models/model_implementations/pylate_models.py +13 -0
  134. mteb/models/model_implementations/qodo_models.py +2 -0
  135. mteb/models/model_implementations/qtack_models.py +1 -0
  136. mteb/models/model_implementations/qwen3_models.py +3 -0
  137. mteb/models/model_implementations/qzhou_models.py +2 -0
  138. mteb/models/model_implementations/rasgaard_models.py +1 -0
  139. mteb/models/model_implementations/reasonir_model.py +65 -0
  140. mteb/models/model_implementations/repllama_models.py +2 -0
  141. mteb/models/model_implementations/rerankers_custom.py +3 -0
  142. mteb/models/model_implementations/rerankers_monot5_based.py +14 -0
  143. mteb/models/model_implementations/richinfoai_models.py +1 -0
  144. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  145. mteb/models/model_implementations/ruri_models.py +10 -0
  146. mteb/models/model_implementations/salesforce_models.py +3 -0
  147. mteb/models/model_implementations/samilpwc_models.py +1 -0
  148. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  149. mteb/models/model_implementations/searchmap_models.py +1 -0
  150. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  151. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +1 -0
  152. mteb/models/model_implementations/seed_models.py +1 -0
  153. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  154. mteb/models/model_implementations/shuu_model.py +1 -0
  155. mteb/models/model_implementations/siglip_models.py +10 -0
  156. mteb/models/model_implementations/sonar_models.py +2 -1
  157. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  158. mteb/models/model_implementations/stella_models.py +6 -0
  159. mteb/models/model_implementations/tarka_models.py +2 -0
  160. mteb/models/model_implementations/text2vec_models.py +3 -0
  161. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  162. mteb/models/model_implementations/uae_models.py +1 -0
  163. mteb/models/model_implementations/vdr_models.py +1 -0
  164. mteb/models/model_implementations/vi_vn_models.py +6 -0
  165. mteb/models/model_implementations/vista_models.py +2 -0
  166. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  167. mteb/models/model_implementations/voyage_models.py +15 -0
  168. mteb/models/model_implementations/voyage_v.py +1 -0
  169. mteb/models/model_implementations/xyz_models.py +1 -0
  170. mteb/models/model_implementations/youtu_models.py +1 -0
  171. mteb/models/model_implementations/yuan_models.py +1 -0
  172. mteb/models/model_implementations/yuan_models_en.py +1 -0
  173. mteb/models/model_meta.py +35 -2
  174. mteb/models/models_protocols.py +4 -0
  175. mteb/models/search_wrappers.py +12 -0
  176. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  177. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  178. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  179. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  180. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  181. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  182. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  183. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  184. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  185. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  186. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  187. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  188. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  189. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  190. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  191. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  192. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  193. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  194. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  195. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  196. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  197. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  198. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  199. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  200. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  201. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  202. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  203. mteb/tasks/classification/est/estonian_valence.py +1 -1
  204. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  205. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  206. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  207. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  208. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  209. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  210. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  211. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  212. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  213. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  214. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  215. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  216. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  217. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  218. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  219. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  220. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  221. mteb/tasks/classification/kor/klue_tc.py +2 -2
  222. mteb/tasks/classification/kor/kor_fin.py +1 -1
  223. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  224. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  225. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  226. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  227. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  228. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  229. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  230. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  231. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  232. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  233. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  234. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  235. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  236. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  237. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  238. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  239. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  240. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  241. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  242. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  243. mteb/tasks/classification/ron/moroco.py +1 -1
  244. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  245. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  246. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  247. mteb/tasks/classification/rus/headline_classification.py +2 -2
  248. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  249. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  250. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  251. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  252. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  253. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  254. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  255. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  256. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  257. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  258. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  259. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  260. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  261. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  262. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  263. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  264. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  265. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  266. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  267. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  268. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  269. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  270. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  271. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  272. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  273. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  274. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  275. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  276. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  277. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  278. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  279. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  280. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  281. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  282. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  283. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  284. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  285. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  286. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  287. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  288. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  289. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  290. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  291. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  292. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  293. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  294. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  295. mteb/tasks/clustering/nob/snl_clustering.py +1 -1
  296. mteb/tasks/clustering/nob/vg_clustering.py +1 -1
  297. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  298. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  299. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  300. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  301. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  302. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  303. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  304. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  305. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  306. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  307. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  308. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  309. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  310. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  311. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  312. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  313. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  314. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
  315. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  316. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  317. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  318. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  319. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  320. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  321. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  322. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  323. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  324. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  325. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  326. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  327. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  328. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  329. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  330. mteb/tasks/pair_classification/rus/terra.py +2 -2
  331. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  332. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  333. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  334. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  335. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  336. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  337. mteb/tasks/retrieval/code/code_rag.py +4 -4
  338. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  339. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  340. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  341. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  342. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  343. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  344. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  345. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  346. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  347. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  348. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  349. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  350. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  351. mteb/tasks/retrieval/eng/__init__.py +42 -0
  352. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  353. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  354. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  355. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  356. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  357. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  358. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  359. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  360. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  361. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  362. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  363. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  364. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  365. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  366. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  367. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  368. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  369. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  370. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  371. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  372. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  373. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  374. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  375. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  376. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  377. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  378. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  379. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  380. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  381. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  382. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  383. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  384. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  385. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  386. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  387. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  388. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  389. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  390. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  391. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  392. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  393. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  394. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  395. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  396. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  397. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  398. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  399. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
  400. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  401. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  402. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  403. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  404. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  405. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  406. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  407. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  408. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  409. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  410. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  411. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  412. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  413. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  414. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  415. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  416. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  417. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  418. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  419. mteb/tasks/retrieval/nob/norquad.py +1 -1
  420. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  421. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  422. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  423. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  424. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  425. mteb/tasks/sts/kor/klue_sts.py +1 -1
  426. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  427. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  428. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  429. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/METADATA +1 -1
  430. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/RECORD +434 -413
  431. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/WHEEL +0 -0
  432. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/entry_points.txt +0 -0
  433. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/licenses/LICENSE +0 -0
  434. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@ if TYPE_CHECKING:
13
13
  from mteb.types import (
14
14
  CorpusDatasetType,
15
15
  EncodeKwargs,
16
- InstructionDatasetType,
17
16
  QueryDatasetType,
18
17
  RetrievalOutputType,
19
18
  TopRankedDocumentsType,
@@ -55,6 +54,7 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
55
54
  hf_split: str,
56
55
  hf_subset: str,
57
56
  encode_kwargs: EncodeKwargs,
57
+ num_proc: int = 1,
58
58
  ) -> None:
59
59
  logger.info("Encoding Corpus...")
60
60
  corpus_texts = [
@@ -80,8 +80,8 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
80
80
  hf_subset: str,
81
81
  top_k: int,
82
82
  encode_kwargs: EncodeKwargs,
83
- instructions: InstructionDatasetType | None = None,
84
83
  top_ranked: TopRankedDocumentsType | None = None,
84
+ num_proc: int = 1,
85
85
  ) -> RetrievalOutputType:
86
86
  logger.info("Encoding Queries...")
87
87
  query_ids = list(queries["id"])
@@ -103,13 +103,17 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
103
103
  query_results = queries_results[qi]
104
104
  scores = queries_scores[qi]
105
105
  doc_id_to_score = {}
106
+ query_documents = (
107
+ top_ranked[qid] if top_ranked and qid in top_ranked else None
108
+ )
106
109
 
107
110
  # Iterate over results
108
- for ri in range(len(query_results)):
109
- doc_idx = query_results[ri]
110
- score = scores[ri]
111
+ for doc_idx, score in zip(query_results, scores):
111
112
  doc_id = self.corpus_idx_to_id[doc_idx]
112
113
 
114
+ # handle reranking with a filtered set of documents
115
+ if query_documents is not None and doc_id not in query_documents:
116
+ continue
113
117
  doc_id_to_score[doc_id] = float(score)
114
118
 
115
119
  results[qid] = doc_id_to_score
@@ -132,6 +136,7 @@ bm25_s = ModelMeta(
132
136
  revision="0_1_10",
133
137
  release_date="2024-07-10", # release of version 0.1.10
134
138
  n_parameters=None,
139
+ n_embedding_parameters=None,
135
140
  memory_usage_mb=None,
136
141
  embed_dim=None,
137
142
  license=None,
@@ -103,6 +103,7 @@ BMRetriever_410M = ModelMeta(
103
103
  release_date="2024-04-29",
104
104
  embed_dim=1024,
105
105
  n_parameters=353_822_720,
106
+ n_embedding_parameters=51_511_296,
106
107
  memory_usage_mb=1349,
107
108
  max_tokens=2048,
108
109
  license="mit",
@@ -133,6 +134,7 @@ BMRetriever_1B = ModelMeta(
133
134
  release_date="2024-04-29",
134
135
  embed_dim=2048,
135
136
  n_parameters=908_759_040,
137
+ n_embedding_parameters=103_022_592,
136
138
  memory_usage_mb=3466,
137
139
  max_tokens=2048,
138
140
  license="mit",
@@ -163,6 +165,7 @@ BMRetriever_2B = ModelMeta(
163
165
  release_date="2024-04-29",
164
166
  embed_dim=2048,
165
167
  n_parameters=2_506_172_416,
168
+ n_embedding_parameters=524_288_000,
166
169
  memory_usage_mb=9560,
167
170
  max_tokens=8192,
168
171
  license="mit",
@@ -193,6 +196,7 @@ BMRetriever_7B = ModelMeta(
193
196
  release_date="2024-04-29",
194
197
  embed_dim=4096,
195
198
  n_parameters=7_110_660_096,
199
+ n_embedding_parameters=131_072_000,
196
200
  memory_usage_mb=27124,
197
201
  max_tokens=32768,
198
202
  license="mit",
@@ -41,6 +41,7 @@ cadet_embed = ModelMeta(
41
41
  open_weights=True,
42
42
  release_date="2025-05-11",
43
43
  n_parameters=109_000_000,
44
+ n_embedding_parameters=23_440_896,
44
45
  memory_usage_mb=418,
45
46
  embed_dim=768,
46
47
  license="apache-2.0",
@@ -226,6 +226,7 @@ cde_small_v1 = ModelMeta(
226
226
  revision="e151df18af0d7f1d1c37b074fee58406ececf19f",
227
227
  release_date="2024-09-24",
228
228
  n_parameters=int(281 * 1e6),
229
+ n_embedding_parameters=None,
229
230
  memory_usage_mb=1072, # Though the second-stage model is only 140M
230
231
  max_tokens=512,
231
232
  embed_dim=768,
@@ -255,6 +256,7 @@ cde_small_v2 = ModelMeta(
255
256
  revision="4e1d021a6c3fd7ce8aa0a7204057eee5ae61d390",
256
257
  release_date="2025-01-13",
257
258
  n_parameters=int(306 * 1e6),
259
+ n_embedding_parameters=None,
258
260
  memory_usage_mb=1166, # Though the second-stage model is only 140M
259
261
  max_tokens=512,
260
262
  embed_dim=768,
@@ -128,6 +128,7 @@ clip_vit_large_patch14 = ModelMeta(
128
128
  release_date="2021-02-26",
129
129
  modalities=["image", "text"],
130
130
  n_parameters=428_000_000,
131
+ n_embedding_parameters=None,
131
132
  memory_usage_mb=1631,
132
133
  max_tokens=77,
133
134
  embed_dim=768,
@@ -152,6 +153,7 @@ clip_vit_base_patch32 = ModelMeta(
152
153
  release_date="2021-02-26",
153
154
  modalities=["image", "text"],
154
155
  n_parameters=151_000_000,
156
+ n_embedding_parameters=None,
155
157
  memory_usage_mb=576,
156
158
  max_tokens=77,
157
159
  embed_dim=512,
@@ -176,6 +178,7 @@ clip_vit_base_patch16 = ModelMeta(
176
178
  release_date="2021-02-26",
177
179
  modalities=["image", "text"],
178
180
  n_parameters=151_000_000,
181
+ n_embedding_parameters=None,
179
182
  memory_usage_mb=576,
180
183
  max_tokens=77,
181
184
  embed_dim=512,
@@ -30,6 +30,7 @@ e5_nl_small = ModelMeta(
30
30
  revision="0243664a6c5e12eef854b091eb283e51833c3e9f",
31
31
  release_date="2025-09-23",
32
32
  n_parameters=40_800_000,
33
+ n_embedding_parameters=19_200_768,
33
34
  memory_usage_mb=78,
34
35
  embed_dim=384,
35
36
  license="mit",
@@ -57,6 +58,7 @@ e5_nl_base = ModelMeta(
57
58
  revision="6bd5722f236da48b4b8bcb28cc1fc478f7089956",
58
59
  release_date="2025-09-23",
59
60
  n_parameters=124_400_000,
61
+ n_embedding_parameters=38_401_536,
60
62
  memory_usage_mb=237,
61
63
  embed_dim=768,
62
64
  license="mit",
@@ -84,6 +86,7 @@ e5_nl_large = ModelMeta(
84
86
  revision="683333f86ed9eb3699b5567f0fdabeb958d412b0",
85
87
  release_date="2025-09-23",
86
88
  n_parameters=355_000_000,
89
+ n_embedding_parameters=51_202_048,
87
90
  memory_usage_mb=1355,
88
91
  embed_dim=1024,
89
92
  license="mit",
@@ -236,6 +236,7 @@ F2LLM_0B6 = ModelMeta(
236
236
  revision="36416618b83d4bd84a8ca30c2ee01ed518f9f2e7",
237
237
  release_date="2025-09-18",
238
238
  n_parameters=595_776_512,
239
+ n_embedding_parameters=None,
239
240
  memory_usage_mb=1137,
240
241
  embed_dim=1024,
241
242
  license="apache-2.0",
@@ -266,6 +267,7 @@ F2LLM_1B7 = ModelMeta(
266
267
  revision="fdce0e09655f42cea26f7f66f5a70cd4507ea45c",
267
268
  release_date="2025-09-18",
268
269
  n_parameters=1_720_574_976,
270
+ n_embedding_parameters=None,
269
271
  memory_usage_mb=3282,
270
272
  embed_dim=2560,
271
273
  license="apache-2.0",
@@ -296,6 +298,7 @@ F2LLM_4B = ModelMeta(
296
298
  revision="9fe95901ed2b6b59dd7673d6e93c9d76766a1e25",
297
299
  release_date="2025-09-18",
298
300
  n_parameters=4_021_774_336,
301
+ n_embedding_parameters=None,
299
302
  memory_usage_mb=7672,
300
303
  embed_dim=2560,
301
304
  license="apache-2.0",
@@ -318,6 +321,7 @@ C2LLM_0B5 = ModelMeta(
318
321
  release_date="2025-12-22",
319
322
  languages=c2llm_languages,
320
323
  n_parameters=497252096,
324
+ n_embedding_parameters=None,
321
325
  memory_usage_mb=948.0,
322
326
  max_tokens=32768,
323
327
  embed_dim=896,
@@ -346,6 +350,7 @@ C2LLM_7B = ModelMeta(
346
350
  release_date="2025-12-22",
347
351
  languages=c2llm_languages,
348
352
  n_parameters=7667028992,
353
+ n_embedding_parameters=None,
349
354
  memory_usage_mb=14624.0,
350
355
  max_tokens=32768,
351
356
  embed_dim=3584,
@@ -28,6 +28,7 @@ codesage_large = ModelMeta(
28
28
  release_date="2024-02-03",
29
29
  modalities=["text"],
30
30
  n_parameters=1_300_000_000,
31
+ n_embedding_parameters=100_667_392,
31
32
  memory_usage_mb=4959,
32
33
  max_tokens=2048,
33
34
  embed_dim=2048,
@@ -55,6 +56,7 @@ codesage_base = ModelMeta(
55
56
  release_date="2024-02-03",
56
57
  modalities=["text"],
57
58
  n_parameters=356_000_000,
59
+ n_embedding_parameters=50_333_696,
58
60
  memory_usage_mb=1358,
59
61
  max_tokens=2048,
60
62
  embed_dim=1024,
@@ -82,6 +84,7 @@ codesage_small = ModelMeta(
82
84
  release_date="2024-02-03",
83
85
  modalities=["text"],
84
86
  n_parameters=130_000_000,
87
+ n_embedding_parameters=50_333_696,
85
88
  memory_usage_mb=496,
86
89
  max_tokens=2048,
87
90
  embed_dim=1024,
@@ -392,6 +392,7 @@ cohere_mult_3 = ModelMeta(
392
392
  revision="1",
393
393
  release_date="2023-11-02",
394
394
  n_parameters=None,
395
+ n_embedding_parameters=None,
395
396
  memory_usage_mb=None,
396
397
  max_tokens=None,
397
398
  embed_dim=512,
@@ -418,6 +419,7 @@ cohere_eng_3 = ModelMeta(
418
419
  revision="1",
419
420
  release_date="2023-11-02",
420
421
  n_parameters=None,
422
+ n_embedding_parameters=None,
421
423
  memory_usage_mb=None,
422
424
  max_tokens=512,
423
425
  embed_dim=1024,
@@ -443,6 +445,7 @@ cohere_mult_light_3 = ModelMeta(
443
445
  reference="https://cohere.com/blog/introducing-embed-v3",
444
446
  release_date="2023-11-02",
445
447
  n_parameters=None,
448
+ n_embedding_parameters=None,
446
449
  memory_usage_mb=None,
447
450
  max_tokens=512,
448
451
  embed_dim=384,
@@ -468,6 +471,7 @@ cohere_eng_light_3 = ModelMeta(
468
471
  revision="1",
469
472
  release_date="2023-11-02",
470
473
  n_parameters=None,
474
+ n_embedding_parameters=None,
471
475
  memory_usage_mb=None,
472
476
  max_tokens=512,
473
477
  embed_dim=384,
@@ -391,6 +391,7 @@ cohere_mult_3 = ModelMeta(
391
391
  revision="1",
392
392
  release_date="2024-10-24",
393
393
  n_parameters=None,
394
+ n_embedding_parameters=None,
394
395
  memory_usage_mb=None,
395
396
  max_tokens=None,
396
397
  embed_dim=1024,
@@ -415,6 +416,7 @@ cohere_eng_3 = ModelMeta(
415
416
  revision="1",
416
417
  release_date="2024-10-24",
417
418
  n_parameters=None,
419
+ n_embedding_parameters=None,
418
420
  memory_usage_mb=None,
419
421
  max_tokens=None,
420
422
  embed_dim=1024,
@@ -439,6 +441,7 @@ cohere_embed_v4_multimodal = ModelMeta(
439
441
  revision="1",
440
442
  release_date="2024-12-01",
441
443
  n_parameters=None,
444
+ n_embedding_parameters=None,
442
445
  memory_usage_mb=None,
443
446
  max_tokens=128000,
444
447
  embed_dim=1536,
@@ -463,6 +466,7 @@ cohere_embed_v4_multimodal_binary = ModelMeta(
463
466
  revision="1",
464
467
  release_date="2024-12-01",
465
468
  n_parameters=None,
469
+ n_embedding_parameters=None,
466
470
  memory_usage_mb=None,
467
471
  max_tokens=128000,
468
472
  embed_dim=1536,
@@ -488,6 +492,7 @@ cohere_embed_v4_multimodal_int8 = ModelMeta(
488
492
  revision="1",
489
493
  release_date="2024-12-01",
490
494
  n_parameters=None,
495
+ n_embedding_parameters=None,
491
496
  memory_usage_mb=None,
492
497
  max_tokens=128000,
493
498
  embed_dim=1536,
@@ -220,6 +220,7 @@ colpali_v1_1 = ModelMeta(
220
220
  release_date="2024-08-21",
221
221
  modalities=["image", "text"],
222
222
  n_parameters=2_920_000_000,
223
+ n_embedding_parameters=None,
223
224
  memory_usage_mb=4700,
224
225
  max_tokens=16384,
225
226
  embed_dim=128,
@@ -247,6 +248,7 @@ colpali_v1_2 = ModelMeta(
247
248
  release_date="2024-08-26",
248
249
  modalities=["image", "text"],
249
250
  n_parameters=2_920_000_000,
251
+ n_embedding_parameters=None,
250
252
  memory_usage_mb=4700,
251
253
  max_tokens=16384,
252
254
  embed_dim=128,
@@ -274,6 +276,7 @@ colpali_v1_3 = ModelMeta(
274
276
  release_date="2024-11-01",
275
277
  modalities=["image", "text"],
276
278
  n_parameters=2_920_000_000,
279
+ n_embedding_parameters=None,
277
280
  memory_usage_mb=4700,
278
281
  max_tokens=16384,
279
282
  embed_dim=128,
@@ -224,6 +224,7 @@ colqwen2 = ModelMeta(
224
224
  release_date="2025-11-03",
225
225
  modalities=["image", "text"],
226
226
  n_parameters=2_210_000_000,
227
+ n_embedding_parameters=None,
227
228
  memory_usage_mb=7200,
228
229
  max_tokens=32768,
229
230
  embed_dim=128,
@@ -251,6 +252,7 @@ colqwen2_5 = ModelMeta(
251
252
  release_date="2025-01-31",
252
253
  modalities=["image", "text"],
253
254
  n_parameters=3_000_000_000,
255
+ n_embedding_parameters=None,
254
256
  memory_usage_mb=7200,
255
257
  max_tokens=128000,
256
258
  embed_dim=128,
@@ -295,6 +297,7 @@ colqwen3_8b = ModelMeta(
295
297
  release_date="2025-11-26",
296
298
  modalities=["image", "text"],
297
299
  n_parameters=8_000_000_000,
300
+ n_embedding_parameters=None,
298
301
  memory_usage_mb=16724,
299
302
  max_tokens=262144,
300
303
  embed_dim=320,
@@ -319,6 +322,7 @@ colqwen3_4b = ModelMeta(
319
322
  release_date="2025-11-26",
320
323
  modalities=["image", "text"],
321
324
  n_parameters=4_000_000_000,
325
+ n_embedding_parameters=None,
322
326
  memory_usage_mb=8466,
323
327
  max_tokens=262144,
324
328
  embed_dim=320,
@@ -365,6 +369,7 @@ colnomic_3b = ModelMeta(
365
369
  release_date="2025-03-31",
366
370
  modalities=["image", "text"],
367
371
  n_parameters=3_000_000_000,
372
+ n_embedding_parameters=None,
368
373
  memory_usage_mb=7200,
369
374
  max_tokens=128000,
370
375
  embed_dim=128,
@@ -430,6 +435,7 @@ evoqwen25_vl_retriever_3b_v1 = ModelMeta(
430
435
  release_date="2025-11-04",
431
436
  modalities=["image", "text"],
432
437
  n_parameters=3_000_000_000,
438
+ n_embedding_parameters=None,
433
439
  memory_usage_mb=7200,
434
440
  max_tokens=128000,
435
441
  embed_dim=128,
@@ -456,6 +462,7 @@ evoqwen25_vl_retriever_7b_v1 = ModelMeta(
456
462
  release_date="2025-11-04",
457
463
  modalities=["image", "text"],
458
464
  n_parameters=7_000_000_000,
465
+ n_embedding_parameters=None,
459
466
  memory_usage_mb=14400,
460
467
  max_tokens=128000,
461
468
  embed_dim=128,
@@ -60,6 +60,7 @@ colsmol_256m = ModelMeta(
60
60
  release_date="2025-01-22",
61
61
  modalities=["image", "text"],
62
62
  n_parameters=256_000_000,
63
+ n_embedding_parameters=None,
63
64
  memory_usage_mb=800,
64
65
  max_tokens=8192,
65
66
  embed_dim=128,
@@ -87,6 +88,7 @@ colsmol_500m = ModelMeta(
87
88
  release_date="2025-01-22",
88
89
  modalities=["image", "text"],
89
90
  n_parameters=500_000_000,
91
+ n_embedding_parameters=None,
90
92
  memory_usage_mb=1200,
91
93
  max_tokens=8192,
92
94
  embed_dim=128,
@@ -211,6 +211,7 @@ Conan_embedding_v2 = ModelMeta(
211
211
  embed_dim=3584,
212
212
  open_weights=False,
213
213
  n_parameters=None,
214
+ n_embedding_parameters=None,
214
215
  memory_usage_mb=None,
215
216
  license="apache-2.0",
216
217
  reference="https://huggingface.co/TencentBAC/Conan-embedding-v2",
@@ -117,6 +117,7 @@ dinov2_small = ModelMeta(
117
117
  release_date="2023-07-18",
118
118
  modalities=["image"],
119
119
  n_parameters=22_100_000,
120
+ n_embedding_parameters=None,
120
121
  memory_usage_mb=84,
121
122
  max_tokens=None,
122
123
  embed_dim=384,
@@ -148,6 +149,7 @@ dinov2_base = ModelMeta(
148
149
  release_date="2023-07-18",
149
150
  modalities=["image"],
150
151
  n_parameters=86_600_000,
152
+ n_embedding_parameters=None,
151
153
  memory_usage_mb=330,
152
154
  max_tokens=None,
153
155
  embed_dim=768,
@@ -179,6 +181,7 @@ dinov2_large = ModelMeta(
179
181
  release_date="2023-07-18",
180
182
  modalities=["image"],
181
183
  n_parameters=304_000_000,
184
+ n_embedding_parameters=None,
182
185
  memory_usage_mb=1161,
183
186
  max_tokens=None,
184
187
  embed_dim=1024,
@@ -210,6 +213,7 @@ dinov2_giant = ModelMeta(
210
213
  release_date="2023-07-18",
211
214
  modalities=["image"],
212
215
  n_parameters=1_140_000_000,
216
+ n_embedding_parameters=None,
213
217
  memory_usage_mb=4335,
214
218
  max_tokens=None,
215
219
  embed_dim=1536,
@@ -245,6 +249,7 @@ webssl_dino300m_full2b = ModelMeta(
245
249
  release_date="2025-04-24",
246
250
  modalities=["image"],
247
251
  n_parameters=304_000_000,
252
+ n_embedding_parameters=None,
248
253
  memory_usage_mb=1158,
249
254
  max_tokens=None,
250
255
  embed_dim=1024,
@@ -276,6 +281,7 @@ webssl_dino1b_full2b = ModelMeta(
276
281
  release_date="2025-04-24",
277
282
  modalities=["image"],
278
283
  n_parameters=1_130_000_000,
284
+ n_embedding_parameters=None,
279
285
  memory_usage_mb=4329,
280
286
  max_tokens=None,
281
287
  embed_dim=1536,
@@ -307,6 +313,7 @@ webssl_dino2b_full2b = ModelMeta(
307
313
  release_date="2025-04-24",
308
314
  modalities=["image"],
309
315
  n_parameters=2_080_000_000,
316
+ n_embedding_parameters=None,
310
317
  memory_usage_mb=7951,
311
318
  max_tokens=None,
312
319
  embed_dim=2688,
@@ -338,6 +345,7 @@ webssl_dino3b_full2b = ModelMeta(
338
345
  release_date="2025-04-24",
339
346
  modalities=["image"],
340
347
  n_parameters=3_000_000_000,
348
+ n_embedding_parameters=None,
341
349
  memory_usage_mb=11247,
342
350
  max_tokens=None,
343
351
  embed_dim=3072,
@@ -369,6 +377,7 @@ webssl_dino5b_full2b = ModelMeta(
369
377
  release_date="2025-04-24",
370
378
  modalities=["image"],
371
379
  n_parameters=5_000_000_000,
380
+ n_embedding_parameters=None,
372
381
  memory_usage_mb=18838,
373
382
  max_tokens=None,
374
383
  embed_dim=3584,
@@ -400,6 +409,7 @@ webssl_dino7b_full8b_224 = ModelMeta(
400
409
  release_date="2025-04-24",
401
410
  modalities=["image"],
402
411
  n_parameters=7_000_000_000,
412
+ n_embedding_parameters=None,
403
413
  memory_usage_mb=24605,
404
414
  max_tokens=None,
405
415
  embed_dim=4096,
@@ -431,6 +441,7 @@ webssl_dino7b_full8b_378 = ModelMeta(
431
441
  release_date="2025-04-24",
432
442
  modalities=["image"],
433
443
  n_parameters=7_000_000_000,
444
+ n_embedding_parameters=None,
434
445
  memory_usage_mb=24613,
435
446
  max_tokens=None,
436
447
  embed_dim=4096,
@@ -462,6 +473,7 @@ webssl_dino7b_full8b_518 = ModelMeta(
462
473
  release_date="2025-04-24",
463
474
  modalities=["image"],
464
475
  n_parameters=7_000_000_000,
476
+ n_embedding_parameters=None,
465
477
  memory_usage_mb=24623,
466
478
  max_tokens=None,
467
479
  embed_dim=4096,
@@ -494,6 +506,7 @@ webssl_dino2b_light2b = ModelMeta(
494
506
  release_date="2025-04-24",
495
507
  modalities=["image"],
496
508
  n_parameters=2_000_000_000,
509
+ n_embedding_parameters=None,
497
510
  memory_usage_mb=7951,
498
511
  max_tokens=None,
499
512
  embed_dim=2688,
@@ -525,6 +538,7 @@ webssl_dino2b_heavy2b = ModelMeta(
525
538
  release_date="2025-04-24",
526
539
  modalities=["image"],
527
540
  n_parameters=2_000_000_000,
541
+ n_embedding_parameters=None,
528
542
  memory_usage_mb=7951,
529
543
  max_tokens=None,
530
544
  embed_dim=2688,
@@ -556,6 +570,7 @@ webssl_dino3b_light2b = ModelMeta(
556
570
  release_date="2025-04-24",
557
571
  modalities=["image"],
558
572
  n_parameters=3_000_000_000,
573
+ n_embedding_parameters=None,
559
574
  memory_usage_mb=11247,
560
575
  max_tokens=None,
561
576
  embed_dim=3072,
@@ -587,6 +602,7 @@ webssl_dino3b_heavy2b = ModelMeta(
587
602
  release_date="2025-04-24",
588
603
  modalities=["image"],
589
604
  n_parameters=3_000_000_000,
605
+ n_embedding_parameters=None,
590
606
  memory_usage_mb=11247,
591
607
  max_tokens=None,
592
608
  embed_dim=3072,
@@ -618,6 +634,7 @@ webssl_mae300m_full2b = ModelMeta(
618
634
  release_date="2025-04-24",
619
635
  modalities=["image"],
620
636
  n_parameters=304_000_000,
637
+ n_embedding_parameters=None,
621
638
  memory_usage_mb=1161,
622
639
  max_tokens=None,
623
640
  embed_dim=1024,
@@ -649,6 +666,7 @@ webssl_mae700m_full2b = ModelMeta(
649
666
  release_date="2025-04-24",
650
667
  modalities=["image"],
651
668
  n_parameters=700_000_000,
669
+ n_embedding_parameters=None,
652
670
  memory_usage_mb=2412,
653
671
  max_tokens=None,
654
672
  embed_dim=1280,
@@ -680,6 +698,7 @@ webssl_mae1b_full2b = ModelMeta(
680
698
  release_date="2025-04-24",
681
699
  modalities=["image"],
682
700
  n_parameters=1_000_000_000,
701
+ n_embedding_parameters=None,
683
702
  memory_usage_mb=4337,
684
703
  max_tokens=None,
685
704
  embed_dim=1536,
@@ -57,6 +57,7 @@ e5_instruct = ModelMeta(
57
57
  use_instructions=True,
58
58
  reference="https://huggingface.co/intfloat/multilingual-e5-large-instruct",
59
59
  n_parameters=560_000_000,
60
+ n_embedding_parameters=256_002_048,
60
61
  memory_usage_mb=1068,
61
62
  embed_dim=1024,
62
63
  license="mit",
@@ -102,6 +103,7 @@ e5_mistral = ModelMeta(
102
103
  use_instructions=True,
103
104
  reference="https://huggingface.co/intfloat/e5-mistral-7b-instruct",
104
105
  n_parameters=7_111_000_000,
106
+ n_embedding_parameters=131_072_000,
105
107
  memory_usage_mb=13563,
106
108
  embed_dim=4096,
107
109
  license="mit",
@@ -145,6 +147,7 @@ zeta_alpha_ai__zeta_alpha_e5_mistral = ModelMeta(
145
147
  release_date="2024-08-30",
146
148
  languages=["eng-Latn"],
147
149
  n_parameters=7110660096,
150
+ n_embedding_parameters=None,
148
151
  memory_usage_mb=13563,
149
152
  max_tokens=32768.0,
150
153
  embed_dim=4096,
@@ -228,6 +231,7 @@ BeastyZ__e5_R_mistral_7b = ModelMeta(
228
231
  release_date="2024-06-28",
229
232
  languages=["eng-Latn"],
230
233
  n_parameters=7241732096,
234
+ n_embedding_parameters=131_072_000,
231
235
  memory_usage_mb=27625,
232
236
  max_tokens=32768.0,
233
237
  embed_dim=4096,
@@ -76,6 +76,7 @@ e5_mult_small = ModelMeta(
76
76
  revision="fd1525a9fd15316a2d503bf26ab031a61d056e98",
77
77
  release_date=E5_PAPER_RELEASE_DATE,
78
78
  n_parameters=118_000_000,
79
+ n_embedding_parameters=96_014_208,
79
80
  memory_usage_mb=449,
80
81
  embed_dim=384,
81
82
  license="mit",
@@ -103,6 +104,7 @@ e5_mult_base = ModelMeta(
103
104
  revision="d13f1b27baf31030b7fd040960d60d909913633f",
104
105
  release_date=E5_PAPER_RELEASE_DATE,
105
106
  n_parameters=278_000_000,
107
+ n_embedding_parameters=192_001_536,
106
108
  memory_usage_mb=1061,
107
109
  embed_dim=768,
108
110
  license="mit",
@@ -130,6 +132,7 @@ e5_mult_large = ModelMeta(
130
132
  revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb",
131
133
  release_date=E5_PAPER_RELEASE_DATE,
132
134
  n_parameters=560_000_000,
135
+ n_embedding_parameters=256_002_048,
133
136
  memory_usage_mb=2136,
134
137
  embed_dim=1024,
135
138
  license="mit",
@@ -157,6 +160,7 @@ e5_eng_small_v2 = ModelMeta(
157
160
  revision="dca8b1a9dae0d4575df2bf423a5edb485a431236",
158
161
  release_date=E5_PAPER_RELEASE_DATE,
159
162
  n_parameters=33_000_000,
163
+ n_embedding_parameters=11_720_448,
160
164
  memory_usage_mb=127,
161
165
  embed_dim=384,
162
166
  license="mit",
@@ -184,6 +188,7 @@ e5_eng_small = ModelMeta(
184
188
  revision="e272f3049e853b47cb5ca3952268c6662abda68f",
185
189
  release_date=E5_PAPER_RELEASE_DATE,
186
190
  n_parameters=33_000_000,
191
+ n_embedding_parameters=11_720_448,
187
192
  memory_usage_mb=127,
188
193
  embed_dim=384,
189
194
  license="mit",
@@ -211,6 +216,7 @@ e5_eng_base_v2 = ModelMeta(
211
216
  revision="1c644c92ad3ba1efdad3f1451a637716616a20e8",
212
217
  release_date=E5_PAPER_RELEASE_DATE,
213
218
  n_parameters=109_000_000,
219
+ n_embedding_parameters=23_440_896,
214
220
  memory_usage_mb=418,
215
221
  embed_dim=768,
216
222
  license="mit",
@@ -239,6 +245,7 @@ e5_eng_large_v2 = ModelMeta(
239
245
  revision="b322e09026e4ea05f42beadf4d661fb4e101d311",
240
246
  release_date=E5_PAPER_RELEASE_DATE,
241
247
  n_parameters=335_000_000,
248
+ n_embedding_parameters=31_254_528,
242
249
  memory_usage_mb=1278,
243
250
  embed_dim=1024,
244
251
  license="mit",
@@ -267,6 +274,7 @@ e5_large = ModelMeta(
267
274
  revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81",
268
275
  release_date="2022-12-26",
269
276
  n_parameters=335_000_000,
277
+ n_embedding_parameters=31_254_528,
270
278
  memory_usage_mb=1278,
271
279
  embed_dim=1024,
272
280
  license="apache-2.0",
@@ -295,6 +303,7 @@ e5_base = ModelMeta(
295
303
  revision="b533fe4636f4a2507c08ddab40644d20b0006d6a",
296
304
  release_date="2022-12-26",
297
305
  n_parameters=109_000_000,
306
+ n_embedding_parameters=23_440_896,
298
307
  memory_usage_mb=418,
299
308
  embed_dim=768,
300
309
  license="apache-2.0",
@@ -171,6 +171,7 @@ e5_v = ModelMeta(
171
171
  release_date="2024-07-17",
172
172
  modalities=["image", "text"],
173
173
  n_parameters=8_360_000_000,
174
+ n_embedding_parameters=None,
174
175
  memory_usage_mb=15936,
175
176
  max_tokens=8192,
176
177
  embed_dim=4096,
@@ -153,6 +153,7 @@ Eager_Embed_V1 = ModelMeta(
153
153
  release_date="2025-11-20",
154
154
  modalities=["image", "text"],
155
155
  n_parameters=4_000_000_000,
156
+ n_embedding_parameters=None,
156
157
  memory_usage_mb=16929,
157
158
  max_tokens=262144,
158
159
  embed_dim=2560,
@@ -10,6 +10,7 @@ embedding_gemma_300m_scandi = ModelMeta(
10
10
  revision="9f3307b9f601db564a9190cb475324d128dcfe86",
11
11
  release_date="2025-10-17",
12
12
  n_parameters=307_581_696,
13
+ n_embedding_parameters=None,
13
14
  embed_dim=768,
14
15
  max_tokens=2048,
15
16
  license="apache-2.0",
@@ -43,6 +44,7 @@ qwen_scandi = ModelMeta(
43
44
  revision="cf1e7ba36ebd3d605549d8f02930a18e17b54513",
44
45
  release_date="2025-10-17",
45
46
  n_parameters=595776512,
47
+ n_embedding_parameters=None,
46
48
  memory_usage_mb=2272,
47
49
  embed_dim=1024,
48
50
  max_tokens=32768,
@@ -67,6 +69,7 @@ mmbert_scandi = ModelMeta(
67
69
  revision="82d74c7a5d8e1ddf31b132865df2d16b2b0294ee",
68
70
  release_date="2025-10-17",
69
71
  n_parameters=306939648,
72
+ n_embedding_parameters=None,
70
73
  memory_usage_mb=1171,
71
74
  embed_dim=768,
72
75
  max_tokens=8192,