mteb 2.7.3__py3-none-any.whl → 2.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (434) hide show
  1. mteb/_create_dataloaders.py +47 -5
  2. mteb/_evaluators/any_sts_evaluator.py +2 -0
  3. mteb/_evaluators/clustering_evaluator.py +2 -0
  4. mteb/_evaluators/evaluator.py +2 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +8 -1
  6. mteb/_evaluators/pair_classification_evaluator.py +3 -0
  7. mteb/_evaluators/retrieval_evaluator.py +3 -0
  8. mteb/_evaluators/sklearn_evaluator.py +6 -1
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +2 -0
  10. mteb/_evaluators/text/summarization_evaluator.py +2 -0
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -0
  12. mteb/abstasks/abstask.py +31 -12
  13. mteb/abstasks/classification.py +10 -3
  14. mteb/abstasks/clustering.py +6 -2
  15. mteb/abstasks/clustering_legacy.py +8 -2
  16. mteb/abstasks/image/image_text_pair_classification.py +6 -2
  17. mteb/abstasks/multilabel_classification.py +2 -0
  18. mteb/abstasks/pair_classification.py +8 -2
  19. mteb/abstasks/retrieval.py +27 -12
  20. mteb/abstasks/retrieval_dataset_loaders.py +29 -19
  21. mteb/abstasks/sts.py +10 -3
  22. mteb/abstasks/text/bitext_mining.py +9 -5
  23. mteb/abstasks/text/reranking.py +2 -2
  24. mteb/abstasks/text/summarization.py +2 -1
  25. mteb/abstasks/zeroshot_classification.py +8 -2
  26. mteb/benchmarks/benchmarks/__init__.py +2 -0
  27. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  28. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  29. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  30. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  31. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  32. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  33. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  34. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  35. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  36. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  37. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  38. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  39. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  40. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  41. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  42. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  43. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  44. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  45. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  48. mteb/evaluate.py +10 -2
  49. mteb/models/model_implementations/align_models.py +1 -0
  50. mteb/models/model_implementations/amazon_models.py +1 -0
  51. mteb/models/model_implementations/andersborges.py +2 -0
  52. mteb/models/model_implementations/ara_models.py +1 -0
  53. mteb/models/model_implementations/arctic_models.py +8 -0
  54. mteb/models/model_implementations/b1ade_models.py +1 -0
  55. mteb/models/model_implementations/bedrock_models.py +4 -0
  56. mteb/models/model_implementations/bge_models.py +40 -1
  57. mteb/models/model_implementations/bica_model.py +1 -0
  58. mteb/models/model_implementations/blip2_models.py +2 -0
  59. mteb/models/model_implementations/blip_models.py +8 -0
  60. mteb/models/model_implementations/bm25.py +10 -5
  61. mteb/models/model_implementations/bmretriever_models.py +4 -0
  62. mteb/models/model_implementations/cadet_models.py +1 -0
  63. mteb/models/model_implementations/cde_models.py +2 -0
  64. mteb/models/model_implementations/clip_models.py +3 -0
  65. mteb/models/model_implementations/clips_models.py +3 -0
  66. mteb/models/model_implementations/codefuse_models.py +5 -0
  67. mteb/models/model_implementations/codesage_models.py +3 -0
  68. mteb/models/model_implementations/cohere_models.py +4 -0
  69. mteb/models/model_implementations/cohere_v.py +5 -0
  70. mteb/models/model_implementations/colpali_models.py +3 -0
  71. mteb/models/model_implementations/colqwen_models.py +7 -0
  72. mteb/models/model_implementations/colsmol_models.py +2 -0
  73. mteb/models/model_implementations/conan_models.py +1 -0
  74. mteb/models/model_implementations/dino_models.py +19 -0
  75. mteb/models/model_implementations/e5_instruct.py +4 -0
  76. mteb/models/model_implementations/e5_models.py +9 -0
  77. mteb/models/model_implementations/e5_v.py +1 -0
  78. mteb/models/model_implementations/eagerworks_models.py +1 -0
  79. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  80. mteb/models/model_implementations/en_code_retriever.py +1 -0
  81. mteb/models/model_implementations/euler_models.py +1 -0
  82. mteb/models/model_implementations/evaclip_models.py +4 -0
  83. mteb/models/model_implementations/fa_models.py +9 -0
  84. mteb/models/model_implementations/facebookai.py +2 -0
  85. mteb/models/model_implementations/geogpt_models.py +1 -0
  86. mteb/models/model_implementations/gme_v_models.py +2 -0
  87. mteb/models/model_implementations/google_models.py +5 -0
  88. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  89. mteb/models/model_implementations/gritlm_models.py +2 -0
  90. mteb/models/model_implementations/gte_models.py +9 -0
  91. mteb/models/model_implementations/hinvec_models.py +1 -0
  92. mteb/models/model_implementations/human.py +1 -0
  93. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  94. mteb/models/model_implementations/inf_models.py +2 -0
  95. mteb/models/model_implementations/jasper_models.py +2 -0
  96. mteb/models/model_implementations/jina_clip.py +1 -0
  97. mteb/models/model_implementations/jina_models.py +7 -0
  98. mteb/models/model_implementations/kalm_models.py +6 -0
  99. mteb/models/model_implementations/kblab.py +1 -0
  100. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  101. mteb/models/model_implementations/kfst.py +1 -0
  102. mteb/models/model_implementations/kowshik24_models.py +1 -0
  103. mteb/models/model_implementations/lens_models.py +2 -0
  104. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  105. mteb/models/model_implementations/linq_models.py +1 -0
  106. mteb/models/model_implementations/listconranker.py +1 -0
  107. mteb/models/model_implementations/llm2clip_models.py +3 -0
  108. mteb/models/model_implementations/llm2vec_models.py +8 -0
  109. mteb/models/model_implementations/mcinext_models.py +3 -0
  110. mteb/models/model_implementations/mdbr_models.py +2 -0
  111. mteb/models/model_implementations/misc_models.py +63 -0
  112. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  113. mteb/models/model_implementations/mme5_models.py +2 -1
  114. mteb/models/model_implementations/moco_models.py +2 -0
  115. mteb/models/model_implementations/mod_models.py +1 -0
  116. mteb/models/model_implementations/model2vec_models.py +13 -0
  117. mteb/models/model_implementations/moka_models.py +3 -0
  118. mteb/models/model_implementations/nbailab.py +3 -0
  119. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  120. mteb/models/model_implementations/nomic_models.py +6 -0
  121. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  122. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  123. mteb/models/model_implementations/nvidia_models.py +3 -0
  124. mteb/models/model_implementations/octen_models.py +2 -0
  125. mteb/models/model_implementations/openai_models.py +5 -0
  126. mteb/models/model_implementations/openclip_models.py +8 -0
  127. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  128. mteb/models/model_implementations/ops_moa_models.py +2 -0
  129. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  130. mteb/models/model_implementations/pawan_models.py +1 -0
  131. mteb/models/model_implementations/piccolo_models.py +2 -0
  132. mteb/models/model_implementations/promptriever_models.py +4 -0
  133. mteb/models/model_implementations/pylate_models.py +13 -0
  134. mteb/models/model_implementations/qodo_models.py +2 -0
  135. mteb/models/model_implementations/qtack_models.py +1 -0
  136. mteb/models/model_implementations/qwen3_models.py +3 -0
  137. mteb/models/model_implementations/qzhou_models.py +2 -0
  138. mteb/models/model_implementations/rasgaard_models.py +1 -0
  139. mteb/models/model_implementations/reasonir_model.py +65 -0
  140. mteb/models/model_implementations/repllama_models.py +2 -0
  141. mteb/models/model_implementations/rerankers_custom.py +3 -0
  142. mteb/models/model_implementations/rerankers_monot5_based.py +14 -0
  143. mteb/models/model_implementations/richinfoai_models.py +1 -0
  144. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  145. mteb/models/model_implementations/ruri_models.py +10 -0
  146. mteb/models/model_implementations/salesforce_models.py +3 -0
  147. mteb/models/model_implementations/samilpwc_models.py +1 -0
  148. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  149. mteb/models/model_implementations/searchmap_models.py +1 -0
  150. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  151. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +1 -0
  152. mteb/models/model_implementations/seed_models.py +1 -0
  153. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  154. mteb/models/model_implementations/shuu_model.py +1 -0
  155. mteb/models/model_implementations/siglip_models.py +10 -0
  156. mteb/models/model_implementations/sonar_models.py +2 -1
  157. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  158. mteb/models/model_implementations/stella_models.py +6 -0
  159. mteb/models/model_implementations/tarka_models.py +2 -0
  160. mteb/models/model_implementations/text2vec_models.py +3 -0
  161. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  162. mteb/models/model_implementations/uae_models.py +1 -0
  163. mteb/models/model_implementations/vdr_models.py +1 -0
  164. mteb/models/model_implementations/vi_vn_models.py +6 -0
  165. mteb/models/model_implementations/vista_models.py +2 -0
  166. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  167. mteb/models/model_implementations/voyage_models.py +15 -0
  168. mteb/models/model_implementations/voyage_v.py +1 -0
  169. mteb/models/model_implementations/xyz_models.py +1 -0
  170. mteb/models/model_implementations/youtu_models.py +1 -0
  171. mteb/models/model_implementations/yuan_models.py +1 -0
  172. mteb/models/model_implementations/yuan_models_en.py +1 -0
  173. mteb/models/model_meta.py +35 -2
  174. mteb/models/models_protocols.py +4 -0
  175. mteb/models/search_wrappers.py +12 -0
  176. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  177. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  178. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  179. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  180. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  181. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  182. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  183. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  184. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  185. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  186. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  187. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  188. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  189. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  190. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  191. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  192. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  193. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  194. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  195. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  196. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  197. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  198. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  199. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  200. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  201. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  202. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  203. mteb/tasks/classification/est/estonian_valence.py +1 -1
  204. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  205. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  206. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  207. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  208. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  209. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  210. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  211. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  212. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  213. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  214. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  215. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  216. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  217. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  218. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  219. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  220. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  221. mteb/tasks/classification/kor/klue_tc.py +2 -2
  222. mteb/tasks/classification/kor/kor_fin.py +1 -1
  223. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  224. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  225. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  226. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  227. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  228. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  229. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  230. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  231. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  232. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  233. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  234. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  235. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  236. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  237. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  238. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  239. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  240. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  241. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  242. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  243. mteb/tasks/classification/ron/moroco.py +1 -1
  244. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  245. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  246. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  247. mteb/tasks/classification/rus/headline_classification.py +2 -2
  248. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  249. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  250. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  251. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  252. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  253. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  254. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  255. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  256. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  257. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  258. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  259. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  260. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  261. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  262. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  263. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  264. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  265. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  266. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  267. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  268. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  269. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  270. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  271. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  272. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  273. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  274. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  275. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  276. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  277. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  278. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  279. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  280. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  281. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  282. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  283. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  284. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  285. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  286. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  287. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  288. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  289. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  290. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  291. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  292. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  293. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  294. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  295. mteb/tasks/clustering/nob/snl_clustering.py +1 -1
  296. mteb/tasks/clustering/nob/vg_clustering.py +1 -1
  297. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  298. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  299. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  300. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  301. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  302. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  303. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  304. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  305. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  306. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  307. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  308. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  309. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  310. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  311. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  312. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  313. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  314. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
  315. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  316. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  317. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  318. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  319. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  320. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  321. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  322. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  323. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  324. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  325. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  326. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  327. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  328. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  329. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  330. mteb/tasks/pair_classification/rus/terra.py +2 -2
  331. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  332. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  333. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  334. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  335. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  336. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  337. mteb/tasks/retrieval/code/code_rag.py +4 -4
  338. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  339. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  340. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  341. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  342. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  343. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  344. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  345. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  346. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  347. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  348. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  349. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  350. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  351. mteb/tasks/retrieval/eng/__init__.py +42 -0
  352. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  353. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  354. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  355. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  356. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  357. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  358. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  359. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  360. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  361. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  362. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  363. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  364. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  365. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  366. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  367. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  368. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  369. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  370. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  371. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  372. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  373. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  374. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  375. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  376. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  377. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  378. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  379. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  380. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  381. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  382. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  383. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  384. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  385. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  386. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  387. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  388. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  389. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  390. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  391. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  392. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  393. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  394. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  395. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  396. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  397. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  398. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  399. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
  400. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  401. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  402. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  403. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  404. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  405. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  406. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  407. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  408. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  409. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  410. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  411. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  412. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  413. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  414. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  415. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  416. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  417. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  418. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  419. mteb/tasks/retrieval/nob/norquad.py +1 -1
  420. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  421. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  422. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  423. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  424. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  425. mteb/tasks/sts/kor/klue_sts.py +1 -1
  426. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  427. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  428. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  429. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/METADATA +1 -1
  430. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/RECORD +434 -413
  431. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/WHEEL +0 -0
  432. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/entry_points.txt +0 -0
  433. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/licenses/LICENSE +0 -0
  434. {mteb-2.7.3.dist-info → mteb-2.7.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 107198,
4
+ "number_of_characters": 183652816,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 183501537,
7
+ "min_text_length": 1,
8
+ "average_text_length": 1713.6703710275399,
9
+ "max_text_length": 4000,
10
+ "unique_texts": 66270
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 151279,
15
+ "min_text_length": 185,
16
+ "average_text_length": 1292.982905982906,
17
+ "max_text_length": 12432,
18
+ "unique_texts": 117
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 819,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 7.0,
25
+ "max_relevant_docs_per_query": 59,
26
+ "unique_relevant_docs": 816
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 12528477,
30
+ "min_top_ranked_per_query": 107081,
31
+ "average_top_ranked_per_query": 107081.0,
32
+ "max_top_ranked_per_query": 107081
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "long": {
3
+ "num_samples": 662,
4
+ "number_of_characters": 21154322,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 21080575,
7
+ "min_text_length": 30,
8
+ "average_text_length": 38051.579422382674,
9
+ "max_text_length": 5732344,
10
+ "unique_texts": 551
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 73747,
15
+ "min_text_length": 158,
16
+ "average_text_length": 682.8425925925926,
17
+ "max_text_length": 2843,
18
+ "unique_texts": 108
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 129,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.1944444444444444,
25
+ "max_relevant_docs_per_query": 5,
26
+ "unique_relevant_docs": 129
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 59832,
30
+ "min_top_ranked_per_query": 554,
31
+ "average_top_ranked_per_query": 554.0,
32
+ "max_top_ranked_per_query": 554
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 60900,
4
+ "number_of_characters": 20971763,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 20898016,
7
+ "min_text_length": 1,
8
+ "average_text_length": 343.7626003421503,
9
+ "max_text_length": 158296,
10
+ "unique_texts": 50142
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 73747,
15
+ "min_text_length": 158,
16
+ "average_text_length": 682.8425925925926,
17
+ "max_text_length": 2843,
18
+ "unique_texts": 108
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 604,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 5.592592592592593,
25
+ "max_relevant_docs_per_query": 59,
26
+ "unique_relevant_docs": 604
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 6565536,
30
+ "min_top_ranked_per_query": 60792,
31
+ "average_top_ranked_per_query": 60792.0,
32
+ "max_top_ranked_per_query": 60792
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 188207,
4
+ "number_of_characters": 141817604,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 141734227,
7
+ "min_text_length": 58,
8
+ "average_text_length": 753.8974425803981,
9
+ "max_text_length": 7334,
10
+ "unique_texts": 176508
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 83377,
15
+ "min_text_length": 12,
16
+ "average_text_length": 406.7170731707317,
17
+ "max_text_length": 1255,
18
+ "unique_texts": 201
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 469,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 2.299019607843137,
25
+ "max_relevant_docs_per_query": 7,
26
+ "unique_relevant_docs": 234
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 37946536,
30
+ "min_top_ranked_per_query": 176970,
31
+ "average_top_ranked_per_query": 185105.05365853658,
32
+ "max_top_ranked_per_query": 188176
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 23904,
4
+ "number_of_characters": 20825122,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 20797224,
7
+ "min_text_length": 74,
8
+ "average_text_length": 872.4033726246906,
9
+ "max_text_length": 19104,
10
+ "unique_texts": 23839
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 27898,
15
+ "min_text_length": 13,
16
+ "average_text_length": 429.2,
17
+ "max_text_length": 1255,
18
+ "unique_texts": 65
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 126,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.9384615384615385,
25
+ "max_relevant_docs_per_query": 6,
26
+ "unique_relevant_docs": 95
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 1549535,
30
+ "min_top_ranked_per_query": 23839,
31
+ "average_top_ranked_per_query": 23839.0,
32
+ "max_top_ranked_per_query": 23839
33
+ }
34
+ }
35
+ }
mteb/evaluate.py CHANGED
@@ -125,6 +125,7 @@ def _evaluate_task(
125
125
  co2_tracker=False,
126
126
  prediction_folder=prediction_folder,
127
127
  public_only=public_only,
128
+ num_proc=num_proc,
128
129
  )
129
130
  if isinstance(result, TaskResult):
130
131
  result.kg_co2_emissions = tracker.final_emissions
@@ -137,7 +138,7 @@ def _evaluate_task(
137
138
  data_preloaded = task.data_loaded
138
139
  if not data_preloaded:
139
140
  try:
140
- task.load_data()
141
+ task.load_data(num_proc=num_proc)
141
142
  except DatasetNotFoundError as e:
142
143
  if not task.metadata.is_public and public_only is None:
143
144
  msg = (
@@ -163,6 +164,7 @@ def _evaluate_task(
163
164
  subsets_to_run=hf_subsets,
164
165
  encode_kwargs=encode_kwargs,
165
166
  prediction_folder=prediction_folder,
167
+ num_proc=num_proc,
166
168
  )
167
169
  tock = time()
168
170
 
@@ -280,6 +282,7 @@ def evaluate(
280
282
  prediction_folder: Path | str | None = None,
281
283
  show_progress_bar: bool = True,
282
284
  public_only: bool | None = None,
285
+ num_proc: int = 1,
283
286
  ) -> ModelResult:
284
287
  """This function runs a model on a given task and returns the results.
285
288
 
@@ -288,7 +291,7 @@ def evaluate(
288
291
  tasks: A task to run.
289
292
  co2_tracker: If True, track the CO₂ emissions of the evaluation, required codecarbon to be installed, which can be installed using
290
293
  `pip install mteb[codecarbon]`. If none is passed co2 tracking will only be run if codecarbon is installed.
291
- encode_kwargs: Additional keyword arguments passed to the models `encode` method.
294
+ encode_kwargs: Additional keyword arguments passed to the models `encode` and `load_data` methods;
292
295
  raise_error: If True, raise an error if the task fails. If False, return an empty list.
293
296
  cache: The cache to use for loading the results. If None, then no cache will be used. The default cache saved the cache in the
294
297
  `~/.cache/mteb` directory. It can be overridden by setting the `MTEB_CACHE` environment variable to a different directory or by directly
@@ -304,6 +307,7 @@ def evaluate(
304
307
  show_progress_bar: Whether to show a progress bar when running the evaluation. Default is True. Setting this to False will also set the
305
308
  `encode_kwargs['show_progress_bar']` to False if encode_kwargs is unspecified.
306
309
  public_only: Run only public tasks. If None, it will attempt to run the private task.
310
+ num_proc: Number of processes to use during data loading and transformation. Defaults to 1.
307
311
 
308
312
  Returns:
309
313
  The results of the evaluation.
@@ -356,6 +360,7 @@ def evaluate(
356
360
  prediction_folder=prediction_folder,
357
361
  show_progress_bar=show_progress_bar,
358
362
  public_only=public_only,
363
+ num_proc=num_proc,
359
364
  )
360
365
  combined_results = aggregated_task.combine_task_results(results.task_results)
361
366
  return ModelResult(
@@ -388,6 +393,7 @@ def evaluate(
388
393
  prediction_folder=prediction_folder,
389
394
  show_progress_bar=False,
390
395
  public_only=public_only,
396
+ num_proc=num_proc,
391
397
  )
392
398
  evaluate_results.extend(_res.task_results)
393
399
  if _res.exceptions:
@@ -467,6 +473,7 @@ def evaluate(
467
473
  encode_kwargs=encode_kwargs,
468
474
  prediction_folder=prediction_folder,
469
475
  public_only=public_only,
476
+ num_proc=num_proc,
470
477
  )
471
478
  except Exception as e:
472
479
  logger.error(
@@ -482,6 +489,7 @@ def evaluate(
482
489
  encode_kwargs=encode_kwargs,
483
490
  prediction_folder=prediction_folder,
484
491
  public_only=public_only,
492
+ num_proc=num_proc,
485
493
  )
486
494
  logger.info(f"✓ Finished evaluation for {task.metadata.name}")
487
495
 
@@ -116,6 +116,7 @@ align_base = ModelMeta(
116
116
  release_date="2023-02-24",
117
117
  modalities=["image", "text"],
118
118
  n_parameters=176_000_000,
119
+ n_embedding_parameters=None,
119
120
  memory_usage_mb=671,
120
121
  max_tokens=64,
121
122
  embed_dim=768,
@@ -8,6 +8,7 @@ amazon_titan_text_embeddings_v2 = ModelMeta(
8
8
  release_date="2024-04-30",
9
9
  languages=["eng-Latn"],
10
10
  n_parameters=None,
11
+ n_embedding_parameters=None,
11
12
  memory_usage_mb=None,
12
13
  max_tokens=None,
13
14
  embed_dim=None,
@@ -12,6 +12,7 @@ model2vecdk = ModelMeta(
12
12
  revision="cb576c78dcc1b729e4612645f61db59929d69e61",
13
13
  release_date="2025-11-21",
14
14
  n_parameters=48042496,
15
+ n_embedding_parameters=None,
15
16
  memory_usage_mb=183,
16
17
  max_tokens=np.inf,
17
18
  embed_dim=256,
@@ -43,6 +44,7 @@ model2vecdk_stem = ModelMeta(
43
44
  revision="cb576c78dcc1b729e4612645f61db59929d69e61",
44
45
  release_date="2025-11-21",
45
46
  n_parameters=48578560,
47
+ n_embedding_parameters=None,
46
48
  memory_usage_mb=185,
47
49
  max_tokens=np.inf,
48
50
  embed_dim=256,
@@ -10,6 +10,7 @@ arabic_triplet_matryoshka = ModelMeta(
10
10
  revision="ed357f222f0b6ea6670d2c9b5a1cb93950d34200",
11
11
  release_date="2024-07-28",
12
12
  n_parameters=135_000_000,
13
+ n_embedding_parameters=49_152_000,
13
14
  memory_usage_mb=516,
14
15
  embed_dim=768,
15
16
  license="apache-2.0",
@@ -147,6 +147,7 @@ arctic_embed_xs = ModelMeta(
147
147
  open_weights=True,
148
148
  framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
149
149
  n_parameters=22_600_000,
150
+ n_embedding_parameters=11_720_448,
150
151
  memory_usage_mb=86,
151
152
  max_tokens=512,
152
153
  embed_dim=384,
@@ -173,6 +174,7 @@ arctic_embed_s = ModelMeta(
173
174
  open_weights=True,
174
175
  framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
175
176
  n_parameters=32_200_000,
177
+ n_embedding_parameters=11_720_448,
176
178
  memory_usage_mb=127,
177
179
  max_tokens=512,
178
180
  embed_dim=384,
@@ -199,6 +201,7 @@ arctic_embed_m = ModelMeta(
199
201
  open_weights=True,
200
202
  framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
201
203
  n_parameters=109_000_000,
204
+ n_embedding_parameters=23_440_896,
202
205
  memory_usage_mb=415,
203
206
  max_tokens=512,
204
207
  embed_dim=768,
@@ -225,6 +228,7 @@ arctic_embed_m_long = ModelMeta(
225
228
  open_weights=True,
226
229
  framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
227
230
  n_parameters=137_000_000,
231
+ n_embedding_parameters=None,
228
232
  memory_usage_mb=522,
229
233
  max_tokens=2048,
230
234
  embed_dim=768,
@@ -250,6 +254,7 @@ arctic_embed_l = ModelMeta(
250
254
  open_weights=True,
251
255
  framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
252
256
  n_parameters=335_000_000,
257
+ n_embedding_parameters=31_254_528,
253
258
  memory_usage_mb=1274,
254
259
  max_tokens=512,
255
260
  embed_dim=1024,
@@ -280,6 +285,7 @@ arctic_embed_m_v1_5 = ModelMeta(
280
285
  open_weights=True,
281
286
  framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors", "GGUF"],
282
287
  n_parameters=109_000_000,
288
+ n_embedding_parameters=23_440_896,
283
289
  memory_usage_mb=415,
284
290
  max_tokens=512,
285
291
  embed_dim=768,
@@ -306,6 +312,7 @@ arctic_embed_m_v2_0 = ModelMeta(
306
312
  open_weights=True,
307
313
  framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
308
314
  n_parameters=305_000_000,
315
+ n_embedding_parameters=None,
309
316
  memory_usage_mb=1165,
310
317
  max_tokens=8192,
311
318
  embed_dim=768,
@@ -331,6 +338,7 @@ arctic_embed_l_v2_0 = ModelMeta(
331
338
  open_weights=True,
332
339
  framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
333
340
  n_parameters=568_000_000,
341
+ n_embedding_parameters=256_002_048,
334
342
  memory_usage_mb=2166,
335
343
  max_tokens=8192,
336
344
  embed_dim=1024,
@@ -16,6 +16,7 @@ b1ade_embed = ModelMeta(
16
16
  open_weights=True,
17
17
  release_date="2025-03-10",
18
18
  n_parameters=335_000_000,
19
+ n_embedding_parameters=31_254_528,
19
20
  memory_usage_mb=1278,
20
21
  embed_dim=1024,
21
22
  license="mit",
@@ -179,6 +179,7 @@ amazon_titan_embed_text_v1 = ModelMeta(
179
179
  embed_dim=1536,
180
180
  open_weights=False,
181
181
  n_parameters=None,
182
+ n_embedding_parameters=None,
182
183
  memory_usage_mb=None,
183
184
  public_training_code=None,
184
185
  public_training_data=None, # assumed
@@ -206,6 +207,7 @@ amazon_titan_embed_text_v2 = ModelMeta(
206
207
  embed_dim=1024,
207
208
  open_weights=False,
208
209
  n_parameters=None,
210
+ n_embedding_parameters=None,
209
211
  memory_usage_mb=None,
210
212
  public_training_code=None,
211
213
  public_training_data=None, # assumed
@@ -235,6 +237,7 @@ cohere_embed_english_v3 = ModelMeta(
235
237
  revision="1",
236
238
  release_date="2023-11-02",
237
239
  n_parameters=None,
240
+ n_embedding_parameters=None,
238
241
  memory_usage_mb=None,
239
242
  public_training_code=None,
240
243
  public_training_data=None, # assumed
@@ -263,6 +266,7 @@ cohere_embed_multilingual_v3 = ModelMeta(
263
266
  revision="1",
264
267
  release_date="2023-11-02",
265
268
  n_parameters=None,
269
+ n_embedding_parameters=None,
266
270
  memory_usage_mb=None,
267
271
  public_training_code=None,
268
272
  public_training_data=None, # assumed
@@ -6,7 +6,29 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
6
6
 
7
7
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
8
8
 
9
- model_prompts = {"query": "Represent this sentence for searching relevant passages: "}
9
+ model_prompts = {
10
+ "query": "Represent this sentence for searching relevant passages: ",
11
+ "BrightBiologyRetrieval-query": "Represent this biology post for searching relevant passages: ",
12
+ "BrightEarthScienceRetrieval-query": "Represent this earth_science post for searching relevant passages: ",
13
+ "BrightEconomicsRetrieval-query": "Represent this economics post for searching relevant passages: ",
14
+ "BrightPsychologyRetrieval-query": "Represent this psychology post for searching relevant passages: ",
15
+ "BrightRoboticsRetrieval-query": "Represent this robotics post for searching relevant passages: ",
16
+ "BrightStackoverflowRetrieval-query": "Represent this stackoverflow post for searching relevant passages: ",
17
+ "BrightSustainableLivingRetrieval-query": "Represent this sustainable_living post for searching relevant passages: ",
18
+ "BrightPonyRetrieval-query": "Represent this Pony question for searching relevant passages: ",
19
+ "BrightLeetcodeRetrieval-query": "Represent this Coding problem for searching relevant examples: ",
20
+ "BrightAopsRetrieval-query": "Represent this Math problem for searching relevant examples: ",
21
+ "BrightTheoremQATheoremsRetrieval-query": "Represent this Math problem for searching relevant theorems: ",
22
+ "BrightTheoremQAQuestionsRetrieval-query": "Represent this Math problem for searching relevant examples: ",
23
+ "BrightBiologyLongRetrieval-query": "Represent this biology post for searching relevant documents: ",
24
+ "BrightEarthScienceLongRetrieval-query": "Represent this earth_science post for searching relevant documents: ",
25
+ "BrightEconomicsLongRetrieval-query": "Represent this economics post for searching relevant documents: ",
26
+ "BrightPsychologyLongRetrieval-query": "Represent this psychology post for searching relevant documents: ",
27
+ "BrightRoboticsLongRetrieval-query": "Represent this robotics post for searching relevant document: ",
28
+ "BrightStackoverflowLongRetrieval-query": "Represent this stackoverflow post for searching relevant document: ",
29
+ "BrightSustainableLivingLongRetrieval-query": "Represent this sustainable_living post for searching relevant documents: ",
30
+ "BrightPonyLongRetrieval-query": "Represent this Pony question for searching relevant documents: ",
31
+ }
10
32
  BGE_15_CITATION = """@misc{bge_embedding,
11
33
  title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
12
34
  author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
@@ -325,6 +347,7 @@ bge_small_en_v1_5 = ModelMeta(
325
347
  revision="5c38ec7c405ec4b44b94cc5a9bb96e735b38267a",
326
348
  release_date="2023-09-12", # initial commit of hf model.
327
349
  n_parameters=33_400_000,
350
+ n_embedding_parameters=11_720_448,
328
351
  memory_usage_mb=127,
329
352
  embed_dim=512,
330
353
  license="mit",
@@ -357,6 +380,7 @@ bge_base_en_v1_5 = ModelMeta(
357
380
  revision="a5beb1e3e68b9ab74eb54cfd186867f64f240e1a",
358
381
  release_date="2023-09-11", # initial commit of hf model.
359
382
  n_parameters=109_000_000,
383
+ n_embedding_parameters=23_440_896,
360
384
  memory_usage_mb=390,
361
385
  embed_dim=768,
362
386
  license="mit",
@@ -389,6 +413,7 @@ bge_large_en_v1_5 = ModelMeta(
389
413
  revision="d4aa6901d3a41ba39fb536a557fa166f842b0e09",
390
414
  release_date="2023-09-12", # initial commit of hf model.
391
415
  n_parameters=335_000_000,
416
+ n_embedding_parameters=31_254_528,
392
417
  memory_usage_mb=1242,
393
418
  embed_dim=1024,
394
419
  license="mit",
@@ -421,6 +446,7 @@ bge_small_zh = ModelMeta(
421
446
  revision="1d2363c5de6ce9ba9c890c8e23a4c72dce540ca8",
422
447
  release_date="2023-08-05", # initial commit of hf model.
423
448
  n_parameters=33_400_000,
449
+ n_embedding_parameters=10_817_536,
424
450
  memory_usage_mb=127,
425
451
  embed_dim=512,
426
452
  license="mit",
@@ -448,6 +474,7 @@ bge_base_zh = ModelMeta(
448
474
  revision="0e5f83d4895db7955e4cb9ed37ab73f7ded339b6",
449
475
  release_date="2023-08-05", # initial commit of hf model.
450
476
  n_parameters=109_000_000,
477
+ n_embedding_parameters=16_226_304,
451
478
  memory_usage_mb=390,
452
479
  embed_dim=768,
453
480
  license="mit",
@@ -475,6 +502,7 @@ bge_large_zh = ModelMeta(
475
502
  revision="b5d9f5c027e87b6f0b6fa4b614f8f9cdc45ce0e8",
476
503
  release_date="2023-08-02", # initial commit of hf model.
477
504
  n_parameters=335_000_000,
505
+ n_embedding_parameters=21_635_072,
478
506
  memory_usage_mb=1242,
479
507
  embed_dim=1024,
480
508
  license="mit",
@@ -502,6 +530,7 @@ bge_small_en = ModelMeta(
502
530
  revision="4778d71a06863076696b03fd2777eb118712cad8",
503
531
  release_date="2023-08-05", # initial commit of hf model.
504
532
  n_parameters=33_400_000,
533
+ n_embedding_parameters=11_720_448,
505
534
  memory_usage_mb=127,
506
535
  embed_dim=512,
507
536
  license="mit",
@@ -529,6 +558,7 @@ bge_base_en = ModelMeta(
529
558
  revision="b737bf5dcc6ee8bdc530531266b4804a5d77b5d8",
530
559
  release_date="2023-08-05", # initial commit of hf model.
531
560
  n_parameters=109_000_000,
561
+ n_embedding_parameters=23_440_896,
532
562
  memory_usage_mb=390,
533
563
  embed_dim=768,
534
564
  license="mit",
@@ -562,6 +592,7 @@ bge_large_en = ModelMeta(
562
592
  revision="abe7d9d814b775ca171121fb03f394dc42974275",
563
593
  release_date="2023-08-05", # initial commit of hf model.
564
594
  n_parameters=335_000_000,
595
+ n_embedding_parameters=31_254_528,
565
596
  memory_usage_mb=1242,
566
597
  embed_dim=1024,
567
598
  license="mit",
@@ -590,6 +621,7 @@ bge_small_zh_v1_5 = ModelMeta(
590
621
  revision="7999e1d3359715c523056ef9478215996d62a620",
591
622
  release_date="2023-09-12", # initial commit of hf model.
592
623
  n_parameters=33_400_000,
624
+ n_embedding_parameters=10_817_536,
593
625
  memory_usage_mb=91,
594
626
  embed_dim=512,
595
627
  license="mit",
@@ -616,6 +648,7 @@ bge_base_zh_v1_5 = ModelMeta(
616
648
  revision="f03589ceff5aac7111bd60cfc7d497ca17ecac65",
617
649
  release_date="2023-09-11", # initial commit of hf model.
618
650
  n_parameters=109_000_000,
651
+ n_embedding_parameters=16_226_304,
619
652
  memory_usage_mb=416,
620
653
  embed_dim=768,
621
654
  license="mit",
@@ -642,6 +675,7 @@ bge_large_zh_v1_5 = ModelMeta(
642
675
  revision="79e7739b6ab944e86d6171e44d24c997fc1e0116",
643
676
  release_date="2023-09-12", # initial commit of hf model.
644
677
  n_parameters=335_000_000,
678
+ n_embedding_parameters=21_635_072,
645
679
  memory_usage_mb=1278,
646
680
  embed_dim=1024,
647
681
  license="mit",
@@ -665,6 +699,7 @@ bge_m3 = ModelMeta(
665
699
  revision="5617a9f61b028005a4858fdac845db406aefb181",
666
700
  release_date="2024-06-28",
667
701
  n_parameters=568_000_000,
702
+ n_embedding_parameters=256_002_048,
668
703
  memory_usage_mb=2167,
669
704
  embed_dim=1024,
670
705
  license="mit",
@@ -761,6 +796,7 @@ bge_multilingual_gemma2 = ModelMeta(
761
796
  revision="992e13d8984fde2c31ef8a3cb2c038aeec513b8a",
762
797
  release_date="2024-07-25", # initial commit of hf model.
763
798
  n_parameters=int(9.24 * 1e9),
799
+ n_embedding_parameters=917_511_168,
764
800
  memory_usage_mb=35254,
765
801
  embed_dim=3584, # from old C-MTEB leaderboard
766
802
  license="https://ai.google.dev/gemma/terms",
@@ -808,6 +844,7 @@ bge_en_icl = ModelMeta(
808
844
  revision="971c7e1445cc86656ca0bd85ed770b8675a40bb5",
809
845
  release_date="2024-07-25", # initial commit of hf model.
810
846
  n_parameters=int(7.11 * 1e9),
847
+ n_embedding_parameters=131_084_288,
811
848
  memory_usage_mb=27125,
812
849
  embed_dim=4096,
813
850
  license="apache-2.0",
@@ -842,6 +879,7 @@ bge_m3_unsupervised = ModelMeta(
842
879
  revision="46f03bc86361cf88102b0b517b36c8259f2946b1",
843
880
  release_date="2024-01-30", # January 30, 2024 - BGE-M3 release date
844
881
  n_parameters=568_000_000,
882
+ n_embedding_parameters=256_002_048,
845
883
  memory_usage_mb=2167,
846
884
  embed_dim=1024,
847
885
  license="mit",
@@ -871,6 +909,7 @@ manu__bge_m3_custom_fr = ModelMeta(
871
909
  languages=None,
872
910
  loader=sentence_transformers_loader,
873
911
  n_parameters=567754752,
912
+ n_embedding_parameters=256_002_048,
874
913
  memory_usage_mb=2166,
875
914
  max_tokens=8194.0,
876
915
  embed_dim=1024,
@@ -9,6 +9,7 @@ bica_base = ModelMeta(
9
9
  revision="31237a836e5ae908c308a256573e5f0986498574",
10
10
  release_date="2025-11-14",
11
11
  n_parameters=110_000_000,
12
+ n_embedding_parameters=23_440_896,
12
13
  memory_usage_mb=418,
13
14
  embed_dim=768,
14
15
  license="mit",
@@ -177,6 +177,7 @@ blip2_opt_2_7b = ModelMeta(
177
177
  release_date="2024-03-22",
178
178
  modalities=["image", "text"],
179
179
  n_parameters=3_740_000_000,
180
+ n_embedding_parameters=None,
180
181
  memory_usage_mb=14285,
181
182
  max_tokens=None,
182
183
  embed_dim=768,
@@ -201,6 +202,7 @@ blip2_opt_6_7b_coco = ModelMeta(
201
202
  release_date="2024-03-31",
202
203
  modalities=["image", "text"],
203
204
  n_parameters=7_750_000_000,
205
+ n_embedding_parameters=None,
204
206
  memory_usage_mb=29577,
205
207
  max_tokens=None,
206
208
  embed_dim=768,
@@ -141,6 +141,7 @@ blip_image_captioning_large = ModelMeta(
141
141
  release_date="2023-12-07",
142
142
  modalities=["image", "text"],
143
143
  n_parameters=470_000_000,
144
+ n_embedding_parameters=23_442_432,
144
145
  memory_usage_mb=1792,
145
146
  max_tokens=512,
146
147
  embed_dim=768,
@@ -169,6 +170,7 @@ blip_image_captioning_base = ModelMeta(
169
170
  release_date="2023-08-01",
170
171
  modalities=["image", "text"],
171
172
  n_parameters=247_000_000,
173
+ n_embedding_parameters=23_442_432,
172
174
  memory_usage_mb=942,
173
175
  max_tokens=512,
174
176
  embed_dim=768,
@@ -198,6 +200,7 @@ blip_vqa_base = ModelMeta(
198
200
  release_date="2023-12-07",
199
201
  modalities=["image", "text"],
200
202
  n_parameters=247_000_000,
203
+ n_embedding_parameters=23_442_432,
201
204
  memory_usage_mb=1467,
202
205
  max_tokens=512,
203
206
  embed_dim=768,
@@ -225,6 +228,7 @@ blip_vqa_capfilt_large = ModelMeta(
225
228
  release_date="2023-01-22",
226
229
  modalities=["image", "text"],
227
230
  n_parameters=247_000_000,
231
+ n_embedding_parameters=23_442_432,
228
232
  memory_usage_mb=942,
229
233
  max_tokens=512,
230
234
  embed_dim=768,
@@ -252,6 +256,7 @@ blip_itm_base_coco = ModelMeta(
252
256
  release_date="2023-08-01",
253
257
  modalities=["image", "text"],
254
258
  n_parameters=247_000_000,
259
+ n_embedding_parameters=23_442_432,
255
260
  memory_usage_mb=942,
256
261
  max_tokens=512,
257
262
  embed_dim=768,
@@ -279,6 +284,7 @@ blip_itm_large_coco = ModelMeta(
279
284
  release_date="2023-08-01",
280
285
  modalities=["image", "text"],
281
286
  n_parameters=470_000_000,
287
+ n_embedding_parameters=23_442_432,
282
288
  memory_usage_mb=1793,
283
289
  max_tokens=512,
284
290
  embed_dim=768,
@@ -307,6 +313,7 @@ blip_itm_base_flickr = ModelMeta(
307
313
  release_date="2023-08-01",
308
314
  modalities=["image", "text"],
309
315
  n_parameters=247_000_000,
316
+ n_embedding_parameters=23_442_432,
310
317
  memory_usage_mb=942,
311
318
  max_tokens=512,
312
319
  embed_dim=768,
@@ -335,6 +342,7 @@ blip_itm_large_flickr = ModelMeta(
335
342
  release_date="2023-08-01",
336
343
  modalities=["image", "text"],
337
344
  n_parameters=470_000_000,
345
+ n_embedding_parameters=23_442_432,
338
346
  memory_usage_mb=1793,
339
347
  max_tokens=512,
340
348
  embed_dim=768,