mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,29 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
6
6
 
7
7
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
8
8
 
9
- model_prompts = {"query": "Represent this sentence for searching relevant passages: "}
9
+ model_prompts = {
10
+ "query": "Represent this sentence for searching relevant passages: ",
11
+ "BrightBiologyRetrieval-query": "Represent this biology post for searching relevant passages: ",
12
+ "BrightEarthScienceRetrieval-query": "Represent this earth_science post for searching relevant passages: ",
13
+ "BrightEconomicsRetrieval-query": "Represent this economics post for searching relevant passages: ",
14
+ "BrightPsychologyRetrieval-query": "Represent this psychology post for searching relevant passages: ",
15
+ "BrightRoboticsRetrieval-query": "Represent this robotics post for searching relevant passages: ",
16
+ "BrightStackoverflowRetrieval-query": "Represent this stackoverflow post for searching relevant passages: ",
17
+ "BrightSustainableLivingRetrieval-query": "Represent this sustainable_living post for searching relevant passages: ",
18
+ "BrightPonyRetrieval-query": "Represent this Pony question for searching relevant passages: ",
19
+ "BrightLeetcodeRetrieval-query": "Represent this Coding problem for searching relevant examples: ",
20
+ "BrightAopsRetrieval-query": "Represent this Math problem for searching relevant examples: ",
21
+ "BrightTheoremQATheoremsRetrieval-query": "Represent this Math problem for searching relevant theorems: ",
22
+ "BrightTheoremQAQuestionsRetrieval-query": "Represent this Math problem for searching relevant examples: ",
23
+ "BrightBiologyLongRetrieval-query": "Represent this biology post for searching relevant documents: ",
24
+ "BrightEarthScienceLongRetrieval-query": "Represent this earth_science post for searching relevant documents: ",
25
+ "BrightEconomicsLongRetrieval-query": "Represent this economics post for searching relevant documents: ",
26
+ "BrightPsychologyLongRetrieval-query": "Represent this psychology post for searching relevant documents: ",
27
+ "BrightRoboticsLongRetrieval-query": "Represent this robotics post for searching relevant document: ",
28
+ "BrightStackoverflowLongRetrieval-query": "Represent this stackoverflow post for searching relevant document: ",
29
+ "BrightSustainableLivingLongRetrieval-query": "Represent this sustainable_living post for searching relevant documents: ",
30
+ "BrightPonyLongRetrieval-query": "Represent this Pony question for searching relevant documents: ",
31
+ }
10
32
  BGE_15_CITATION = """@misc{bge_embedding,
11
33
  title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
12
34
  author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
@@ -325,13 +347,20 @@ bge_small_en_v1_5 = ModelMeta(
325
347
  revision="5c38ec7c405ec4b44b94cc5a9bb96e735b38267a",
326
348
  release_date="2023-09-12", # initial commit of hf model.
327
349
  n_parameters=33_400_000,
350
+ n_embedding_parameters=11_720_448,
328
351
  memory_usage_mb=127,
329
352
  embed_dim=512,
330
353
  license="mit",
331
354
  max_tokens=512,
332
355
  reference="https://huggingface.co/BAAI/bge-small-en-v1.5",
333
356
  similarity_fn_name=ScoringFunction.COSINE,
334
- framework=["Sentence Transformers", "PyTorch"],
357
+ framework=[
358
+ "Sentence Transformers",
359
+ "PyTorch",
360
+ "ONNX",
361
+ "safetensors",
362
+ "Transformers",
363
+ ],
335
364
  use_instructions=True,
336
365
  public_training_code=None,
337
366
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
@@ -351,13 +380,20 @@ bge_base_en_v1_5 = ModelMeta(
351
380
  revision="a5beb1e3e68b9ab74eb54cfd186867f64f240e1a",
352
381
  release_date="2023-09-11", # initial commit of hf model.
353
382
  n_parameters=109_000_000,
383
+ n_embedding_parameters=23_440_896,
354
384
  memory_usage_mb=390,
355
385
  embed_dim=768,
356
386
  license="mit",
357
387
  max_tokens=512,
358
388
  reference="https://huggingface.co/BAAI/bge-base-en-v1.5",
359
389
  similarity_fn_name=ScoringFunction.COSINE,
360
- framework=["Sentence Transformers", "PyTorch"],
390
+ framework=[
391
+ "Sentence Transformers",
392
+ "PyTorch",
393
+ "ONNX",
394
+ "safetensors",
395
+ "Transformers",
396
+ ],
361
397
  use_instructions=True,
362
398
  public_training_code=None, # seemingly released (at least for some models, but the link is broken
363
399
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
@@ -377,13 +413,20 @@ bge_large_en_v1_5 = ModelMeta(
377
413
  revision="d4aa6901d3a41ba39fb536a557fa166f842b0e09",
378
414
  release_date="2023-09-12", # initial commit of hf model.
379
415
  n_parameters=335_000_000,
416
+ n_embedding_parameters=31_254_528,
380
417
  memory_usage_mb=1242,
381
418
  embed_dim=1024,
382
419
  license="mit",
383
420
  max_tokens=512,
384
421
  reference="https://huggingface.co/BAAI/bge-large-en-v1.5",
385
422
  similarity_fn_name=ScoringFunction.COSINE,
386
- framework=["Sentence Transformers", "PyTorch"],
423
+ framework=[
424
+ "Sentence Transformers",
425
+ "PyTorch",
426
+ "ONNX",
427
+ "safetensors",
428
+ "Transformers",
429
+ ],
387
430
  use_instructions=True,
388
431
  citation=BGE_15_CITATION,
389
432
  public_training_code=None, # seemingly released (at least for some models, but the link is broken
@@ -403,13 +446,14 @@ bge_small_zh = ModelMeta(
403
446
  revision="1d2363c5de6ce9ba9c890c8e23a4c72dce540ca8",
404
447
  release_date="2023-08-05", # initial commit of hf model.
405
448
  n_parameters=33_400_000,
449
+ n_embedding_parameters=10_817_536,
406
450
  memory_usage_mb=127,
407
451
  embed_dim=512,
408
452
  license="mit",
409
453
  max_tokens=512,
410
454
  reference="https://huggingface.co/BAAI/bge-small-zh",
411
455
  similarity_fn_name=ScoringFunction.COSINE,
412
- framework=["Sentence Transformers", "PyTorch"],
456
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
413
457
  use_instructions=True,
414
458
  public_training_code=None,
415
459
  public_training_data=None,
@@ -430,13 +474,14 @@ bge_base_zh = ModelMeta(
430
474
  revision="0e5f83d4895db7955e4cb9ed37ab73f7ded339b6",
431
475
  release_date="2023-08-05", # initial commit of hf model.
432
476
  n_parameters=109_000_000,
477
+ n_embedding_parameters=16_226_304,
433
478
  memory_usage_mb=390,
434
479
  embed_dim=768,
435
480
  license="mit",
436
481
  max_tokens=512,
437
482
  reference="https://huggingface.co/BAAI/bge-base-zh",
438
483
  similarity_fn_name=ScoringFunction.COSINE,
439
- framework=["Sentence Transformers", "PyTorch"],
484
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
440
485
  use_instructions=True,
441
486
  public_training_code=None,
442
487
  public_training_data=None,
@@ -457,13 +502,14 @@ bge_large_zh = ModelMeta(
457
502
  revision="b5d9f5c027e87b6f0b6fa4b614f8f9cdc45ce0e8",
458
503
  release_date="2023-08-02", # initial commit of hf model.
459
504
  n_parameters=335_000_000,
505
+ n_embedding_parameters=21_635_072,
460
506
  memory_usage_mb=1242,
461
507
  embed_dim=1024,
462
508
  license="mit",
463
509
  max_tokens=512,
464
510
  reference="https://huggingface.co/BAAI/bge-large-zh",
465
511
  similarity_fn_name=ScoringFunction.COSINE,
466
- framework=["Sentence Transformers", "PyTorch"],
512
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
467
513
  use_instructions=True,
468
514
  public_training_code=None,
469
515
  public_training_data=None,
@@ -484,13 +530,14 @@ bge_small_en = ModelMeta(
484
530
  revision="4778d71a06863076696b03fd2777eb118712cad8",
485
531
  release_date="2023-08-05", # initial commit of hf model.
486
532
  n_parameters=33_400_000,
533
+ n_embedding_parameters=11_720_448,
487
534
  memory_usage_mb=127,
488
535
  embed_dim=512,
489
536
  license="mit",
490
537
  max_tokens=512,
491
538
  reference="https://huggingface.co/BAAI/bge-small-en",
492
539
  similarity_fn_name=ScoringFunction.COSINE,
493
- framework=["Sentence Transformers", "PyTorch"],
540
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
494
541
  use_instructions=True,
495
542
  public_training_code=None,
496
543
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
@@ -511,13 +558,20 @@ bge_base_en = ModelMeta(
511
558
  revision="b737bf5dcc6ee8bdc530531266b4804a5d77b5d8",
512
559
  release_date="2023-08-05", # initial commit of hf model.
513
560
  n_parameters=109_000_000,
561
+ n_embedding_parameters=23_440_896,
514
562
  memory_usage_mb=390,
515
563
  embed_dim=768,
516
564
  license="mit",
517
565
  max_tokens=512,
518
566
  reference="https://huggingface.co/BAAI/bge-base-en",
519
567
  similarity_fn_name=ScoringFunction.COSINE,
520
- framework=["Sentence Transformers", "PyTorch"],
568
+ framework=[
569
+ "Sentence Transformers",
570
+ "PyTorch",
571
+ "Transformers",
572
+ "ONNX",
573
+ "safetensors",
574
+ ],
521
575
  use_instructions=True,
522
576
  public_training_code=None, # seemingly released (at least for some models, but the link is broken
523
577
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
@@ -538,13 +592,14 @@ bge_large_en = ModelMeta(
538
592
  revision="abe7d9d814b775ca171121fb03f394dc42974275",
539
593
  release_date="2023-08-05", # initial commit of hf model.
540
594
  n_parameters=335_000_000,
595
+ n_embedding_parameters=31_254_528,
541
596
  memory_usage_mb=1242,
542
597
  embed_dim=1024,
543
598
  license="mit",
544
599
  max_tokens=512,
545
600
  reference="https://huggingface.co/BAAI/bge-large-en",
546
601
  similarity_fn_name=ScoringFunction.COSINE,
547
- framework=["Sentence Transformers", "PyTorch"],
602
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
548
603
  use_instructions=True,
549
604
  public_training_code=None, # seemingly released (at least for some models, but the link is broken
550
605
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
@@ -566,13 +621,14 @@ bge_small_zh_v1_5 = ModelMeta(
566
621
  revision="7999e1d3359715c523056ef9478215996d62a620",
567
622
  release_date="2023-09-12", # initial commit of hf model.
568
623
  n_parameters=33_400_000,
624
+ n_embedding_parameters=10_817_536,
569
625
  memory_usage_mb=91,
570
626
  embed_dim=512,
571
627
  license="mit",
572
628
  max_tokens=512,
573
629
  reference="https://huggingface.co/BAAI/bge-small-zh-v1.5",
574
630
  similarity_fn_name=ScoringFunction.COSINE,
575
- framework=["Sentence Transformers", "PyTorch"],
631
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
576
632
  use_instructions=True,
577
633
  public_training_code=None,
578
634
  public_training_data=None,
@@ -592,13 +648,14 @@ bge_base_zh_v1_5 = ModelMeta(
592
648
  revision="f03589ceff5aac7111bd60cfc7d497ca17ecac65",
593
649
  release_date="2023-09-11", # initial commit of hf model.
594
650
  n_parameters=109_000_000,
651
+ n_embedding_parameters=16_226_304,
595
652
  memory_usage_mb=416,
596
653
  embed_dim=768,
597
654
  license="mit",
598
655
  max_tokens=512,
599
656
  reference="https://huggingface.co/BAAI/bge-base-zh-v1.5",
600
657
  similarity_fn_name=ScoringFunction.COSINE,
601
- framework=["Sentence Transformers", "PyTorch"],
658
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
602
659
  use_instructions=True,
603
660
  public_training_code=None,
604
661
  public_training_data=None,
@@ -618,13 +675,14 @@ bge_large_zh_v1_5 = ModelMeta(
618
675
  revision="79e7739b6ab944e86d6171e44d24c997fc1e0116",
619
676
  release_date="2023-09-12", # initial commit of hf model.
620
677
  n_parameters=335_000_000,
678
+ n_embedding_parameters=21_635_072,
621
679
  memory_usage_mb=1278,
622
680
  embed_dim=1024,
623
681
  license="mit",
624
682
  max_tokens=512,
625
683
  reference="https://huggingface.co/BAAI/bge-large-zh-v1.5",
626
684
  similarity_fn_name=ScoringFunction.COSINE,
627
- framework=["Sentence Transformers", "PyTorch"],
685
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
628
686
  use_instructions=True,
629
687
  public_training_code=None,
630
688
  public_training_data=None,
@@ -641,19 +699,20 @@ bge_m3 = ModelMeta(
641
699
  revision="5617a9f61b028005a4858fdac845db406aefb181",
642
700
  release_date="2024-06-28",
643
701
  n_parameters=568_000_000,
702
+ n_embedding_parameters=256_002_048,
644
703
  memory_usage_mb=2167,
645
704
  embed_dim=1024,
646
705
  license="mit",
647
706
  max_tokens=8194,
648
707
  reference="https://huggingface.co/BAAI/bge-m3",
649
708
  similarity_fn_name=ScoringFunction.COSINE,
650
- framework=["Sentence Transformers", "PyTorch"],
709
+ framework=["Sentence Transformers", "PyTorch", "ONNX"],
651
710
  use_instructions=False,
652
711
  public_training_code=None,
653
712
  public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
654
713
  training_datasets=bge_m3_training_data,
655
714
  citation="""@misc{bge-m3,
656
- title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
715
+ title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
657
716
  author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
658
717
  year={2024},
659
718
  eprint={2402.03216},
@@ -737,13 +796,14 @@ bge_multilingual_gemma2 = ModelMeta(
737
796
  revision="992e13d8984fde2c31ef8a3cb2c038aeec513b8a",
738
797
  release_date="2024-07-25", # initial commit of hf model.
739
798
  n_parameters=int(9.24 * 1e9),
799
+ n_embedding_parameters=917_511_168,
740
800
  memory_usage_mb=35254,
741
801
  embed_dim=3584, # from old C-MTEB leaderboard
742
802
  license="https://ai.google.dev/gemma/terms",
743
803
  max_tokens=8192, # from old C-MTEB leaderboard
744
804
  reference="https://huggingface.co/BAAI/bge-multilingual-gemma2",
745
805
  similarity_fn_name=ScoringFunction.COSINE,
746
- framework=["Sentence Transformers", "PyTorch"],
806
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
747
807
  use_instructions=False,
748
808
  public_training_code=None,
749
809
  public_training_data=None,
@@ -754,7 +814,7 @@ bge_multilingual_gemma2 = ModelMeta(
754
814
  | bge_full_data
755
815
  | bge_m3_training_data,
756
816
  citation="""@misc{bge-m3,
757
- title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
817
+ title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
758
818
  author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
759
819
  year={2024},
760
820
  eprint={2402.03216},
@@ -764,7 +824,7 @@ bge_multilingual_gemma2 = ModelMeta(
764
824
 
765
825
 
766
826
  @misc{bge_embedding,
767
- title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
827
+ title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
768
828
  author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
769
829
  year={2023},
770
830
  eprint={2309.07597},
@@ -784,13 +844,14 @@ bge_en_icl = ModelMeta(
784
844
  revision="971c7e1445cc86656ca0bd85ed770b8675a40bb5",
785
845
  release_date="2024-07-25", # initial commit of hf model.
786
846
  n_parameters=int(7.11 * 1e9),
847
+ n_embedding_parameters=131_084_288,
787
848
  memory_usage_mb=27125,
788
849
  embed_dim=4096,
789
850
  license="apache-2.0",
790
851
  max_tokens=32768,
791
852
  reference="https://huggingface.co/BAAI/bge-en-icl",
792
853
  similarity_fn_name=ScoringFunction.COSINE,
793
- framework=["Sentence Transformers", "PyTorch"],
854
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
794
855
  use_instructions=False,
795
856
  public_training_code="https://github.com/FlagOpen/FlagEmbedding",
796
857
  public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
@@ -818,19 +879,20 @@ bge_m3_unsupervised = ModelMeta(
818
879
  revision="46f03bc86361cf88102b0b517b36c8259f2946b1",
819
880
  release_date="2024-01-30", # January 30, 2024 - BGE-M3 release date
820
881
  n_parameters=568_000_000,
882
+ n_embedding_parameters=256_002_048,
821
883
  memory_usage_mb=2167,
822
884
  embed_dim=1024,
823
885
  license="mit",
824
886
  max_tokens=8192,
825
887
  reference="https://huggingface.co/BAAI/bge-m3-unsupervised",
826
888
  similarity_fn_name="cosine",
827
- framework=["Sentence Transformers", "PyTorch"],
889
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
828
890
  use_instructions=False,
829
891
  public_training_code="https://github.com/FlagOpen/FlagEmbedding",
830
892
  public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
831
893
  training_datasets=bge_m3_training_data,
832
894
  citation="""@misc{bge-m3,
833
- title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
895
+ title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
834
896
  author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
835
897
  year={2024},
836
898
  eprint={2402.03216},
@@ -847,6 +909,7 @@ manu__bge_m3_custom_fr = ModelMeta(
847
909
  languages=None,
848
910
  loader=sentence_transformers_loader,
849
911
  n_parameters=567754752,
912
+ n_embedding_parameters=256_002_048,
850
913
  memory_usage_mb=2166,
851
914
  max_tokens=8194.0,
852
915
  embed_dim=1024,
@@ -854,7 +917,7 @@ manu__bge_m3_custom_fr = ModelMeta(
854
917
  open_weights=True,
855
918
  public_training_code=None,
856
919
  public_training_data=None,
857
- framework=["PyTorch", "Sentence Transformers"],
920
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
858
921
  reference="https://huggingface.co/manu/bge-m3-custom-fr",
859
922
  similarity_fn_name=ScoringFunction.COSINE,
860
923
  use_instructions=None,
@@ -9,26 +9,27 @@ bica_base = ModelMeta(
9
9
  revision="31237a836e5ae908c308a256573e5f0986498574",
10
10
  release_date="2025-11-14",
11
11
  n_parameters=110_000_000,
12
+ n_embedding_parameters=23_440_896,
12
13
  memory_usage_mb=418,
13
14
  embed_dim=768,
14
15
  license="mit",
15
16
  max_tokens=512,
16
17
  reference="https://huggingface.co/bisectgroup/BiCA-base",
17
18
  similarity_fn_name="cosine",
18
- framework=["Sentence Transformers", "PyTorch"],
19
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
19
20
  use_instructions=False,
20
21
  public_training_code="https://github.com/NiravBhattLab/BiCA",
21
22
  public_training_data="https://huggingface.co/datasets/bisectgroup/hard-negatives-traversal",
22
23
  adapted_from="thenlper/gte-base",
23
24
  citation="""
24
25
  @misc{sinha2025bicaeffectivebiomedicaldense,
25
- title={BiCA: Effective Biomedical Dense Retrieval with Citation-Aware Hard Negatives},
26
+ title={BiCA: Effective Biomedical Dense Retrieval with Citation-Aware Hard Negatives},
26
27
  author={Aarush Sinha and Pavan Kumar S and Roshan Balaji and Nirav Pravinbhai Bhatt},
27
28
  year={2025},
28
29
  eprint={2511.08029},
29
30
  archivePrefix={arXiv},
30
31
  primaryClass={cs.IR},
31
- url={https://arxiv.org/abs/2511.08029},
32
+ url={https://arxiv.org/abs/2511.08029},
32
33
  }
33
34
  """,
34
35
  training_datasets=set(),
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  BLIP2_CITATION = """@inproceedings{li2023blip2,
14
19
  title={{BLIP-2:} Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models},
@@ -172,6 +177,7 @@ blip2_opt_2_7b = ModelMeta(
172
177
  release_date="2024-03-22",
173
178
  modalities=["image", "text"],
174
179
  n_parameters=3_740_000_000,
180
+ n_embedding_parameters=None,
175
181
  memory_usage_mb=14285,
176
182
  max_tokens=None,
177
183
  embed_dim=768,
@@ -179,7 +185,7 @@ blip2_opt_2_7b = ModelMeta(
179
185
  open_weights=True,
180
186
  public_training_code="https://github.com/salesforce/LAVIS/tree/main/projects/blip2",
181
187
  public_training_data=None,
182
- framework=["PyTorch"],
188
+ framework=["PyTorch", "Transformers", "safetensors"],
183
189
  reference="https://huggingface.co/Salesforce/blip2-opt-2.7b",
184
190
  similarity_fn_name=ScoringFunction.COSINE,
185
191
  use_instructions=False,
@@ -196,6 +202,7 @@ blip2_opt_6_7b_coco = ModelMeta(
196
202
  release_date="2024-03-31",
197
203
  modalities=["image", "text"],
198
204
  n_parameters=7_750_000_000,
205
+ n_embedding_parameters=None,
199
206
  memory_usage_mb=29577,
200
207
  max_tokens=None,
201
208
  embed_dim=768,
@@ -203,7 +210,7 @@ blip2_opt_6_7b_coco = ModelMeta(
203
210
  open_weights=True,
204
211
  public_training_code="https://github.com/salesforce/LAVIS/tree/main/projects/blip2",
205
212
  public_training_data=None,
206
- framework=["PyTorch"],
213
+ framework=["PyTorch", "Transformers", "safetensors"],
207
214
  reference="https://huggingface.co/Salesforce/blip2-opt-6.7b-coco",
208
215
  similarity_fn_name=ScoringFunction.COSINE,
209
216
  use_instructions=False,
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
6
  from torch.nn.functional import normalize
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  BLIP_CITATION = """@misc{https://doi.org/10.48550/arxiv.2201.12086,
14
19
  doi = {10.48550/ARXIV.2201.12086},
@@ -128,7 +133,7 @@ class BLIPModel(AbsEncoder):
128
133
 
129
134
  # in descending order of usage (downloads from huggingface)
130
135
  blip_image_captioning_large = ModelMeta(
131
- loader=BLIPModel, # type: ignore
136
+ loader=BLIPModel,
132
137
  name="Salesforce/blip-image-captioning-large",
133
138
  model_type=["dense"],
134
139
  languages=["eng-Latn"],
@@ -136,6 +141,7 @@ blip_image_captioning_large = ModelMeta(
136
141
  release_date="2023-12-07",
137
142
  modalities=["image", "text"],
138
143
  n_parameters=470_000_000,
144
+ n_embedding_parameters=23_442_432,
139
145
  memory_usage_mb=1792,
140
146
  max_tokens=512,
141
147
  embed_dim=768,
@@ -143,7 +149,7 @@ blip_image_captioning_large = ModelMeta(
143
149
  open_weights=True,
144
150
  public_training_code="https://github.com/salesforce/BLIP",
145
151
  public_training_data="https://github.com/salesforce/BLIP",
146
- framework=["PyTorch"],
152
+ framework=["PyTorch", "Transformers", "safetensors"],
147
153
  reference="https://huggingface.co/Salesforce/blip-image-captioning-large",
148
154
  similarity_fn_name=ScoringFunction.COSINE,
149
155
  use_instructions=False,
@@ -156,7 +162,7 @@ blip_image_captioning_large = ModelMeta(
156
162
  )
157
163
 
158
164
  blip_image_captioning_base = ModelMeta(
159
- loader=BLIPModel, # type: ignore
165
+ loader=BLIPModel,
160
166
  name="Salesforce/blip-image-captioning-base",
161
167
  model_type=["dense"],
162
168
  languages=["eng-Latn"],
@@ -164,6 +170,7 @@ blip_image_captioning_base = ModelMeta(
164
170
  release_date="2023-08-01",
165
171
  modalities=["image", "text"],
166
172
  n_parameters=247_000_000,
173
+ n_embedding_parameters=23_442_432,
167
174
  memory_usage_mb=942,
168
175
  max_tokens=512,
169
176
  embed_dim=768,
@@ -171,7 +178,7 @@ blip_image_captioning_base = ModelMeta(
171
178
  open_weights=True,
172
179
  public_training_code="https://github.com/salesforce/BLIP",
173
180
  public_training_data="https://github.com/salesforce/BLIP",
174
- framework=["PyTorch"],
181
+ framework=["PyTorch", "Transformers"],
175
182
  reference="https://huggingface.co/Salesforce/blip-image-captioning-base",
176
183
  similarity_fn_name=ScoringFunction.COSINE,
177
184
  use_instructions=False,
@@ -185,7 +192,7 @@ blip_image_captioning_base = ModelMeta(
185
192
 
186
193
 
187
194
  blip_vqa_base = ModelMeta(
188
- loader=BLIPModel, # type: ignore
195
+ loader=BLIPModel,
189
196
  name="Salesforce/blip-vqa-base",
190
197
  model_type=["dense"],
191
198
  languages=["eng-Latn"],
@@ -193,6 +200,7 @@ blip_vqa_base = ModelMeta(
193
200
  release_date="2023-12-07",
194
201
  modalities=["image", "text"],
195
202
  n_parameters=247_000_000,
203
+ n_embedding_parameters=23_442_432,
196
204
  memory_usage_mb=1467,
197
205
  max_tokens=512,
198
206
  embed_dim=768,
@@ -200,7 +208,7 @@ blip_vqa_base = ModelMeta(
200
208
  open_weights=True,
201
209
  public_training_code="https://github.com/salesforce/BLIP",
202
210
  public_training_data="https://github.com/salesforce/BLIP",
203
- framework=["PyTorch"],
211
+ framework=["PyTorch", "Transformers", "safetensors"],
204
212
  reference="https://huggingface.co/Salesforce/blip-vqa-base",
205
213
  similarity_fn_name=ScoringFunction.COSINE,
206
214
  use_instructions=False,
@@ -212,7 +220,7 @@ blip_vqa_base = ModelMeta(
212
220
  )
213
221
 
214
222
  blip_vqa_capfilt_large = ModelMeta(
215
- loader=BLIPModel, # type: ignore
223
+ loader=BLIPModel,
216
224
  name="Salesforce/blip-vqa-capfilt-large",
217
225
  model_type=["dense"],
218
226
  languages=["eng-Latn"],
@@ -220,6 +228,7 @@ blip_vqa_capfilt_large = ModelMeta(
220
228
  release_date="2023-01-22",
221
229
  modalities=["image", "text"],
222
230
  n_parameters=247_000_000,
231
+ n_embedding_parameters=23_442_432,
223
232
  memory_usage_mb=942,
224
233
  max_tokens=512,
225
234
  embed_dim=768,
@@ -227,7 +236,7 @@ blip_vqa_capfilt_large = ModelMeta(
227
236
  open_weights=True,
228
237
  public_training_code="https://github.com/salesforce/BLIP",
229
238
  public_training_data="https://github.com/salesforce/BLIP",
230
- framework=["PyTorch"],
239
+ framework=["PyTorch", "Transformers"],
231
240
  reference="https://huggingface.co/Salesforce/blip-vqa-capfilt-large",
232
241
  similarity_fn_name=ScoringFunction.COSINE,
233
242
  use_instructions=False,
@@ -239,7 +248,7 @@ blip_vqa_capfilt_large = ModelMeta(
239
248
  )
240
249
 
241
250
  blip_itm_base_coco = ModelMeta(
242
- loader=BLIPModel, # type: ignore
251
+ loader=BLIPModel,
243
252
  name="Salesforce/blip-itm-base-coco",
244
253
  model_type=["dense"],
245
254
  languages=["eng-Latn"],
@@ -247,6 +256,7 @@ blip_itm_base_coco = ModelMeta(
247
256
  release_date="2023-08-01",
248
257
  modalities=["image", "text"],
249
258
  n_parameters=247_000_000,
259
+ n_embedding_parameters=23_442_432,
250
260
  memory_usage_mb=942,
251
261
  max_tokens=512,
252
262
  embed_dim=768,
@@ -254,7 +264,7 @@ blip_itm_base_coco = ModelMeta(
254
264
  open_weights=True,
255
265
  public_training_code="https://github.com/salesforce/BLIP",
256
266
  public_training_data="https://github.com/salesforce/BLIP",
257
- framework=["PyTorch"],
267
+ framework=["PyTorch", "Transformers"],
258
268
  reference="https://huggingface.co/Salesforce/blip-itm-base-coco",
259
269
  similarity_fn_name=ScoringFunction.COSINE,
260
270
  use_instructions=False,
@@ -266,7 +276,7 @@ blip_itm_base_coco = ModelMeta(
266
276
  )
267
277
 
268
278
  blip_itm_large_coco = ModelMeta(
269
- loader=BLIPModel, # type: ignore
279
+ loader=BLIPModel,
270
280
  name="Salesforce/blip-itm-large-coco",
271
281
  model_type=["dense"],
272
282
  languages=["eng-Latn"],
@@ -274,6 +284,7 @@ blip_itm_large_coco = ModelMeta(
274
284
  release_date="2023-08-01",
275
285
  modalities=["image", "text"],
276
286
  n_parameters=470_000_000,
287
+ n_embedding_parameters=23_442_432,
277
288
  memory_usage_mb=1793,
278
289
  max_tokens=512,
279
290
  embed_dim=768,
@@ -281,7 +292,7 @@ blip_itm_large_coco = ModelMeta(
281
292
  open_weights=True,
282
293
  public_training_code="https://github.com/salesforce/BLIP",
283
294
  public_training_data="https://github.com/salesforce/BLIP",
284
- framework=["PyTorch"],
295
+ framework=["PyTorch", "Transformers"],
285
296
  reference="https://huggingface.co/Salesforce/blip-itm-large-coco",
286
297
  similarity_fn_name=ScoringFunction.COSINE,
287
298
  use_instructions=False,
@@ -294,7 +305,7 @@ blip_itm_large_coco = ModelMeta(
294
305
  )
295
306
 
296
307
  blip_itm_base_flickr = ModelMeta(
297
- loader=BLIPModel, # type: ignore
308
+ loader=BLIPModel,
298
309
  name="Salesforce/blip-itm-base-flickr",
299
310
  model_type=["dense"],
300
311
  languages=["eng-Latn"],
@@ -302,6 +313,7 @@ blip_itm_base_flickr = ModelMeta(
302
313
  release_date="2023-08-01",
303
314
  modalities=["image", "text"],
304
315
  n_parameters=247_000_000,
316
+ n_embedding_parameters=23_442_432,
305
317
  memory_usage_mb=942,
306
318
  max_tokens=512,
307
319
  embed_dim=768,
@@ -309,7 +321,7 @@ blip_itm_base_flickr = ModelMeta(
309
321
  open_weights=True,
310
322
  public_training_code="https://github.com/salesforce/BLIP",
311
323
  public_training_data="https://github.com/salesforce/BLIP",
312
- framework=["PyTorch"],
324
+ framework=["PyTorch", "Transformers"],
313
325
  reference="https://huggingface.co/Salesforce/blip-itm-base-flickr",
314
326
  similarity_fn_name=ScoringFunction.COSINE,
315
327
  use_instructions=False,
@@ -322,7 +334,7 @@ blip_itm_base_flickr = ModelMeta(
322
334
  )
323
335
 
324
336
  blip_itm_large_flickr = ModelMeta(
325
- loader=BLIPModel, # type: ignore
337
+ loader=BLIPModel,
326
338
  name="Salesforce/blip-itm-large-flickr",
327
339
  model_type=["dense"],
328
340
  languages=["eng-Latn"],
@@ -330,6 +342,7 @@ blip_itm_large_flickr = ModelMeta(
330
342
  release_date="2023-08-01",
331
343
  modalities=["image", "text"],
332
344
  n_parameters=470_000_000,
345
+ n_embedding_parameters=23_442_432,
333
346
  memory_usage_mb=1793,
334
347
  max_tokens=512,
335
348
  embed_dim=768,
@@ -337,7 +350,7 @@ blip_itm_large_flickr = ModelMeta(
337
350
  open_weights=True,
338
351
  public_training_code="https://github.com/salesforce/BLIP",
339
352
  public_training_data="https://github.com/salesforce/BLIP",
340
- framework=["PyTorch"],
353
+ framework=["PyTorch", "Transformers"],
341
354
  reference="https://huggingface.co/Salesforce/blip-itm-large-flickr",
342
355
  similarity_fn_name=ScoringFunction.COSINE,
343
356
  use_instructions=False,