mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,335 @@
1
+ from mteb.models.model_implementations.pylate_models import MultiVectorModel
2
+ from mteb.models.model_meta import (
3
+ ModelMeta,
4
+ ScoringFunction,
5
+ )
6
+ from mteb.models.sentence_transformer_wrapper import (
7
+ CrossEncoderWrapper,
8
+ sentence_transformers_loader,
9
+ )
10
+
11
+ mixedbread_training_data = {
12
+ # from correspondence:
13
+ # as mentioned in our blog post
14
+ # (https://www.mixedbread.com/blog/mxbai-embed-large-v1#built-for-rag-and-real-world-use-cases:~:text=During%20the%20whole,related%20use%20cases.)
15
+ # We do not train on any data (except the MSMarco training split) of MTEB. We have a strong filtering process to ensure the OOD setting. That's true
16
+ # for all of our models. Keep up the good work and let me know if you have any questions.
17
+ "MSMARCO",
18
+ }
19
+
20
+ mxbai_embed_large_v1 = ModelMeta(
21
+ loader=sentence_transformers_loader,
22
+ loader_kwargs=dict(
23
+ model_prompts={
24
+ "query": "Represent this sentence for searching relevant passages: "
25
+ },
26
+ ),
27
+ name="mixedbread-ai/mxbai-embed-large-v1",
28
+ model_type=["dense"],
29
+ languages=["eng-Latn"],
30
+ open_weights=True,
31
+ revision="990580e27d329c7408b3741ecff85876e128e203",
32
+ release_date="2024-03-07", # initial commit of hf model.
33
+ n_parameters=335_000_000,
34
+ n_embedding_parameters=31_254_528,
35
+ memory_usage_mb=639,
36
+ max_tokens=512,
37
+ embed_dim=1024,
38
+ license="apache-2.0",
39
+ reference="https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1",
40
+ similarity_fn_name=ScoringFunction.COSINE,
41
+ framework=[
42
+ "Sentence Transformers",
43
+ "PyTorch",
44
+ "ONNX",
45
+ "safetensors",
46
+ "GGUF",
47
+ "Transformers",
48
+ ],
49
+ use_instructions=True,
50
+ citation="""
51
+ @online{emb2024mxbai,
52
+ title={Open Source Strikes Bread - New Fluffy Embeddings Model},
53
+ author={Sean Lee and Aamir Shakir and Darius Koenig and Julius Lipp},
54
+ year={2024},
55
+ url={https://www.mixedbread.ai/blog/mxbai-embed-large-v1},
56
+ }
57
+
58
+ @article{li2023angle,
59
+ title={AnglE-optimized Text Embeddings},
60
+ author={Li, Xianming and Li, Jing},
61
+ journal={arXiv preprint arXiv:2309.12871},
62
+ year={2023}
63
+ }
64
+ """,
65
+ public_training_code=None,
66
+ public_training_data=None,
67
+ training_datasets=mixedbread_training_data,
68
+ )
69
+
70
+ mxbai_embed_2d_large_v1 = ModelMeta(
71
+ loader=sentence_transformers_loader,
72
+ name="mixedbread-ai/mxbai-embed-2d-large-v1",
73
+ model_type=["dense"],
74
+ languages=["eng-Latn"],
75
+ open_weights=True,
76
+ revision="7e639ca8e344af398876ead3b19ec3c0b9068f49",
77
+ release_date="2024-03-04", # initial commit of hf model.
78
+ n_parameters=335_000_000,
79
+ n_embedding_parameters=31_254_528,
80
+ memory_usage_mb=None,
81
+ max_tokens=512,
82
+ embed_dim=768,
83
+ license="apache-2.0",
84
+ reference="https://huggingface.co/mixedbread-ai/mxbai-embed-2d-large-v1",
85
+ similarity_fn_name=ScoringFunction.COSINE,
86
+ framework=[
87
+ "Sentence Transformers",
88
+ "PyTorch",
89
+ "ONNX",
90
+ "safetensors",
91
+ "Transformers",
92
+ ],
93
+ use_instructions=True,
94
+ adapted_from=None,
95
+ superseded_by=None,
96
+ public_training_code=None,
97
+ public_training_data=None,
98
+ training_datasets=None,
99
+ )
100
+
101
+
102
+ mxbai_embed_xsmall_v1 = ModelMeta(
103
+ loader=sentence_transformers_loader,
104
+ name="mixedbread-ai/mxbai-embed-xsmall-v1",
105
+ model_type=["dense"],
106
+ languages=["eng-Latn"],
107
+ open_weights=True,
108
+ revision="2f741ec33328bb57e4704e1238fc59a4a5745705",
109
+ release_date="2024-08-13", # initial commit of hf model.
110
+ n_parameters=24_100_000,
111
+ n_embedding_parameters=11_720_448,
112
+ memory_usage_mb=None,
113
+ max_tokens=512,
114
+ embed_dim=384,
115
+ license="apache-2.0",
116
+ reference="https://huggingface.co/mixedbread-ai/mxbai-embed-xsmall-v1",
117
+ similarity_fn_name=ScoringFunction.COSINE,
118
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors", "GGUF"],
119
+ use_instructions=True,
120
+ adapted_from="sentence-transformers/all-MiniLM-L6-v2",
121
+ superseded_by=None,
122
+ public_training_code=None,
123
+ public_training_data=None,
124
+ training_datasets=mixedbread_training_data,
125
+ citation="""@online{xsmall2024mxbai,
126
+ title={Every Byte Matters: Introducing mxbai-embed-xsmall-v1},
127
+ author={Sean Lee and Julius Lipp and Rui Huang and Darius Koenig},
128
+ year={2024},
129
+ url={https://www.mixedbread.ai/blog/mxbai-embed-xsmall-v1},
130
+ }""",
131
+ )
132
+
133
+ mxbai_rerank_xsmall_v1 = ModelMeta(
134
+ loader=CrossEncoderWrapper,
135
+ name="mixedbread-ai/mxbai-rerank-xsmall-v1",
136
+ revision="b5c6e9da73abc3711f593f705371cdbe9e0fe422",
137
+ release_date="2024-02-29",
138
+ languages=["eng-Latn"],
139
+ n_parameters=70830337,
140
+ memory_usage_mb=135.0,
141
+ max_tokens=512,
142
+ embed_dim=None,
143
+ license="apache-2.0",
144
+ open_weights=True,
145
+ public_training_code=None,
146
+ public_training_data=None,
147
+ framework=[
148
+ "PyTorch",
149
+ "Sentence Transformers",
150
+ "Transformers",
151
+ "ONNX",
152
+ "safetensors",
153
+ ],
154
+ reference="https://huggingface.co/mixedbread-ai/mxbai-rerank-xsmall-v1",
155
+ similarity_fn_name=None,
156
+ use_instructions=None,
157
+ training_datasets=None,
158
+ adapted_from=None,
159
+ superseded_by=None,
160
+ modalities=["text"],
161
+ model_type=["cross-encoder"],
162
+ citation="""@online{rerank2024mxbai,
163
+ title={Boost Your Search With The Crispy Mixedbread Rerank Models},
164
+ author={Aamir Shakir and Darius Koenig and Julius Lipp and Sean Lee},
165
+ year={2024},
166
+ url={https://www.mixedbread.ai/blog/mxbai-rerank-v1},
167
+ }""",
168
+ contacts=None,
169
+ )
170
+
171
+ mxbai_rerank_base_v1 = ModelMeta(
172
+ loader=CrossEncoderWrapper,
173
+ name="mixedbread-ai/mxbai-rerank-base-v1",
174
+ revision="800f24c113213a187e65bde9db00c15a2bb12738",
175
+ release_date="2024-02-29",
176
+ languages=["eng-Latn"],
177
+ n_parameters=184422913,
178
+ memory_usage_mb=352.0,
179
+ max_tokens=512,
180
+ embed_dim=None,
181
+ license="apache-2.0",
182
+ open_weights=True,
183
+ public_training_code=None,
184
+ public_training_data=None,
185
+ framework=[
186
+ "PyTorch",
187
+ "Sentence Transformers",
188
+ "Transformers",
189
+ "ONNX",
190
+ "safetensors",
191
+ ],
192
+ reference="https://huggingface.co/mixedbread-ai/mxbai-rerank-base-v1",
193
+ similarity_fn_name=None,
194
+ use_instructions=None,
195
+ training_datasets=None,
196
+ adapted_from=None,
197
+ superseded_by=None,
198
+ modalities=["text"],
199
+ model_type=["cross-encoder"],
200
+ citation="""@online{rerank2024mxbai,
201
+ title={Boost Your Search With The Crispy Mixedbread Rerank Models},
202
+ author={Aamir Shakir and Darius Koenig and Julius Lipp and Sean Lee},
203
+ year={2024},
204
+ url={https://www.mixedbread.ai/blog/mxbai-rerank-v1},
205
+ }""",
206
+ contacts=None,
207
+ )
208
+
209
+ mxbai_rerank_large_v1 = ModelMeta(
210
+ loader=CrossEncoderWrapper,
211
+ name="mixedbread-ai/mxbai-rerank-large-v1",
212
+ revision="98f655841d5caf0b16eaff79c2b4ca109d920d17",
213
+ release_date="2024-02-29",
214
+ languages=["eng-Latn"],
215
+ n_parameters=435062785,
216
+ memory_usage_mb=830.0,
217
+ max_tokens=512,
218
+ embed_dim=None,
219
+ license="apache-2.0",
220
+ open_weights=True,
221
+ public_training_code=None,
222
+ public_training_data=None,
223
+ framework=[
224
+ "PyTorch",
225
+ "Sentence Transformers",
226
+ "Transformers",
227
+ "ONNX",
228
+ "safetensors",
229
+ ],
230
+ reference="https://huggingface.co/mixedbread-ai/mxbai-rerank-large-v1",
231
+ similarity_fn_name=None,
232
+ use_instructions=None,
233
+ training_datasets=None,
234
+ adapted_from=None,
235
+ superseded_by=None,
236
+ modalities=["text"],
237
+ model_type=["cross-encoder"],
238
+ citation="""@online{rerank2024mxbai,
239
+ title={Boost Your Search With The Crispy Mixedbread Rerank Models},
240
+ author={Aamir Shakir and Darius Koenig and Julius Lipp and Sean Lee},
241
+ year={2024},
242
+ url={https://www.mixedbread.ai/blog/mxbai-rerank-v1},
243
+ }""",
244
+ contacts=None,
245
+ )
246
+
247
+ mxbai_edge_colbert_v0_17m = ModelMeta(
248
+ loader=MultiVectorModel,
249
+ name="mixedbread-ai/mxbai-edge-colbert-v0-17m",
250
+ model_type=["late-interaction"],
251
+ languages=["eng-Latn"],
252
+ open_weights=True,
253
+ revision="23ae07f5bf028bc0d1f80c82e6e2dd2311f13a46",
254
+ public_training_code=None,
255
+ public_training_data=None,
256
+ release_date="2025-10-16",
257
+ n_parameters=int(17 * 1e6),
258
+ memory_usage_mb=64,
259
+ max_tokens=7999,
260
+ embed_dim=None,
261
+ license="apache-2.0",
262
+ similarity_fn_name=ScoringFunction.MAX_SIM,
263
+ framework=["PyLate", "ColBERT", "Transformers", "safetensors"],
264
+ reference="https://huggingface.co/mixedbread-ai/mxbai-edge-colbert-v0-17m",
265
+ use_instructions=False,
266
+ adapted_from="https://huggingface.co/jhu-clsp/ettin-encoder-17m",
267
+ superseded_by=None,
268
+ training_datasets={
269
+ "CornStack",
270
+ "MSMARCO",
271
+ "NQ",
272
+ "HotpotQA",
273
+ "AmazonQA",
274
+ "LoTTE",
275
+ "MultiLongDocRetrieval",
276
+ # "FineWeb",
277
+ # "PubMedQA",
278
+ # "TriviaQA",
279
+ },
280
+ citation="""@misc{takehi2025fantasticsmallretrieverstrain,
281
+ title={Fantastic (small) Retrievers and How to Train Them: mxbai-edge-colbert-v0 Tech Report},
282
+ author={Rikiya Takehi and Benjamin Clavié and Sean Lee and Aamir Shakir},
283
+ year={2025},
284
+ eprint={2510.14880},
285
+ archivePrefix={arXiv},
286
+ primaryClass={cs.IR},
287
+ url={https://arxiv.org/abs/2510.14880},
288
+ }""",
289
+ contacts=None,
290
+ )
291
+
292
+ mxbai_edge_colbert_v0_32m = ModelMeta(
293
+ loader=MultiVectorModel,
294
+ name="mixedbread-ai/mxbai-edge-colbert-v0-32m",
295
+ model_type=["late-interaction"],
296
+ languages=["eng-Latn"],
297
+ open_weights=True,
298
+ revision="2f12870a85dae80680b9babc59992c9a2bc59e4a",
299
+ public_training_code=None,
300
+ public_training_data=None,
301
+ release_date="2025-10-16",
302
+ n_parameters=int(32 * 1e6),
303
+ memory_usage_mb=122,
304
+ max_tokens=511,
305
+ embed_dim=None,
306
+ license="apache-2.0",
307
+ similarity_fn_name=ScoringFunction.MAX_SIM,
308
+ framework=["PyLate", "ColBERT", "Transformers", "safetensors"],
309
+ reference="https://huggingface.co/mixedbread-ai/mxbai-edge-colbert-v0-32m",
310
+ use_instructions=False,
311
+ adapted_from="https://huggingface.co/jhu-clsp/ettin-encoder-32m",
312
+ superseded_by=None,
313
+ training_datasets={
314
+ "CornStack",
315
+ "MSMARCO",
316
+ "NQ",
317
+ "HotpotQA",
318
+ "AmazonQA",
319
+ "LoTTE",
320
+ "MultiLongDocRetrieval",
321
+ # "FineWeb",
322
+ # "PubMedQA",
323
+ # "TriviaQA",
324
+ },
325
+ citation="""@misc{takehi2025fantasticsmallretrieverstrain,
326
+ title={Fantastic (small) Retrievers and How to Train Them: mxbai-edge-colbert-v0 Tech Report},
327
+ author={Rikiya Takehi and Benjamin Clavié and Sean Lee and Aamir Shakir},
328
+ year={2025},
329
+ eprint={2510.14880},
330
+ archivePrefix={arXiv},
331
+ primaryClass={cs.IR},
332
+ url={https://arxiv.org/abs/2510.14880},
333
+ }""",
334
+ contacts=None,
335
+ )
@@ -16,7 +16,8 @@ mme5_mllama = ModelMeta(
16
16
  revision="cbb328b9bf9ff5362c852c3166931903226d46f1",
17
17
  release_date="2025-02-12",
18
18
  languages=["eng-Latn"],
19
- n_parameters=10_600_000_000, # 10.6B
19
+ n_parameters=10_600_000_000,
20
+ n_embedding_parameters=None, # 10.6B
20
21
  memory_usage_mb=20300,
21
22
  max_tokens=128_000,
22
23
  embed_dim=4096,
@@ -25,7 +26,7 @@ mme5_mllama = ModelMeta(
25
26
  open_weights=True,
26
27
  public_training_code=None,
27
28
  public_training_data="https://huggingface.co/datasets/intfloat/mmE5-MMEB-hardneg, https://huggingface.co/datasets/intfloat/mmE5-synthetic",
28
- framework=["Sentence Transformers", "PyTorch"],
29
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
29
30
  reference="https://huggingface.co/intfloat/mmE5-mllama-11b-instruct",
30
31
  similarity_fn_name=ScoringFunction.COSINE,
31
32
  use_instructions=True,
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies, requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  MOCOV3_CITATION = """@Article{chen2021mocov3,
14
19
  author = {Xinlei Chen* and Saining Xie* and Kaiming He},
@@ -117,7 +122,7 @@ mocov3_training_datasets = set(
117
122
  )
118
123
 
119
124
  mocov3_vit_base = ModelMeta(
120
- loader=mocov3_loader, # type: ignore
125
+ loader=mocov3_loader,
121
126
  name="nyu-visionx/moco-v3-vit-b",
122
127
  model_type=["dense"],
123
128
  languages=["eng-Latn"],
@@ -125,6 +130,7 @@ mocov3_vit_base = ModelMeta(
125
130
  release_date="2024-06-03",
126
131
  modalities=["image"],
127
132
  n_parameters=86_600_000,
133
+ n_embedding_parameters=None,
128
134
  memory_usage_mb=330,
129
135
  max_tokens=None,
130
136
  embed_dim=768,
@@ -132,7 +138,7 @@ mocov3_vit_base = ModelMeta(
132
138
  open_weights=True,
133
139
  public_training_code="https://github.com/facebookresearch/moco-v3",
134
140
  public_training_data=None,
135
- framework=["PyTorch"],
141
+ framework=["PyTorch", "Transformers", "safetensors"],
136
142
  reference="https://github.com/facebookresearch/moco-v3",
137
143
  similarity_fn_name=ScoringFunction.COSINE,
138
144
  use_instructions=False,
@@ -141,7 +147,7 @@ mocov3_vit_base = ModelMeta(
141
147
  )
142
148
 
143
149
  mocov3_vit_large = ModelMeta(
144
- loader=mocov3_loader, # type: ignore
150
+ loader=mocov3_loader,
145
151
  name="nyu-visionx/moco-v3-vit-l",
146
152
  model_type=["dense"],
147
153
  languages=["eng-Latn"],
@@ -149,6 +155,7 @@ mocov3_vit_large = ModelMeta(
149
155
  release_date="2024-06-03",
150
156
  modalities=["image"],
151
157
  n_parameters=304_000_000,
158
+ n_embedding_parameters=None,
152
159
  memory_usage_mb=1161,
153
160
  max_tokens=None,
154
161
  embed_dim=1024,
@@ -156,7 +163,7 @@ mocov3_vit_large = ModelMeta(
156
163
  open_weights=True,
157
164
  public_training_code="https://github.com/facebookresearch/moco-v3",
158
165
  public_training_data=None,
159
- framework=["PyTorch"],
166
+ framework=["PyTorch", "Transformers", "safetensors"],
160
167
  reference="https://github.com/facebookresearch/moco-v3",
161
168
  similarity_fn_name=ScoringFunction.COSINE,
162
169
  use_instructions=False,
@@ -1,6 +1,6 @@
1
1
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
2
  from mteb.models.model_meta import ModelMeta
3
- from mteb.models.models_protocols import PromptType
3
+ from mteb.types import PromptType
4
4
 
5
5
 
6
6
  def instruction_template(
@@ -175,13 +175,14 @@ MoD_Embedding = ModelMeta(
175
175
  revision="acbb5b70fdab262226a6af2bc62001de8021b05c",
176
176
  release_date="2025-12-14",
177
177
  n_parameters=4021774336,
178
+ n_embedding_parameters=None,
178
179
  memory_usage_mb=7671,
179
180
  embed_dim=2560,
180
181
  max_tokens=32768,
181
182
  license="apache-2.0",
182
183
  reference="https://huggingface.co/bflhc/MoD-Embedding",
183
184
  similarity_fn_name="cosine",
184
- framework=["Sentence Transformers", "PyTorch"],
185
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
185
186
  use_instructions=True,
186
187
  public_training_code=None,
187
188
  public_training_data=None,
@@ -1,17 +1,23 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import numpy as np
5
- from torch.utils.data import DataLoader
6
7
 
7
8
  from mteb._requires_package import requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
12
11
 
13
12
  from .bge_models import bge_training_data
14
13
 
14
+ if TYPE_CHECKING:
15
+ from torch.utils.data import DataLoader
16
+
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.types import Array, BatchedInput, PromptType
19
+
20
+
15
21
  logger = logging.getLogger(__name__)
16
22
 
17
23
  MODEL2VEC_CITATION = """@software{minishlab2024model2vec,
@@ -139,7 +145,7 @@ class Model2VecModel(AbsEncoder):
139
145
  **kwargs: Additional arguments to pass to the wrapper.
140
146
  """
141
147
  requires_package(self, "model2vec", model_name, "pip install 'mteb[model2vec]'")
142
- from model2vec import StaticModel # type: ignore
148
+ from model2vec import StaticModel
143
149
 
144
150
  self.model_name = model_name
145
151
  self.model = StaticModel.from_pretrained(self.model_name)
@@ -167,12 +173,13 @@ m2v_base_glove_subword = ModelMeta(
167
173
  revision="5f4f5ca159b7321a8b39739bba0794fa0debddf4",
168
174
  release_date="2024-09-21",
169
175
  n_parameters=int(103 * 1e6),
176
+ n_embedding_parameters=int(103 * 1e6),
170
177
  memory_usage_mb=391,
171
178
  max_tokens=np.inf, # Theoretically infinite
172
179
  embed_dim=256,
173
180
  license="mit",
174
181
  similarity_fn_name=ScoringFunction.COSINE,
175
- framework=["NumPy", "Sentence Transformers"],
182
+ framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
176
183
  reference="https://huggingface.co/minishlab/M2V_base_glove_subword",
177
184
  use_instructions=False,
178
185
  adapted_from="BAAI/bge-base-en-v1.5",
@@ -193,12 +200,13 @@ m2v_base_glove = ModelMeta(
193
200
  revision="38ebd7f10f71e67fa8db898290f92b82e9cfff2b",
194
201
  release_date="2024-09-21",
195
202
  n_parameters=int(102 * 1e6),
203
+ n_embedding_parameters=int(102 * 1e6),
196
204
  memory_usage_mb=391,
197
205
  max_tokens=np.inf,
198
206
  embed_dim=256,
199
207
  license="mit",
200
208
  similarity_fn_name=ScoringFunction.COSINE,
201
- framework=["NumPy", "Sentence Transformers"],
209
+ framework=["NumPy", "Sentence Transformers", "safetensors"],
202
210
  reference="https://huggingface.co/minishlab/M2V_base_glove",
203
211
  use_instructions=False,
204
212
  adapted_from="BAAI/bge-base-en-v1.5",
@@ -218,12 +226,13 @@ m2v_base_output = ModelMeta(
218
226
  revision="02460ae401a22b09d2c6652e23371398329551e2",
219
227
  release_date="2024-09-21",
220
228
  n_parameters=int(7.56 * 1e6),
229
+ n_embedding_parameters=int(7.56 * 1e6),
221
230
  memory_usage_mb=29,
222
231
  max_tokens=np.inf,
223
232
  embed_dim=256,
224
233
  license="mit",
225
234
  similarity_fn_name=ScoringFunction.COSINE,
226
- framework=["NumPy", "Sentence Transformers"],
235
+ framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
227
236
  reference="https://huggingface.co/minishlab/M2V_base_output",
228
237
  use_instructions=False,
229
238
  adapted_from="BAAI/bge-base-en-v1.5",
@@ -243,12 +252,13 @@ m2v_multilingual_output = ModelMeta(
243
252
  revision="2cf4ec4e1f51aeca6c55cf9b93097d00711a6305",
244
253
  release_date="2024-09-21",
245
254
  n_parameters=int(128 * 1e6),
255
+ n_embedding_parameters=int(128 * 1e6),
246
256
  memory_usage_mb=489,
247
257
  max_tokens=np.inf,
248
258
  embed_dim=256,
249
259
  license="mit",
250
260
  similarity_fn_name=ScoringFunction.COSINE,
251
- framework=["NumPy", "Sentence Transformers"],
261
+ framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
252
262
  reference="https://huggingface.co/minishlab/M2V_multilingual_output",
253
263
  use_instructions=False,
254
264
  adapted_from="sentence-transformers/LaBSE",
@@ -268,12 +278,13 @@ potion_base_2m = ModelMeta(
268
278
  revision="86db093558fbced2072b929eb1690bce5272bd4b",
269
279
  release_date="2024-10-29",
270
280
  n_parameters=int(2 * 1e6),
281
+ n_embedding_parameters=int(2 * 1e6),
271
282
  memory_usage_mb=7,
272
283
  max_tokens=np.inf,
273
284
  embed_dim=64,
274
285
  license="mit",
275
286
  similarity_fn_name=ScoringFunction.COSINE,
276
- framework=["NumPy", "Sentence Transformers"],
287
+ framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
277
288
  reference="https://huggingface.co/minishlab/potion-base-2M",
278
289
  use_instructions=False,
279
290
  adapted_from="BAAI/bge-base-en-v1.5",
@@ -293,12 +304,13 @@ potion_base_4m = ModelMeta(
293
304
  revision="81b1802ada41afcd0987a37dc15e569c9fa76f04",
294
305
  release_date="2024-10-29",
295
306
  n_parameters=int(3.78 * 1e6),
307
+ n_embedding_parameters=int(3.78 * 1e6),
296
308
  memory_usage_mb=14,
297
309
  max_tokens=np.inf,
298
310
  embed_dim=128,
299
311
  license="mit",
300
312
  similarity_fn_name=ScoringFunction.COSINE,
301
- framework=["NumPy", "Sentence Transformers"],
313
+ framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
302
314
  reference="https://huggingface.co/minishlab/potion-base-4M",
303
315
  use_instructions=False,
304
316
  adapted_from="BAAI/bge-base-en-v1.5",
@@ -318,12 +330,13 @@ potion_base_8m = ModelMeta(
318
330
  revision="dcbec7aa2d52fc76754ac6291803feedd8c619ce",
319
331
  release_date="2024-10-29",
320
332
  n_parameters=int(7.56 * 1e6),
333
+ n_embedding_parameters=int(7.56 * 1e6),
321
334
  memory_usage_mb=29,
322
335
  max_tokens=np.inf,
323
336
  embed_dim=256,
324
337
  license="mit",
325
338
  similarity_fn_name=ScoringFunction.COSINE,
326
- framework=["NumPy", "Sentence Transformers"],
339
+ framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
327
340
  reference="https://huggingface.co/minishlab/potion-base-8M",
328
341
  use_instructions=False,
329
342
  adapted_from="BAAI/bge-base-en-v1.5",
@@ -343,12 +356,13 @@ potion_multilingual_128m = ModelMeta(
343
356
  revision="38ebd7f10f71e67fa8db898290f92b82e9cfff2a",
344
357
  release_date="2025-05-23",
345
358
  n_parameters=128 * 1e6,
359
+ n_embedding_parameters=128 * 1e6,
346
360
  memory_usage_mb=489,
347
361
  max_tokens=np.inf,
348
362
  embed_dim=256,
349
363
  license="mit",
350
364
  similarity_fn_name="cosine",
351
- framework=["NumPy"],
365
+ framework=["NumPy", "ONNX", "safetensors", "Sentence Transformers"],
352
366
  reference="https://huggingface.co/minishlab/potion-multilingual-128M",
353
367
  use_instructions=False,
354
368
  adapted_from="BAAI/bge-m3",
@@ -368,12 +382,13 @@ pubmed_bert_100k = ModelMeta(
368
382
  revision="bac5e3b12fb8c650e92a19c41b436732c4f16e9e",
369
383
  release_date="2025-01-03",
370
384
  n_parameters=1 * 1e5,
385
+ n_embedding_parameters=1 * 1e5,
371
386
  memory_usage_mb=0,
372
387
  max_tokens=np.inf,
373
388
  embed_dim=64,
374
389
  license="apache-2.0",
375
390
  similarity_fn_name="cosine",
376
- framework=["NumPy"],
391
+ framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
377
392
  reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-100K",
378
393
  use_instructions=False,
379
394
  adapted_from="NeuML/pubmedbert-base-embeddings",
@@ -392,12 +407,13 @@ pubmed_bert_500k = ModelMeta(
392
407
  revision="34ba71e35c393fdad7ed695113f653feb407b16b",
393
408
  release_date="2025-01-03",
394
409
  n_parameters=5 * 1e5,
410
+ n_embedding_parameters=5 * 1e5,
395
411
  memory_usage_mb=2,
396
412
  max_tokens=np.inf,
397
413
  embed_dim=64,
398
414
  license="apache-2.0",
399
415
  similarity_fn_name="cosine",
400
- framework=["NumPy"],
416
+ framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
401
417
  reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-500K",
402
418
  use_instructions=False,
403
419
  adapted_from="NeuML/pubmedbert-base-embeddings",
@@ -416,12 +432,13 @@ pubmed_bert_1m = ModelMeta(
416
432
  revision="2b7fed222594708da6d88bcda92ae9b434b7ddd1",
417
433
  release_date="2025-01-03",
418
434
  n_parameters=1 * 1e6,
435
+ n_embedding_parameters=1 * 1e6,
419
436
  memory_usage_mb=2,
420
437
  max_tokens=np.inf,
421
438
  embed_dim=64,
422
439
  license="apache-2.0",
423
440
  similarity_fn_name="cosine",
424
- framework=["NumPy"],
441
+ framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
425
442
  reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-1M",
426
443
  use_instructions=False,
427
444
  adapted_from="NeuML/pubmedbert-base-embeddings",
@@ -440,12 +457,13 @@ pubmed_bert_2m = ModelMeta(
440
457
  revision="1d7bbe04d6713e425161146bfdc71473cbed498a",
441
458
  release_date="2025-01-03",
442
459
  n_parameters=1.95 * 1e6,
460
+ n_embedding_parameters=1.95 * 1e6,
443
461
  memory_usage_mb=7,
444
462
  max_tokens=np.inf,
445
463
  embed_dim=64,
446
464
  license="apache-2.0",
447
465
  similarity_fn_name="cosine",
448
- framework=["NumPy"],
466
+ framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
449
467
  reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-2M",
450
468
  use_instructions=False,
451
469
  adapted_from="NeuML/pubmedbert-base-embeddings",
@@ -464,12 +482,13 @@ pubmed_bert_8m = ModelMeta(
464
482
  revision="387d350015e963744f4fafe56a574b7cd48646c9",
465
483
  release_date="2025-01-03",
466
484
  n_parameters=7.81 * 1e6,
485
+ n_embedding_parameters=7.81 * 1e6,
467
486
  memory_usage_mb=30,
468
487
  max_tokens=np.inf,
469
488
  embed_dim=256,
470
489
  license="apache-2.0",
471
490
  similarity_fn_name="cosine",
472
- framework=["NumPy"],
491
+ framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
473
492
  reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-8M",
474
493
  use_instructions=False,
475
494
  adapted_from="NeuML/pubmedbert-base-embeddings",