mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,7 @@
1
1
  """Implementation of Sentence Transformers model validated in MTEB."""
2
2
 
3
+ import numpy as np
4
+
3
5
  from mteb.models.model_meta import ModelMeta, ScoringFunction
4
6
  from mteb.models.sentence_transformer_wrapper import (
5
7
  SentenceTransformerEncoderWrapper,
@@ -119,13 +121,20 @@ all_minilm_l6_v2 = ModelMeta(
119
121
  revision="8b3219a92973c328a8e22fadcfa821b5dc75636a",
120
122
  release_date="2021-08-30",
121
123
  n_parameters=22_700_000,
124
+ n_embedding_parameters=11_720_448,
122
125
  memory_usage_mb=87,
123
126
  embed_dim=384,
124
127
  license="apache-2.0",
125
128
  max_tokens=256,
126
129
  reference="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
127
130
  similarity_fn_name=ScoringFunction.COSINE,
128
- framework=["Sentence Transformers", "PyTorch"],
131
+ framework=[
132
+ "Sentence Transformers",
133
+ "PyTorch",
134
+ "ONNX",
135
+ "safetensors",
136
+ "Transformers",
137
+ ],
129
138
  use_instructions=False,
130
139
  superseded_by=None,
131
140
  adapted_from=None,
@@ -144,13 +153,20 @@ all_minilm_l12_v2 = ModelMeta(
144
153
  revision="364dd28d28dcd3359b537f3cf1f5348ba679da62",
145
154
  release_date="2021-08-30",
146
155
  n_parameters=33_400_000,
156
+ n_embedding_parameters=11_720_448,
147
157
  memory_usage_mb=127,
148
158
  embed_dim=384,
149
159
  license="apache-2.0",
150
160
  max_tokens=256,
151
161
  reference="https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2",
152
162
  similarity_fn_name=ScoringFunction.COSINE,
153
- framework=["Sentence Transformers", "PyTorch"],
163
+ framework=[
164
+ "Sentence Transformers",
165
+ "PyTorch",
166
+ "ONNX",
167
+ "safetensors",
168
+ "Transformers",
169
+ ],
154
170
  use_instructions=False,
155
171
  superseded_by=None,
156
172
  adapted_from=None,
@@ -169,13 +185,20 @@ paraphrase_multilingual_minilm_l12_v2 = ModelMeta(
169
185
  revision="bf3bf13ab40c3157080a7ab344c831b9ad18b5eb",
170
186
  release_date="2019-11-01", # release date of paper
171
187
  n_parameters=118_000_000,
188
+ n_embedding_parameters=96_014_208,
172
189
  memory_usage_mb=449,
173
190
  embed_dim=768,
174
191
  license="apache-2.0",
175
192
  max_tokens=512,
176
193
  reference="https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
177
194
  similarity_fn_name=ScoringFunction.COSINE,
178
- framework=["Sentence Transformers", "PyTorch"],
195
+ framework=[
196
+ "Sentence Transformers",
197
+ "PyTorch",
198
+ "ONNX",
199
+ "safetensors",
200
+ "Transformers",
201
+ ],
179
202
  use_instructions=False,
180
203
  superseded_by=None,
181
204
  adapted_from=None,
@@ -194,13 +217,20 @@ paraphrase_multilingual_mpnet_base_v2 = ModelMeta(
194
217
  revision="79f2382ceacceacdf38563d7c5d16b9ff8d725d6",
195
218
  release_date="2019-11-01", # release date of paper
196
219
  n_parameters=278_000_000,
220
+ n_embedding_parameters=192_001_536,
197
221
  memory_usage_mb=1061,
198
222
  embed_dim=768,
199
223
  license="apache-2.0",
200
224
  max_tokens=512,
201
225
  reference="https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
202
226
  similarity_fn_name=ScoringFunction.COSINE,
203
- framework=["Sentence Transformers", "PyTorch"],
227
+ framework=[
228
+ "Sentence Transformers",
229
+ "PyTorch",
230
+ "ONNX",
231
+ "safetensors",
232
+ "Transformers",
233
+ ],
204
234
  use_instructions=False,
205
235
  superseded_by=None,
206
236
  adapted_from=None,
@@ -230,13 +260,14 @@ labse = ModelMeta(
230
260
  revision="e34fab64a3011d2176c99545a93d5cbddc9a91b7",
231
261
  release_date="2019-11-01", # release date of paper
232
262
  n_parameters=471_000_000,
263
+ n_embedding_parameters=384_885_504,
233
264
  memory_usage_mb=1796,
234
265
  embed_dim=768,
235
266
  license="apache-2.0",
236
267
  max_tokens=512,
237
268
  reference="https://huggingface.co/sentence-transformers/LaBSE",
238
269
  similarity_fn_name=ScoringFunction.COSINE,
239
- framework=["Sentence Transformers", "PyTorch"],
270
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
240
271
  use_instructions=False,
241
272
  superseded_by=None,
242
273
  adapted_from=None,
@@ -268,13 +299,20 @@ multi_qa_minilm_l6_cos_v1 = ModelMeta(
268
299
  revision="b207367332321f8e44f96e224ef15bc607f4dbf0",
269
300
  release_date="2021-08-30",
270
301
  n_parameters=22_700_000,
302
+ n_embedding_parameters=11_720_448,
271
303
  memory_usage_mb=87,
272
304
  embed_dim=384,
273
305
  license="apache-2.0",
274
306
  max_tokens=512,
275
307
  reference="https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
276
308
  similarity_fn_name=ScoringFunction.COSINE,
277
- framework=["Sentence Transformers", "PyTorch"],
309
+ framework=[
310
+ "Sentence Transformers",
311
+ "PyTorch",
312
+ "ONNX",
313
+ "safetensors",
314
+ "Transformers",
315
+ ],
278
316
  use_instructions=False,
279
317
  superseded_by=None,
280
318
  adapted_from="nreimers/MiniLM-L6-H384-uncased",
@@ -293,13 +331,20 @@ all_mpnet_base_v2 = ModelMeta(
293
331
  revision="9a3225965996d404b775526de6dbfe85d3368642",
294
332
  release_date="2021-08-30",
295
333
  n_parameters=109_000_000,
334
+ n_embedding_parameters=23_444_736,
296
335
  memory_usage_mb=418,
297
336
  embed_dim=768,
298
337
  license="apache-2.0",
299
338
  max_tokens=384,
300
339
  reference="https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
301
340
  similarity_fn_name=ScoringFunction.COSINE,
302
- framework=["Sentence Transformers", "PyTorch"],
341
+ framework=[
342
+ "Sentence Transformers",
343
+ "PyTorch",
344
+ "ONNX",
345
+ "safetensors",
346
+ "Transformers",
347
+ ],
303
348
  use_instructions=False,
304
349
  superseded_by=None,
305
350
  adapted_from=None,
@@ -397,13 +442,14 @@ static_similarity_mrl_multilingual_v1 = ModelMeta(
397
442
  revision="7264ea07c5365a11d7e6d87dbb6195889a13054f",
398
443
  release_date="2025-01-15",
399
444
  n_parameters=108_420_096,
445
+ n_embedding_parameters=None,
400
446
  memory_usage_mb=413,
401
447
  embed_dim=1024,
402
448
  license="apache-2.0",
403
449
  max_tokens=None,
404
450
  reference="https://huggingface.co/sentence-transformers/static-similarity-mrl-multilingual-v1",
405
451
  similarity_fn_name="cosine",
406
- framework=["Sentence Transformers", "PyTorch"],
452
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
407
453
  use_instructions=False,
408
454
  superseded_by=None,
409
455
  adapted_from=None,
@@ -430,13 +476,14 @@ contriever = ModelMeta(
430
476
  revision="abe8c1493371369031bcb1e02acb754cf4e162fa",
431
477
  release_date="2022-06-25", # release date of model on HF
432
478
  n_parameters=150_000_000,
479
+ n_embedding_parameters=23_440_896,
433
480
  memory_usage_mb=572,
434
481
  embed_dim=768,
435
482
  license=None,
436
483
  max_tokens=512,
437
484
  reference="https://huggingface.co/facebook/contriever-msmarco",
438
485
  similarity_fn_name=ScoringFunction.DOT_PRODUCT,
439
- framework=["Sentence Transformers", "PyTorch"],
486
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
440
487
  use_instructions=False,
441
488
  citation="""
442
489
  @misc{izacard2021contriever,
@@ -460,13 +507,14 @@ microllama_text_embedding = ModelMeta(
460
507
  revision="98f70f14cdf12d7ea217ed2fd4e808b0195f1e7e",
461
508
  release_date="2024-11-10",
462
509
  n_parameters=272_000_000,
510
+ n_embedding_parameters=32_769_024,
463
511
  memory_usage_mb=1037,
464
512
  embed_dim=1024,
465
513
  license="apache-2.0",
466
514
  max_tokens=2048,
467
515
  reference="https://huggingface.co/keeeeenw/MicroLlama-text-embedding",
468
516
  similarity_fn_name=ScoringFunction.COSINE,
469
- framework=["Sentence Transformers", "PyTorch"],
517
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
470
518
  use_instructions=False,
471
519
  superseded_by=None,
472
520
  adapted_from=None,
@@ -488,13 +536,13 @@ microllama_text_embedding = ModelMeta(
488
536
 
489
537
  SENTENCE_T5_CITATION = """
490
538
  @misc{ni2021sentencet5scalablesentenceencoders,
491
- title={Sentence-T5: Scalable Sentence Encoders from Pre-trained Text-to-Text Models},
539
+ title={Sentence-T5: Scalable Sentence Encoders from Pre-trained Text-to-Text Models},
492
540
  author={Jianmo Ni and Gustavo Hernández Ábrego and Noah Constant and Ji Ma and Keith B. Hall and Daniel Cer and Yinfei Yang},
493
541
  year={2021},
494
542
  eprint={2108.08877},
495
543
  archivePrefix={arXiv},
496
544
  primaryClass={cs.CL},
497
- url={https://arxiv.org/abs/2108.08877},
545
+ url={https://arxiv.org/abs/2108.08877},
498
546
  }
499
547
  """
500
548
  sentence_t5_base = ModelMeta(
@@ -506,13 +554,14 @@ sentence_t5_base = ModelMeta(
506
554
  revision="50c53e206f8b01c9621484a3c0aafce4e55efebf",
507
555
  release_date="2022-02-09",
508
556
  n_parameters=110_000_000,
557
+ n_embedding_parameters=24_674_304,
509
558
  memory_usage_mb=209,
510
559
  embed_dim=768,
511
560
  license="apache-2.0",
512
561
  max_tokens=512,
513
562
  reference="https://huggingface.co/sentence-transformers/sentence-t5-base",
514
563
  similarity_fn_name=ScoringFunction.COSINE,
515
- framework=["Sentence Transformers", "PyTorch"],
564
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
516
565
  use_instructions=False,
517
566
  public_training_code=None,
518
567
  public_training_data=None,
@@ -529,13 +578,14 @@ sentence_t5_large = ModelMeta(
529
578
  revision="1fc08ea477205aa54a3e5b13f0971ae16b86410a",
530
579
  release_date="2022-02-09",
531
580
  n_parameters=335_000_000,
581
+ n_embedding_parameters=32_899_072,
532
582
  memory_usage_mb=639,
533
583
  embed_dim=768,
534
584
  license="apache-2.0",
535
585
  max_tokens=512,
536
586
  reference="https://huggingface.co/sentence-transformers/sentence-t5-large",
537
587
  similarity_fn_name=ScoringFunction.COSINE,
538
- framework=["Sentence Transformers", "PyTorch"],
588
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
539
589
  use_instructions=False,
540
590
  public_training_code=None,
541
591
  public_training_data=None,
@@ -552,13 +602,14 @@ sentence_t5_xl = ModelMeta(
552
602
  revision="2965d31b368fb14117688e0bde77cbd720e91f53",
553
603
  release_date="2024-03-27",
554
604
  n_parameters=3_000_000_000,
605
+ n_embedding_parameters=32_899_072,
555
606
  memory_usage_mb=2367,
556
607
  embed_dim=768,
557
608
  license="apache-2.0",
558
609
  max_tokens=512,
559
610
  reference="https://huggingface.co/sentence-transformers/sentence-t5-xl",
560
611
  similarity_fn_name=ScoringFunction.COSINE,
561
- framework=["Sentence Transformers", "PyTorch"],
612
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
562
613
  use_instructions=False,
563
614
  public_training_code=None,
564
615
  public_training_data=None,
@@ -575,13 +626,14 @@ sentence_t5_xxl = ModelMeta(
575
626
  revision="4d122282ba80e807e9e6eb8c358269e92796365d",
576
627
  release_date="2024-03-27",
577
628
  n_parameters=11_000_000_000,
629
+ n_embedding_parameters=None,
578
630
  memory_usage_mb=9279,
579
631
  embed_dim=768,
580
632
  license="apache-2.0",
581
633
  max_tokens=512,
582
634
  reference="https://huggingface.co/sentence-transformers/sentence-t5-xxl",
583
635
  similarity_fn_name=ScoringFunction.COSINE,
584
- framework=["Sentence Transformers", "PyTorch"],
636
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
585
637
  use_instructions=False,
586
638
  public_training_code=None,
587
639
  public_training_data=None,
@@ -590,13 +642,13 @@ sentence_t5_xxl = ModelMeta(
590
642
  )
591
643
  GTR_CITATION = """
592
644
  @misc{ni2021largedualencodersgeneralizable,
593
- title={Large Dual Encoders Are Generalizable Retrievers},
645
+ title={Large Dual Encoders Are Generalizable Retrievers},
594
646
  author={Jianmo Ni and Chen Qu and Jing Lu and Zhuyun Dai and Gustavo Hernández Ábrego and Ji Ma and Vincent Y. Zhao and Yi Luan and Keith B. Hall and Ming-Wei Chang and Yinfei Yang},
595
647
  year={2021},
596
648
  eprint={2112.07899},
597
649
  archivePrefix={arXiv},
598
650
  primaryClass={cs.IR},
599
- url={https://arxiv.org/abs/2112.07899},
651
+ url={https://arxiv.org/abs/2112.07899},
600
652
  }
601
653
  """
602
654
  gtr_t5_large = ModelMeta(
@@ -608,13 +660,14 @@ gtr_t5_large = ModelMeta(
608
660
  revision="a2c8ac47f998531948d4cbe32a0b577a7037a5e3",
609
661
  release_date="2022-02-09",
610
662
  n_parameters=335_000_000,
663
+ n_embedding_parameters=32_899_072,
611
664
  memory_usage_mb=639,
612
665
  embed_dim=768,
613
666
  license="apache-2.0",
614
667
  max_tokens=512,
615
668
  reference="https://huggingface.co/sentence-transformers/gtr-t5-large",
616
669
  similarity_fn_name=ScoringFunction.COSINE,
617
- framework=["Sentence Transformers", "PyTorch"],
670
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
618
671
  use_instructions=False,
619
672
  public_training_code=None,
620
673
  public_training_data=None,
@@ -643,13 +696,14 @@ gtr_t5_xl = ModelMeta(
643
696
  revision="23a8d667a1ad2578af181ce762867003c498d1bf",
644
697
  release_date="2022-02-09",
645
698
  n_parameters=1_240_000_000,
699
+ n_embedding_parameters=32_899_072,
646
700
  memory_usage_mb=2367,
647
701
  embed_dim=768,
648
702
  license="apache-2.0",
649
703
  max_tokens=512,
650
704
  reference="https://huggingface.co/sentence-transformers/gtr-t5-xl",
651
705
  similarity_fn_name=ScoringFunction.COSINE,
652
- framework=["Sentence Transformers", "PyTorch"],
706
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
653
707
  use_instructions=False,
654
708
  public_training_code=None,
655
709
  public_training_data=None,
@@ -677,13 +731,14 @@ gtr_t5_xxl = ModelMeta(
677
731
  revision="73f2a9156a3dcc2194dfdb2bf201cd7d17e17884",
678
732
  release_date="2022-02-09",
679
733
  n_parameters=4_860_000_000,
734
+ n_embedding_parameters=None,
680
735
  memory_usage_mb=9279,
681
736
  embed_dim=768,
682
737
  license="apache-2.0",
683
738
  max_tokens=512,
684
739
  reference="https://huggingface.co/sentence-transformers/gtr-t5-xxl",
685
740
  similarity_fn_name=ScoringFunction.COSINE,
686
- framework=["Sentence Transformers", "PyTorch"],
741
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
687
742
  use_instructions=False,
688
743
  public_training_code=None,
689
744
  public_training_data=None,
@@ -712,13 +767,14 @@ gtr_t5_base = ModelMeta(
712
767
  revision="7027e9594267928589816394bdd295273ddc0739",
713
768
  release_date="2022-02-09",
714
769
  n_parameters=110_000_000,
770
+ n_embedding_parameters=24_674_304,
715
771
  memory_usage_mb=209,
716
772
  embed_dim=768,
717
773
  license="apache-2.0",
718
774
  max_tokens=512,
719
775
  reference="https://huggingface.co/sentence-transformers/gtr-t5-base",
720
776
  similarity_fn_name=ScoringFunction.COSINE,
721
- framework=["Sentence Transformers", "PyTorch"],
777
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
722
778
  use_instructions=False,
723
779
  public_training_code=None,
724
780
  public_training_data=None,
@@ -737,3 +793,67 @@ gtr_t5_base = ModelMeta(
737
793
  },
738
794
  citation=GTR_CITATION,
739
795
  )
796
+
797
+ static_retrieval_mrl_en_v1 = ModelMeta(
798
+ loader=sentence_transformers_loader,
799
+ name="sentence-transformers/static-retrieval-mrl-en-v1",
800
+ revision="f60985c706f192d45d218078e49e5a8b6f15283a",
801
+ release_date="2024-10-24",
802
+ languages=["eng-Latn"],
803
+ n_parameters=3_125_4528,
804
+ memory_usage_mb=119,
805
+ max_tokens=np.inf,
806
+ embed_dim=1024,
807
+ license="apache-2.0",
808
+ open_weights=True,
809
+ public_training_code="https://huggingface.co/sentence-transformers/static-retrieval-mrl-en-v1/blob/main/train.py",
810
+ public_training_data=None,
811
+ framework=["PyTorch", "Sentence Transformers"],
812
+ reference="https://huggingface.co/sentence-transformers/static-retrieval-mrl-en-v1",
813
+ similarity_fn_name=ScoringFunction.COSINE,
814
+ use_instructions=False,
815
+ training_datasets={
816
+ "MSMARCO",
817
+ # gooaq
818
+ # s2orc
819
+ # allnli
820
+ # paq
821
+ # trivia-qa
822
+ # swim-ir-monolingual
823
+ # PubMedQA
824
+ # swim
825
+ "MIRACLRetrieval",
826
+ "MultiLongDocRetrieval",
827
+ "MrTidyRetrieval",
828
+ },
829
+ modalities=["text"],
830
+ model_type=["dense"],
831
+ )
832
+
833
+ multi_qa_mpnet_base_dot_v1 = ModelMeta(
834
+ loader=sentence_transformers_loader,
835
+ name="sentence-transformers/multi-qa-mpnet-base-dot-v1",
836
+ revision="3af7c6da5b3e1bea796ef6c97fe237538cbe6e7f",
837
+ release_date="2021-08-23",
838
+ languages=["eng-Latn"],
839
+ n_parameters=109486978,
840
+ memory_usage_mb=418.0,
841
+ max_tokens=512,
842
+ embed_dim=768,
843
+ license=None,
844
+ open_weights=True,
845
+ public_training_code="https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1/blob/main/train_script.py",
846
+ public_training_data=None,
847
+ framework=["PyTorch", "Sentence Transformers"],
848
+ reference="https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1",
849
+ similarity_fn_name=ScoringFunction.DOT_PRODUCT,
850
+ use_instructions=False,
851
+ training_datasets={
852
+ "MSMARCO",
853
+ "YahooAnswersTopicsClassification",
854
+ "NQ",
855
+ },
856
+ adapted_from="microsoft/mpnet-base",
857
+ modalities=["text"],
858
+ model_type=["dense"],
859
+ )
@@ -10,13 +10,14 @@ codemodernbert_crow_meta = ModelMeta(
10
10
  revision="044a7a4b552f86e284817234c336bccf16f895ce",
11
11
  release_date="2025-04-21",
12
12
  n_parameters=151668480,
13
+ n_embedding_parameters=None,
13
14
  memory_usage_mb=607,
14
15
  embed_dim=768,
15
16
  license="apache-2.0",
16
17
  max_tokens=1024,
17
18
  reference="https://huggingface.co/Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
18
19
  similarity_fn_name="cosine",
19
- framework=["Sentence Transformers", "PyTorch"],
20
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
20
21
  use_instructions=False,
21
22
  public_training_code=None,
22
23
  public_training_data=None,