mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -244,13 +244,14 @@ rubert_tiny = ModelMeta(
244
244
  revision="5441c5ea8026d4f6d7505ec004845409f1259fb1",
245
245
  release_date="2021-05-24",
246
246
  n_parameters=11_900_000,
247
+ n_embedding_parameters=9_223_968,
247
248
  memory_usage_mb=45,
248
249
  embed_dim=312,
249
250
  license="mit",
250
251
  max_tokens=512,
251
252
  reference="https://huggingface.co/cointegrated/rubert-tiny",
252
253
  similarity_fn_name=ScoringFunction.COSINE,
253
- framework=["Sentence Transformers", "PyTorch"],
254
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
254
255
  use_instructions=False,
255
256
  public_training_code="https://gist.github.com/avidale/7bc6350f26196918bf339c01261f5c60",
256
257
  training_datasets={
@@ -270,13 +271,14 @@ rubert_tiny2 = ModelMeta(
270
271
  revision="dad72b8f77c5eef6995dd3e4691b758ba56b90c3",
271
272
  release_date="2021-10-28",
272
273
  n_parameters=29_400_000,
274
+ n_embedding_parameters=26_154_336,
273
275
  memory_usage_mb=112,
274
276
  embed_dim=312,
275
277
  license="mit",
276
278
  max_tokens=2048,
277
279
  reference="https://huggingface.co/cointegrated/rubert-tiny2",
278
280
  similarity_fn_name=ScoringFunction.COSINE,
279
- framework=["Sentence Transformers", "PyTorch"],
281
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
280
282
  use_instructions=False,
281
283
  public_training_code="https://colab.research.google.com/drive/1mSWfIQ6PIlteLVZ9DKKpcorycgLIKZLf?usp=sharing",
282
284
  training_datasets=set(
@@ -297,13 +299,14 @@ sbert_large_nlu_ru = ModelMeta(
297
299
  revision="af977d5dfa46a3635e29bf0ef383f2df2a08d47a",
298
300
  release_date="2020-11-20",
299
301
  n_parameters=427_000_000,
302
+ n_embedding_parameters=123_021_312,
300
303
  memory_usage_mb=1629,
301
304
  embed_dim=1024,
302
305
  license="mit",
303
306
  max_tokens=512, # best guess
304
307
  reference="https://huggingface.co/ai-forever/sbert_large_nlu_ru",
305
308
  similarity_fn_name=ScoringFunction.COSINE,
306
- framework=["Sentence Transformers", "PyTorch"],
309
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
307
310
  use_instructions=False,
308
311
  public_training_code=None,
309
312
  public_training_data=None,
@@ -323,13 +326,14 @@ sbert_large_mt_nlu_ru = ModelMeta(
323
326
  revision="05300876c2b83f46d3ddd422a7f17e45cf633bb0",
324
327
  release_date="2021-05-18",
325
328
  n_parameters=427_000_000,
329
+ n_embedding_parameters=123_021_312,
326
330
  memory_usage_mb=1629,
327
331
  embed_dim=1024,
328
332
  license="not specified",
329
333
  max_tokens=512, # best guess
330
334
  reference="https://huggingface.co/ai-forever/sbert_large_mt_nlu_ru",
331
335
  similarity_fn_name=ScoringFunction.COSINE,
332
- framework=["Sentence Transformers", "PyTorch"],
336
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
333
337
  use_instructions=False,
334
338
  public_training_code=None,
335
339
  public_training_data=None,
@@ -351,13 +355,14 @@ user_base_ru = ModelMeta(
351
355
  revision="436a489a2087d61aa670b3496a9915f84e46c861",
352
356
  release_date="2024-06-10",
353
357
  n_parameters=427_000_000,
358
+ n_embedding_parameters=38_603_520,
354
359
  memory_usage_mb=473,
355
360
  embed_dim=768,
356
361
  license="apache-2.0",
357
362
  max_tokens=512,
358
363
  reference="https://huggingface.co/deepvk/USER-base",
359
364
  similarity_fn_name=ScoringFunction.COSINE,
360
- framework=["Sentence Transformers", "PyTorch"],
365
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
361
366
  adapted_from="https://huggingface.co/deepvk/deberta-v1-base",
362
367
  use_instructions=True,
363
368
  citation="""@misc{deepvk2024user,
@@ -412,13 +417,14 @@ user_bge_m3 = ModelMeta(
412
417
  revision="0cc6cfe48e260fb0474c753087a69369e88709ae",
413
418
  release_date="2024-07-05",
414
419
  n_parameters=359_026_688,
420
+ n_embedding_parameters=47_273_984,
415
421
  memory_usage_mb=1370,
416
422
  embed_dim=1024,
417
423
  license="apache-2.0",
418
424
  max_tokens=8194,
419
425
  reference="https://huggingface.co/deepvk/USER-base",
420
426
  similarity_fn_name=ScoringFunction.COSINE,
421
- framework=["Sentence Transformers", "PyTorch"],
427
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
422
428
  adapted_from="BAAI/bge-m3",
423
429
  use_instructions=False,
424
430
  training_datasets={
@@ -463,13 +469,14 @@ deberta_v1_ru = ModelMeta(
463
469
  revision="bdd30b0e19757e6940c92c7aff19e8fc0a60dff4",
464
470
  release_date="2023-02-07",
465
471
  n_parameters=124_000_000,
472
+ n_embedding_parameters=38_603_520,
466
473
  memory_usage_mb=473,
467
474
  embed_dim=768,
468
475
  license="apache-2.0",
469
476
  max_tokens=512,
470
477
  reference="https://huggingface.co/deepvk/deberta-v1-base",
471
478
  similarity_fn_name=ScoringFunction.COSINE,
472
- framework=["Sentence Transformers", "PyTorch"],
479
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
473
480
  use_instructions=False,
474
481
  # Wikipedia, Books, Twitter comments, Pikabu, Proza.ru, Film subtitles, News websites, and Social corpus
475
482
  public_training_code=None,
@@ -494,13 +501,14 @@ rubert_base_cased = ModelMeta(
494
501
  revision="4036cab694767a299f2b9e6492909664d9414229",
495
502
  release_date="2020-03-04",
496
503
  n_parameters=1280_000_000,
504
+ n_embedding_parameters=91_812_096,
497
505
  memory_usage_mb=4883,
498
506
  embed_dim=768,
499
507
  license="not specified",
500
508
  max_tokens=512,
501
509
  reference="https://huggingface.co/DeepPavlov/rubert-base-cased",
502
510
  similarity_fn_name=ScoringFunction.COSINE,
503
- framework=["Sentence Transformers", "PyTorch"],
511
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
504
512
  use_instructions=False,
505
513
  public_training_code=None,
506
514
  public_training_data=None,
@@ -530,13 +538,14 @@ distilrubert_small_cased_conversational = ModelMeta(
530
538
  revision="e348066b4a7279b97138038299bddc6580a9169a",
531
539
  release_date="2022-06-28",
532
540
  n_parameters=107_000_000,
541
+ n_embedding_parameters=91_812_096,
533
542
  memory_usage_mb=408,
534
543
  embed_dim=768,
535
544
  license="not specified",
536
545
  max_tokens=512,
537
546
  reference="https://huggingface.co/DeepPavlov/distilrubert-small-cased-conversational",
538
547
  similarity_fn_name=ScoringFunction.COSINE,
539
- framework=["Sentence Transformers", "PyTorch"],
548
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
540
549
  use_instructions=False,
541
550
  public_training_code=None,
542
551
  public_training_data=None,
@@ -565,13 +574,14 @@ rubert_base_cased_sentence = ModelMeta(
565
574
  revision="78b5122d6365337dd4114281b0d08cd1edbb3bc8",
566
575
  release_date="2020-03-04",
567
576
  n_parameters=107_000_000,
577
+ n_embedding_parameters=91_812_096,
568
578
  memory_usage_mb=408,
569
579
  embed_dim=768,
570
580
  license="not specified",
571
581
  max_tokens=512,
572
582
  reference="https://huggingface.co/DeepPavlov/rubert-base-cased-sentence",
573
583
  similarity_fn_name=ScoringFunction.COSINE,
574
- framework=["Sentence Transformers", "PyTorch"],
584
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
575
585
  use_instructions=False,
576
586
  public_training_code=None,
577
587
  public_training_data=None,
@@ -590,13 +600,14 @@ labse_en_ru = ModelMeta(
590
600
  revision="cf0714e606d4af551e14ad69a7929cd6b0da7f7e",
591
601
  release_date="2021-06-10",
592
602
  n_parameters=129_000_000,
603
+ n_embedding_parameters=42_303_744,
593
604
  memory_usage_mb=492,
594
605
  embed_dim=768,
595
606
  license="not specified",
596
607
  max_tokens=512,
597
608
  reference="https://huggingface.co/cointegrated/LaBSE-en-ru",
598
609
  similarity_fn_name=ScoringFunction.COSINE,
599
- framework=["Sentence Transformers", "PyTorch"],
610
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
600
611
  use_instructions=False,
601
612
  public_training_code="https://colab.research.google.com/drive/1dnPRn0-ugj3vZgSpyCC9sgslM2SuSfHy?usp=sharing",
602
613
  public_training_data=None,
@@ -618,13 +629,14 @@ rubert_tiny_turbo = ModelMeta(
618
629
  revision="8ce0cf757446ce9bb2d5f5a4ac8103c7a1049054",
619
630
  release_date="2024-06-21",
620
631
  n_parameters=29_200_000,
632
+ n_embedding_parameters=26_154_336,
621
633
  memory_usage_mb=111,
622
634
  embed_dim=312,
623
635
  license="mit",
624
636
  max_tokens=2048,
625
637
  reference="https://huggingface.co/sergeyzh/rubert-tiny-turbo",
626
638
  similarity_fn_name=ScoringFunction.COSINE,
627
- framework=["Sentence Transformers", "PyTorch"],
639
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
628
640
  use_instructions=False,
629
641
  public_training_code=None,
630
642
  public_training_data=None,
@@ -641,13 +653,14 @@ rubert_mini_frida = ModelMeta(
641
653
  revision="19b279b78afd945b5ccae78f63e284909814adc2",
642
654
  release_date="2025-03-02",
643
655
  n_parameters=32_300_000,
656
+ n_embedding_parameters=26_154_336,
644
657
  memory_usage_mb=123,
645
658
  embed_dim=312,
646
659
  license="mit",
647
660
  max_tokens=2048,
648
661
  reference="https://huggingface.co/sergeyzh/rubert-mini-frida",
649
662
  similarity_fn_name=ScoringFunction.COSINE,
650
- framework=["Sentence Transformers", "PyTorch"],
663
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
651
664
  use_instructions=True,
652
665
  public_training_code=None,
653
666
  public_training_data=None,
@@ -669,13 +682,14 @@ labse_ru_turbo = ModelMeta(
669
682
  revision="1940b046c6b5e125df11722b899130329d0a46da",
670
683
  release_date="2024-06-27",
671
684
  n_parameters=129_000_000,
685
+ n_embedding_parameters=42_303_744,
672
686
  memory_usage_mb=490,
673
687
  embed_dim=768,
674
688
  license="mit",
675
689
  max_tokens=512,
676
690
  reference="https://huggingface.co/sergeyzh/LaBSE-ru-turbo",
677
691
  similarity_fn_name=ScoringFunction.COSINE,
678
- framework=["Sentence Transformers", "PyTorch"],
692
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
679
693
  use_instructions=False,
680
694
  training_datasets=turbo_models_datasets,
681
695
  public_training_code=None,
@@ -720,6 +734,7 @@ rosberta_ru_en = ModelMeta(
720
734
  use_instructions=True,
721
735
  reference="https://huggingface.co/ai-forever/ru-en-RoSBERTa",
722
736
  n_parameters=404_000_000,
737
+ n_embedding_parameters=100_869_120,
723
738
  memory_usage_mb=1540,
724
739
  max_tokens=512,
725
740
  embed_dim=1024,
@@ -745,7 +760,7 @@ rosberta_ru_en = ModelMeta(
745
760
  },
746
761
  public_training_data=None,
747
762
  public_training_code=None,
748
- framework=["Sentence Transformers", "PyTorch"],
763
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
749
764
  citation="""@misc{snegirev2024russianfocusedembeddersexplorationrumteb,
750
765
  title={The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design},
751
766
  author={Artem Snegirev and Maria Tikhonova and Anna Maksimova and Alena Fenogenova and Alexander Abramov},
@@ -886,6 +901,7 @@ frida = ModelMeta(
886
901
  use_instructions=True,
887
902
  reference="https://huggingface.co/ai-forever/FRIDA",
888
903
  n_parameters=823_000_000,
904
+ n_embedding_parameters=143_847_936,
889
905
  memory_usage_mb=3141,
890
906
  max_tokens=512,
891
907
  embed_dim=1536,
@@ -895,7 +911,7 @@ frida = ModelMeta(
895
911
  training_datasets=frida_training_datasets,
896
912
  public_training_data=None,
897
913
  public_training_code=None,
898
- framework=["Sentence Transformers", "PyTorch"],
914
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
899
915
  citation=None,
900
916
  )
901
917
 
@@ -918,13 +934,14 @@ giga_embeddings = ModelMeta(
918
934
  revision="0ad5b29bfecd806cecc9d66b927d828a736594dc",
919
935
  release_date="2025-09-23",
920
936
  n_parameters=3_227_176_961,
937
+ n_embedding_parameters=None,
921
938
  memory_usage_mb=12865,
922
939
  embed_dim=2048,
923
940
  license="mit",
924
941
  max_tokens=4096,
925
942
  reference="https://huggingface.co/ai-sage/Giga-Embeddings-instruct",
926
943
  similarity_fn_name=ScoringFunction.COSINE,
927
- framework=["Sentence Transformers", "PyTorch"],
944
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
928
945
  use_instructions=True,
929
946
  public_training_code=None,
930
947
  public_training_data=None,
@@ -950,13 +967,14 @@ berta = ModelMeta(
950
967
  revision="914c8c8aed14042ed890fc2c662d5e9e66b2faa7",
951
968
  release_date="2025-03-10",
952
969
  n_parameters=128_000_000,
970
+ n_embedding_parameters=42_303_744,
953
971
  memory_usage_mb=489,
954
972
  embed_dim=768,
955
973
  license="mit",
956
974
  max_tokens=512,
957
975
  reference="https://huggingface.co/sergeyzh/BERTA",
958
976
  similarity_fn_name=ScoringFunction.COSINE,
959
- framework=["Sentence Transformers", "PyTorch"],
977
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
960
978
  use_instructions=True,
961
979
  training_datasets=berta_training_datasets,
962
980
  public_training_code=None,
@@ -1025,6 +1043,7 @@ user2_small = ModelMeta(
1025
1043
  use_instructions=True,
1026
1044
  reference="https://huggingface.co/collections/deepvk/user2-6802650d7210f222ec60e05f",
1027
1045
  n_parameters=34_400_000,
1046
+ n_embedding_parameters=None,
1028
1047
  memory_usage_mb=131,
1029
1048
  max_tokens=8192,
1030
1049
  embed_dim=384,
@@ -1034,7 +1053,7 @@ user2_small = ModelMeta(
1034
1053
  training_datasets=user2_training_data,
1035
1054
  public_training_data=None,
1036
1055
  public_training_code="https://github.com/BlessedTatonka/some_code/tree/2899f27d51efdf4217fc6453799ff197e9792f1e",
1037
- framework=["Sentence Transformers", "PyTorch"],
1056
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
1038
1057
  citation="""@misc{deepvk2025user,
1039
1058
  title={USER2},
1040
1059
  author={Malashenko, Boris and Spirin, Egor and Sokolov Andrey},
@@ -1058,6 +1077,7 @@ user2_base = ModelMeta(
1058
1077
  use_instructions=True,
1059
1078
  reference="https://huggingface.co/collections/deepvk/user2-6802650d7210f222ec60e05f",
1060
1079
  n_parameters=149_000_000,
1080
+ n_embedding_parameters=None,
1061
1081
  memory_usage_mb=568,
1062
1082
  max_tokens=8192,
1063
1083
  embed_dim=768,
@@ -1067,7 +1087,7 @@ user2_base = ModelMeta(
1067
1087
  training_datasets=user2_training_data,
1068
1088
  public_training_data=None,
1069
1089
  public_training_code="https://github.com/BlessedTatonka/some_code/tree/2899f27d51efdf4217fc6453799ff197e9792f1e",
1070
- framework=["Sentence Transformers", "PyTorch"],
1090
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
1071
1091
  citation="""@misc{deepvk2025user,
1072
1092
  title={USER2},
1073
1093
  author={Malashenko, Boris and Spirin, Egor and Sokolov Andrey},
@@ -38,13 +38,14 @@ cl_nagoya_ruri_v3_30m = ModelMeta(
38
38
  revision="24899e5de370b56d179604a007c0d727bf144504",
39
39
  release_date="2025-04-07",
40
40
  n_parameters=36_705_536,
41
+ n_embedding_parameters=None,
41
42
  memory_usage_mb=140,
42
43
  embed_dim=256,
43
44
  license="apache-2.0",
44
45
  max_tokens=8192,
45
46
  reference="https://huggingface.co/cl-nagoya/ruri-v3-30m",
46
47
  similarity_fn_name="cosine",
47
- framework=["PyTorch", "Sentence Transformers"],
48
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
48
49
  use_instructions=True,
49
50
  superseded_by=None,
50
51
  training_datasets={
@@ -69,13 +70,14 @@ cl_nagoya_ruri_v3_70m = ModelMeta(
69
70
  revision="07a8b0aba47d29d2ca21f89b915c1efe2c23d1cc",
70
71
  release_date="2025-04-09",
71
72
  n_parameters=36_705_536,
73
+ n_embedding_parameters=None,
72
74
  memory_usage_mb=140,
73
75
  embed_dim=256,
74
76
  license="apache-2.0",
75
77
  max_tokens=8192,
76
78
  reference="https://huggingface.co/cl-nagoya/ruri-v3-70m",
77
79
  similarity_fn_name="cosine",
78
- framework=["PyTorch", "Sentence Transformers"],
80
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
79
81
  use_instructions=True,
80
82
  superseded_by=None,
81
83
  training_datasets={"MrTidyRetrieval", "MIRACLRetrieval"},
@@ -98,13 +100,14 @@ cl_nagoya_ruri_v3_130m = ModelMeta(
98
100
  revision="e3114c6ee10dbab8b4b235fbc6dcf9dd4d5ac1a6",
99
101
  release_date="2025-04-09",
100
102
  n_parameters=132_140_544,
103
+ n_embedding_parameters=None,
101
104
  memory_usage_mb=504,
102
105
  embed_dim=512,
103
106
  license="apache-2.0",
104
107
  max_tokens=8192,
105
108
  reference="https://huggingface.co/cl-nagoya/ruri-v3-130m",
106
109
  similarity_fn_name="cosine",
107
- framework=["PyTorch", "Sentence Transformers"],
110
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
108
111
  use_instructions=True,
109
112
  superseded_by=None,
110
113
  training_datasets={"MrTidyRetrieval", "MIRACLRetrieval"},
@@ -127,13 +130,14 @@ cl_nagoya_ruri_v3_310m = ModelMeta(
127
130
  revision="18b60fb8c2b9df296fb4212bb7d23ef94e579cd3",
128
131
  release_date="2025-04-09",
129
132
  n_parameters=314_611_968,
133
+ n_embedding_parameters=None,
130
134
  memory_usage_mb=1200,
131
135
  embed_dim=768,
132
136
  license="apache-2.0",
133
137
  max_tokens=8192,
134
138
  reference="https://huggingface.co/cl-nagoya/ruri-v3-310m",
135
139
  similarity_fn_name="cosine",
136
- framework=["PyTorch", "Sentence Transformers"],
140
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
137
141
  use_instructions=True,
138
142
  superseded_by=None,
139
143
  training_datasets={"MrTidyRetrieval", "MIRACLRetrieval"},
@@ -157,13 +161,14 @@ cl_nagoya_ruri_small_v2 = ModelMeta(
157
161
  revision="db18646e673b713cd0518a5bb0fefdce21e77cd9",
158
162
  release_date="2024-12-05",
159
163
  n_parameters=68_087_808,
164
+ n_embedding_parameters=25_165_824,
160
165
  memory_usage_mb=260,
161
166
  embed_dim=768,
162
167
  license="apache-2.0",
163
168
  max_tokens=512,
164
169
  reference="https://huggingface.co/cl-nagoya/ruri-small-v2",
165
170
  similarity_fn_name="cosine",
166
- framework=["PyTorch", "Sentence Transformers"],
171
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
167
172
  use_instructions=True,
168
173
  adapted_from="line-corporation/line-distilbert-base-japanese",
169
174
  superseded_by=None,
@@ -186,13 +191,14 @@ cl_nagoya_ruri_base_v2 = ModelMeta(
186
191
  revision="8ce03882903668a01c83ca3b8111ac025a3bc734",
187
192
  release_date="2024-12-05",
188
193
  n_parameters=111_207_168,
194
+ n_embedding_parameters=25_165_824,
189
195
  memory_usage_mb=424,
190
196
  embed_dim=768,
191
197
  license="apache-2.0",
192
198
  max_tokens=512,
193
199
  reference="https://huggingface.co/cl-nagoya/ruri-base-v2",
194
200
  similarity_fn_name="cosine",
195
- framework=["PyTorch", "Sentence Transformers"],
201
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
196
202
  use_instructions=True,
197
203
  adapted_from="tohoku-nlp/bert-base-japanese-v3",
198
204
  superseded_by=None,
@@ -215,13 +221,14 @@ cl_nagoya_ruri_large_v2 = ModelMeta(
215
221
  revision="42898ef34a5574977380ebf0dfd28cbfbd36438b",
216
222
  release_date="2024-12-06",
217
223
  n_parameters=337_441_792,
224
+ n_embedding_parameters=33_554_432,
218
225
  memory_usage_mb=1287,
219
226
  embed_dim=1024,
220
227
  license="apache-2.0",
221
228
  max_tokens=512,
222
229
  reference="https://huggingface.co/cl-nagoya/ruri-large-v2",
223
230
  similarity_fn_name="cosine",
224
- framework=["PyTorch", "Sentence Transformers"],
231
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
225
232
  use_instructions=True,
226
233
  adapted_from="tohoku-nlp/bert-large-japanese-v2",
227
234
  superseded_by=None,
@@ -245,13 +252,14 @@ cl_nagoya_ruri_small_v1 = ModelMeta(
245
252
  revision="bc56ce90cd7a979f6eb199fc52dfe700bfd94bc3",
246
253
  release_date="2024-08-28",
247
254
  n_parameters=68_087_808,
255
+ n_embedding_parameters=25_165_824,
248
256
  memory_usage_mb=130,
249
257
  embed_dim=768,
250
258
  license="apache-2.0",
251
259
  max_tokens=512,
252
260
  reference="https://huggingface.co/cl-nagoya/ruri-small",
253
261
  similarity_fn_name="cosine",
254
- framework=["PyTorch", "Sentence Transformers"],
262
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
255
263
  use_instructions=True,
256
264
  adapted_from="line-corporation/line-distilbert-base-japanese",
257
265
  superseded_by="cl-nagoya/ruri-small-v2",
@@ -274,13 +282,14 @@ cl_nagoya_ruri_base_v1 = ModelMeta(
274
282
  revision="1ae40b8b6c78518a499425086bab8fc16c2e4b0e",
275
283
  release_date="2024-08-28",
276
284
  n_parameters=111_207_168,
285
+ n_embedding_parameters=25_165_824,
277
286
  memory_usage_mb=212,
278
287
  embed_dim=768,
279
288
  license="apache-2.0",
280
289
  max_tokens=512,
281
290
  reference="https://huggingface.co/cl-nagoya/ruri-base",
282
291
  similarity_fn_name="cosine",
283
- framework=["PyTorch", "Sentence Transformers"],
292
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
284
293
  use_instructions=True,
285
294
  adapted_from="tohoku-nlp/bert-base-japanese-v3",
286
295
  superseded_by="cl-nagoya/ruri-base-v2",
@@ -304,13 +313,14 @@ cl_nagoya_ruri_large_v1 = ModelMeta(
304
313
  revision="a011c39b13e8bc137ee13c6bc82191ece46c414c",
305
314
  release_date="2024-08-28",
306
315
  n_parameters=337_441_792,
316
+ n_embedding_parameters=33_554_432,
307
317
  memory_usage_mb=644,
308
318
  embed_dim=1024,
309
319
  license="apache-2.0",
310
320
  max_tokens=512,
311
321
  reference="https://huggingface.co/cl-nagoya/ruri-large",
312
322
  similarity_fn_name="cosine",
313
- framework=["PyTorch", "Sentence Transformers"],
323
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
314
324
  use_instructions=True,
315
325
  adapted_from="tohoku-nlp/bert-large-japanese-v2",
316
326
  superseded_by="cl-nagoya/ruri-large-v2",
@@ -1,12 +1,18 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  from mteb.models.instruct_wrapper import (
2
6
  InstructSentenceTransformerModel,
3
7
  instruct_wrapper,
4
8
  )
5
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
6
- from mteb.types import PromptType
7
10
 
8
11
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
9
12
 
13
+ if TYPE_CHECKING:
14
+ from mteb.types import PromptType
15
+
10
16
 
11
17
  def instruction_template(
12
18
  instruction: str, prompt_type: PromptType | None = None
@@ -52,13 +58,14 @@ SFR_Embedding_2_R = ModelMeta(
52
58
  revision="91762139d94ed4371a9fa31db5551272e0b83818",
53
59
  release_date="2024-06-14", # initial commit of hf model.
54
60
  n_parameters=7_110_000_000,
61
+ n_embedding_parameters=None,
55
62
  memory_usage_mb=13563,
56
63
  embed_dim=4096,
57
64
  license="cc-by-nc-4.0",
58
65
  max_tokens=32768,
59
66
  reference="https://huggingface.co/Salesforce/SFR-Embedding-2_R",
60
67
  similarity_fn_name=ScoringFunction.COSINE,
61
- framework=["Sentence Transformers", "PyTorch"],
68
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
62
69
  use_instructions=True,
63
70
  adapted_from="intfloat/e5-mistral-7b-instruct",
64
71
  public_training_code=None,
@@ -90,13 +97,14 @@ SFR_Embedding_Code_2B_R = ModelMeta(
90
97
  revision="c73d8631a005876ed5abde34db514b1fb6566973",
91
98
  release_date="2025-01-17", # initial commit of hf model.
92
99
  n_parameters=2_610_000_000,
100
+ n_embedding_parameters=None,
93
101
  memory_usage_mb=4986,
94
102
  embed_dim=2304,
95
103
  license="cc-by-nc-4.0",
96
104
  max_tokens=8192,
97
105
  reference="https://huggingface.co/Salesforce/SFR-Embedding-Code-2B_R",
98
106
  similarity_fn_name=ScoringFunction.COSINE,
99
- framework=["Sentence Transformers", "PyTorch"],
107
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
100
108
  use_instructions=True,
101
109
  adapted_from="google/gemma-2-2b-it",
102
110
  public_training_code=None,
@@ -128,13 +136,14 @@ SFR_Embedding_Mistral = ModelMeta(
128
136
  revision="938c560d1c236aa563b2dbdf084f28ab28bccb11",
129
137
  release_date="2024-01-24", # initial commit of hf model.
130
138
  n_parameters=7_110_000_000,
139
+ n_embedding_parameters=None,
131
140
  memory_usage_mb=13563,
132
141
  embed_dim=4096,
133
142
  license="cc-by-nc-4.0",
134
143
  max_tokens=32768,
135
144
  reference="https://huggingface.co/Salesforce/SFR-Embedding-Mistral",
136
145
  similarity_fn_name=ScoringFunction.COSINE,
137
- framework=["Sentence Transformers", "PyTorch"],
146
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
138
147
  use_instructions=True,
139
148
  public_training_code=None,
140
149
  public_training_data=None,
@@ -51,13 +51,14 @@ samilpwc_expr = ModelMeta(
51
51
  revision="33358978be40f36491045f9c2a359d38c3f50047",
52
52
  release_date="2025-08-12",
53
53
  n_parameters=560_000_000,
54
+ n_embedding_parameters=256_002_048,
54
55
  memory_usage_mb=2136,
55
56
  embed_dim=1024,
56
57
  license="apache-2.0",
57
58
  max_tokens=514,
58
59
  reference="https://huggingface.co/SamilPwC-AXNode-GenAI/PwC-Embedding_expr",
59
60
  similarity_fn_name="cosine",
60
- framework=["Sentence Transformers", "PyTorch"],
61
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
61
62
  use_instructions=True,
62
63
  public_training_code=None,
63
64
  public_training_data=None,
@@ -124,13 +124,14 @@ sbintuitions_sarashina_embedding_v2_1b = ModelMeta(
124
124
  revision="1f3408afaa7b617e3445d891310a9c26dd0c68a5",
125
125
  release_date="2025-07-30",
126
126
  n_parameters=1_224_038_144,
127
+ n_embedding_parameters=183_500_800,
127
128
  memory_usage_mb=4669,
128
129
  embed_dim=1792,
129
130
  license="https://huggingface.co/sbintuitions/sarashina-embedding-v2-1b/blob/main/LICENSE",
130
131
  max_tokens=8192,
131
132
  reference="https://huggingface.co/sbintuitions/sarashina-embedding-v2-1b",
132
133
  similarity_fn_name="cosine",
133
- framework=["Sentence Transformers", "PyTorch"],
134
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
134
135
  use_instructions=True,
135
136
  adapted_from="sbintuitions/sarashina2.2-1b",
136
137
  superseded_by=None,
@@ -150,13 +151,14 @@ sbintuitions_sarashina_embedding_v1_1b = ModelMeta(
150
151
  revision="d060fcd8984075071e7fad81baff035cbb3b6c7e",
151
152
  release_date="2024-11-22",
152
153
  n_parameters=1_224_038_144,
154
+ n_embedding_parameters=183_500_800,
153
155
  memory_usage_mb=4669,
154
156
  embed_dim=1792,
155
157
  license="https://huggingface.co/sbintuitions/sarashina-embedding-v1-1b/blob/main/LICENSE",
156
158
  max_tokens=8192,
157
159
  reference="https://huggingface.co/sbintuitions/sarashina-embedding-v1-1b",
158
160
  similarity_fn_name="cosine",
159
- framework=["Sentence Transformers", "PyTorch"],
161
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
160
162
  use_instructions=False,
161
163
  adapted_from="sbintuitions/sarashina2.1-1b",
162
164
  superseded_by="sbintuitions/sarashina-embedding-v2-1b",
@@ -27,13 +27,14 @@ searchmap_preview = ModelMeta(
27
27
  use_instructions=True,
28
28
  release_date="2025-03-05",
29
29
  n_parameters=435_000_000,
30
+ n_embedding_parameters=None,
30
31
  memory_usage_mb=1660,
31
32
  embed_dim=4096,
32
33
  license="mit",
33
34
  max_tokens=8192,
34
35
  reference="https://huggingface.co/VPLabs/SearchMap_Preview",
35
36
  similarity_fn_name="cosine",
36
- framework=["Sentence Transformers", "PyTorch"],
37
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
37
38
  public_training_code=None,
38
39
  public_training_data=None,
39
40
  training_datasets=None,
@@ -13,16 +13,18 @@ import torch
13
13
  from torch.utils.data import DataLoader
14
14
 
15
15
  from mteb._requires_package import requires_package
16
- from mteb.abstasks.task_metadata import TaskMetadata
17
16
  from mteb.models.abs_encoder import AbsEncoder
18
17
  from mteb.models.model_implementations.bge_models import bge_chinese_training_data
19
18
  from mteb.models.model_implementations.nvidia_models import nvidia_training_datasets
20
19
  from mteb.models.model_meta import ModelMeta
21
- from mteb.types import Array, BatchedInput, PromptType
20
+ from mteb.types import PromptType
22
21
 
23
22
  if TYPE_CHECKING:
24
23
  from PIL import Image
25
24
 
25
+ from mteb.abstasks.task_metadata import TaskMetadata
26
+ from mteb.types import Array, BatchedInput
27
+
26
28
 
27
29
  logger = logging.getLogger(__name__)
28
30
 
@@ -429,6 +431,7 @@ seed_embedding = ModelMeta(
429
431
  embed_dim=2048,
430
432
  open_weights=False,
431
433
  n_parameters=None,
434
+ n_embedding_parameters=None,
432
435
  memory_usage_mb=None,
433
436
  license=None,
434
437
  reference="https://seed1-6-embedding.github.io/",