mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -1,35 +1,55 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
7
  import torch.nn.functional as F
6
8
  from packaging.version import Version
7
- from torch.utils.data import DataLoader
8
9
  from tqdm import tqdm
9
10
  from transformers import AutoModel, AutoTokenizer
10
11
  from transformers import __version__ as transformers_version
11
12
 
12
- from mteb import TaskMetadata
13
13
  from mteb._requires_package import requires_package
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.models import CrossEncoderWrapper
14
16
  from mteb.models.abs_encoder import AbsEncoder
15
17
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
16
18
  from mteb.models.model_meta import ModelMeta, ScoringFunction
17
- from mteb.types import Array, BatchedInput, PromptType
19
+ from mteb.types import PromptType
20
+
21
+ if TYPE_CHECKING:
22
+ from collections.abc import Callable
23
+
24
+ from torch.utils.data import DataLoader
25
+
26
+ from mteb import TaskMetadata
27
+ from mteb.types import Array, BatchedInput
18
28
 
19
29
  logger = logging.getLogger(__name__)
20
30
 
21
- NV_RETRIEVER_CITATION = """@misc{moreira2025nvretrieverimprovingtextembedding,
22
- title={NV-Retriever: Improving text embedding models with effective hard-negative mining},
23
- author={Gabriel de Souza P. Moreira and Radek Osmulski and Mengyao Xu and Ronay Ak and Benedikt Schifferer and Even Oldridge},
31
+ NV_RETRIEVER_CITATION = """@misc{lee2025nvembedimprovedtechniquestraining,
32
+ title={NV-Embed: Improved Techniques for Training LLMs as Generalist Embedding Models},
33
+ author={Chankyu Lee and Rajarshi Roy and Mengyao Xu and Jonathan Raiman and Mohammad Shoeybi and Bryan Catanzaro and Wei Ping},
34
+ year={2025},
35
+ eprint={2405.17428},
36
+ archivePrefix={arXiv},
37
+ primaryClass={cs.CL},
38
+ url={https://arxiv.org/abs/2405.17428},
39
+ }"""
40
+
41
+ LlamaEmbedNemotron_CITATION = """@misc{babakhin2025llamaembednemotron8buniversaltextembedding,
42
+ title={Llama-Embed-Nemotron-8B: A Universal Text Embedding Model for Multilingual and Cross-Lingual Tasks},
43
+ author={Yauhen Babakhin and Radek Osmulski and Ronay Ak and Gabriel Moreira and Mengyao Xu and Benedikt Schifferer and Bo Liu and Even Oldridge},
24
44
  year={2025},
25
- eprint={2407.15831},
45
+ eprint={2511.07025},
26
46
  archivePrefix={arXiv},
27
- primaryClass={cs.IR},
28
- url={https://arxiv.org/abs/2407.15831}
47
+ primaryClass={cs.CL},
48
+ url={https://arxiv.org/abs/2511.07025},
29
49
  }"""
30
50
 
31
51
 
32
- def instruction_template(
52
+ def _instruction_template(
33
53
  instruction: str, prompt_type: PromptType | None = None
34
54
  ) -> str:
35
55
  return f"Instruct: {instruction}\nQuery: " if instruction else ""
@@ -100,10 +120,77 @@ nvidia_training_datasets = {
100
120
  "MrTidyRetrieval",
101
121
  }
102
122
 
123
+
124
+ class _NVEmbedWrapper(InstructSentenceTransformerModel):
125
+ """Inherited, because nvembed requires `sbert==2`, but it doesn't have tokenizers kwargs"""
126
+
127
+ def __init__(
128
+ self,
129
+ model_name: str,
130
+ revision: str,
131
+ instruction_template: str
132
+ | Callable[[str, PromptType | None], str]
133
+ | None = None,
134
+ max_seq_length: int | None = None,
135
+ apply_instruction_to_passages: bool = True,
136
+ padding_side: str | None = None,
137
+ add_eos_token: bool = False,
138
+ prompts_dict: dict[str, str] | None = None,
139
+ **kwargs: Any,
140
+ ):
141
+ from sentence_transformers import __version__ as sbert_version
142
+
143
+ required_transformers_version = "4.42.4"
144
+ required_sbert_version = "2.7.0"
145
+
146
+ if Version(transformers_version) != Version(required_transformers_version):
147
+ raise RuntimeError(
148
+ f"transformers version {transformers_version} is not match with required "
149
+ f"install version {required_transformers_version} to run `nvidia/NV-Embed-v2`"
150
+ )
151
+
152
+ if Version(sbert_version) != Version(required_sbert_version):
153
+ raise RuntimeError(
154
+ f"sbert version {sbert_version} is not match with required "
155
+ f"install version {required_sbert_version} to run `nvidia/NV-Embed-v2`"
156
+ )
157
+
158
+ requires_package(
159
+ self, "flash_attn", model_name, "pip install 'mteb[flash_attention]'"
160
+ )
161
+
162
+ from sentence_transformers import SentenceTransformer
163
+
164
+ if (
165
+ isinstance(instruction_template, str)
166
+ and "{instruction}" not in instruction_template
167
+ ):
168
+ raise ValueError(
169
+ "Instruction template must contain the string '{instruction}'."
170
+ )
171
+ if instruction_template is None:
172
+ logger.warning(
173
+ "No instruction template provided. Instructions will be used as-is."
174
+ )
175
+
176
+ self.instruction_template = instruction_template
177
+
178
+ self.model_name = model_name
179
+ self.model = SentenceTransformer(model_name, revision=revision, **kwargs)
180
+ self.model.tokenizer.padding_side = padding_side
181
+ self.model.tokenizer.add_eos_token = add_eos_token
182
+
183
+ if max_seq_length:
184
+ # https://github.com/huggingface/sentence-transformers/issues/3575
185
+ self.model.max_seq_length = max_seq_length
186
+ self.apply_instruction_to_passages = apply_instruction_to_passages
187
+ self.prompts_dict = prompts_dict
188
+
189
+
103
190
  NV_embed_v2 = ModelMeta(
104
- loader=InstructSentenceTransformerModel,
191
+ loader=_NVEmbedWrapper,
105
192
  loader_kwargs=dict(
106
- instruction_template=instruction_template,
193
+ instruction_template=_instruction_template,
107
194
  trust_remote_code=True,
108
195
  max_seq_length=32768,
109
196
  padding_side="right",
@@ -117,13 +204,14 @@ NV_embed_v2 = ModelMeta(
117
204
  revision="7604d305b621f14095a1aa23d351674c2859553a",
118
205
  release_date="2024-09-09", # initial commit of hf model.
119
206
  n_parameters=7_850_000_000,
207
+ n_embedding_parameters=None,
120
208
  memory_usage_mb=14975,
121
209
  embed_dim=4096,
122
210
  license="cc-by-nc-4.0",
123
211
  max_tokens=32768,
124
212
  reference="https://huggingface.co/nvidia/NV-Embed-v2",
125
213
  similarity_fn_name=ScoringFunction.COSINE,
126
- framework=["Sentence Transformers", "PyTorch"],
214
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
127
215
  use_instructions=True,
128
216
  training_datasets=nvidia_training_datasets,
129
217
  public_training_code=None,
@@ -132,9 +220,9 @@ NV_embed_v2 = ModelMeta(
132
220
  )
133
221
 
134
222
  NV_embed_v1 = ModelMeta(
135
- loader=InstructSentenceTransformerModel,
223
+ loader=_NVEmbedWrapper,
136
224
  loader_kwargs=dict(
137
- instruction_template=instruction_template,
225
+ instruction_template=_instruction_template,
138
226
  trust_remote_code=True,
139
227
  max_seq_length=32768,
140
228
  padding_side="right",
@@ -148,13 +236,14 @@ NV_embed_v1 = ModelMeta(
148
236
  revision="570834afd5fef5bf3a3c2311a2b6e0a66f6f4f2c",
149
237
  release_date="2024-09-13", # initial commit of hf model.
150
238
  n_parameters=7_850_000_000,
239
+ n_embedding_parameters=None,
151
240
  memory_usage_mb=14975,
152
241
  embed_dim=4096,
153
242
  license="cc-by-nc-4.0",
154
243
  max_tokens=32768,
155
244
  reference="https://huggingface.co/nvidia/NV-Embed-v1",
156
245
  similarity_fn_name=ScoringFunction.COSINE,
157
- framework=["Sentence Transformers", "PyTorch"],
246
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
158
247
  use_instructions=True,
159
248
  training_datasets=nvidia_training_datasets,
160
249
  public_training_code=None,
@@ -337,6 +426,7 @@ class LlamaEmbedNemotron(AbsEncoder):
337
426
  self,
338
427
  model_name: str,
339
428
  revision: str,
429
+ device: str | None = None,
340
430
  ) -> None:
341
431
  required_transformers_version = "4.51.0"
342
432
  if Version(transformers_version) != Version(required_transformers_version):
@@ -355,7 +445,7 @@ class LlamaEmbedNemotron(AbsEncoder):
355
445
  self.attn_implementation = (
356
446
  "flash_attention_2" if torch.cuda.is_available() else "eager"
357
447
  )
358
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
448
+ self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
359
449
  self.task_prompts = TASK_PROMPTS
360
450
  self.instruction_template = self._instruction_template
361
451
 
@@ -536,17 +626,70 @@ llama_embed_nemotron_8b = ModelMeta(
536
626
  revision="84a375593d27d3528beb4e104822515659e093b4",
537
627
  release_date="2025-10-23",
538
628
  n_parameters=7_504_924_672,
629
+ n_embedding_parameters=None,
539
630
  memory_usage_mb=28629,
540
631
  embed_dim=4096,
541
632
  license="https://huggingface.co/nvidia/llama-embed-nemotron-8b/blob/main/LICENSE",
542
633
  max_tokens=32768,
543
634
  reference="https://huggingface.co/nvidia/llama-embed-nemotron-8b",
544
635
  similarity_fn_name="cosine",
545
- framework=["PyTorch"],
636
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
546
637
  use_instructions=True,
547
638
  training_datasets=llama_embed_nemotron_training_datasets,
548
- public_training_code=None, # Will be released later
549
- public_training_data=None, # Will be released later
639
+ public_training_code="https://github.com/NVIDIA-NeMo/Automodel/tree/main/examples/biencoder/llama_embed_nemotron_8b",
640
+ public_training_data="https://huggingface.co/datasets/nvidia/embed-nemotron-dataset-v1",
550
641
  contacts=["ybabakhin"],
551
- citation=NV_RETRIEVER_CITATION,
642
+ citation=LlamaEmbedNemotron_CITATION,
643
+ )
644
+
645
+
646
+ def _nemotron_rerank_model(model: str, revision: str, **kwargs) -> CrossEncoderWrapper:
647
+ required_transformers_version = "4.47.1"
648
+
649
+ if Version(transformers_version) != Version(required_transformers_version):
650
+ raise RuntimeError(
651
+ f"transformers version {transformers_version} is not match with required "
652
+ f"install version {required_transformers_version} to run `nvidia/llama-nemotron-rerank-1b-v2`"
653
+ )
654
+
655
+ return CrossEncoderWrapper(
656
+ model=model,
657
+ revision=revision,
658
+ **kwargs,
659
+ )
660
+
661
+
662
+ nemotron_rerank_1b_v2 = ModelMeta(
663
+ loader=_nemotron_rerank_model,
664
+ loader_kwargs=dict(
665
+ trust_remote_code=True,
666
+ query_prefix="question:",
667
+ passage_prefix=" \n \n passage:",
668
+ model_kwargs={"torch_dtype": torch.float32},
669
+ ),
670
+ name="nvidia/llama-nemotron-rerank-1b-v2",
671
+ revision="78efcfdc23b53a753f6c73f2d78b18132a34ac4d",
672
+ release_date="2025-10-16",
673
+ languages=["eng-Latn"],
674
+ n_parameters=1235816448,
675
+ memory_usage_mb=2357.0,
676
+ max_tokens=4096,
677
+ embed_dim=2048,
678
+ license="https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/",
679
+ open_weights=True,
680
+ public_training_code=None,
681
+ public_training_data=None,
682
+ framework=["PyTorch", "Sentence Transformers"],
683
+ reference="https://huggingface.co/nvidia/llama-nemotron-rerank-1b-v2",
684
+ similarity_fn_name=ScoringFunction.COSINE,
685
+ use_instructions=None,
686
+ training_datasets=set(
687
+ # private
688
+ ),
689
+ adapted_from="meta-llama/Llama-3.2-1B",
690
+ superseded_by=None,
691
+ modalities=["text"],
692
+ model_type=["cross-encoder"],
693
+ citation=None,
694
+ contacts=None,
552
695
  )
@@ -1,6 +1,6 @@
1
1
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
2
  from mteb.models.model_meta import ModelMeta
3
- from mteb.models.models_protocols import PromptType
3
+ from mteb.types import PromptType
4
4
 
5
5
 
6
6
  def instruction_template(
@@ -163,6 +163,66 @@ _PREDEFINED_PROMPTS = {
163
163
  "German1Retrieval": "Given a query, retrieve relevant passages",
164
164
  }
165
165
 
166
+ Octen_Embedding_0B6 = ModelMeta(
167
+ loader=InstructSentenceTransformerModel,
168
+ loader_kwargs=dict(
169
+ instruction_template=instruction_template,
170
+ apply_instruction_to_passages=True,
171
+ prompts_dict=_PREDEFINED_PROMPTS,
172
+ max_seq_length=18480,
173
+ model_kwargs={"torch_dtype": "bfloat16"},
174
+ ),
175
+ name="bflhc/Octen-Embedding-0.6B",
176
+ languages=multilingual_langs,
177
+ open_weights=True,
178
+ revision="1a00a4e837bd788f6f8d91bc43201a5e52cf8ef8",
179
+ release_date="2026-01-10",
180
+ n_parameters=595776512,
181
+ memory_usage_mb=1136,
182
+ embed_dim=1024,
183
+ max_tokens=32768,
184
+ license="apache-2.0",
185
+ reference="https://huggingface.co/bflhc/Octen-Embedding-0.6B",
186
+ similarity_fn_name="cosine",
187
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
188
+ use_instructions=True,
189
+ public_training_code=None,
190
+ public_training_data=None,
191
+ training_datasets=training_data,
192
+ citation=OCTEN_CITATION,
193
+ adapted_from="Qwen/Qwen3-Embedding-0.6B",
194
+ )
195
+
196
+ Octen_Embedding_4B = ModelMeta(
197
+ loader=InstructSentenceTransformerModel,
198
+ loader_kwargs=dict(
199
+ instruction_template=instruction_template,
200
+ apply_instruction_to_passages=True,
201
+ prompts_dict=_PREDEFINED_PROMPTS,
202
+ max_seq_length=18480,
203
+ model_kwargs={"torch_dtype": "bfloat16"},
204
+ ),
205
+ name="bflhc/Octen-Embedding-4B",
206
+ languages=multilingual_langs,
207
+ open_weights=True,
208
+ revision="6e188e3b072c3e3678b235ad84e6e97bcbb71e8f",
209
+ release_date="2025-12-30",
210
+ n_parameters=4021774336,
211
+ n_embedding_parameters=None,
212
+ memory_usage_mb=7671,
213
+ embed_dim=2560,
214
+ max_tokens=32768,
215
+ license="apache-2.0",
216
+ reference="https://huggingface.co/bflhc/Octen-Embedding-4B",
217
+ similarity_fn_name="cosine",
218
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
219
+ use_instructions=True,
220
+ public_training_code=None,
221
+ public_training_data=None,
222
+ training_datasets=training_data,
223
+ citation=OCTEN_CITATION,
224
+ adapted_from="Qwen/Qwen3-Embedding-4B",
225
+ )
166
226
 
167
227
  Octen_Embedding_8B = ModelMeta(
168
228
  loader=InstructSentenceTransformerModel,
@@ -176,16 +236,17 @@ Octen_Embedding_8B = ModelMeta(
176
236
  name="bflhc/Octen-Embedding-8B",
177
237
  languages=multilingual_langs,
178
238
  open_weights=True,
179
- revision="2030603c2926ab005fafd824fac5911e271be21f",
239
+ revision="f7db178d5a82fb841f606a6a67c423cead2fdbba",
180
240
  release_date="2025-12-23",
181
241
  n_parameters=7567295488,
242
+ n_embedding_parameters=None,
182
243
  memory_usage_mb=14433,
183
244
  embed_dim=4096,
184
245
  max_tokens=32768,
185
246
  license="apache-2.0",
186
247
  reference="https://huggingface.co/bflhc/Octen-Embedding-8B",
187
248
  similarity_fn_name="cosine",
188
- framework=["Sentence Transformers", "PyTorch"],
249
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
189
250
  use_instructions=True,
190
251
  public_training_code=None,
191
252
  public_training_data=None,
@@ -1,15 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any, ClassVar
4
+ from typing import TYPE_CHECKING, Any, ClassVar
3
5
 
4
6
  import numpy as np
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import requires_package
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
10
  from mteb.models.abs_encoder import AbsEncoder
11
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
13
18
 
14
19
  logger = logging.getLogger(__name__)
15
20
 
@@ -180,6 +185,7 @@ text_embedding_3_small = ModelMeta(
180
185
  embed_dim=1536,
181
186
  open_weights=False,
182
187
  n_parameters=None,
188
+ n_embedding_parameters=None,
183
189
  memory_usage_mb=None,
184
190
  license=None,
185
191
  reference="https://openai.com/index/new-embedding-models-and-api-updates/",
@@ -208,6 +214,7 @@ text_embedding_3_large = ModelMeta(
208
214
  framework=["API"],
209
215
  use_instructions=False,
210
216
  n_parameters=None,
217
+ n_embedding_parameters=None,
211
218
  memory_usage_mb=None,
212
219
  public_training_code=None,
213
220
  public_training_data=None, # assumed
@@ -233,6 +240,7 @@ text_embedding_ada_002 = ModelMeta(
233
240
  framework=["API"],
234
241
  use_instructions=False,
235
242
  n_parameters=None,
243
+ n_embedding_parameters=None,
236
244
  memory_usage_mb=None,
237
245
  public_training_code=None,
238
246
  public_training_data=None, # assumed
@@ -257,6 +265,7 @@ text_embedding_3_small_512 = ModelMeta(
257
265
  embed_dim=512,
258
266
  open_weights=False,
259
267
  n_parameters=None,
268
+ n_embedding_parameters=None,
260
269
  memory_usage_mb=None,
261
270
  license=None,
262
271
  reference="https://openai.com/index/new-embedding-models-and-api-updates/",
@@ -287,6 +296,7 @@ text_embedding_3_large_512 = ModelMeta(
287
296
  framework=["API"],
288
297
  use_instructions=False,
289
298
  n_parameters=None,
299
+ n_embedding_parameters=None,
290
300
  memory_usage_mb=None,
291
301
  public_training_code=None,
292
302
  public_training_data=None, # assumed
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies, requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  OPENCLIP_CITATION = """@inproceedings{cherti2023reproducible,
14
19
  title={Reproducible scaling laws for contrastive language-image learning},
@@ -120,7 +125,7 @@ def openclip_loader(model_name, **kwargs):
120
125
 
121
126
 
122
127
  CLIP_ViT_L_14_DataComp_XL_s13B_b90K = ModelMeta(
123
- loader=openclip_loader, # type: ignore
128
+ loader=openclip_loader,
124
129
  name="laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K",
125
130
  model_type=["dense"],
126
131
  languages=["eng-Latn"],
@@ -128,6 +133,7 @@ CLIP_ViT_L_14_DataComp_XL_s13B_b90K = ModelMeta(
128
133
  release_date="2023-04-26",
129
134
  modalities=["image", "text"],
130
135
  n_parameters=428_000_000,
136
+ n_embedding_parameters=None,
131
137
  memory_usage_mb=1633,
132
138
  max_tokens=77,
133
139
  embed_dim=768,
@@ -146,7 +152,7 @@ CLIP_ViT_L_14_DataComp_XL_s13B_b90K = ModelMeta(
146
152
  )
147
153
 
148
154
  CLIP_ViT_B_32_DataComp_XL_s13B_b90K = ModelMeta(
149
- loader=openclip_loader, # type: ignore
155
+ loader=openclip_loader,
150
156
  name="laion/CLIP-ViT-B-32-DataComp.XL-s13B-b90K",
151
157
  model_type=["dense"],
152
158
  languages=["eng-Latn"],
@@ -154,6 +160,7 @@ CLIP_ViT_B_32_DataComp_XL_s13B_b90K = ModelMeta(
154
160
  release_date="2023-04-26",
155
161
  modalities=["image", "text"],
156
162
  n_parameters=151_000_000,
163
+ n_embedding_parameters=None,
157
164
  memory_usage_mb=576,
158
165
  max_tokens=77,
159
166
  embed_dim=512,
@@ -161,7 +168,7 @@ CLIP_ViT_B_32_DataComp_XL_s13B_b90K = ModelMeta(
161
168
  open_weights=True,
162
169
  public_training_code="https://github.com/mlfoundations/open_clip",
163
170
  public_training_data="https://huggingface.co/datasets/mlfoundations/datacomp_1b",
164
- framework=["PyTorch"],
171
+ framework=["PyTorch", "safetensors"],
165
172
  reference="https://huggingface.co/laion/CLIP-ViT-B-32-DataComp.XL-s13B-b90K",
166
173
  similarity_fn_name=ScoringFunction.COSINE,
167
174
  use_instructions=False,
@@ -172,7 +179,7 @@ CLIP_ViT_B_32_DataComp_XL_s13B_b90K = ModelMeta(
172
179
  )
173
180
 
174
181
  CLIP_ViT_B_16_DataComp_XL_s13B_b90K = ModelMeta(
175
- loader=openclip_loader, # type: ignore
182
+ loader=openclip_loader,
176
183
  name="laion/CLIP-ViT-B-16-DataComp.XL-s13B-b90K",
177
184
  model_type=["dense"],
178
185
  languages=["eng-Latn"],
@@ -180,6 +187,7 @@ CLIP_ViT_B_16_DataComp_XL_s13B_b90K = ModelMeta(
180
187
  release_date="2023-04-26",
181
188
  modalities=["image", "text"],
182
189
  n_parameters=150_000_000,
190
+ n_embedding_parameters=None,
183
191
  memory_usage_mb=572,
184
192
  max_tokens=77,
185
193
  embed_dim=512,
@@ -198,7 +206,7 @@ CLIP_ViT_B_16_DataComp_XL_s13B_b90K = ModelMeta(
198
206
  )
199
207
 
200
208
  CLIP_ViT_bigG_14_laion2B_39B_b160k = ModelMeta(
201
- loader=openclip_loader, # type: ignore
209
+ loader=openclip_loader,
202
210
  name="laion/CLIP-ViT-bigG-14-laion2B-39B-b160k",
203
211
  model_type=["dense"],
204
212
  languages=["eng-Latn"],
@@ -206,6 +214,7 @@ CLIP_ViT_bigG_14_laion2B_39B_b160k = ModelMeta(
206
214
  release_date="2023-01-23",
207
215
  modalities=["image", "text"],
208
216
  n_parameters=2_540_000_000,
217
+ n_embedding_parameters=None,
209
218
  memory_usage_mb=9689,
210
219
  max_tokens=77,
211
220
  embed_dim=1280,
@@ -213,7 +222,7 @@ CLIP_ViT_bigG_14_laion2B_39B_b160k = ModelMeta(
213
222
  open_weights=True,
214
223
  public_training_code="https://github.com/mlfoundations/open_clip",
215
224
  public_training_data="https://laion.ai/blog/laion-5b/",
216
- framework=["PyTorch"],
225
+ framework=["PyTorch", "safetensors"],
217
226
  reference="https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k",
218
227
  similarity_fn_name=ScoringFunction.COSINE,
219
228
  use_instructions=False,
@@ -224,7 +233,7 @@ CLIP_ViT_bigG_14_laion2B_39B_b160k = ModelMeta(
224
233
  )
225
234
 
226
235
  CLIP_ViT_g_14_laion2B_s34B_b88K = ModelMeta(
227
- loader=openclip_loader, # type: ignore
236
+ loader=openclip_loader,
228
237
  name="laion/CLIP-ViT-g-14-laion2B-s34B-b88K",
229
238
  model_type=["dense"],
230
239
  languages=["eng-Latn"],
@@ -232,6 +241,7 @@ CLIP_ViT_g_14_laion2B_s34B_b88K = ModelMeta(
232
241
  release_date="2023-03-06",
233
242
  modalities=["image", "text"],
234
243
  n_parameters=1_367_000_000,
244
+ n_embedding_parameters=None,
235
245
  memory_usage_mb=5215,
236
246
  max_tokens=77,
237
247
  embed_dim=1024,
@@ -239,7 +249,7 @@ CLIP_ViT_g_14_laion2B_s34B_b88K = ModelMeta(
239
249
  open_weights=True,
240
250
  public_training_code="https://github.com/mlfoundations/open_clip",
241
251
  public_training_data="https://laion.ai/blog/laion-5b/",
242
- framework=["PyTorch"],
252
+ framework=["PyTorch", "safetensors"],
243
253
  reference="https://huggingface.co/laion/CLIP-ViT-g-14-laion2B-s34B-b88K",
244
254
  similarity_fn_name=ScoringFunction.COSINE,
245
255
  use_instructions=False,
@@ -250,7 +260,7 @@ CLIP_ViT_g_14_laion2B_s34B_b88K = ModelMeta(
250
260
  )
251
261
 
252
262
  CLIP_ViT_H_14_laion2B_s32B_b79K = ModelMeta(
253
- loader=openclip_loader, # type: ignore
263
+ loader=openclip_loader,
254
264
  name="laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
255
265
  model_type=["dense"],
256
266
  languages=["eng-Latn"],
@@ -258,6 +268,7 @@ CLIP_ViT_H_14_laion2B_s32B_b79K = ModelMeta(
258
268
  release_date="2022-09-15",
259
269
  modalities=["image", "text"],
260
270
  n_parameters=986_000_000,
271
+ n_embedding_parameters=None,
261
272
  memory_usage_mb=3762,
262
273
  max_tokens=77,
263
274
  embed_dim=1024,
@@ -265,7 +276,7 @@ CLIP_ViT_H_14_laion2B_s32B_b79K = ModelMeta(
265
276
  open_weights=True,
266
277
  public_training_code="https://github.com/mlfoundations/open_clip",
267
278
  public_training_data="https://laion.ai/blog/laion-5b/",
268
- framework=["PyTorch"],
279
+ framework=["PyTorch", "safetensors"],
269
280
  reference="https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
270
281
  similarity_fn_name=ScoringFunction.COSINE,
271
282
  use_instructions=False,
@@ -276,7 +287,7 @@ CLIP_ViT_H_14_laion2B_s32B_b79K = ModelMeta(
276
287
  )
277
288
 
278
289
  CLIP_ViT_L_14_laion2B_s32B_b82K = ModelMeta(
279
- loader=openclip_loader, # type: ignore
290
+ loader=openclip_loader,
280
291
  name="laion/CLIP-ViT-L-14-laion2B-s32B-b82K",
281
292
  model_type=["dense"],
282
293
  languages=["eng-Latn"],
@@ -284,6 +295,7 @@ CLIP_ViT_L_14_laion2B_s32B_b82K = ModelMeta(
284
295
  release_date="2022-09-15",
285
296
  modalities=["image", "text"],
286
297
  n_parameters=428_000_000,
298
+ n_embedding_parameters=None,
287
299
  memory_usage_mb=1631,
288
300
  max_tokens=77,
289
301
  embed_dim=768,
@@ -291,7 +303,7 @@ CLIP_ViT_L_14_laion2B_s32B_b82K = ModelMeta(
291
303
  open_weights=True,
292
304
  public_training_code="https://github.com/mlfoundations/open_clip",
293
305
  public_training_data="https://laion.ai/blog/laion-5b/",
294
- framework=["PyTorch"],
306
+ framework=["PyTorch", "safetensors"],
295
307
  reference="https://huggingface.co/laion/CLIP-ViT-L-14-laion2B-s32B-b82K",
296
308
  similarity_fn_name=ScoringFunction.COSINE,
297
309
  use_instructions=False,
@@ -310,6 +322,7 @@ CLIP_ViT_B_32_laion2B_s34B_b79K = ModelMeta(
310
322
  release_date="2022-09-15",
311
323
  modalities=["image", "text"],
312
324
  n_parameters=151_000_000,
325
+ n_embedding_parameters=None,
313
326
  memory_usage_mb=577,
314
327
  max_tokens=77,
315
328
  embed_dim=512,
@@ -317,7 +330,7 @@ CLIP_ViT_B_32_laion2B_s34B_b79K = ModelMeta(
317
330
  open_weights=True,
318
331
  public_training_code="https://github.com/mlfoundations/open_clip",
319
332
  public_training_data="https://laion.ai/blog/laion-5b/",
320
- framework=["PyTorch"],
333
+ framework=["PyTorch", "safetensors"],
321
334
  reference="https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
322
335
  similarity_fn_name=ScoringFunction.COSINE,
323
336
  use_instructions=False,