mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -12,12 +12,13 @@ spartan8806_atles_champion_embedding = ModelMeta(
12
12
  revision="d4c74d7000bbd25f3597fc0f2dcde59ef1386e8f",
13
13
  release_date="2025-11-15",
14
14
  n_parameters=110_000_000,
15
+ n_embedding_parameters=23_444_736,
15
16
  memory_usage_mb=420,
16
17
  max_tokens=512,
17
18
  embed_dim=768,
18
19
  license="apache-2.0",
19
20
  similarity_fn_name="cosine",
20
- framework=["Sentence Transformers"],
21
+ framework=["Sentence Transformers", "safetensors", "Transformers"],
21
22
  reference="https://huggingface.co/spartan8806/atles-champion-embedding",
22
23
  use_instructions=False,
23
24
  training_datasets={"STSBenchmark"},
@@ -66,12 +66,19 @@ stella_en_400m = ModelMeta(
66
66
  revision="1bb50bc7bb726810eac2140e62155b88b0df198f",
67
67
  release_date="2024-07-12",
68
68
  n_parameters=435_000_000,
69
+ n_embedding_parameters=None,
69
70
  memory_usage_mb=1660,
70
71
  max_tokens=8192,
71
72
  embed_dim=4096,
72
73
  license="mit",
73
74
  similarity_fn_name=ScoringFunction.COSINE,
74
- framework=["Sentence Transformers", "PyTorch", "GritLM"],
75
+ framework=[
76
+ "Sentence Transformers",
77
+ "PyTorch",
78
+ "GritLM",
79
+ "safetensors",
80
+ "Transformers",
81
+ ],
75
82
  reference="https://huggingface.co/NovaSearch/stella_en_400M_v5",
76
83
  training_datasets=nvidia_training_datasets, # also distilled from gte-qwen (but training data is unknown) #2164
77
84
  public_training_code="https://github.com/NovaSearch-Team/RAG-Retrieval/blob/c40f4638b705eb77d88305d2056901ed550f9f4b/rag_retrieval/train/embedding/README.md",
@@ -95,12 +102,20 @@ stella_en_1_5b = ModelMeta(
95
102
  revision="d03be74b361d4eb24f42a2fe5bd2e29917df4604",
96
103
  release_date="2024-07-12",
97
104
  n_parameters=1_540_000_000,
105
+ n_embedding_parameters=232_928_256,
98
106
  memory_usage_mb=5887,
99
107
  max_tokens=131072,
100
108
  embed_dim=8960,
101
109
  license="mit",
102
110
  similarity_fn_name=ScoringFunction.COSINE,
103
- framework=["Sentence Transformers", "PyTorch", "GritLM"],
111
+ framework=[
112
+ "Sentence Transformers",
113
+ "PyTorch",
114
+ "GritLM",
115
+ "ONNX",
116
+ "safetensors",
117
+ "Transformers",
118
+ ],
104
119
  reference="https://huggingface.co/NovaSearch/stella_en_1.5B_v5",
105
120
  training_datasets=nvidia_training_datasets, # also distilled from gte-qwen (but training data is unknown) #2164
106
121
  public_training_code="https://github.com/NovaSearch-Team/RAG-Retrieval/blob/c40f4638b705eb77d88305d2056901ed550f9f4b/rag_retrieval/train/embedding/README.md",
@@ -117,6 +132,7 @@ stella_large_zh_v3_1792d = ModelMeta(
117
132
  revision="d5d39eb8cd11c80a63df53314e59997074469f09",
118
133
  release_date="2024-02-17",
119
134
  n_parameters=None,
135
+ n_embedding_parameters=21_635_072,
120
136
  memory_usage_mb=None, # can't see on model card
121
137
  embed_dim=1792,
122
138
  license="not specified",
@@ -144,6 +160,7 @@ stella_base_zh_v3_1792d = ModelMeta(
144
160
  revision="82254892a0fba125aa2abf3a4800d2dd12821343",
145
161
  release_date="2024-02-17",
146
162
  n_parameters=None,
163
+ n_embedding_parameters=16_226_304,
147
164
  memory_usage_mb=None, # can't see on model card
148
165
  embed_dim=1792,
149
166
  license="mit",
@@ -172,13 +189,14 @@ stella_mrl_large_zh_v3_5_1792d = ModelMeta(
172
189
  revision="17bb1c32a93a8fc5f6fc9e91d5ea86da99983cfe",
173
190
  release_date="2024-02-27",
174
191
  n_parameters=int(326 * 1e6),
192
+ n_embedding_parameters=21_635_072,
175
193
  memory_usage_mb=1242,
176
194
  embed_dim=1792,
177
195
  license="mit",
178
196
  max_tokens=512,
179
197
  reference="https://huggingface.co/dunzhang/stella-large-zh-v3-1792d",
180
198
  similarity_fn_name=ScoringFunction.COSINE,
181
- framework=["Sentence Transformers", "PyTorch"],
199
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
182
200
  use_instructions=False,
183
201
  superseded_by=None,
184
202
  adapted_from="dunzhang/stella-large-zh-v3-1792d",
@@ -196,13 +214,14 @@ zpoint_large_embedding_zh = ModelMeta(
196
214
  revision="b1075144f440ab4409c05622c1179130ebd57d03",
197
215
  release_date="2024-06-04",
198
216
  n_parameters=int(326 * 1e6),
217
+ n_embedding_parameters=21_635_072,
199
218
  memory_usage_mb=1242,
200
219
  embed_dim=1792,
201
220
  license="mit",
202
221
  max_tokens=512,
203
222
  reference="https://huggingface.co/iampanda/zpoint_large_embedding_zh",
204
223
  similarity_fn_name=ScoringFunction.COSINE,
205
- framework=["Sentence Transformers", "PyTorch"],
224
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
206
225
  use_instructions=False,
207
226
  superseded_by=None,
208
227
  adapted_from="dunzhang/stella-mrl-large-zh-v3.5-1792d",
@@ -327,11 +327,12 @@ tarka_embedding_150m_v1 = ModelMeta(
327
327
  revision="b0ffecc4ef0d873e517507ed080e43b88b2704b9",
328
328
  release_date="2025-11-04",
329
329
  n_parameters=155_714_304,
330
+ n_embedding_parameters=None,
330
331
  embed_dim=768,
331
332
  max_tokens=2048,
332
333
  license="gemma",
333
334
  reference="https://huggingface.co/Tarka-AIR/Tarka-Embedding-150M-V1",
334
- framework=["Sentence Transformers", "PyTorch"],
335
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
335
336
  use_instructions=True,
336
337
  public_training_code=None,
337
338
  public_training_data=None,
@@ -361,13 +362,14 @@ tarka_embedding_350m_v1 = ModelMeta(
361
362
  revision="a850d6a329145474727424fed6b12b62096b8ba3",
362
363
  release_date="2025-11-11",
363
364
  n_parameters=354_483_968,
365
+ n_embedding_parameters=None,
364
366
  memory_usage_mb=676,
365
367
  embed_dim=1024,
366
368
  max_tokens=128000,
367
369
  license=None,
368
370
  reference="https://huggingface.co/Tarka-AIR/Tarka-Embedding-350M-V1",
369
371
  similarity_fn_name="cosine",
370
- framework=["Sentence Transformers", "PyTorch"],
372
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
371
373
  use_instructions=True,
372
374
  public_training_code=None,
373
375
  public_training_data=None,
@@ -22,12 +22,13 @@ text2vec_base_chinese = ModelMeta(
22
22
  revision="183bb99aa7af74355fb58d16edf8c13ae7c5433e",
23
23
  release_date="2022-01-23",
24
24
  n_parameters=int(102 * 1e6),
25
+ n_embedding_parameters=16_226_304,
25
26
  embed_dim=768,
26
27
  license="apache-2.0",
27
28
  max_tokens=512,
28
29
  reference="https://huggingface.co/shibing624/text2vec-base-chinese",
29
30
  similarity_fn_name=ScoringFunction.COSINE,
30
- framework=["Sentence Transformers", "PyTorch"],
31
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
31
32
  use_instructions=False,
32
33
  superseded_by=None,
33
34
  adapted_from=None,
@@ -51,13 +52,14 @@ text2vec_base_chinese_paraphrase = ModelMeta(
51
52
  revision="e90c150a9c7fb55a67712a766d6820c55fb83cdd",
52
53
  release_date="2023-06-19",
53
54
  n_parameters=118 * 1e6,
55
+ n_embedding_parameters=30_720_000,
54
56
  memory_usage_mb=450,
55
57
  embed_dim=768,
56
58
  license="apache-2.0",
57
59
  max_tokens=512,
58
60
  reference="https://huggingface.co/shibing624/text2vec-base-chinese-paraphrase",
59
61
  similarity_fn_name=ScoringFunction.COSINE,
60
- framework=["Sentence Transformers", "PyTorch"],
62
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
61
63
  use_instructions=False,
62
64
  superseded_by=None,
63
65
  adapted_from=None,
@@ -95,13 +97,20 @@ text2vec_base_multilingual = ModelMeta(
95
97
  # So probably best not to.
96
98
  loader=sentence_transformers_loader,
97
99
  n_parameters=117654272,
100
+ n_embedding_parameters=96_014_208,
98
101
  memory_usage_mb=449,
99
102
  embed_dim=384,
100
103
  license="apache-2.0",
101
104
  max_tokens=256,
102
105
  reference="https://huggingface.co/shibing624/text2vec-base-chinese-paraphrase",
103
106
  similarity_fn_name=ScoringFunction.COSINE,
104
- framework=["Sentence Transformers", "PyTorch"],
107
+ framework=[
108
+ "Sentence Transformers",
109
+ "PyTorch",
110
+ "ONNX",
111
+ "safetensors",
112
+ "Transformers",
113
+ ],
105
114
  use_instructions=False,
106
115
  superseded_by=None,
107
116
  adapted_from="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
@@ -8,6 +8,7 @@ xlm_roberta_ua_distilled = ModelMeta(
8
8
  model_type=["dense"],
9
9
  loader=sentence_transformers_loader,
10
10
  n_parameters=278_000_000,
11
+ n_embedding_parameters=192_001_536,
11
12
  memory_usage_mb=1061,
12
13
  max_tokens=512,
13
14
  embed_dim=768,
@@ -17,7 +18,7 @@ xlm_roberta_ua_distilled = ModelMeta(
17
18
  open_weights=True,
18
19
  public_training_code="https://github.com/panalexeu/xlm-roberta-ua-distilled/blob/main/researches/research_final.ipynb",
19
20
  similarity_fn_name="cosine",
20
- framework=["Sentence Transformers"],
21
+ framework=["Sentence Transformers", "safetensors"],
21
22
  reference="https://github.com/panalexeu/xlm-roberta-ua-distilled/tree/main",
22
23
  languages=["eng-Latn", "ukr-Cyrl"],
23
24
  training_datasets=set(
@@ -1,13 +1,18 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.model_meta import ModelMeta, ScoringFunction
9
9
  from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
  logger = logging.getLogger(__name__)
13
18
 
@@ -67,12 +72,19 @@ uae_large_v1 = ModelMeta(
67
72
  revision="369c368f70f16a613f19f5598d4f12d9f44235d4",
68
73
  release_date="2023-12-04", # initial commit of hf model.
69
74
  n_parameters=int(335 * 1e6),
75
+ n_embedding_parameters=31_254_528,
70
76
  memory_usage_mb=1278,
71
77
  max_tokens=512,
72
78
  embed_dim=1024,
73
79
  license="mit",
74
80
  similarity_fn_name=ScoringFunction.COSINE,
75
- framework=["Sentence Transformers", "PyTorch"],
81
+ framework=[
82
+ "Sentence Transformers",
83
+ "PyTorch",
84
+ "ONNX",
85
+ "safetensors",
86
+ "Transformers",
87
+ ],
76
88
  reference="https://huggingface.co/WhereIsAI/UAE-Large-V1",
77
89
  use_instructions=True,
78
90
  citation="""
@@ -1,6 +1,12 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
6
  from mteb.models.model_meta import ModelMeta, ScoringFunction
3
- from mteb.types import PromptType
7
+
8
+ if TYPE_CHECKING:
9
+ from mteb.types import PromptType
4
10
 
5
11
 
6
12
  def instruction_template(
@@ -32,13 +38,14 @@ vdr_2b_multi_v1 = ModelMeta(
32
38
  release_date="2024-01-08",
33
39
  modalities=["text"], # TODO: integrate with image
34
40
  n_parameters=2_000_000_000,
41
+ n_embedding_parameters=233_373_696,
35
42
  memory_usage_mb=4213,
36
43
  max_tokens=32768,
37
44
  embed_dim=1536,
38
45
  license="apache-2.0",
39
46
  reference="https://huggingface.co/llamaindex/vdr-2b-multi-v1",
40
47
  similarity_fn_name=ScoringFunction.COSINE,
41
- framework=["PyTorch", "Sentence Transformers"],
48
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
42
49
  use_instructions=True,
43
50
  public_training_code=None,
44
51
  public_training_data="https://huggingface.co/datasets/llamaindex/vdr-multilingual-train",
@@ -16,13 +16,14 @@ greennode_embedding_large_vn_v1 = ModelMeta(
16
16
  loader=sentence_transformers_loader,
17
17
  open_weights=True,
18
18
  n_parameters=568_000_000,
19
+ n_embedding_parameters=256_002_048,
19
20
  memory_usage_mb=2167,
20
21
  embed_dim=1024,
21
22
  license="cc-by-4.0",
22
23
  max_tokens=8194,
23
24
  reference="https://huggingface.co/GreenNode/GreenNode-Embedding-Large-VN-V1",
24
25
  similarity_fn_name="cosine",
25
- framework=["Sentence Transformers", "PyTorch"],
26
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
26
27
  use_instructions=False,
27
28
  public_training_code=None,
28
29
  public_training_data="https://huggingface.co/datasets/GreenNode/GreenNode-Table-Markdown-Retrieval-VN",
@@ -41,13 +42,14 @@ greennode_embedding_large_vn_mixed_v1 = ModelMeta(
41
42
  loader=sentence_transformers_loader,
42
43
  open_weights=True,
43
44
  n_parameters=568_000_000,
45
+ n_embedding_parameters=256_002_048,
44
46
  memory_usage_mb=2167,
45
47
  embed_dim=1024,
46
48
  license="cc-by-4.0",
47
49
  max_tokens=8194,
48
50
  reference="https://huggingface.co/GreenNode/GreenNode-Embedding-Large-VN-Mixed-V1",
49
51
  similarity_fn_name="cosine",
50
- framework=["Sentence Transformers", "PyTorch"],
52
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
51
53
  use_instructions=False,
52
54
  public_training_code=None,
53
55
  public_training_data="https://huggingface.co/datasets/GreenNode/GreenNode-Table-Markdown-Retrieval-VN",
@@ -66,13 +68,14 @@ aiteamvn_vietnamese_embeddings = ModelMeta(
66
68
  loader=sentence_transformers_loader,
67
69
  open_weights=True,
68
70
  n_parameters=568_000_000,
71
+ n_embedding_parameters=256_002_048,
69
72
  memory_usage_mb=2166,
70
73
  embed_dim=1024,
71
74
  license="cc-by-4.0",
72
75
  max_tokens=8194,
73
76
  reference="https://huggingface.co/AITeamVN/Vietnamese_Embedding",
74
77
  similarity_fn_name="cosine",
75
- framework=["Sentence Transformers", "PyTorch"],
78
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
76
79
  use_instructions=False,
77
80
  public_training_code=None,
78
81
  public_training_data=None,
@@ -98,13 +101,14 @@ hiieu_halong_embedding = ModelMeta(
98
101
  use_instructions=False,
99
102
  open_weights=True,
100
103
  n_parameters=278_000_000,
104
+ n_embedding_parameters=192_001_536,
101
105
  memory_usage_mb=1061,
102
106
  embed_dim=768,
103
107
  license="apache-2.0",
104
108
  max_tokens=514,
105
109
  reference="https://huggingface.co/hiieu/halong_embedding",
106
110
  similarity_fn_name="cosine",
107
- framework=["Sentence Transformers", "PyTorch"],
111
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
108
112
  public_training_code=None,
109
113
  public_training_data=None,
110
114
  training_datasets=None,
@@ -129,13 +133,14 @@ sup_simcse_vietnamese_phobert_base_ = ModelMeta(
129
133
  use_instructions=False,
130
134
  open_weights=True,
131
135
  n_parameters=135_000_000,
136
+ n_embedding_parameters=49_152_768,
132
137
  memory_usage_mb=517,
133
138
  max_tokens=256,
134
139
  embed_dim=768,
135
140
  license="apache-2.0",
136
141
  public_training_code=None,
137
142
  public_training_data=None,
138
- framework=["PyTorch", "Sentence Transformers"],
143
+ framework=["PyTorch", "Sentence Transformers", "Transformers", "safetensors"],
139
144
  reference="https://huggingface.co/VoVanPhuc/sup-SimCSE-VietNamese-phobert-base",
140
145
  similarity_fn_name="cosine",
141
146
  training_datasets=None,
@@ -167,13 +172,14 @@ bkai_foundation_models_vietnamese_bi_encoder = ModelMeta(
167
172
  use_instructions=False,
168
173
  open_weights=True,
169
174
  n_parameters=135_000_000,
175
+ n_embedding_parameters=49_152_768,
170
176
  memory_usage_mb=515,
171
177
  max_tokens=256,
172
178
  embed_dim=768,
173
179
  license="apache-2.0",
174
180
  public_training_code=None,
175
181
  public_training_data=None,
176
- framework=["PyTorch", "Sentence Transformers"],
182
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
177
183
  reference="https://huggingface.co/bkai-foundation-models/vietnamese-bi-encoder",
178
184
  similarity_fn_name="cosine",
179
185
  training_datasets=None,
@@ -1,14 +1,19 @@
1
- from typing import Any, Literal
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any, Literal
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  VISTA_CITATION = """@article{zhou2024vista,
14
19
  title={VISTA: Visualized Text Embedding For Universal Multi-Modal Retrieval},
@@ -253,6 +258,7 @@ visualized_bge_base = ModelMeta(
253
258
  release_date="2024-06-06",
254
259
  modalities=["image", "text"],
255
260
  n_parameters=196_000_000,
261
+ n_embedding_parameters=None,
256
262
  memory_usage_mb=1631,
257
263
  max_tokens=512,
258
264
  embed_dim=768,
@@ -281,6 +287,7 @@ visualized_bge_m3 = ModelMeta(
281
287
  release_date="2024-06-06",
282
288
  modalities=["image", "text"],
283
289
  n_parameters=872_909_505,
290
+ n_embedding_parameters=None,
284
291
  memory_usage_mb=4263,
285
292
  max_tokens=8192,
286
293
  embed_dim=1024,
@@ -1,8 +1,9 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import (
@@ -10,10 +11,14 @@ from mteb._requires_package import (
10
11
  requires_package,
11
12
  suggest_package,
12
13
  )
13
- from mteb.abstasks.task_metadata import TaskMetadata
14
14
  from mteb.models.abs_encoder import AbsEncoder
15
15
  from mteb.models.model_meta import ModelMeta, ScoringFunction
16
- from mteb.types import Array, BatchedInput, PromptType
16
+
17
+ if TYPE_CHECKING:
18
+ from torch.utils.data import DataLoader
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.types import Array, BatchedInput, PromptType
17
22
 
18
23
  logger = logging.getLogger(__name__)
19
24
 
@@ -41,7 +46,7 @@ class VLM2VecWrapper(AbsEncoder):
41
46
  model_name,
42
47
  "pip install flash-attn --no-build-isolation",
43
48
  ):
44
- import flash_attn # noqa
49
+ pass
45
50
 
46
51
  requires_package(self, "peft", model_name, "pip install 'mteb[peft]'")
47
52
  from peft import LoraConfig, PeftModel
@@ -275,6 +280,7 @@ vlm2vec_lora = ModelMeta(
275
280
  release_date="2024-10-08",
276
281
  modalities=["image", "text"],
277
282
  n_parameters=None,
283
+ n_embedding_parameters=None,
278
284
  memory_usage_mb=None,
279
285
  max_tokens=131072,
280
286
  embed_dim=3072,
@@ -282,7 +288,7 @@ vlm2vec_lora = ModelMeta(
282
288
  open_weights=True,
283
289
  public_training_code="https://github.com/TIGER-AI-Lab/VLM2Vec",
284
290
  public_training_data="https://huggingface.co/datasets/TIGER-Lab/MMEB-train",
285
- framework=["PyTorch"],
291
+ framework=["PyTorch", "Transformers"],
286
292
  reference="https://huggingface.co/TIGER-Lab/VLM2Vec-LoRA",
287
293
  similarity_fn_name=ScoringFunction.COSINE,
288
294
  use_instructions=True,
@@ -299,6 +305,7 @@ vlm2vec_full = ModelMeta(
299
305
  release_date="2024-10-08",
300
306
  modalities=["image", "text"],
301
307
  n_parameters=4_150_000_000,
308
+ n_embedding_parameters=None,
302
309
  memory_usage_mb=7909,
303
310
  max_tokens=131072,
304
311
  embed_dim=3072,
@@ -306,7 +313,7 @@ vlm2vec_full = ModelMeta(
306
313
  open_weights=True,
307
314
  public_training_code="https://github.com/TIGER-AI-Lab/VLM2Vec",
308
315
  public_training_data="https://huggingface.co/TIGER-Lab/VLM2Vec-Full",
309
- framework=["PyTorch"],
316
+ framework=["PyTorch", "Transformers", "safetensors"],
310
317
  reference="https://huggingface.co/TIGER-Lab/VLM2Vec-Full",
311
318
  similarity_fn_name=ScoringFunction.COSINE,
312
319
  use_instructions=True,