mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -64,13 +64,14 @@ QZhou_Embedding = ModelMeta(
64
64
  revision="f1e6c03ee3882e7b9fa5cec91217715272e433b8",
65
65
  release_date="2025-08-24",
66
66
  n_parameters=7_070_619_136,
67
+ n_embedding_parameters=None,
67
68
  memory_usage_mb=14436,
68
69
  embed_dim=3584,
69
70
  license="apache-2.0",
70
71
  max_tokens=8192,
71
72
  reference="https://huggingface.co/Kingsoft-LLM/QZhou-Embedding",
72
73
  similarity_fn_name="cosine",
73
- framework=["Sentence Transformers", "PyTorch"],
74
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
74
75
  use_instructions=True,
75
76
  public_training_code=None,
76
77
  public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
@@ -98,13 +99,14 @@ QZhou_Embedding_Zh = ModelMeta(
98
99
  revision="0321ccb126413d1e49c5ce908e802b63d35f18e2",
99
100
  release_date="2025-09-28",
100
101
  n_parameters=7_575_747_328,
102
+ n_embedding_parameters=None,
101
103
  memory_usage_mb=29431,
102
104
  embed_dim=1792,
103
105
  license="apache-2.0",
104
106
  max_tokens=8192,
105
107
  reference="http://huggingface.co/Kingsoft-LLM/QZhou-Embedding-Zh",
106
108
  similarity_fn_name="cosine",
107
- framework=["Sentence Transformers", "PyTorch"],
109
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
108
110
  use_instructions=True,
109
111
  public_training_code=None,
110
112
  public_training_data=None,
@@ -5,18 +5,19 @@ from typing import TYPE_CHECKING, Any, Literal
5
5
 
6
6
  import numpy as np
7
7
  import torch
8
- from torch.utils.data import DataLoader
9
8
 
10
- from mteb.abstasks.task_metadata import TaskMetadata
11
9
  from mteb.models.model_meta import ModelMeta
12
10
  from mteb.similarity_functions import (
13
11
  select_pairwise_similarity,
14
12
  select_similarity,
15
13
  )
16
- from mteb.types._encoder_io import Array, BatchedInput, PromptType
17
14
 
18
15
  if TYPE_CHECKING:
19
16
  from PIL import Image
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types._encoder_io import Array, BatchedInput, PromptType
20
21
 
21
22
 
22
23
  def _string_to_vector(text: str | None, size: int) -> np.ndarray:
@@ -68,7 +69,7 @@ _common_mock_metadata = dict(
68
69
  license="mit",
69
70
  max_tokens=np.inf,
70
71
  reference=None,
71
- similarity_fn_name="cosine", # type: ignore
72
+ similarity_fn_name="cosine",
72
73
  framework=[],
73
74
  use_instructions=False,
74
75
  public_training_code=None, # No training code, as this is a random baseline
@@ -187,7 +188,7 @@ class RandomEncoderBaseline:
187
188
 
188
189
 
189
190
  random_encoder_baseline = ModelMeta(
190
- loader=RandomEncoderBaseline, # type: ignore
191
+ loader=RandomEncoderBaseline,
191
192
  name="baseline/random-encoder-baseline",
192
193
  model_type=["dense"],
193
194
  modalities=["text", "image"],
@@ -232,7 +233,7 @@ class RandomCrossEncoderBaseline:
232
233
 
233
234
 
234
235
  random_cross_encoder_baseline = ModelMeta(
235
- loader=RandomCrossEncoderBaseline, # type: ignore
236
+ loader=RandomCrossEncoderBaseline,
236
237
  name="baseline/random-cross-encoder-baseline",
237
238
  model_type=["cross-encoder"],
238
239
  modalities=["text", "image"],
@@ -4,7 +4,7 @@ from mteb.models.model_implementations.model2vec_models import Model2VecModel
4
4
  from mteb.models.model_meta import ModelMeta, ScoringFunction
5
5
 
6
6
  potion_base_8m = ModelMeta(
7
- loader=Model2VecModel, # type: ignore
7
+ loader=Model2VecModel,
8
8
  name="rasgaard/m2v-dfm-large",
9
9
  model_type=["dense"],
10
10
  languages=["dan-Latn"],
@@ -12,12 +12,13 @@ potion_base_8m = ModelMeta(
12
12
  revision="387897cfb09992e6d45ea9cd7b28b9fcf119e23a",
13
13
  release_date="2025-10-08",
14
14
  n_parameters=22893312,
15
+ n_embedding_parameters=22893312,
15
16
  memory_usage_mb=87,
16
17
  max_tokens=np.inf,
17
18
  embed_dim=256,
18
19
  license="mit",
19
20
  similarity_fn_name=ScoringFunction.COSINE,
20
- framework=["NumPy", "Sentence Transformers"],
21
+ framework=["NumPy", "Sentence Transformers", "safetensors"],
21
22
  reference="https://huggingface.co/rasgaard/m2v-dfm-large",
22
23
  use_instructions=False,
23
24
  adapted_from="KennethEnevoldsen/dfm-sentence-encoder-large",
@@ -36,12 +36,76 @@ REASONIR_TRAINING_DATA = {
36
36
  "DuRetrieval",
37
37
  "QuoraRetrieval",
38
38
  }
39
+ _prompts_dict = {
40
+ "BrightBiologyRetrieval": {
41
+ "query": "Given a Biology post, retrieve relevant passages that help answer the post"
42
+ },
43
+ "BrightEarthScienceRetrieval": {
44
+ "query": "Given a Earth Science post, retrieve relevant passages that help answer the post"
45
+ },
46
+ "BrightEconomicsRetrieval": {
47
+ "query": "Given a Economics post, retrieve relevant passages that help answer the post"
48
+ },
49
+ "BrightPsychologyRetrieval": {
50
+ "query": "Given a Psychology post, retrieve relevant passages that help answer the post"
51
+ },
52
+ "BrightRoboticsRetrieval": {
53
+ "query": "Given a Robotics post, retrieve relevant passages that help answer the post"
54
+ },
55
+ "BrightStackoverflowRetrieval": {
56
+ "query": "Given a Stackoverflow post, retrieve relevant passages that help answer the post"
57
+ },
58
+ "BrightSustainableLivingRetrieval": {
59
+ "query": "Given a Sustainable Living post, retrieve relevant passages that help answer the post"
60
+ },
61
+ "BrightPonyRetrieval": {
62
+ "query": "Given a Pony question, retrieve relevant passages that help answer the question"
63
+ },
64
+ "BrightLeetcodeRetrieval": {
65
+ "query": "Given a coding problem, retrieve relevant examples that help answer the problem",
66
+ },
67
+ "BrightAopsRetrieval": {
68
+ "query": "Given a Math problem, retrieve relevant examples that help answer the problem"
69
+ },
70
+ "BrightTheoremQATheoremsRetrieval": {
71
+ "query": "Given a Math problem, retrieve relevant theorems that help answer the problem",
72
+ },
73
+ "BrightTheoremQAQuestionsRetrieval": {
74
+ "query": "Given a Math problem, retrieve relevant examples that help answer the problem",
75
+ },
76
+ "BrightBiologyLongRetrieval": {
77
+ "query": "Given a Biology post, retrieve relevant documents that help answer the post"
78
+ },
79
+ "BrightEarthScienceLongRetrieval": {
80
+ "query": "Given a Earth Science post, retrieve relevant documents that help answer the post"
81
+ },
82
+ "BrightEconomicsLongRetrieval": {
83
+ "query": "Given a Economics post, retrieve relevant documents that help answer the post"
84
+ },
85
+ "BrightPsychologyLongRetrieval": {
86
+ "query": "Given a Psychology post, retrieve relevant documents that help answer the post"
87
+ },
88
+ "BrightRoboticsLongRetrieval": {
89
+ "query": "Given a Robotics post, retrieve relevant documents that help answer the post"
90
+ },
91
+ "BrightStackoverflowLongRetrieval": {
92
+ "query": "Given a Stackoverflow post, retrieve relevant documents that help answer the post"
93
+ },
94
+ "BrightSustainableLivingLongRetrieval": {
95
+ "query": "Given a Sustainable Living post, retrieve relevant documents that help answer the post"
96
+ },
97
+ "BrightPonyLongRetrieval": {
98
+ "query": "Given a Pony question, retrieve relevant documents that help answer the question"
99
+ },
100
+ }
101
+
39
102
 
40
103
  ReasonIR_8B = ModelMeta(
41
104
  loader=InstructSentenceTransformerModel,
42
105
  loader_kwargs=dict(
43
106
  instruction_template=instruction_template,
44
107
  trust_remote_code=True,
108
+ prompts_dict=_prompts_dict,
45
109
  ),
46
110
  name="ReasonIR/ReasonIR-8B",
47
111
  model_type=["dense"],
@@ -50,13 +114,14 @@ ReasonIR_8B = ModelMeta(
50
114
  revision="c3d0690370ff4a8c3d3882d8dfa85c43650034fa",
51
115
  release_date="2025-04-29",
52
116
  n_parameters=7_500_000_000,
117
+ n_embedding_parameters=None,
53
118
  memory_usage_mb=None,
54
119
  embed_dim=4096,
55
120
  license="cc-by-nc-4.0",
56
121
  max_tokens=131072,
57
122
  reference="https://huggingface.co/ReasonIR/ReasonIR-8B",
58
123
  similarity_fn_name="cosine",
59
- framework=["Sentence Transformers", "PyTorch"],
124
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
60
125
  use_instructions=True,
61
126
  training_datasets=REASONIR_TRAINING_DATA,
62
127
  public_training_code="https://github.com/facebookresearch/ReasonIR/tree/main/training",
@@ -1,22 +1,29 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import numpy as np
6
7
  import torch
7
8
  import torch.nn.functional as F
8
- from torch.utils.data import DataLoader
9
9
  from tqdm.auto import tqdm
10
10
 
11
11
  from mteb._requires_package import requires_package
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
12
  from mteb.models.abs_encoder import AbsEncoder
14
13
  from mteb.models.model_meta import (
15
14
  ModelMeta,
16
15
  ScoringFunction,
17
16
  )
18
- from mteb.models.models_protocols import EncoderProtocol
19
- from mteb.types import Array, BatchedInput, PromptType
17
+ from mteb.types import PromptType
18
+
19
+ if TYPE_CHECKING:
20
+ from collections.abc import Callable
21
+
22
+ from torch.utils.data import DataLoader
23
+
24
+ from mteb.abstasks.task_metadata import TaskMetadata
25
+ from mteb.models.models_protocols import EncoderProtocol
26
+ from mteb.types import Array, BatchedInput
20
27
 
21
28
  logger = logging.getLogger(__name__)
22
29
 
@@ -154,7 +161,7 @@ REPLLAMA_CITATION = """
154
161
  """
155
162
 
156
163
  repllama_llama2_original = ModelMeta(
157
- loader=RepLLaMAModel, # type: ignore
164
+ loader=RepLLaMAModel,
158
165
  loader_kwargs=dict(
159
166
  base_model_name_or_path="meta-llama/Llama-2-7b-hf",
160
167
  device_map="auto",
@@ -172,6 +179,7 @@ repllama_llama2_original = ModelMeta(
172
179
  "mMARCO-NL", # translation not trained on
173
180
  },
174
181
  n_parameters=7_000_000,
182
+ n_embedding_parameters=131_072_000,
175
183
  memory_usage_mb=27,
176
184
  max_tokens=4096,
177
185
  embed_dim=4096,
@@ -187,7 +195,7 @@ repllama_llama2_original = ModelMeta(
187
195
 
188
196
 
189
197
  repllama_llama2_reproduced = ModelMeta(
190
- loader=RepLLaMAModel, # type: ignore
198
+ loader=RepLLaMAModel,
191
199
  loader_kwargs=dict(
192
200
  base_model_name_or_path="meta-llama/Llama-2-7b-hf",
193
201
  device_map="auto",
@@ -201,13 +209,14 @@ repllama_llama2_reproduced = ModelMeta(
201
209
  revision="01c7f73d771dfac7d292323805ebc428287df4f9-ad5c1d0938a1e02954bcafb4d811ba2f34052e71", # base-peft revision
202
210
  release_date="2024-09-15",
203
211
  n_parameters=7_000_000,
212
+ n_embedding_parameters=None,
204
213
  memory_usage_mb=27,
205
214
  max_tokens=4096,
206
215
  embed_dim=4096,
207
216
  license="apache-2.0",
208
217
  reference="https://huggingface.co/samaya-ai/RepLLaMA-reproduced",
209
218
  similarity_fn_name=ScoringFunction.COSINE,
210
- framework=["PyTorch", "Tevatron"],
219
+ framework=["PyTorch", "Tevatron", "safetensors"],
211
220
  use_instructions=True,
212
221
  citation=REPLLAMA_CITATION,
213
222
  public_training_code=None,
@@ -1,16 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
 
7
8
  from mteb._requires_package import requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.model_meta import ModelMeta
10
- from mteb.types import Array, BatchedInput, PromptType
11
10
 
12
11
  from .bge_models import bge_m3_training_data
13
12
 
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
18
+
19
+
14
20
  logger = logging.getLogger(__name__)
15
21
 
16
22
 
@@ -214,7 +220,7 @@ class JinaReranker(RerankerWrapper):
214
220
 
215
221
 
216
222
  monobert_large = ModelMeta(
217
- loader=MonoBERTReranker, # type: ignore
223
+ loader=MonoBERTReranker,
218
224
  loader_kwargs=dict(
219
225
  fp_options="float16",
220
226
  ),
@@ -225,6 +231,7 @@ monobert_large = ModelMeta(
225
231
  revision="0a97706f3827389da43b83348d5d18c9d53876fa",
226
232
  release_date="2020-05-28",
227
233
  n_parameters=None,
234
+ n_embedding_parameters=31_254_528,
228
235
  memory_usage_mb=None,
229
236
  max_tokens=None,
230
237
  embed_dim=None,
@@ -234,12 +241,12 @@ monobert_large = ModelMeta(
234
241
  similarity_fn_name=None,
235
242
  use_instructions=None,
236
243
  training_datasets=None,
237
- framework=["Sentence Transformers", "PyTorch"],
244
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
238
245
  )
239
246
 
240
247
  # languages unclear: https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual/discussions/28
241
248
  jina_reranker_multilingual = ModelMeta(
242
- loader=JinaReranker, # type: ignore
249
+ loader=JinaReranker,
243
250
  loader_kwargs=dict(
244
251
  fp_options="float16",
245
252
  ),
@@ -250,6 +257,7 @@ jina_reranker_multilingual = ModelMeta(
250
257
  revision="126747772a932960028d9f4dc93bd5d9c4869be4",
251
258
  release_date="2024-09-26",
252
259
  n_parameters=None,
260
+ n_embedding_parameters=None,
253
261
  memory_usage_mb=531,
254
262
  max_tokens=None,
255
263
  embed_dim=None,
@@ -259,11 +267,17 @@ jina_reranker_multilingual = ModelMeta(
259
267
  similarity_fn_name=None,
260
268
  use_instructions=None,
261
269
  training_datasets=None,
262
- framework=["Sentence Transformers", "PyTorch"],
270
+ framework=[
271
+ "Sentence Transformers",
272
+ "PyTorch",
273
+ "Transformers",
274
+ "ONNX",
275
+ "safetensors",
276
+ ],
263
277
  )
264
278
 
265
279
  bge_reranker_v2_m3 = ModelMeta(
266
- loader=BGEReranker, # type: ignore
280
+ loader=BGEReranker,
267
281
  loader_kwargs=dict(
268
282
  fp_options="float16",
269
283
  ),
@@ -307,6 +321,7 @@ bge_reranker_v2_m3 = ModelMeta(
307
321
  revision="953dc6f6f85a1b2dbfca4c34a2796e7dde08d41e",
308
322
  release_date="2024-06-24",
309
323
  n_parameters=None,
324
+ n_embedding_parameters=256_002_048,
310
325
  memory_usage_mb=2166,
311
326
  max_tokens=None,
312
327
  embed_dim=None,
@@ -316,7 +331,7 @@ bge_reranker_v2_m3 = ModelMeta(
316
331
  similarity_fn_name=None,
317
332
  use_instructions=None,
318
333
  training_datasets=bge_m3_training_data,
319
- framework=["Sentence Transformers", "PyTorch"],
334
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
320
335
  citation="""
321
336
  @misc{li2023making,
322
337
  title={Making Large Language Models A Better Foundation For Dense Retrieval},
@@ -1,15 +1,21 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.model_meta import ModelMeta
9
- from mteb.types import Array, BatchedInput, PromptType
10
9
 
11
10
  from .rerankers_custom import RerankerWrapper
12
11
 
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
17
+
18
+
13
19
  logger = logging.getLogger(__name__)
14
20
 
15
21
 
@@ -321,6 +327,7 @@ monot5_small = ModelMeta(
321
327
  revision="77f8e3f7b1eb1afe353aa21a7c3a2fc8feca702e",
322
328
  release_date="2022-03-28",
323
329
  n_parameters=None,
330
+ n_embedding_parameters=16_449_536,
324
331
  memory_usage_mb=None,
325
332
  max_tokens=None,
326
333
  embed_dim=None,
@@ -330,7 +337,7 @@ monot5_small = ModelMeta(
330
337
  similarity_fn_name=None,
331
338
  use_instructions=None,
332
339
  training_datasets=None,
333
- framework=["PyTorch"],
340
+ framework=["PyTorch", "Transformers"],
334
341
  citation="""@misc{rosa2022parameterleftbehinddistillation,
335
342
  title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
336
343
  author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
@@ -343,7 +350,7 @@ monot5_small = ModelMeta(
343
350
  )
344
351
 
345
352
  monot5_base = ModelMeta(
346
- loader=MonoT5Reranker, # type: ignore
353
+ loader=MonoT5Reranker,
347
354
  loader_kwargs=dict(
348
355
  fp_options="float16",
349
356
  ),
@@ -363,6 +370,7 @@ monot5_base = ModelMeta(
363
370
  url={https://arxiv.org/abs/2206.02873},
364
371
  }""",
365
372
  n_parameters=None,
373
+ n_embedding_parameters=24_674_304,
366
374
  memory_usage_mb=None,
367
375
  max_tokens=None,
368
376
  embed_dim=None,
@@ -372,7 +380,7 @@ monot5_base = ModelMeta(
372
380
  similarity_fn_name=None,
373
381
  use_instructions=None,
374
382
  training_datasets=None,
375
- framework=["PyTorch"],
383
+ framework=["PyTorch", "Transformers"],
376
384
  )
377
385
 
378
386
  monot5_large = ModelMeta(
@@ -387,6 +395,7 @@ monot5_large = ModelMeta(
387
395
  revision="48cfad1d8dd587670393f27ee8ec41fde63e3d98",
388
396
  release_date="2022-03-28",
389
397
  n_parameters=None,
398
+ n_embedding_parameters=32_899_072,
390
399
  memory_usage_mb=None,
391
400
  max_tokens=None,
392
401
  embed_dim=None,
@@ -396,7 +405,7 @@ monot5_large = ModelMeta(
396
405
  similarity_fn_name=None,
397
406
  use_instructions=None,
398
407
  training_datasets=None,
399
- framework=["PyTorch"],
408
+ framework=["PyTorch", "Transformers"],
400
409
  citation="""@misc{rosa2022parameterleftbehinddistillation,
401
410
  title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
402
411
  author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
@@ -420,6 +429,7 @@ monot5_3b = ModelMeta(
420
429
  revision="bc0c419a438c81f592f878ce32430a1823f5db6c",
421
430
  release_date="2022-03-28",
422
431
  n_parameters=None,
432
+ n_embedding_parameters=32_899_072,
423
433
  memory_usage_mb=None,
424
434
  max_tokens=None,
425
435
  embed_dim=None,
@@ -429,7 +439,7 @@ monot5_3b = ModelMeta(
429
439
  similarity_fn_name=None,
430
440
  use_instructions=None,
431
441
  training_datasets=None,
432
- framework=["PyTorch"],
442
+ framework=["PyTorch", "Transformers"],
433
443
  citation="""@misc{rosa2022parameterleftbehinddistillation,
434
444
  title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
435
445
  author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
@@ -442,7 +452,7 @@ monot5_3b = ModelMeta(
442
452
  )
443
453
 
444
454
  flant5_base = ModelMeta(
445
- loader=FLANT5Reranker, # type: ignore
455
+ loader=FLANT5Reranker,
446
456
  loader_kwargs=dict(
447
457
  fp_options="float16",
448
458
  ),
@@ -476,6 +486,7 @@ flant5_base = ModelMeta(
476
486
  # "qed": ["train"],
477
487
  ),
478
488
  n_parameters=None,
489
+ n_embedding_parameters=24_674_304,
479
490
  memory_usage_mb=944,
480
491
  max_tokens=None,
481
492
  embed_dim=None,
@@ -484,7 +495,7 @@ flant5_base = ModelMeta(
484
495
  public_training_data=None,
485
496
  similarity_fn_name=None,
486
497
  use_instructions=None,
487
- framework=["PyTorch"],
498
+ framework=["PyTorch", "Transformers", "safetensors"],
488
499
  )
489
500
 
490
501
  flant5_large = ModelMeta(
@@ -522,6 +533,7 @@ flant5_large = ModelMeta(
522
533
  # "qed": ["train"],
523
534
  ),
524
535
  n_parameters=None,
536
+ n_embedding_parameters=32_899_072,
525
537
  memory_usage_mb=2987,
526
538
  max_tokens=None,
527
539
  embed_dim=None,
@@ -530,7 +542,7 @@ flant5_large = ModelMeta(
530
542
  public_training_data=None,
531
543
  similarity_fn_name=None,
532
544
  use_instructions=None,
533
- framework=["PyTorch"],
545
+ framework=["PyTorch", "Transformers", "safetensors"],
534
546
  )
535
547
 
536
548
  flant5_xl = ModelMeta(
@@ -568,6 +580,7 @@ flant5_xl = ModelMeta(
568
580
  # "qed": ["train"],
569
581
  ),
570
582
  n_parameters=None,
583
+ n_embedding_parameters=65_798_144,
571
584
  memory_usage_mb=10871,
572
585
  max_tokens=None,
573
586
  embed_dim=None,
@@ -576,7 +589,7 @@ flant5_xl = ModelMeta(
576
589
  public_training_data=None,
577
590
  similarity_fn_name=None,
578
591
  use_instructions=None,
579
- framework=["PyTorch"],
592
+ framework=["PyTorch", "Transformers", "safetensors"],
580
593
  )
581
594
 
582
595
  flant5_xxl = ModelMeta(
@@ -614,6 +627,7 @@ flant5_xxl = ModelMeta(
614
627
  # "qed": ["train"],
615
628
  ),
616
629
  n_parameters=None,
630
+ n_embedding_parameters=131_596_288,
617
631
  memory_usage_mb=42980,
618
632
  max_tokens=None,
619
633
  embed_dim=None,
@@ -622,7 +636,7 @@ flant5_xxl = ModelMeta(
622
636
  public_training_data=None,
623
637
  similarity_fn_name=None,
624
638
  use_instructions=None,
625
- framework=["PyTorch"],
639
+ framework=["PyTorch", "Transformers", "safetensors"],
626
640
  )
627
641
 
628
642
 
@@ -638,6 +652,7 @@ llama2_7b = ModelMeta(
638
652
  revision="01c7f73d771dfac7d292323805ebc428287df4f9",
639
653
  release_date="2023-07-18",
640
654
  n_parameters=None,
655
+ n_embedding_parameters=131_072_000,
641
656
  memory_usage_mb=None,
642
657
  max_tokens=None,
643
658
  embed_dim=None,
@@ -647,7 +662,7 @@ llama2_7b = ModelMeta(
647
662
  similarity_fn_name=None,
648
663
  use_instructions=None,
649
664
  training_datasets=None,
650
- framework=["PyTorch"],
665
+ framework=["PyTorch", "Transformers", "safetensors"],
651
666
  citation="""@misc{touvron2023llama2openfoundation,
652
667
  title={Llama 2: Open Foundation and Fine-Tuned Chat Models},
653
668
  author={Hugo Touvron and Louis Martin and Kevin Stone and Peter Albert and Amjad Almahairi and Yasmine Babaei and Nikolay Bashlykov and Soumya Batra and Prajjwal Bhargava and Shruti Bhosale and Dan Bikel and Lukas Blecher and Cristian Canton Ferrer and Moya Chen and Guillem Cucurull and David Esiobu and Jude Fernandes and Jeremy Fu and Wenyin Fu and Brian Fuller and Cynthia Gao and Vedanuj Goswami and Naman Goyal and Anthony Hartshorn and Saghar Hosseini and Rui Hou and Hakan Inan and Marcin Kardas and Viktor Kerkez and Madian Khabsa and Isabel Kloumann and Artem Korenev and Punit Singh Koura and Marie-Anne Lachaux and Thibaut Lavril and Jenya Lee and Diana Liskovich and Yinghai Lu and Yuning Mao and Xavier Martinet and Todor Mihaylov and Pushkar Mishra and Igor Molybog and Yixin Nie and Andrew Poulton and Jeremy Reizenstein and Rashi Rungta and Kalyan Saladi and Alan Schelten and Ruan Silva and Eric Michael Smith and Ranjan Subramanian and Xiaoqing Ellen Tan and Binh Tang and Ross Taylor and Adina Williams and Jian Xiang Kuan and Puxin Xu and Zheng Yan and Iliyan Zarov and Yuchen Zhang and Angela Fan and Melanie Kambadur and Sharan Narang and Aurelien Rodriguez and Robert Stojnic and Sergey Edunov and Thomas Scialom},
@@ -680,6 +695,7 @@ llama2_7b_chat = ModelMeta(
680
695
  url={https://arxiv.org/abs/2307.09288},
681
696
  }""",
682
697
  n_parameters=None,
698
+ n_embedding_parameters=131_072_000,
683
699
  memory_usage_mb=None,
684
700
  max_tokens=None,
685
701
  embed_dim=None,
@@ -689,7 +705,7 @@ llama2_7b_chat = ModelMeta(
689
705
  similarity_fn_name=None,
690
706
  use_instructions=None,
691
707
  training_datasets=None,
692
- framework=["PyTorch"],
708
+ framework=["PyTorch", "Transformers", "safetensors"],
693
709
  )
694
710
 
695
711
  mistral_7b = ModelMeta(
@@ -704,6 +720,7 @@ mistral_7b = ModelMeta(
704
720
  revision="3ad372fc79158a2148299e3318516c786aeded6c",
705
721
  release_date="2023-12-11",
706
722
  n_parameters=None,
723
+ n_embedding_parameters=None,
707
724
  memory_usage_mb=None,
708
725
  max_tokens=None,
709
726
  embed_dim=None,
@@ -713,7 +730,7 @@ mistral_7b = ModelMeta(
713
730
  similarity_fn_name=None,
714
731
  use_instructions=None,
715
732
  training_datasets=None,
716
- framework=["PyTorch"],
733
+ framework=["PyTorch", "Transformers", "safetensors"],
717
734
  citation="""@misc{jiang2023mistral7b,
718
735
  title={Mistral 7B},
719
736
  author={Albert Q. Jiang and Alexandre Sablayrolles and Arthur Mensch and Chris Bamford and Devendra Singh Chaplot and Diego de las Casas and Florian Bressand and Gianna Lengyel and Guillaume Lample and Lucile Saulnier and Lélio Renard Lavaud and Marie-Anne Lachaux and Pierre Stock and Teven Le Scao and Thibaut Lavril and Thomas Wang and Timothée Lacroix and William El Sayed},
@@ -740,6 +757,7 @@ followir_7b = ModelMeta(
740
757
  # "jhu-clsp/FollowIR-train"
741
758
  ),
742
759
  n_parameters=None,
760
+ n_embedding_parameters=None,
743
761
  memory_usage_mb=13813,
744
762
  max_tokens=None,
745
763
  embed_dim=None,
@@ -748,7 +766,7 @@ followir_7b = ModelMeta(
748
766
  public_training_data=None,
749
767
  similarity_fn_name=None,
750
768
  use_instructions=None,
751
- framework=["PyTorch"],
769
+ framework=["PyTorch", "Transformers", "safetensors"],
752
770
  citation="""
753
771
  @misc{weller2024followir,
754
772
  title={FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions},
@@ -890,6 +908,7 @@ mt5_base_mmarco_v2 = ModelMeta(
890
908
  """,
891
909
  training_datasets={"MSMARCO"},
892
910
  n_parameters=None,
911
+ n_embedding_parameters=192_086_016,
893
912
  memory_usage_mb=None,
894
913
  max_tokens=None,
895
914
  embed_dim=None,
@@ -898,11 +917,11 @@ mt5_base_mmarco_v2 = ModelMeta(
898
917
  public_training_data=None,
899
918
  similarity_fn_name=None,
900
919
  use_instructions=None,
901
- framework=["PyTorch"],
920
+ framework=["PyTorch", "Transformers"],
902
921
  )
903
922
 
904
923
  mt5_13b_mmarco_100k = ModelMeta(
905
- loader=MonoT5Reranker, # type: ignore
924
+ loader=MonoT5Reranker,
906
925
  loader_kwargs=dict(
907
926
  fp_options="float16",
908
927
  ),
@@ -913,6 +932,7 @@ mt5_13b_mmarco_100k = ModelMeta(
913
932
  revision="e1a4317e102a525ea9e16745ad21394a4f1bffbc",
914
933
  release_date="2022-11-04",
915
934
  n_parameters=None,
935
+ n_embedding_parameters=1_024_458_752,
916
936
  memory_usage_mb=None,
917
937
  max_tokens=None,
918
938
  embed_dim=None,
@@ -922,5 +942,5 @@ mt5_13b_mmarco_100k = ModelMeta(
922
942
  similarity_fn_name=None,
923
943
  use_instructions=None,
924
944
  training_datasets=None,
925
- framework=["PyTorch"],
945
+ framework=["PyTorch", "Transformers"],
926
946
  )
@@ -15,13 +15,14 @@ ritrieve_zh_v1 = ModelMeta(
15
15
  revision="f8d5a707656c55705027678e311f9202c8ced12c",
16
16
  release_date="2025-03-25",
17
17
  n_parameters=int(326 * 1e6),
18
+ n_embedding_parameters=21_635_072,
18
19
  memory_usage_mb=1242,
19
20
  embed_dim=1792,
20
21
  license="mit",
21
22
  max_tokens=512,
22
23
  reference="https://huggingface.co/richinfoai/ritrieve_zh_v1",
23
24
  similarity_fn_name="cosine",
24
- framework=["Sentence Transformers", "PyTorch"],
25
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
25
26
  use_instructions=False,
26
27
  superseded_by=None,
27
28
  adapted_from=None,