mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -2,19 +2,22 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import math
5
+ import warnings
5
6
  from typing import TYPE_CHECKING, Any
6
7
 
7
8
  import torch
8
- from torch.utils.data import DataLoader
9
9
  from tqdm.autonotebook import tqdm
10
10
 
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
11
  from mteb.models.abs_encoder import AbsEncoder
13
12
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
13
+ from mteb.types import PromptType
15
14
 
16
15
  if TYPE_CHECKING:
17
16
  from PIL import Image
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput
18
21
 
19
22
  logger = logging.getLogger(__name__)
20
23
 
@@ -261,9 +264,9 @@ def smart_resize(
261
264
  w_bar = ceil_by_factor(width * beta, factor)
262
265
 
263
266
  if max(h_bar, w_bar) / min(h_bar, w_bar) > MAX_RATIO:
264
- logger.warning(
265
- f"Absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(h_bar, w_bar) / min(h_bar, w_bar)}"
266
- )
267
+ msg = f"Absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(h_bar, w_bar) / min(h_bar, w_bar)}"
268
+ logger.warning(msg)
269
+ warnings.warn(msg)
267
270
  if h_bar > w_bar:
268
271
  h_bar = w_bar * MAX_RATIO
269
272
  else:
@@ -353,13 +356,14 @@ gme_qwen2vl_2b = ModelMeta(
353
356
  release_date="2024-12-24",
354
357
  modalities=["image", "text"],
355
358
  n_parameters=2_210_000_000,
359
+ n_embedding_parameters=233_373_696,
356
360
  memory_usage_mb=8427,
357
361
  embed_dim=1536,
358
362
  license="apache-2.0",
359
363
  max_tokens=32768,
360
364
  reference="https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct",
361
365
  similarity_fn_name=ScoringFunction.COSINE,
362
- framework=["PyTorch"],
366
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
363
367
  use_instructions=True,
364
368
  public_training_code=None,
365
369
  public_training_data=None,
@@ -377,13 +381,14 @@ gme_qwen2vl_7b = ModelMeta(
377
381
  release_date="2024-12-24",
378
382
  modalities=["image", "text"],
379
383
  n_parameters=8_290_000_000,
384
+ n_embedding_parameters=544_997_376,
380
385
  memory_usage_mb=31629,
381
386
  embed_dim=3584,
382
387
  license="apache-2.0",
383
388
  max_tokens=32768,
384
389
  reference="https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
385
390
  similarity_fn_name=ScoringFunction.COSINE,
386
- framework=["PyTorch"],
391
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
387
392
  use_instructions=True,
388
393
  public_training_code=None,
389
394
  public_training_data=None,
@@ -1,17 +1,23 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import numpy as np
4
6
  from packaging.version import Version
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
  from transformers import __version__ as transformers_version
8
9
 
9
10
  from mteb._requires_package import requires_package
10
- from mteb.abstasks.task_metadata import TaskMetadata
11
11
  from mteb.models import sentence_transformers_loader
12
12
  from mteb.models.abs_encoder import AbsEncoder
13
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
14
+ from mteb.types import PromptType
15
+
16
+ if TYPE_CHECKING:
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput
15
21
 
16
22
  MULTILINGUAL_EVALUATED_LANGUAGES = [
17
23
  "arb-Arab",
@@ -156,6 +162,7 @@ google_text_emb_004 = ModelMeta(
156
162
  revision="1", # revision is intended for implementation
157
163
  release_date="2024-05-14",
158
164
  n_parameters=None,
165
+ n_embedding_parameters=None,
159
166
  memory_usage_mb=None,
160
167
  max_tokens=2048,
161
168
  embed_dim=768,
@@ -181,6 +188,7 @@ google_text_emb_005 = ModelMeta(
181
188
  revision="1", # revision is intended for implementation
182
189
  release_date="2024-11-18",
183
190
  n_parameters=None,
191
+ n_embedding_parameters=None,
184
192
  memory_usage_mb=None,
185
193
  max_tokens=2048,
186
194
  embed_dim=768,
@@ -206,6 +214,7 @@ google_text_multilingual_emb_002 = ModelMeta(
206
214
  revision="1",
207
215
  release_date="2024-05-14",
208
216
  n_parameters=None,
217
+ n_embedding_parameters=None,
209
218
  memory_usage_mb=None,
210
219
  max_tokens=2048,
211
220
  embed_dim=768,
@@ -231,6 +240,7 @@ google_gemini_embedding_001 = ModelMeta(
231
240
  revision="1",
232
241
  release_date="2025-03-07",
233
242
  n_parameters=None,
243
+ n_embedding_parameters=None,
234
244
  memory_usage_mb=None,
235
245
  max_tokens=2048,
236
246
  embed_dim=3072,
@@ -266,11 +276,12 @@ embedding_gemma_300m = ModelMeta(
266
276
  revision="64614b0b8b64f0c6c1e52b07e4e9a4e8fe4d2da2",
267
277
  release_date="2025-09-04",
268
278
  n_parameters=307_581_696,
279
+ n_embedding_parameters=201_326_592,
269
280
  embed_dim=768,
270
281
  max_tokens=2048,
271
282
  license="gemma",
272
283
  reference="https://ai.google.dev/gemma/docs/embeddinggemma/model_card",
273
- framework=["Sentence Transformers", "PyTorch"],
284
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
274
285
  use_instructions=True,
275
286
  public_training_code=None,
276
287
  public_training_data=None,
@@ -4,20 +4,21 @@ import logging
4
4
  from typing import TYPE_CHECKING, Any
5
5
 
6
6
  import torch
7
- from torch.utils.data import DataLoader
8
7
  from tqdm.auto import tqdm
9
8
 
10
9
  from mteb._requires_package import (
11
10
  requires_image_dependencies,
12
11
  )
13
- from mteb.abstasks.task_metadata import TaskMetadata
14
12
  from mteb.models.model_meta import ModelMeta
15
- from mteb.types import Array, BatchedInput, PromptType
16
-
17
- logger = logging.getLogger(__name__)
18
13
 
19
14
  if TYPE_CHECKING:
20
15
  from PIL import Image
16
+ from torch.utils.data import DataLoader
17
+
18
+ from mteb.abstasks.task_metadata import TaskMetadata
19
+ from mteb.types import Array, BatchedInput, PromptType
20
+
21
+ logger = logging.getLogger(__name__)
21
22
 
22
23
 
23
24
  class GraniteVisionEmbeddingWrapper:
@@ -172,6 +173,7 @@ granite_vision_embedding = ModelMeta(
172
173
  release_date="2025-06-11",
173
174
  modalities=["image", "text"],
174
175
  n_parameters=2_980_000_000,
176
+ n_embedding_parameters=None,
175
177
  memory_usage_mb=11351,
176
178
  max_tokens=128000,
177
179
  embed_dim=128,
@@ -179,7 +181,7 @@ granite_vision_embedding = ModelMeta(
179
181
  open_weights=True,
180
182
  public_training_code=None,
181
183
  public_training_data=None,
182
- framework=["PyTorch"],
184
+ framework=["PyTorch", "Transformers", "safetensors"],
183
185
  reference="https://huggingface.co/ibm-granite/granite-vision-3.3-2b-embedding",
184
186
  similarity_fn_name="MaxSim",
185
187
  use_instructions=True,
@@ -44,13 +44,14 @@ gritlm7b = ModelMeta(
44
44
  revision="13f00a0e36500c80ce12870ea513846a066004af",
45
45
  release_date="2024-02-15",
46
46
  n_parameters=7_240_000_000,
47
+ n_embedding_parameters=131_072_000,
47
48
  memory_usage_mb=13813,
48
49
  embed_dim=4096,
49
50
  license="apache-2.0",
50
51
  max_tokens=32768,
51
52
  reference="https://huggingface.co/GritLM/GritLM-7B",
52
53
  similarity_fn_name=ScoringFunction.COSINE,
53
- framework=["GritLM", "PyTorch"],
54
+ framework=["GritLM", "PyTorch", "Transformers", "safetensors"],
54
55
  use_instructions=True,
55
56
  training_datasets=GRIT_LM_TRAINING_DATA,
56
57
  # section 3.1 "We finetune our final models from Mistral 7B [68] and Mixtral 8x7B [69] using adaptations of E5 [160] and the Tülu 2 data
@@ -73,13 +74,15 @@ gritlm8x7b = ModelMeta(
73
74
  revision="7f089b13e3345510281733ca1e6ff871b5b4bc76",
74
75
  release_date="2024-02-15",
75
76
  n_parameters=57_920_000_000,
77
+ n_embedding_parameters=None,
78
+ n_active_parameters_override=13_000_000_000,
76
79
  memory_usage_mb=89079,
77
80
  embed_dim=32768,
78
81
  license="apache-2.0",
79
82
  max_tokens=32768,
80
83
  reference="https://huggingface.co/GritLM/GritLM-8x7B",
81
84
  similarity_fn_name=ScoringFunction.COSINE,
82
- framework=["GritLM", "PyTorch"],
85
+ framework=["GritLM", "PyTorch", "Transformers", "safetensors"],
83
86
  use_instructions=True,
84
87
  training_datasets=GRIT_LM_TRAINING_DATA,
85
88
  citation=GRITLM_CITATION,
@@ -48,12 +48,13 @@ gte_qwen2_7b_instruct = ModelMeta(
48
48
  revision="e26182b2122f4435e8b3ebecbf363990f409b45b",
49
49
  release_date="2024-06-15", # initial commit of hf model.
50
50
  n_parameters=7_613_000_000,
51
+ n_embedding_parameters=543_499_264,
51
52
  memory_usage_mb=29040,
52
53
  embed_dim=3584,
53
54
  license="apache-2.0",
54
55
  reference="https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct",
55
56
  similarity_fn_name=ScoringFunction.COSINE,
56
- framework=["Sentence Transformers", "PyTorch"],
57
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
57
58
  use_instructions=True,
58
59
  citation=GTE_CITATION,
59
60
  public_training_code=None,
@@ -80,13 +81,14 @@ gte_qwen1_5_7b_instruct = ModelMeta(
80
81
  revision="07d27e5226328010336563bc1b564a5e3436a298",
81
82
  release_date="2024-04-20", # initial commit of hf model.
82
83
  n_parameters=7_720_000_000,
84
+ n_embedding_parameters=None,
83
85
  memory_usage_mb=29449,
84
86
  embed_dim=4096,
85
87
  license="apache-2.0",
86
88
  max_tokens=32_768,
87
89
  reference="https://huggingface.co/Alibaba-NLP/gte-Qwen1.5-7B-instruct",
88
90
  similarity_fn_name=ScoringFunction.COSINE,
89
- framework=["Sentence Transformers", "PyTorch"],
91
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
90
92
  use_instructions=True,
91
93
  public_training_code=None,
92
94
  public_training_data=None,
@@ -117,13 +119,14 @@ gte_qwen2_1_5b_instruct = ModelMeta(
117
119
  revision="c6c1b92f4a3e1b92b326ad29dd3c8433457df8dd",
118
120
  release_date="2024-07-29", # initial commit of hf model.
119
121
  n_parameters=1_780_000_000,
122
+ n_embedding_parameters=232_928_256,
120
123
  memory_usage_mb=6776,
121
124
  embed_dim=8960,
122
125
  license="apache-2.0",
123
126
  max_tokens=32_768,
124
127
  reference="https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct",
125
128
  similarity_fn_name=ScoringFunction.COSINE,
126
- framework=["Sentence Transformers", "PyTorch"],
129
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
127
130
  use_instructions=True,
128
131
  public_training_code=None,
129
132
  public_training_data=None,
@@ -145,13 +148,14 @@ gte_small_zh = ModelMeta(
145
148
  revision="af7bd46fbb00b3a6963c8dd7f1786ddfbfbe973a",
146
149
  release_date="2023-11-08", # initial commit of hf model.
147
150
  n_parameters=int(30.3 * 1e6),
151
+ n_embedding_parameters=10_817_536,
148
152
  memory_usage_mb=58,
149
153
  embed_dim=1024,
150
154
  license="mit",
151
155
  max_tokens=512,
152
156
  reference="https://huggingface.co/thenlper/gte-small-zh",
153
157
  similarity_fn_name=ScoringFunction.COSINE,
154
- framework=["Sentence Transformers", "PyTorch"],
158
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
155
159
  use_instructions=False,
156
160
  public_training_code=None,
157
161
  public_training_data=None,
@@ -173,13 +177,14 @@ gte_base_zh = ModelMeta(
173
177
  revision="71ab7947d6fac5b64aa299e6e40e6c2b2e85976c",
174
178
  release_date="2023-11-08", # initial commit of hf model.
175
179
  n_parameters=int(102 * 1e6),
180
+ n_embedding_parameters=16_226_304,
176
181
  memory_usage_mb=195,
177
182
  embed_dim=1024,
178
183
  license="mit",
179
184
  max_tokens=512,
180
185
  reference="https://huggingface.co/thenlper/gte-base-zh",
181
186
  similarity_fn_name=ScoringFunction.COSINE,
182
- framework=["Sentence Transformers", "PyTorch"],
187
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
183
188
  use_instructions=False,
184
189
  public_training_code=None,
185
190
  public_training_data=None,
@@ -201,13 +206,14 @@ gte_large_zh = ModelMeta(
201
206
  revision="64c364e579de308104a9b2c170ca009502f4f545",
202
207
  release_date="2023-11-08", # initial commit of hf model.
203
208
  n_parameters=int(326 * 1e6),
209
+ n_embedding_parameters=21_635_072,
204
210
  memory_usage_mb=621,
205
211
  embed_dim=1024,
206
212
  license="mit",
207
213
  max_tokens=512,
208
214
  reference="https://huggingface.co/thenlper/gte-large-zh",
209
215
  similarity_fn_name=ScoringFunction.COSINE,
210
- framework=["Sentence Transformers", "PyTorch"],
216
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
211
217
  use_instructions=False,
212
218
  public_training_code=None,
213
219
  public_training_data=None,
@@ -330,13 +336,14 @@ gte_multilingual_base = ModelMeta(
330
336
  revision="ca1791e0bcc104f6db161f27de1340241b13c5a4",
331
337
  release_date="2024-07-20", # initial commit of hf model.
332
338
  n_parameters=int(305 * 1e6),
339
+ n_embedding_parameters=192_036_864,
333
340
  memory_usage_mb=582,
334
341
  embed_dim=768,
335
342
  license="apache-2.0",
336
343
  max_tokens=8192,
337
344
  reference="https://huggingface.co/Alibaba-NLP/gte-multilingual-base",
338
345
  similarity_fn_name=ScoringFunction.COSINE,
339
- framework=["Sentence Transformers", "PyTorch"],
346
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
340
347
  use_instructions=False,
341
348
  public_training_code=None,
342
349
  public_training_data=None, # couldn't find
@@ -359,13 +366,20 @@ gte_modernbert_base = ModelMeta(
359
366
  revision="7ca8b4ca700621b67618669f5378fe5f5820b8e4",
360
367
  release_date="2025-01-21", # initial commit of hf model.
361
368
  n_parameters=int(149 * 1e6),
369
+ n_embedding_parameters=None,
362
370
  memory_usage_mb=284,
363
371
  embed_dim=768,
364
372
  license="apache-2.0",
365
373
  max_tokens=8192,
366
374
  reference="https://huggingface.co/Alibaba-NLP/gte-modernbert-base",
367
375
  similarity_fn_name=ScoringFunction.COSINE,
368
- framework=["Sentence Transformers", "PyTorch"],
376
+ framework=[
377
+ "Sentence Transformers",
378
+ "PyTorch",
379
+ "Transformers",
380
+ "ONNX",
381
+ "safetensors",
382
+ ],
369
383
  use_instructions=False,
370
384
  public_training_code=None, # couldn't find
371
385
  public_training_data=None,
@@ -396,13 +410,20 @@ gte_base_en_v15 = ModelMeta(
396
410
  revision="a829fd0e060bb84554da0dfd354d0de0f7712b7f", # can be any
397
411
  release_date="2024-06-20", # initial commit of hf model
398
412
  n_parameters=137_000_000,
413
+ n_embedding_parameters=23_445_504,
399
414
  memory_usage_mb=None,
400
415
  embed_dim=768,
401
416
  license="apache-2.0",
402
417
  max_tokens=8192,
403
418
  reference="https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5",
404
419
  similarity_fn_name=ScoringFunction.COSINE,
405
- framework=["Sentence Transformers", "PyTorch"],
420
+ framework=[
421
+ "Sentence Transformers",
422
+ "PyTorch",
423
+ "Transformers",
424
+ "ONNX",
425
+ "safetensors",
426
+ ],
406
427
  use_instructions=False,
407
428
  superseded_by=None,
408
429
  adapted_from=None,
@@ -410,21 +431,21 @@ gte_base_en_v15 = ModelMeta(
410
431
  public_training_data=None,
411
432
  training_datasets=None,
412
433
  citation="""@misc{zhang2024mgte,
413
- title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
434
+ title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
414
435
  author={Xin Zhang and Yanzhao Zhang and Dingkun Long and Wen Xie and Ziqi Dai and Jialong Tang and Huan Lin and Baosong Yang and Pengjun Xie and Fei Huang and Meishan Zhang and Wenjie Li and Min Zhang},
415
436
  year={2024},
416
437
  eprint={2407.19669},
417
438
  archivePrefix={arXiv},
418
439
  primaryClass={cs.CL},
419
- url={https://arxiv.org/abs/2407.19669},
440
+ url={https://arxiv.org/abs/2407.19669},
420
441
  }
421
442
  @misc{li2023gte,
422
- title={Towards General Text Embeddings with Multi-stage Contrastive Learning},
443
+ title={Towards General Text Embeddings with Multi-stage Contrastive Learning},
423
444
  author={Zehan Li and Xin Zhang and Yanzhao Zhang and Dingkun Long and Pengjun Xie and Meishan Zhang},
424
445
  year={2023},
425
446
  eprint={2308.03281},
426
447
  archivePrefix={arXiv},
427
448
  primaryClass={cs.CL},
428
- url={https://arxiv.org/abs/2308.03281},
449
+ url={https://arxiv.org/abs/2308.03281},
429
450
  }""",
430
451
  )
@@ -1,9 +1,13 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from typing import TYPE_CHECKING
2
5
 
3
6
  from mteb.models.model_meta import ModelMeta
4
7
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
5
- from mteb.types import PromptType
6
8
 
9
+ if TYPE_CHECKING:
10
+ from mteb.types import PromptType
7
11
  logger = logging.getLogger(__name__)
8
12
 
9
13
 
@@ -43,13 +47,14 @@ Hinvec_bidir = ModelMeta(
43
47
  revision="d4fc678720cc1b8c5d18599ce2d9a4d6090c8b6b",
44
48
  release_date="2025-06-19",
45
49
  n_parameters=939_591_680,
50
+ n_embedding_parameters=None,
46
51
  memory_usage_mb=3715,
47
52
  embed_dim=2048,
48
53
  license="cc-by-nc-4.0",
49
54
  max_tokens=2048,
50
55
  reference="https://huggingface.co/Sailesh97/Hinvec",
51
56
  similarity_fn_name="cosine",
52
- framework=["Sentence Transformers", "PyTorch"],
57
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
53
58
  use_instructions=True,
54
59
  training_datasets=hinvec_training_datasets,
55
60
  public_training_code=None,
@@ -9,6 +9,7 @@ human = ModelMeta(
9
9
  revision="2025_09_25",
10
10
  release_date=None,
11
11
  n_parameters=None,
12
+ n_embedding_parameters=None,
12
13
  memory_usage_mb=None,
13
14
  embed_dim=None,
14
15
  license=None,
@@ -100,13 +100,20 @@ granite_107m_multilingual = ModelMeta(
100
100
  revision="47db56afe692f731540413c67dd818ff492277e7",
101
101
  release_date="2024-12-18",
102
102
  n_parameters=107_000_000,
103
+ n_embedding_parameters=96_000_768,
103
104
  memory_usage_mb=204,
104
105
  embed_dim=384,
105
106
  license="apache-2.0",
106
107
  max_tokens=512,
107
108
  reference="https://huggingface.co/ibm-granite/granite-embedding-107m-multilingual",
108
109
  similarity_fn_name=ScoringFunction.COSINE,
109
- framework=["Sentence Transformers", "PyTorch"],
110
+ framework=[
111
+ "Sentence Transformers",
112
+ "PyTorch",
113
+ "Transformers",
114
+ "ONNX",
115
+ "safetensors",
116
+ ],
110
117
  adapted_from=None,
111
118
  superseded_by=None,
112
119
  public_training_code=None,
@@ -125,13 +132,20 @@ granite_278m_multilingual = ModelMeta(
125
132
  revision="84e3546b88b0cb69f8078608a1df558020bcbf1f",
126
133
  release_date="2024-12-18",
127
134
  n_parameters=278_000_000,
135
+ n_embedding_parameters=192_001_536,
128
136
  memory_usage_mb=530,
129
137
  embed_dim=768,
130
138
  license="apache-2.0",
131
139
  max_tokens=512,
132
140
  reference="https://huggingface.co/ibm-granite/granite-embedding-278m-multilingual",
133
141
  similarity_fn_name=ScoringFunction.COSINE,
134
- framework=["Sentence Transformers", "PyTorch"],
142
+ framework=[
143
+ "Sentence Transformers",
144
+ "PyTorch",
145
+ "Transformers",
146
+ "ONNX",
147
+ "safetensors",
148
+ ],
135
149
  adapted_from=None,
136
150
  superseded_by=None,
137
151
  public_training_code=None,
@@ -150,13 +164,20 @@ granite_30m_english = ModelMeta(
150
164
  revision="eddbb57470f896b5f8e2bfcb823d8f0e2d2024a5",
151
165
  release_date="2024-12-18",
152
166
  n_parameters=30_000_000,
167
+ n_embedding_parameters=19_301_760,
153
168
  memory_usage_mb=58,
154
169
  embed_dim=384,
155
170
  license="apache-2.0",
156
171
  max_tokens=512,
157
172
  reference="https://huggingface.co/ibm-granite/granite-embedding-30m-english",
158
173
  similarity_fn_name=ScoringFunction.COSINE,
159
- framework=["Sentence Transformers", "PyTorch"],
174
+ framework=[
175
+ "Sentence Transformers",
176
+ "PyTorch",
177
+ "ONNX",
178
+ "safetensors",
179
+ "Transformers",
180
+ ],
160
181
  adapted_from=None,
161
182
  superseded_by=None,
162
183
  public_training_code=None,
@@ -175,13 +196,20 @@ granite_125m_english = ModelMeta(
175
196
  revision="e48d3a5b47eaa18e3fe07d4676e187fd80f32730",
176
197
  release_date="2024-12-18",
177
198
  n_parameters=125_000_000,
199
+ n_embedding_parameters=38_603_520,
178
200
  memory_usage_mb=238,
179
201
  embed_dim=768,
180
202
  license="apache-2.0",
181
203
  max_tokens=512,
182
204
  reference="https://huggingface.co/ibm-granite/granite-embedding-125m-english",
183
205
  similarity_fn_name=ScoringFunction.COSINE,
184
- framework=["Sentence Transformers", "PyTorch"],
206
+ framework=[
207
+ "Sentence Transformers",
208
+ "PyTorch",
209
+ "ONNX",
210
+ "safetensors",
211
+ "Transformers",
212
+ ],
185
213
  adapted_from=None,
186
214
  superseded_by=None,
187
215
  public_training_code=None,
@@ -201,13 +229,14 @@ granite_english_r2 = ModelMeta(
201
229
  revision="6e7b8ce0e76270394ac4669ba4bbd7133b60b7f9",
202
230
  release_date="2025-08-15",
203
231
  n_parameters=149_000_000,
232
+ n_embedding_parameters=None,
204
233
  memory_usage_mb=284,
205
234
  embed_dim=768,
206
235
  license="apache-2.0",
207
236
  max_tokens=8192,
208
237
  reference="https://huggingface.co/ibm-granite/granite-embedding-english-r2",
209
238
  similarity_fn_name="cosine",
210
- framework=["Sentence Transformers", "PyTorch"],
239
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
211
240
  adapted_from=None,
212
241
  superseded_by=None,
213
242
  public_training_code=None,
@@ -226,13 +255,14 @@ granite_small_english_r2 = ModelMeta(
226
255
  revision="54a8d2616a0844355a5164432d3f6dafb37b17a3",
227
256
  release_date="2025-08-15",
228
257
  n_parameters=47_000_000,
258
+ n_embedding_parameters=None,
229
259
  memory_usage_mb=91,
230
260
  embed_dim=384,
231
261
  license="apache-2.0",
232
262
  max_tokens=8192,
233
263
  reference="https://huggingface.co/ibm-granite/granite-embedding-small-english-r2",
234
264
  similarity_fn_name="cosine",
235
- framework=["Sentence Transformers", "PyTorch"],
265
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
236
266
  adapted_from=None,
237
267
  superseded_by=None,
238
268
  public_training_code=None,
@@ -56,13 +56,14 @@ inf_retriever_v1 = ModelMeta(
56
56
  revision="cb70ca7c31dfa866b2eff2dad229c144d8ddfd91",
57
57
  release_date="2024-12-24", # initial commit of hf model.
58
58
  n_parameters=7_069_121_024,
59
+ n_embedding_parameters=None,
59
60
  memory_usage_mb=13483,
60
61
  embed_dim=3584,
61
62
  license="apache-2.0",
62
63
  max_tokens=32768,
63
64
  reference="https://huggingface.co/infly/inf-retriever-v1",
64
65
  similarity_fn_name=ScoringFunction.COSINE,
65
- framework=["Sentence Transformers", "PyTorch"],
66
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
66
67
  use_instructions=True,
67
68
  adapted_from="Alibaba-NLP/gte-Qwen2-7B-instruct",
68
69
  public_training_code=None,
@@ -83,13 +84,14 @@ inf_retriever_v1_1_5b = ModelMeta(
83
84
  revision="c9c05c2dd50707a486966ba81703021ae2094a06",
84
85
  release_date="2025-02-08", # initial commit of hf model.
85
86
  n_parameters=1_543_268_864,
87
+ n_embedding_parameters=232_928_256,
86
88
  memory_usage_mb=2944,
87
89
  embed_dim=1536,
88
90
  license="apache-2.0",
89
91
  max_tokens=32768,
90
92
  reference="https://huggingface.co/infly/inf-retriever-v1-1.5b",
91
93
  similarity_fn_name=ScoringFunction.COSINE,
92
- framework=["Sentence Transformers", "PyTorch"],
94
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
93
95
  use_instructions=True,
94
96
  adapted_from="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
95
97
  public_training_code=None,
@@ -1,11 +1,10 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
- from torch.utils.data import DataLoader
7
7
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
8
  from mteb.models.abs_encoder import AbsEncoder
10
9
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
11
10
  from mteb.models.model_implementations.bge_models import (
@@ -17,7 +16,15 @@ from mteb.models.model_implementations.e5_instruct import E5_MISTRAL_TRAINING_DA
17
16
  from mteb.models.model_implementations.nvidia_models import nvidia_training_datasets
18
17
  from mteb.models.model_implementations.qzhou_models import qzhou_training_data
19
18
  from mteb.models.model_meta import ModelMeta, ScoringFunction
20
- from mteb.types import Array, BatchedInput, PromptType
19
+ from mteb.types import PromptType
20
+
21
+ if TYPE_CHECKING:
22
+ from collections.abc import Callable
23
+
24
+ from torch.utils.data import DataLoader
25
+
26
+ from mteb.abstasks.task_metadata import TaskMetadata
27
+ from mteb.types import Array, BatchedInput
21
28
 
22
29
  logger = logging.getLogger(__name__)
23
30
 
@@ -292,13 +299,14 @@ jasper_en_v1 = ModelMeta(
292
299
  revision="d6330ce98f8a0d741e781df845904c9484f00efa",
293
300
  release_date="2024-12-11", # first commit
294
301
  n_parameters=1_999_000_000,
302
+ n_embedding_parameters=232_932_864,
295
303
  memory_usage_mb=3802,
296
304
  max_tokens=131072,
297
305
  embed_dim=8960,
298
306
  license="apache-2.0",
299
307
  reference="https://huggingface.co/infgrad/jasper_en_vision_language_v1",
300
308
  similarity_fn_name=ScoringFunction.COSINE,
301
- framework=["Sentence Transformers", "PyTorch"],
309
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
302
310
  use_instructions=True,
303
311
  adapted_from=None,
304
312
  superseded_by=None,
@@ -339,13 +347,14 @@ Jasper_Token_Compression_600M = ModelMeta(
339
347
  revision="06a100f753a5a96d9e583b3af79c6fcdfacc4719",
340
348
  release_date="2025-11-14",
341
349
  n_parameters=595776512,
350
+ n_embedding_parameters=None,
342
351
  memory_usage_mb=2272,
343
352
  embed_dim=2048,
344
353
  license="mit",
345
354
  max_tokens=32768,
346
355
  reference="https://huggingface.co/infgrad/Jasper-Token-Compression-600M",
347
356
  similarity_fn_name="cosine",
348
- framework=["Sentence Transformers", "PyTorch"],
357
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
349
358
  use_instructions=True,
350
359
  public_training_code="https://github.com/DunZhang/Jasper-Token-Compression-Training",
351
360
  # public_training_data: unsupervised data for distillation