mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. mteb/_create_dataloaders.py +63 -14
  2. mteb/_evaluators/any_sts_evaluator.py +12 -5
  3. mteb/_evaluators/clustering_evaluator.py +12 -4
  4. mteb/_evaluators/evaluator.py +11 -5
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
  6. mteb/_evaluators/pair_classification_evaluator.py +13 -5
  7. mteb/_evaluators/retrieval_evaluator.py +22 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +20 -11
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +10 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +48 -21
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +25 -9
  21. mteb/abstasks/clustering.py +23 -10
  22. mteb/abstasks/clustering_legacy.py +22 -8
  23. mteb/abstasks/image/image_text_pair_classification.py +23 -9
  24. mteb/abstasks/multilabel_classification.py +13 -5
  25. mteb/abstasks/pair_classification.py +27 -11
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +56 -30
  28. mteb/abstasks/retrieval_dataset_loaders.py +48 -37
  29. mteb/abstasks/sts.py +29 -13
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +23 -12
  32. mteb/abstasks/text/reranking.py +2 -2
  33. mteb/abstasks/text/summarization.py +19 -8
  34. mteb/abstasks/zeroshot_classification.py +23 -9
  35. mteb/benchmarks/_create_table.py +13 -7
  36. mteb/benchmarks/benchmark.py +11 -1
  37. mteb/benchmarks/benchmarks/__init__.py +2 -0
  38. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  39. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  40. mteb/cache.py +10 -5
  41. mteb/cli/_display_tasks.py +9 -3
  42. mteb/cli/build_cli.py +5 -2
  43. mteb/cli/generate_model_card.py +9 -2
  44. mteb/deprecated_evaluator.py +16 -12
  45. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  65. mteb/evaluate.py +33 -20
  66. mteb/filter_tasks.py +12 -7
  67. mteb/get_tasks.py +9 -4
  68. mteb/languages/language_scripts.py +8 -3
  69. mteb/leaderboard/app.py +11 -4
  70. mteb/leaderboard/table.py +7 -2
  71. mteb/load_results.py +9 -3
  72. mteb/models/abs_encoder.py +22 -12
  73. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  74. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  75. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  76. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  77. mteb/models/get_model_meta.py +32 -6
  78. mteb/models/instruct_wrapper.py +13 -5
  79. mteb/models/model_implementations/align_models.py +10 -4
  80. mteb/models/model_implementations/amazon_models.py +1 -0
  81. mteb/models/model_implementations/andersborges.py +2 -0
  82. mteb/models/model_implementations/ara_models.py +1 -0
  83. mteb/models/model_implementations/arctic_models.py +8 -0
  84. mteb/models/model_implementations/b1ade_models.py +1 -0
  85. mteb/models/model_implementations/bedrock_models.py +20 -6
  86. mteb/models/model_implementations/bge_models.py +40 -1
  87. mteb/models/model_implementations/bica_model.py +1 -0
  88. mteb/models/model_implementations/blip2_models.py +11 -4
  89. mteb/models/model_implementations/blip_models.py +17 -4
  90. mteb/models/model_implementations/bm25.py +24 -14
  91. mteb/models/model_implementations/bmretriever_models.py +10 -2
  92. mteb/models/model_implementations/cadet_models.py +1 -0
  93. mteb/models/model_implementations/cde_models.py +11 -5
  94. mteb/models/model_implementations/clip_models.py +12 -4
  95. mteb/models/model_implementations/clips_models.py +3 -0
  96. mteb/models/model_implementations/codefuse_models.py +5 -0
  97. mteb/models/model_implementations/codesage_models.py +3 -0
  98. mteb/models/model_implementations/cohere_models.py +14 -4
  99. mteb/models/model_implementations/cohere_v.py +14 -4
  100. mteb/models/model_implementations/colpali_models.py +7 -3
  101. mteb/models/model_implementations/colqwen_models.py +17 -31
  102. mteb/models/model_implementations/colsmol_models.py +3 -1
  103. mteb/models/model_implementations/conan_models.py +11 -4
  104. mteb/models/model_implementations/dino_models.py +28 -4
  105. mteb/models/model_implementations/e5_instruct.py +4 -0
  106. mteb/models/model_implementations/e5_models.py +9 -0
  107. mteb/models/model_implementations/e5_v.py +10 -4
  108. mteb/models/model_implementations/eagerworks_models.py +11 -4
  109. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  110. mteb/models/model_implementations/en_code_retriever.py +1 -0
  111. mteb/models/model_implementations/euler_models.py +1 -0
  112. mteb/models/model_implementations/evaclip_models.py +13 -4
  113. mteb/models/model_implementations/fa_models.py +9 -0
  114. mteb/models/model_implementations/facebookai.py +2 -0
  115. mteb/models/model_implementations/geogpt_models.py +1 -0
  116. mteb/models/model_implementations/gme_v_models.py +7 -3
  117. mteb/models/model_implementations/google_models.py +15 -4
  118. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  119. mteb/models/model_implementations/gritlm_models.py +3 -0
  120. mteb/models/model_implementations/gte_models.py +9 -0
  121. mteb/models/model_implementations/hinvec_models.py +6 -1
  122. mteb/models/model_implementations/human.py +1 -0
  123. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  124. mteb/models/model_implementations/inf_models.py +2 -0
  125. mteb/models/model_implementations/jasper_models.py +14 -5
  126. mteb/models/model_implementations/jina_clip.py +10 -4
  127. mteb/models/model_implementations/jina_models.py +17 -5
  128. mteb/models/model_implementations/kalm_models.py +24 -12
  129. mteb/models/model_implementations/kblab.py +1 -0
  130. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  131. mteb/models/model_implementations/kfst.py +1 -0
  132. mteb/models/model_implementations/kowshik24_models.py +1 -0
  133. mteb/models/model_implementations/lens_models.py +2 -0
  134. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  135. mteb/models/model_implementations/linq_models.py +7 -1
  136. mteb/models/model_implementations/listconranker.py +10 -4
  137. mteb/models/model_implementations/llm2clip_models.py +12 -4
  138. mteb/models/model_implementations/llm2vec_models.py +20 -6
  139. mteb/models/model_implementations/mcinext_models.py +8 -2
  140. mteb/models/model_implementations/mdbr_models.py +2 -0
  141. mteb/models/model_implementations/misc_models.py +63 -0
  142. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  143. mteb/models/model_implementations/mme5_models.py +2 -1
  144. mteb/models/model_implementations/moco_models.py +11 -4
  145. mteb/models/model_implementations/mod_models.py +2 -1
  146. mteb/models/model_implementations/model2vec_models.py +23 -4
  147. mteb/models/model_implementations/moka_models.py +3 -0
  148. mteb/models/model_implementations/nbailab.py +3 -0
  149. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  150. mteb/models/model_implementations/nomic_models.py +17 -4
  151. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  152. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  153. mteb/models/model_implementations/nvidia_models.py +15 -4
  154. mteb/models/model_implementations/octen_models.py +3 -1
  155. mteb/models/model_implementations/openai_models.py +14 -4
  156. mteb/models/model_implementations/openclip_models.py +17 -4
  157. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  158. mteb/models/model_implementations/ops_moa_models.py +9 -2
  159. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  160. mteb/models/model_implementations/pawan_models.py +1 -0
  161. mteb/models/model_implementations/piccolo_models.py +2 -0
  162. mteb/models/model_implementations/promptriever_models.py +16 -6
  163. mteb/models/model_implementations/pylate_models.py +32 -13
  164. mteb/models/model_implementations/qodo_models.py +2 -0
  165. mteb/models/model_implementations/qtack_models.py +1 -0
  166. mteb/models/model_implementations/qwen3_models.py +11 -1
  167. mteb/models/model_implementations/qzhou_models.py +2 -0
  168. mteb/models/model_implementations/random_baseline.py +4 -3
  169. mteb/models/model_implementations/rasgaard_models.py +1 -0
  170. mteb/models/model_implementations/reasonir_model.py +65 -0
  171. mteb/models/model_implementations/repllama_models.py +15 -6
  172. mteb/models/model_implementations/rerankers_custom.py +13 -4
  173. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  174. mteb/models/model_implementations/richinfoai_models.py +1 -0
  175. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  176. mteb/models/model_implementations/ruri_models.py +10 -0
  177. mteb/models/model_implementations/salesforce_models.py +10 -1
  178. mteb/models/model_implementations/samilpwc_models.py +1 -0
  179. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  180. mteb/models/model_implementations/searchmap_models.py +1 -0
  181. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  182. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  183. mteb/models/model_implementations/seed_models.py +2 -1
  184. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  185. mteb/models/model_implementations/shuu_model.py +1 -0
  186. mteb/models/model_implementations/siglip_models.py +19 -4
  187. mteb/models/model_implementations/slm_models.py +7 -4
  188. mteb/models/model_implementations/sonar_models.py +2 -1
  189. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  190. mteb/models/model_implementations/stella_models.py +6 -0
  191. mteb/models/model_implementations/tarka_models.py +2 -0
  192. mteb/models/model_implementations/text2vec_models.py +3 -0
  193. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  194. mteb/models/model_implementations/uae_models.py +10 -4
  195. mteb/models/model_implementations/vdr_models.py +8 -1
  196. mteb/models/model_implementations/vi_vn_models.py +6 -0
  197. mteb/models/model_implementations/vista_models.py +11 -4
  198. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  199. mteb/models/model_implementations/voyage_models.py +52 -4
  200. mteb/models/model_implementations/voyage_v.py +11 -6
  201. mteb/models/model_implementations/xyz_models.py +1 -0
  202. mteb/models/model_implementations/youtu_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models.py +1 -0
  204. mteb/models/model_implementations/yuan_models_en.py +2 -1
  205. mteb/models/model_meta.py +47 -9
  206. mteb/models/models_protocols.py +23 -18
  207. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  208. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  209. mteb/models/search_wrappers.py +31 -12
  210. mteb/models/sentence_transformer_wrapper.py +4 -3
  211. mteb/models/vllm_wrapper.py +8 -6
  212. mteb/results/benchmark_results.py +22 -17
  213. mteb/results/model_result.py +21 -15
  214. mteb/results/task_result.py +32 -16
  215. mteb/similarity_functions.py +8 -2
  216. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  220. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  223. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  224. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  225. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  226. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  227. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  228. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  229. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  230. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  231. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  232. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  233. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  234. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  235. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  236. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  237. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  238. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  239. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  240. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  241. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  242. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  243. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  244. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  245. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  246. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  247. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  248. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  249. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  250. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  251. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  252. mteb/tasks/classification/est/estonian_valence.py +1 -1
  253. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  254. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  256. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  257. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  258. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  259. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  260. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  261. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  262. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  263. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  264. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  265. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  266. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  267. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  268. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  269. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  270. mteb/tasks/classification/kor/klue_tc.py +2 -2
  271. mteb/tasks/classification/kor/kor_fin.py +1 -1
  272. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  273. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  274. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  275. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  276. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  277. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  278. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  279. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  280. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  281. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  282. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  283. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  284. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  285. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  286. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  287. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  288. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  289. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  290. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  291. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  292. mteb/tasks/classification/ron/moroco.py +1 -1
  293. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  294. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  295. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  296. mteb/tasks/classification/rus/headline_classification.py +2 -2
  297. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  298. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  299. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  300. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  301. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  302. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  303. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  304. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  305. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  306. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  307. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  308. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  309. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  310. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  311. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  312. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  313. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  314. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  315. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  316. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  317. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  318. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  319. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  320. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  321. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  322. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  323. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  324. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  325. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  326. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  327. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  328. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  329. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  330. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  331. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  332. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  333. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  334. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  335. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  336. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  337. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  338. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  341. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  342. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  343. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  344. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  345. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  346. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  347. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  348. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  349. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  350. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  351. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  352. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  353. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  354. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  355. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  356. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  357. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  358. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  359. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  360. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  361. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  362. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  363. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  364. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  365. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  366. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  367. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  368. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  369. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  370. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  371. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  372. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  373. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  374. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  375. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  376. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  377. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  378. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  379. mteb/tasks/pair_classification/rus/terra.py +2 -2
  380. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  381. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  382. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  383. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  384. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  385. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  386. mteb/tasks/retrieval/code/code_rag.py +4 -4
  387. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  388. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  389. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  390. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  391. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  392. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  393. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  394. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  395. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  396. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  397. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  398. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  399. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  400. mteb/tasks/retrieval/eng/__init__.py +42 -0
  401. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  402. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  403. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  404. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  405. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  406. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  407. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  408. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  409. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  410. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  411. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  412. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  413. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  414. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  415. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  416. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  417. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  418. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  419. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  420. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  421. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  422. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  423. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  424. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  425. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  426. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  428. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  435. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  438. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  439. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  440. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  441. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  442. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  443. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  444. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  445. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  446. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  447. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  448. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  449. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  450. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  451. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  452. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  453. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  454. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  455. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  456. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  457. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  458. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  459. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  460. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  461. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  462. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  463. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  464. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  465. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  466. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  467. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  468. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  469. mteb/tasks/retrieval/nob/norquad.py +1 -1
  470. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  471. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  472. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  473. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  474. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  475. mteb/tasks/sts/kor/klue_sts.py +1 -1
  476. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  477. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  478. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  479. mteb/types/_encoder_io.py +1 -1
  480. mteb/types/statistics.py +9 -2
  481. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
  482. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
  483. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  484. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  485. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  486. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -48,6 +48,7 @@ gte_qwen2_7b_instruct = ModelMeta(
48
48
  revision="e26182b2122f4435e8b3ebecbf363990f409b45b",
49
49
  release_date="2024-06-15", # initial commit of hf model.
50
50
  n_parameters=7_613_000_000,
51
+ n_embedding_parameters=543_499_264,
51
52
  memory_usage_mb=29040,
52
53
  embed_dim=3584,
53
54
  license="apache-2.0",
@@ -80,6 +81,7 @@ gte_qwen1_5_7b_instruct = ModelMeta(
80
81
  revision="07d27e5226328010336563bc1b564a5e3436a298",
81
82
  release_date="2024-04-20", # initial commit of hf model.
82
83
  n_parameters=7_720_000_000,
84
+ n_embedding_parameters=None,
83
85
  memory_usage_mb=29449,
84
86
  embed_dim=4096,
85
87
  license="apache-2.0",
@@ -117,6 +119,7 @@ gte_qwen2_1_5b_instruct = ModelMeta(
117
119
  revision="c6c1b92f4a3e1b92b326ad29dd3c8433457df8dd",
118
120
  release_date="2024-07-29", # initial commit of hf model.
119
121
  n_parameters=1_780_000_000,
122
+ n_embedding_parameters=232_928_256,
120
123
  memory_usage_mb=6776,
121
124
  embed_dim=8960,
122
125
  license="apache-2.0",
@@ -145,6 +148,7 @@ gte_small_zh = ModelMeta(
145
148
  revision="af7bd46fbb00b3a6963c8dd7f1786ddfbfbe973a",
146
149
  release_date="2023-11-08", # initial commit of hf model.
147
150
  n_parameters=int(30.3 * 1e6),
151
+ n_embedding_parameters=10_817_536,
148
152
  memory_usage_mb=58,
149
153
  embed_dim=1024,
150
154
  license="mit",
@@ -173,6 +177,7 @@ gte_base_zh = ModelMeta(
173
177
  revision="71ab7947d6fac5b64aa299e6e40e6c2b2e85976c",
174
178
  release_date="2023-11-08", # initial commit of hf model.
175
179
  n_parameters=int(102 * 1e6),
180
+ n_embedding_parameters=16_226_304,
176
181
  memory_usage_mb=195,
177
182
  embed_dim=1024,
178
183
  license="mit",
@@ -201,6 +206,7 @@ gte_large_zh = ModelMeta(
201
206
  revision="64c364e579de308104a9b2c170ca009502f4f545",
202
207
  release_date="2023-11-08", # initial commit of hf model.
203
208
  n_parameters=int(326 * 1e6),
209
+ n_embedding_parameters=21_635_072,
204
210
  memory_usage_mb=621,
205
211
  embed_dim=1024,
206
212
  license="mit",
@@ -330,6 +336,7 @@ gte_multilingual_base = ModelMeta(
330
336
  revision="ca1791e0bcc104f6db161f27de1340241b13c5a4",
331
337
  release_date="2024-07-20", # initial commit of hf model.
332
338
  n_parameters=int(305 * 1e6),
339
+ n_embedding_parameters=192_036_864,
333
340
  memory_usage_mb=582,
334
341
  embed_dim=768,
335
342
  license="apache-2.0",
@@ -359,6 +366,7 @@ gte_modernbert_base = ModelMeta(
359
366
  revision="7ca8b4ca700621b67618669f5378fe5f5820b8e4",
360
367
  release_date="2025-01-21", # initial commit of hf model.
361
368
  n_parameters=int(149 * 1e6),
369
+ n_embedding_parameters=None,
362
370
  memory_usage_mb=284,
363
371
  embed_dim=768,
364
372
  license="apache-2.0",
@@ -402,6 +410,7 @@ gte_base_en_v15 = ModelMeta(
402
410
  revision="a829fd0e060bb84554da0dfd354d0de0f7712b7f", # can be any
403
411
  release_date="2024-06-20", # initial commit of hf model
404
412
  n_parameters=137_000_000,
413
+ n_embedding_parameters=23_445_504,
405
414
  memory_usage_mb=None,
406
415
  embed_dim=768,
407
416
  license="apache-2.0",
@@ -1,9 +1,13 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from typing import TYPE_CHECKING
2
5
 
3
6
  from mteb.models.model_meta import ModelMeta
4
7
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
5
- from mteb.types import PromptType
6
8
 
9
+ if TYPE_CHECKING:
10
+ from mteb.types import PromptType
7
11
  logger = logging.getLogger(__name__)
8
12
 
9
13
 
@@ -43,6 +47,7 @@ Hinvec_bidir = ModelMeta(
43
47
  revision="d4fc678720cc1b8c5d18599ce2d9a4d6090c8b6b",
44
48
  release_date="2025-06-19",
45
49
  n_parameters=939_591_680,
50
+ n_embedding_parameters=None,
46
51
  memory_usage_mb=3715,
47
52
  embed_dim=2048,
48
53
  license="cc-by-nc-4.0",
@@ -9,6 +9,7 @@ human = ModelMeta(
9
9
  revision="2025_09_25",
10
10
  release_date=None,
11
11
  n_parameters=None,
12
+ n_embedding_parameters=None,
12
13
  memory_usage_mb=None,
13
14
  embed_dim=None,
14
15
  license=None,
@@ -100,6 +100,7 @@ granite_107m_multilingual = ModelMeta(
100
100
  revision="47db56afe692f731540413c67dd818ff492277e7",
101
101
  release_date="2024-12-18",
102
102
  n_parameters=107_000_000,
103
+ n_embedding_parameters=96_000_768,
103
104
  memory_usage_mb=204,
104
105
  embed_dim=384,
105
106
  license="apache-2.0",
@@ -131,6 +132,7 @@ granite_278m_multilingual = ModelMeta(
131
132
  revision="84e3546b88b0cb69f8078608a1df558020bcbf1f",
132
133
  release_date="2024-12-18",
133
134
  n_parameters=278_000_000,
135
+ n_embedding_parameters=192_001_536,
134
136
  memory_usage_mb=530,
135
137
  embed_dim=768,
136
138
  license="apache-2.0",
@@ -162,6 +164,7 @@ granite_30m_english = ModelMeta(
162
164
  revision="eddbb57470f896b5f8e2bfcb823d8f0e2d2024a5",
163
165
  release_date="2024-12-18",
164
166
  n_parameters=30_000_000,
167
+ n_embedding_parameters=19_301_760,
165
168
  memory_usage_mb=58,
166
169
  embed_dim=384,
167
170
  license="apache-2.0",
@@ -193,6 +196,7 @@ granite_125m_english = ModelMeta(
193
196
  revision="e48d3a5b47eaa18e3fe07d4676e187fd80f32730",
194
197
  release_date="2024-12-18",
195
198
  n_parameters=125_000_000,
199
+ n_embedding_parameters=38_603_520,
196
200
  memory_usage_mb=238,
197
201
  embed_dim=768,
198
202
  license="apache-2.0",
@@ -225,6 +229,7 @@ granite_english_r2 = ModelMeta(
225
229
  revision="6e7b8ce0e76270394ac4669ba4bbd7133b60b7f9",
226
230
  release_date="2025-08-15",
227
231
  n_parameters=149_000_000,
232
+ n_embedding_parameters=None,
228
233
  memory_usage_mb=284,
229
234
  embed_dim=768,
230
235
  license="apache-2.0",
@@ -250,6 +255,7 @@ granite_small_english_r2 = ModelMeta(
250
255
  revision="54a8d2616a0844355a5164432d3f6dafb37b17a3",
251
256
  release_date="2025-08-15",
252
257
  n_parameters=47_000_000,
258
+ n_embedding_parameters=None,
253
259
  memory_usage_mb=91,
254
260
  embed_dim=384,
255
261
  license="apache-2.0",
@@ -56,6 +56,7 @@ inf_retriever_v1 = ModelMeta(
56
56
  revision="cb70ca7c31dfa866b2eff2dad229c144d8ddfd91",
57
57
  release_date="2024-12-24", # initial commit of hf model.
58
58
  n_parameters=7_069_121_024,
59
+ n_embedding_parameters=None,
59
60
  memory_usage_mb=13483,
60
61
  embed_dim=3584,
61
62
  license="apache-2.0",
@@ -83,6 +84,7 @@ inf_retriever_v1_1_5b = ModelMeta(
83
84
  revision="c9c05c2dd50707a486966ba81703021ae2094a06",
84
85
  release_date="2025-02-08", # initial commit of hf model.
85
86
  n_parameters=1_543_268_864,
87
+ n_embedding_parameters=232_928_256,
86
88
  memory_usage_mb=2944,
87
89
  embed_dim=1536,
88
90
  license="apache-2.0",
@@ -1,11 +1,10 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
- from torch.utils.data import DataLoader
7
7
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
8
  from mteb.models.abs_encoder import AbsEncoder
10
9
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
11
10
  from mteb.models.model_implementations.bge_models import (
@@ -17,7 +16,15 @@ from mteb.models.model_implementations.e5_instruct import E5_MISTRAL_TRAINING_DA
17
16
  from mteb.models.model_implementations.nvidia_models import nvidia_training_datasets
18
17
  from mteb.models.model_implementations.qzhou_models import qzhou_training_data
19
18
  from mteb.models.model_meta import ModelMeta, ScoringFunction
20
- from mteb.types import Array, BatchedInput, PromptType
19
+ from mteb.types import PromptType
20
+
21
+ if TYPE_CHECKING:
22
+ from collections.abc import Callable
23
+
24
+ from torch.utils.data import DataLoader
25
+
26
+ from mteb.abstasks.task_metadata import TaskMetadata
27
+ from mteb.types import Array, BatchedInput
21
28
 
22
29
  logger = logging.getLogger(__name__)
23
30
 
@@ -292,6 +299,7 @@ jasper_en_v1 = ModelMeta(
292
299
  revision="d6330ce98f8a0d741e781df845904c9484f00efa",
293
300
  release_date="2024-12-11", # first commit
294
301
  n_parameters=1_999_000_000,
302
+ n_embedding_parameters=232_932_864,
295
303
  memory_usage_mb=3802,
296
304
  max_tokens=131072,
297
305
  embed_dim=8960,
@@ -339,6 +347,7 @@ Jasper_Token_Compression_600M = ModelMeta(
339
347
  revision="06a100f753a5a96d9e583b3af79c6fcdfacc4719",
340
348
  release_date="2025-11-14",
341
349
  n_parameters=595776512,
350
+ n_embedding_parameters=None,
342
351
  memory_usage_mb=2272,
343
352
  embed_dim=2048,
344
353
  license="mit",
@@ -1,15 +1,20 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_implementations.colpali_models import COLPALI_TRAINING_DATA
11
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
13
18
 
14
19
  JINA_CLIP_CITATION = """@article{koukounas2024jinaclip,
15
20
  title={Jina CLIP: Your CLIP Model Is Also Your Text Retriever},
@@ -139,6 +144,7 @@ jina_clip_v1 = ModelMeta(
139
144
  release_date="2024-05-30",
140
145
  modalities=["image", "text"],
141
146
  n_parameters=223_000_000,
147
+ n_embedding_parameters=None,
142
148
  memory_usage_mb=849,
143
149
  max_tokens=8192,
144
150
  embed_dim=768,
@@ -1,14 +1,13 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from collections import defaultdict
3
- from typing import Any, ClassVar
5
+ from typing import TYPE_CHECKING, Any, ClassVar
4
6
 
5
7
  import numpy as np
6
8
  import torch
7
- from sentence_transformers import CrossEncoder
8
- from torch.utils.data import DataLoader
9
9
 
10
10
  from mteb._requires_package import requires_package
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
11
  from mteb.languages import PROGRAMMING_LANGS
13
12
  from mteb.models.abs_encoder import AbsEncoder
14
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
@@ -16,7 +15,13 @@ from mteb.models.sentence_transformer_wrapper import (
16
15
  CrossEncoderWrapper,
17
16
  SentenceTransformerEncoderWrapper,
18
17
  )
19
- from mteb.types import Array, BatchedInput, PromptType
18
+
19
+ if TYPE_CHECKING:
20
+ from sentence_transformers import CrossEncoder
21
+ from torch.utils.data import DataLoader
22
+
23
+ from mteb.abstasks.task_metadata import TaskMetadata
24
+ from mteb.types import Array, BatchedInput, PromptType
20
25
 
21
26
  logger = logging.getLogger(__name__)
22
27
 
@@ -728,6 +733,7 @@ jina_reranker_v3 = ModelMeta(
728
733
  release_date="2025-09-18", # official release date
729
734
  modalities=["text"],
730
735
  n_parameters=int(0.6 * 1e9),
736
+ n_embedding_parameters=None,
731
737
  memory_usage_mb=1138,
732
738
  max_tokens=131072,
733
739
  embed_dim=None,
@@ -771,6 +777,7 @@ jina_embeddings_v4 = ModelMeta(
771
777
  release_date="2025-06-24", # official release date
772
778
  modalities=["image", "text"],
773
779
  n_parameters=int(3.8 * 1e9),
780
+ n_embedding_parameters=None,
774
781
  memory_usage_mb=7500,
775
782
  max_tokens=32768,
776
783
  embed_dim=2048,
@@ -819,6 +826,7 @@ jina_embeddings_v3 = ModelMeta(
819
826
  revision="215a6e121fa0183376388ac6b1ae230326bfeaed",
820
827
  release_date="2024-09-18", # official release date
821
828
  n_parameters=int(572 * 1e6),
829
+ n_embedding_parameters=None,
822
830
  memory_usage_mb=1092,
823
831
  max_tokens=8194,
824
832
  embed_dim=1024,
@@ -879,6 +887,7 @@ jina_embeddings_v2_base_en = ModelMeta(
879
887
  revision="6e85f575bc273f1fd840a658067d0157933c83f0",
880
888
  release_date="2023-09-27",
881
889
  n_parameters=137_000_000,
890
+ n_embedding_parameters=23_445_504,
882
891
  memory_usage_mb=262,
883
892
  embed_dim=768,
884
893
  license="apache-2.0",
@@ -943,6 +952,7 @@ jina_embeddings_v2_small_en = ModelMeta(
943
952
  revision="44e7d1d6caec8c883c2d4b207588504d519788d0",
944
953
  release_date="2023-09-27",
945
954
  n_parameters=32_700_000,
955
+ n_embedding_parameters=15_630_336,
946
956
  memory_usage_mb=62,
947
957
  embed_dim=512,
948
958
  license="apache-2.0",
@@ -1004,6 +1014,7 @@ jina_embedding_b_en_v1 = ModelMeta(
1004
1014
  revision="32aa658e5ceb90793454d22a57d8e3a14e699516",
1005
1015
  release_date="2023-07-07",
1006
1016
  n_parameters=110_000_000,
1017
+ n_embedding_parameters=24_674_304,
1007
1018
  memory_usage_mb=420,
1008
1019
  embed_dim=768,
1009
1020
  license="apache-2.0",
@@ -1061,6 +1072,7 @@ jina_embedding_s_en_v1 = ModelMeta(
1061
1072
  revision="5ac6cd473e2324c6d5f9e558a6a9f65abb57143e",
1062
1073
  release_date="2023-07-07",
1063
1074
  n_parameters=35_000_000,
1075
+ n_embedding_parameters=16_449_536,
1064
1076
  memory_usage_mb=134,
1065
1077
  embed_dim=512,
1066
1078
  license="apache-2.0",
@@ -1,14 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
9
9
  from mteb.models.model_meta import ModelMeta
10
10
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
11
- from mteb.types import Array, BatchedInput, PromptType
11
+ from mteb.types import PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput
12
18
 
13
19
  logger = logging.getLogger(__name__)
14
20
 
@@ -774,6 +780,7 @@ HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v1 = ModelMeta(
774
780
  release_date="2024-10-23",
775
781
  languages=["eng-Latn", "zho-Hans"],
776
782
  n_parameters=494032768,
783
+ n_embedding_parameters=136_134_656,
777
784
  memory_usage_mb=1885,
778
785
  max_tokens=512,
779
786
  embed_dim=896,
@@ -799,6 +806,7 @@ HIT_TMG__KaLM_embedding_multilingual_mini_v1 = ModelMeta(
799
806
  release_date="2024-08-27",
800
807
  languages=["eng-Latn", "zho-Hans"],
801
808
  n_parameters=494032768,
809
+ n_embedding_parameters=136_134_656,
802
810
  memory_usage_mb=1885,
803
811
  max_tokens=512,
804
812
  embed_dim=896,
@@ -830,6 +838,7 @@ HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v1_5 = ModelMeta(
830
838
  release_date="2024-12-26",
831
839
  languages=["eng-Latn", "zho-Hans"],
832
840
  n_parameters=494032768,
841
+ n_embedding_parameters=136_134_656,
833
842
  memory_usage_mb=1885,
834
843
  max_tokens=512,
835
844
  embed_dim=896,
@@ -861,6 +870,7 @@ HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v2 = ModelMeta(
861
870
  release_date="2025-06-25",
862
871
  languages=["eng-Latn", "zho-Hans"],
863
872
  n_parameters=494032768,
873
+ n_embedding_parameters=136_134_656,
864
874
  memory_usage_mb=942,
865
875
  max_tokens=512,
866
876
  embed_dim=896,
@@ -892,6 +902,7 @@ KaLM_Embedding_KaLM_embedding_multilingual_mini_instruct_v2_5 = ModelMeta(
892
902
  release_date="2025-09-30",
893
903
  languages=["eng-Latn", "zho-Hans"],
894
904
  n_parameters=494032768,
905
+ n_embedding_parameters=136_134_656,
895
906
  memory_usage_mb=1885,
896
907
  max_tokens=512,
897
908
  embed_dim=896,
@@ -907,23 +918,23 @@ KaLM_Embedding_KaLM_embedding_multilingual_mini_instruct_v2_5 = ModelMeta(
907
918
  adapted_from="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2",
908
919
  superseded_by=None,
909
920
  citation="""@misc{zhao2025kalmembeddingv2,
910
- title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
921
+ title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
911
922
  author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
912
923
  year={2025},
913
924
  eprint={2506.20923},
914
925
  archivePrefix={arXiv},
915
926
  primaryClass={cs.CL},
916
- url={https://arxiv.org/abs/2506.20923},
927
+ url={https://arxiv.org/abs/2506.20923},
917
928
  }
918
929
 
919
930
  @misc{hu2025kalmembedding,
920
- title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
931
+ title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
921
932
  author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
922
933
  year={2025},
923
934
  eprint={2501.01028},
924
935
  archivePrefix={arXiv},
925
936
  primaryClass={cs.CL},
926
- url={https://arxiv.org/abs/2501.01028},
937
+ url={https://arxiv.org/abs/2501.01028},
927
938
  }""",
928
939
  )
929
940
 
@@ -942,6 +953,7 @@ KaLM_Embedding_gemma_3_12b_2511 = ModelMeta(
942
953
  open_weights=True,
943
954
  release_date="2025-11-06",
944
955
  n_parameters=11.76 * 1e9,
956
+ n_embedding_parameters=None,
945
957
  memory_usage_mb=44884,
946
958
  max_tokens=32768,
947
959
  embed_dim=3840,
@@ -954,22 +966,22 @@ KaLM_Embedding_gemma_3_12b_2511 = ModelMeta(
954
966
  public_training_data=None,
955
967
  training_datasets=KaLM_Embedding_gemma_3_12b_training_data,
956
968
  citation="""@misc{zhao2025kalmembeddingv2,
957
- title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
969
+ title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
958
970
  author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
959
971
  year={2025},
960
972
  eprint={2506.20923},
961
973
  archivePrefix={arXiv},
962
974
  primaryClass={cs.CL},
963
- url={https://arxiv.org/abs/2506.20923},
975
+ url={https://arxiv.org/abs/2506.20923},
964
976
  }
965
977
 
966
978
  @misc{hu2025kalmembedding,
967
- title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
979
+ title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
968
980
  author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
969
981
  year={2025},
970
982
  eprint={2501.01028},
971
983
  archivePrefix={arXiv},
972
984
  primaryClass={cs.CL},
973
- url={https://arxiv.org/abs/2501.01028},
985
+ url={https://arxiv.org/abs/2501.01028},
974
986
  }""",
975
987
  )
@@ -10,6 +10,7 @@ sbert_swedish = ModelMeta(
10
10
  revision="6b5e83cd29c03729cfdc33d13b1423399b0efb5c",
11
11
  release_date="2023-01-11",
12
12
  n_parameters=124690944,
13
+ n_embedding_parameters=38_649_600,
13
14
  memory_usage_mb=476,
14
15
  embed_dim=768,
15
16
  license="apache-2.0",
@@ -12,6 +12,7 @@ dfm_enc_large = ModelMeta(
12
12
  revision="132c53391e7a780dc6a2f9a03724d0158fe7122c",
13
13
  release_date="2023-07-12",
14
14
  n_parameters=355087360,
15
+ n_embedding_parameters=51_200_000,
15
16
  memory_usage_mb=1554,
16
17
  embed_dim=1024,
17
18
  license="mit",
@@ -47,6 +48,7 @@ dfm_enc_med = ModelMeta(
47
48
  revision="701bce95d499fa97610d57e8823c54fd1fb79930",
48
49
  release_date="2023-07-12",
49
50
  n_parameters=124445952,
51
+ n_embedding_parameters=38_403_840,
50
52
  memory_usage_mb=475,
51
53
  embed_dim=768,
52
54
  license="mit",
@@ -10,6 +10,7 @@ xlmr_scandi = ModelMeta(
10
10
  revision="d40c10ca7b1e68b5a8372f2d112dac9eb3279df1",
11
11
  release_date="2022-02-22",
12
12
  n_parameters=278043648,
13
+ n_embedding_parameters=192_001_536,
13
14
  memory_usage_mb=1061,
14
15
  embed_dim=768,
15
16
  license="not specified",
@@ -9,6 +9,7 @@ kowshik24_bangla_embedding_model = ModelMeta(
9
9
  revision="6689c21e69be5950596bad084457cbaa138728d8",
10
10
  release_date="2025-11-10",
11
11
  n_parameters=278_000_000,
12
+ n_embedding_parameters=192_001_536,
12
13
  memory_usage_mb=1061,
13
14
  embed_dim=768,
14
15
  license="apache-2.0",
@@ -18,6 +18,7 @@ lens_d4000 = ModelMeta(
18
18
  revision="e473b33364e6c48a324796fd1411d3b93670c6fe",
19
19
  release_date="2025-01-17",
20
20
  n_parameters=int(7.11 * 1e9),
21
+ n_embedding_parameters=131_084_288,
21
22
  memory_usage_mb=27125,
22
23
  embed_dim=4000,
23
24
  license="apache-2.0",
@@ -41,6 +42,7 @@ lens_d8000 = ModelMeta(
41
42
  revision="a0b87bd91cb27b6f2f0b0fe22c28026da1d464ef",
42
43
  release_date="2025-01-17",
43
44
  n_parameters=int(7.11 * 1e9),
45
+ n_embedding_parameters=131_084_288,
44
46
  memory_usage_mb=27125,
45
47
  embed_dim=8000,
46
48
  license="apache-2.0",
@@ -52,6 +52,7 @@ lgai_embedding_en = ModelMeta(
52
52
  revision="5e0b2316acc8c2e2941ded6b9cb200b1cb313e65",
53
53
  release_date="2025-06-11",
54
54
  n_parameters=7_110_000_000,
55
+ n_embedding_parameters=131_084_288,
55
56
  memory_usage_mb=27125,
56
57
  embed_dim=4096,
57
58
  license="apache-2.0",
@@ -1,11 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  import torch
2
6
 
3
7
  from mteb.models.instruct_wrapper import instruct_wrapper
4
8
  from mteb.models.model_meta import ModelMeta, ScoringFunction
5
- from mteb.types import PromptType
6
9
 
7
10
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
8
11
 
12
+ if TYPE_CHECKING:
13
+ from mteb.types import PromptType
9
14
  LINQ_EMBED_MISTRAL_CITATION = """@misc{LinqAIResearch2024,
10
15
  title={Linq-Embed-Mistral:Elevating Text Retrieval with Improved GPT Data Through Task-Specific Control and Quality Refinement},
11
16
  author={Junseong Kim and Seolhwa Lee and Jihoon Kwon and Sangmo Gu and Yejin Kim and Minkyung Cho and Jy-yong Sohn and Chanyeol Choi},
@@ -38,6 +43,7 @@ Linq_Embed_Mistral = ModelMeta(
38
43
  revision="0c1a0b0589177079acc552433cad51d7c9132379",
39
44
  release_date="2024-05-29", # initial commit of hf model.
40
45
  n_parameters=7_110_000_000,
46
+ n_embedding_parameters=None,
41
47
  memory_usage_mb=13563,
42
48
  embed_dim=4096,
43
49
  license="cc-by-nc-4.0",
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
 
6
- from mteb.abstasks.task_metadata import TaskMetadata
7
7
  from mteb.models.model_meta import ModelMeta
8
- from mteb.types import BatchedInput, PromptType
9
8
 
10
9
  from .rerankers_custom import RerankerWrapper
11
10
 
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import BatchedInput, PromptType
16
+
12
17
  LISTCONRANKER_CITATION = """@article{liu2025listconranker,
13
18
  title={ListConRanker: A Contrastive Text Reranker with Listwise Encoding},
14
19
  author={Liu, Junlong and Ma, Yue and Zhao, Ruihui and Zheng, Junhao and Ma, Qianli and Kang, Yangyang},
@@ -118,6 +123,7 @@ listconranker = ModelMeta(
118
123
  revision="95ae6a5f422a916bc36520f0f3e198e7d91520a0",
119
124
  release_date="2024-12-11",
120
125
  n_parameters=401_000_000,
126
+ n_embedding_parameters=None,
121
127
  memory_usage_mb=1242,
122
128
  similarity_fn_name="cosine",
123
129
  training_datasets=listconranker_training_datasets,
@@ -1,15 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  from pathlib import Path
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import requires_image_dependencies, requires_package
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
10
  from mteb.models.abs_encoder import AbsEncoder
11
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
13
18
 
14
19
  LLM2CLIP_CITATION = """@misc{huang2024llm2clippowerfullanguagemodel,
15
20
  title={LLM2CLIP: Powerful Language Model Unlock Richer Visual Representation},
@@ -189,6 +194,7 @@ llm2clip_openai_l_14_336 = ModelMeta(
189
194
  release_date="2024-11-07",
190
195
  modalities=["image", "text"],
191
196
  n_parameters=579_000_000,
197
+ n_embedding_parameters=None,
192
198
  memory_usage_mb=None,
193
199
  max_tokens=None,
194
200
  embed_dim=1280,
@@ -214,6 +220,7 @@ llm2clip_openai_l_14_224 = ModelMeta(
214
220
  release_date="2024-11-07",
215
221
  modalities=["image", "text"],
216
222
  n_parameters=578_000_000,
223
+ n_embedding_parameters=None,
217
224
  memory_usage_mb=None,
218
225
  max_tokens=None,
219
226
  embed_dim=1280,
@@ -238,6 +245,7 @@ llm2clip_openai_b_16 = ModelMeta(
238
245
  release_date="2024-11-07",
239
246
  modalities=["image", "text"],
240
247
  n_parameters=361_000_000,
248
+ n_embedding_parameters=None,
241
249
  memory_usage_mb=None,
242
250
  max_tokens=None,
243
251
  embed_dim=1280,