mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. mteb/_create_dataloaders.py +63 -14
  2. mteb/_evaluators/any_sts_evaluator.py +12 -5
  3. mteb/_evaluators/clustering_evaluator.py +12 -4
  4. mteb/_evaluators/evaluator.py +11 -5
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
  6. mteb/_evaluators/pair_classification_evaluator.py +13 -5
  7. mteb/_evaluators/retrieval_evaluator.py +22 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +20 -11
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +10 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +48 -21
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +25 -9
  21. mteb/abstasks/clustering.py +23 -10
  22. mteb/abstasks/clustering_legacy.py +22 -8
  23. mteb/abstasks/image/image_text_pair_classification.py +23 -9
  24. mteb/abstasks/multilabel_classification.py +13 -5
  25. mteb/abstasks/pair_classification.py +27 -11
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +56 -30
  28. mteb/abstasks/retrieval_dataset_loaders.py +48 -37
  29. mteb/abstasks/sts.py +29 -13
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +23 -12
  32. mteb/abstasks/text/reranking.py +2 -2
  33. mteb/abstasks/text/summarization.py +19 -8
  34. mteb/abstasks/zeroshot_classification.py +23 -9
  35. mteb/benchmarks/_create_table.py +13 -7
  36. mteb/benchmarks/benchmark.py +11 -1
  37. mteb/benchmarks/benchmarks/__init__.py +2 -0
  38. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  39. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  40. mteb/cache.py +10 -5
  41. mteb/cli/_display_tasks.py +9 -3
  42. mteb/cli/build_cli.py +5 -2
  43. mteb/cli/generate_model_card.py +9 -2
  44. mteb/deprecated_evaluator.py +16 -12
  45. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  65. mteb/evaluate.py +33 -20
  66. mteb/filter_tasks.py +12 -7
  67. mteb/get_tasks.py +9 -4
  68. mteb/languages/language_scripts.py +8 -3
  69. mteb/leaderboard/app.py +11 -4
  70. mteb/leaderboard/table.py +7 -2
  71. mteb/load_results.py +9 -3
  72. mteb/models/abs_encoder.py +22 -12
  73. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  74. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  75. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  76. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  77. mteb/models/get_model_meta.py +32 -6
  78. mteb/models/instruct_wrapper.py +13 -5
  79. mteb/models/model_implementations/align_models.py +10 -4
  80. mteb/models/model_implementations/amazon_models.py +1 -0
  81. mteb/models/model_implementations/andersborges.py +2 -0
  82. mteb/models/model_implementations/ara_models.py +1 -0
  83. mteb/models/model_implementations/arctic_models.py +8 -0
  84. mteb/models/model_implementations/b1ade_models.py +1 -0
  85. mteb/models/model_implementations/bedrock_models.py +20 -6
  86. mteb/models/model_implementations/bge_models.py +40 -1
  87. mteb/models/model_implementations/bica_model.py +1 -0
  88. mteb/models/model_implementations/blip2_models.py +11 -4
  89. mteb/models/model_implementations/blip_models.py +17 -4
  90. mteb/models/model_implementations/bm25.py +24 -14
  91. mteb/models/model_implementations/bmretriever_models.py +10 -2
  92. mteb/models/model_implementations/cadet_models.py +1 -0
  93. mteb/models/model_implementations/cde_models.py +11 -5
  94. mteb/models/model_implementations/clip_models.py +12 -4
  95. mteb/models/model_implementations/clips_models.py +3 -0
  96. mteb/models/model_implementations/codefuse_models.py +5 -0
  97. mteb/models/model_implementations/codesage_models.py +3 -0
  98. mteb/models/model_implementations/cohere_models.py +14 -4
  99. mteb/models/model_implementations/cohere_v.py +14 -4
  100. mteb/models/model_implementations/colpali_models.py +7 -3
  101. mteb/models/model_implementations/colqwen_models.py +17 -31
  102. mteb/models/model_implementations/colsmol_models.py +3 -1
  103. mteb/models/model_implementations/conan_models.py +11 -4
  104. mteb/models/model_implementations/dino_models.py +28 -4
  105. mteb/models/model_implementations/e5_instruct.py +4 -0
  106. mteb/models/model_implementations/e5_models.py +9 -0
  107. mteb/models/model_implementations/e5_v.py +10 -4
  108. mteb/models/model_implementations/eagerworks_models.py +11 -4
  109. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  110. mteb/models/model_implementations/en_code_retriever.py +1 -0
  111. mteb/models/model_implementations/euler_models.py +1 -0
  112. mteb/models/model_implementations/evaclip_models.py +13 -4
  113. mteb/models/model_implementations/fa_models.py +9 -0
  114. mteb/models/model_implementations/facebookai.py +2 -0
  115. mteb/models/model_implementations/geogpt_models.py +1 -0
  116. mteb/models/model_implementations/gme_v_models.py +7 -3
  117. mteb/models/model_implementations/google_models.py +15 -4
  118. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  119. mteb/models/model_implementations/gritlm_models.py +3 -0
  120. mteb/models/model_implementations/gte_models.py +9 -0
  121. mteb/models/model_implementations/hinvec_models.py +6 -1
  122. mteb/models/model_implementations/human.py +1 -0
  123. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  124. mteb/models/model_implementations/inf_models.py +2 -0
  125. mteb/models/model_implementations/jasper_models.py +14 -5
  126. mteb/models/model_implementations/jina_clip.py +10 -4
  127. mteb/models/model_implementations/jina_models.py +17 -5
  128. mteb/models/model_implementations/kalm_models.py +24 -12
  129. mteb/models/model_implementations/kblab.py +1 -0
  130. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  131. mteb/models/model_implementations/kfst.py +1 -0
  132. mteb/models/model_implementations/kowshik24_models.py +1 -0
  133. mteb/models/model_implementations/lens_models.py +2 -0
  134. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  135. mteb/models/model_implementations/linq_models.py +7 -1
  136. mteb/models/model_implementations/listconranker.py +10 -4
  137. mteb/models/model_implementations/llm2clip_models.py +12 -4
  138. mteb/models/model_implementations/llm2vec_models.py +20 -6
  139. mteb/models/model_implementations/mcinext_models.py +8 -2
  140. mteb/models/model_implementations/mdbr_models.py +2 -0
  141. mteb/models/model_implementations/misc_models.py +63 -0
  142. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  143. mteb/models/model_implementations/mme5_models.py +2 -1
  144. mteb/models/model_implementations/moco_models.py +11 -4
  145. mteb/models/model_implementations/mod_models.py +2 -1
  146. mteb/models/model_implementations/model2vec_models.py +23 -4
  147. mteb/models/model_implementations/moka_models.py +3 -0
  148. mteb/models/model_implementations/nbailab.py +3 -0
  149. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  150. mteb/models/model_implementations/nomic_models.py +17 -4
  151. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  152. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  153. mteb/models/model_implementations/nvidia_models.py +15 -4
  154. mteb/models/model_implementations/octen_models.py +3 -1
  155. mteb/models/model_implementations/openai_models.py +14 -4
  156. mteb/models/model_implementations/openclip_models.py +17 -4
  157. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  158. mteb/models/model_implementations/ops_moa_models.py +9 -2
  159. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  160. mteb/models/model_implementations/pawan_models.py +1 -0
  161. mteb/models/model_implementations/piccolo_models.py +2 -0
  162. mteb/models/model_implementations/promptriever_models.py +16 -6
  163. mteb/models/model_implementations/pylate_models.py +32 -13
  164. mteb/models/model_implementations/qodo_models.py +2 -0
  165. mteb/models/model_implementations/qtack_models.py +1 -0
  166. mteb/models/model_implementations/qwen3_models.py +11 -1
  167. mteb/models/model_implementations/qzhou_models.py +2 -0
  168. mteb/models/model_implementations/random_baseline.py +4 -3
  169. mteb/models/model_implementations/rasgaard_models.py +1 -0
  170. mteb/models/model_implementations/reasonir_model.py +65 -0
  171. mteb/models/model_implementations/repllama_models.py +15 -6
  172. mteb/models/model_implementations/rerankers_custom.py +13 -4
  173. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  174. mteb/models/model_implementations/richinfoai_models.py +1 -0
  175. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  176. mteb/models/model_implementations/ruri_models.py +10 -0
  177. mteb/models/model_implementations/salesforce_models.py +10 -1
  178. mteb/models/model_implementations/samilpwc_models.py +1 -0
  179. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  180. mteb/models/model_implementations/searchmap_models.py +1 -0
  181. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  182. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  183. mteb/models/model_implementations/seed_models.py +2 -1
  184. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  185. mteb/models/model_implementations/shuu_model.py +1 -0
  186. mteb/models/model_implementations/siglip_models.py +19 -4
  187. mteb/models/model_implementations/slm_models.py +7 -4
  188. mteb/models/model_implementations/sonar_models.py +2 -1
  189. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  190. mteb/models/model_implementations/stella_models.py +6 -0
  191. mteb/models/model_implementations/tarka_models.py +2 -0
  192. mteb/models/model_implementations/text2vec_models.py +3 -0
  193. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  194. mteb/models/model_implementations/uae_models.py +10 -4
  195. mteb/models/model_implementations/vdr_models.py +8 -1
  196. mteb/models/model_implementations/vi_vn_models.py +6 -0
  197. mteb/models/model_implementations/vista_models.py +11 -4
  198. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  199. mteb/models/model_implementations/voyage_models.py +52 -4
  200. mteb/models/model_implementations/voyage_v.py +11 -6
  201. mteb/models/model_implementations/xyz_models.py +1 -0
  202. mteb/models/model_implementations/youtu_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models.py +1 -0
  204. mteb/models/model_implementations/yuan_models_en.py +2 -1
  205. mteb/models/model_meta.py +47 -9
  206. mteb/models/models_protocols.py +23 -18
  207. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  208. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  209. mteb/models/search_wrappers.py +31 -12
  210. mteb/models/sentence_transformer_wrapper.py +4 -3
  211. mteb/models/vllm_wrapper.py +8 -6
  212. mteb/results/benchmark_results.py +22 -17
  213. mteb/results/model_result.py +21 -15
  214. mteb/results/task_result.py +32 -16
  215. mteb/similarity_functions.py +8 -2
  216. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  220. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  223. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  224. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  225. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  226. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  227. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  228. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  229. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  230. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  231. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  232. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  233. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  234. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  235. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  236. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  237. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  238. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  239. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  240. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  241. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  242. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  243. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  244. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  245. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  246. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  247. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  248. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  249. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  250. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  251. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  252. mteb/tasks/classification/est/estonian_valence.py +1 -1
  253. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  254. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  256. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  257. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  258. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  259. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  260. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  261. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  262. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  263. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  264. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  265. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  266. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  267. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  268. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  269. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  270. mteb/tasks/classification/kor/klue_tc.py +2 -2
  271. mteb/tasks/classification/kor/kor_fin.py +1 -1
  272. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  273. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  274. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  275. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  276. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  277. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  278. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  279. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  280. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  281. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  282. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  283. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  284. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  285. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  286. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  287. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  288. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  289. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  290. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  291. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  292. mteb/tasks/classification/ron/moroco.py +1 -1
  293. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  294. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  295. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  296. mteb/tasks/classification/rus/headline_classification.py +2 -2
  297. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  298. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  299. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  300. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  301. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  302. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  303. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  304. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  305. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  306. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  307. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  308. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  309. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  310. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  311. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  312. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  313. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  314. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  315. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  316. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  317. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  318. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  319. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  320. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  321. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  322. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  323. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  324. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  325. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  326. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  327. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  328. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  329. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  330. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  331. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  332. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  333. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  334. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  335. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  336. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  337. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  338. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  341. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  342. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  343. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  344. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  345. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  346. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  347. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  348. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  349. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  350. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  351. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  352. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  353. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  354. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  355. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  356. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  357. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  358. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  359. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  360. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  361. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  362. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  363. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  364. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  365. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  366. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  367. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  368. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  369. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  370. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  371. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  372. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  373. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  374. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  375. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  376. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  377. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  378. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  379. mteb/tasks/pair_classification/rus/terra.py +2 -2
  380. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  381. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  382. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  383. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  384. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  385. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  386. mteb/tasks/retrieval/code/code_rag.py +4 -4
  387. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  388. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  389. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  390. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  391. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  392. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  393. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  394. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  395. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  396. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  397. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  398. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  399. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  400. mteb/tasks/retrieval/eng/__init__.py +42 -0
  401. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  402. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  403. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  404. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  405. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  406. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  407. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  408. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  409. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  410. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  411. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  412. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  413. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  414. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  415. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  416. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  417. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  418. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  419. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  420. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  421. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  422. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  423. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  424. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  425. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  426. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  428. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  435. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  438. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  439. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  440. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  441. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  442. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  443. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  444. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  445. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  446. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  447. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  448. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  449. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  450. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  451. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  452. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  453. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  454. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  455. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  456. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  457. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  458. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  459. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  460. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  461. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  462. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  463. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  464. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  465. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  466. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  467. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  468. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  469. mteb/tasks/retrieval/nob/norquad.py +1 -1
  470. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  471. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  472. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  473. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  474. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  475. mteb/tasks/sts/kor/klue_sts.py +1 -1
  476. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  477. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  478. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  479. mteb/types/_encoder_io.py +1 -1
  480. mteb/types/statistics.py +9 -2
  481. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
  482. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
  483. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  484. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  485. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  486. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,18 @@
1
- from typing import Any, Literal
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any, Literal
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.abs_encoder import AbsEncoder
9
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
 
13
18
  class DINOModel(AbsEncoder):
@@ -112,6 +117,7 @@ dinov2_small = ModelMeta(
112
117
  release_date="2023-07-18",
113
118
  modalities=["image"],
114
119
  n_parameters=22_100_000,
120
+ n_embedding_parameters=None,
115
121
  memory_usage_mb=84,
116
122
  max_tokens=None,
117
123
  embed_dim=384,
@@ -143,6 +149,7 @@ dinov2_base = ModelMeta(
143
149
  release_date="2023-07-18",
144
150
  modalities=["image"],
145
151
  n_parameters=86_600_000,
152
+ n_embedding_parameters=None,
146
153
  memory_usage_mb=330,
147
154
  max_tokens=None,
148
155
  embed_dim=768,
@@ -174,6 +181,7 @@ dinov2_large = ModelMeta(
174
181
  release_date="2023-07-18",
175
182
  modalities=["image"],
176
183
  n_parameters=304_000_000,
184
+ n_embedding_parameters=None,
177
185
  memory_usage_mb=1161,
178
186
  max_tokens=None,
179
187
  embed_dim=1024,
@@ -205,6 +213,7 @@ dinov2_giant = ModelMeta(
205
213
  release_date="2023-07-18",
206
214
  modalities=["image"],
207
215
  n_parameters=1_140_000_000,
216
+ n_embedding_parameters=None,
208
217
  memory_usage_mb=4335,
209
218
  max_tokens=None,
210
219
  embed_dim=1536,
@@ -240,6 +249,7 @@ webssl_dino300m_full2b = ModelMeta(
240
249
  release_date="2025-04-24",
241
250
  modalities=["image"],
242
251
  n_parameters=304_000_000,
252
+ n_embedding_parameters=None,
243
253
  memory_usage_mb=1158,
244
254
  max_tokens=None,
245
255
  embed_dim=1024,
@@ -271,6 +281,7 @@ webssl_dino1b_full2b = ModelMeta(
271
281
  release_date="2025-04-24",
272
282
  modalities=["image"],
273
283
  n_parameters=1_130_000_000,
284
+ n_embedding_parameters=None,
274
285
  memory_usage_mb=4329,
275
286
  max_tokens=None,
276
287
  embed_dim=1536,
@@ -302,6 +313,7 @@ webssl_dino2b_full2b = ModelMeta(
302
313
  release_date="2025-04-24",
303
314
  modalities=["image"],
304
315
  n_parameters=2_080_000_000,
316
+ n_embedding_parameters=None,
305
317
  memory_usage_mb=7951,
306
318
  max_tokens=None,
307
319
  embed_dim=2688,
@@ -333,6 +345,7 @@ webssl_dino3b_full2b = ModelMeta(
333
345
  release_date="2025-04-24",
334
346
  modalities=["image"],
335
347
  n_parameters=3_000_000_000,
348
+ n_embedding_parameters=None,
336
349
  memory_usage_mb=11247,
337
350
  max_tokens=None,
338
351
  embed_dim=3072,
@@ -364,6 +377,7 @@ webssl_dino5b_full2b = ModelMeta(
364
377
  release_date="2025-04-24",
365
378
  modalities=["image"],
366
379
  n_parameters=5_000_000_000,
380
+ n_embedding_parameters=None,
367
381
  memory_usage_mb=18838,
368
382
  max_tokens=None,
369
383
  embed_dim=3584,
@@ -395,6 +409,7 @@ webssl_dino7b_full8b_224 = ModelMeta(
395
409
  release_date="2025-04-24",
396
410
  modalities=["image"],
397
411
  n_parameters=7_000_000_000,
412
+ n_embedding_parameters=None,
398
413
  memory_usage_mb=24605,
399
414
  max_tokens=None,
400
415
  embed_dim=4096,
@@ -426,6 +441,7 @@ webssl_dino7b_full8b_378 = ModelMeta(
426
441
  release_date="2025-04-24",
427
442
  modalities=["image"],
428
443
  n_parameters=7_000_000_000,
444
+ n_embedding_parameters=None,
429
445
  memory_usage_mb=24613,
430
446
  max_tokens=None,
431
447
  embed_dim=4096,
@@ -457,6 +473,7 @@ webssl_dino7b_full8b_518 = ModelMeta(
457
473
  release_date="2025-04-24",
458
474
  modalities=["image"],
459
475
  n_parameters=7_000_000_000,
476
+ n_embedding_parameters=None,
460
477
  memory_usage_mb=24623,
461
478
  max_tokens=None,
462
479
  embed_dim=4096,
@@ -489,6 +506,7 @@ webssl_dino2b_light2b = ModelMeta(
489
506
  release_date="2025-04-24",
490
507
  modalities=["image"],
491
508
  n_parameters=2_000_000_000,
509
+ n_embedding_parameters=None,
492
510
  memory_usage_mb=7951,
493
511
  max_tokens=None,
494
512
  embed_dim=2688,
@@ -520,6 +538,7 @@ webssl_dino2b_heavy2b = ModelMeta(
520
538
  release_date="2025-04-24",
521
539
  modalities=["image"],
522
540
  n_parameters=2_000_000_000,
541
+ n_embedding_parameters=None,
523
542
  memory_usage_mb=7951,
524
543
  max_tokens=None,
525
544
  embed_dim=2688,
@@ -551,6 +570,7 @@ webssl_dino3b_light2b = ModelMeta(
551
570
  release_date="2025-04-24",
552
571
  modalities=["image"],
553
572
  n_parameters=3_000_000_000,
573
+ n_embedding_parameters=None,
554
574
  memory_usage_mb=11247,
555
575
  max_tokens=None,
556
576
  embed_dim=3072,
@@ -582,6 +602,7 @@ webssl_dino3b_heavy2b = ModelMeta(
582
602
  release_date="2025-04-24",
583
603
  modalities=["image"],
584
604
  n_parameters=3_000_000_000,
605
+ n_embedding_parameters=None,
585
606
  memory_usage_mb=11247,
586
607
  max_tokens=None,
587
608
  embed_dim=3072,
@@ -613,6 +634,7 @@ webssl_mae300m_full2b = ModelMeta(
613
634
  release_date="2025-04-24",
614
635
  modalities=["image"],
615
636
  n_parameters=304_000_000,
637
+ n_embedding_parameters=None,
616
638
  memory_usage_mb=1161,
617
639
  max_tokens=None,
618
640
  embed_dim=1024,
@@ -644,6 +666,7 @@ webssl_mae700m_full2b = ModelMeta(
644
666
  release_date="2025-04-24",
645
667
  modalities=["image"],
646
668
  n_parameters=700_000_000,
669
+ n_embedding_parameters=None,
647
670
  memory_usage_mb=2412,
648
671
  max_tokens=None,
649
672
  embed_dim=1280,
@@ -675,6 +698,7 @@ webssl_mae1b_full2b = ModelMeta(
675
698
  release_date="2025-04-24",
676
699
  modalities=["image"],
677
700
  n_parameters=1_000_000_000,
701
+ n_embedding_parameters=None,
678
702
  memory_usage_mb=4337,
679
703
  max_tokens=None,
680
704
  embed_dim=1536,
@@ -57,6 +57,7 @@ e5_instruct = ModelMeta(
57
57
  use_instructions=True,
58
58
  reference="https://huggingface.co/intfloat/multilingual-e5-large-instruct",
59
59
  n_parameters=560_000_000,
60
+ n_embedding_parameters=256_002_048,
60
61
  memory_usage_mb=1068,
61
62
  embed_dim=1024,
62
63
  license="mit",
@@ -102,6 +103,7 @@ e5_mistral = ModelMeta(
102
103
  use_instructions=True,
103
104
  reference="https://huggingface.co/intfloat/e5-mistral-7b-instruct",
104
105
  n_parameters=7_111_000_000,
106
+ n_embedding_parameters=131_072_000,
105
107
  memory_usage_mb=13563,
106
108
  embed_dim=4096,
107
109
  license="mit",
@@ -145,6 +147,7 @@ zeta_alpha_ai__zeta_alpha_e5_mistral = ModelMeta(
145
147
  release_date="2024-08-30",
146
148
  languages=["eng-Latn"],
147
149
  n_parameters=7110660096,
150
+ n_embedding_parameters=None,
148
151
  memory_usage_mb=13563,
149
152
  max_tokens=32768.0,
150
153
  embed_dim=4096,
@@ -228,6 +231,7 @@ BeastyZ__e5_R_mistral_7b = ModelMeta(
228
231
  release_date="2024-06-28",
229
232
  languages=["eng-Latn"],
230
233
  n_parameters=7241732096,
234
+ n_embedding_parameters=131_072_000,
231
235
  memory_usage_mb=27625,
232
236
  max_tokens=32768.0,
233
237
  embed_dim=4096,
@@ -76,6 +76,7 @@ e5_mult_small = ModelMeta(
76
76
  revision="fd1525a9fd15316a2d503bf26ab031a61d056e98",
77
77
  release_date=E5_PAPER_RELEASE_DATE,
78
78
  n_parameters=118_000_000,
79
+ n_embedding_parameters=96_014_208,
79
80
  memory_usage_mb=449,
80
81
  embed_dim=384,
81
82
  license="mit",
@@ -103,6 +104,7 @@ e5_mult_base = ModelMeta(
103
104
  revision="d13f1b27baf31030b7fd040960d60d909913633f",
104
105
  release_date=E5_PAPER_RELEASE_DATE,
105
106
  n_parameters=278_000_000,
107
+ n_embedding_parameters=192_001_536,
106
108
  memory_usage_mb=1061,
107
109
  embed_dim=768,
108
110
  license="mit",
@@ -130,6 +132,7 @@ e5_mult_large = ModelMeta(
130
132
  revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb",
131
133
  release_date=E5_PAPER_RELEASE_DATE,
132
134
  n_parameters=560_000_000,
135
+ n_embedding_parameters=256_002_048,
133
136
  memory_usage_mb=2136,
134
137
  embed_dim=1024,
135
138
  license="mit",
@@ -157,6 +160,7 @@ e5_eng_small_v2 = ModelMeta(
157
160
  revision="dca8b1a9dae0d4575df2bf423a5edb485a431236",
158
161
  release_date=E5_PAPER_RELEASE_DATE,
159
162
  n_parameters=33_000_000,
163
+ n_embedding_parameters=11_720_448,
160
164
  memory_usage_mb=127,
161
165
  embed_dim=384,
162
166
  license="mit",
@@ -184,6 +188,7 @@ e5_eng_small = ModelMeta(
184
188
  revision="e272f3049e853b47cb5ca3952268c6662abda68f",
185
189
  release_date=E5_PAPER_RELEASE_DATE,
186
190
  n_parameters=33_000_000,
191
+ n_embedding_parameters=11_720_448,
187
192
  memory_usage_mb=127,
188
193
  embed_dim=384,
189
194
  license="mit",
@@ -211,6 +216,7 @@ e5_eng_base_v2 = ModelMeta(
211
216
  revision="1c644c92ad3ba1efdad3f1451a637716616a20e8",
212
217
  release_date=E5_PAPER_RELEASE_DATE,
213
218
  n_parameters=109_000_000,
219
+ n_embedding_parameters=23_440_896,
214
220
  memory_usage_mb=418,
215
221
  embed_dim=768,
216
222
  license="mit",
@@ -239,6 +245,7 @@ e5_eng_large_v2 = ModelMeta(
239
245
  revision="b322e09026e4ea05f42beadf4d661fb4e101d311",
240
246
  release_date=E5_PAPER_RELEASE_DATE,
241
247
  n_parameters=335_000_000,
248
+ n_embedding_parameters=31_254_528,
242
249
  memory_usage_mb=1278,
243
250
  embed_dim=1024,
244
251
  license="mit",
@@ -267,6 +274,7 @@ e5_large = ModelMeta(
267
274
  revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81",
268
275
  release_date="2022-12-26",
269
276
  n_parameters=335_000_000,
277
+ n_embedding_parameters=31_254_528,
270
278
  memory_usage_mb=1278,
271
279
  embed_dim=1024,
272
280
  license="apache-2.0",
@@ -295,6 +303,7 @@ e5_base = ModelMeta(
295
303
  revision="b533fe4636f4a2507c08ddab40644d20b0006d6a",
296
304
  release_date="2022-12-26",
297
305
  n_parameters=109_000_000,
306
+ n_embedding_parameters=23_440_896,
298
307
  memory_usage_mb=418,
299
308
  embed_dim=768,
300
309
  license="apache-2.0",
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
6
  from packaging import version
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  E5_V_TRANSFORMERS_VERSION = (
14
19
  "4.44.2" # Issue 1647: Only works with transformers==4.44.2.
@@ -166,6 +171,7 @@ e5_v = ModelMeta(
166
171
  release_date="2024-07-17",
167
172
  modalities=["image", "text"],
168
173
  n_parameters=8_360_000_000,
174
+ n_embedding_parameters=None,
169
175
  memory_usage_mb=15936,
170
176
  max_tokens=8192,
171
177
  embed_dim=4096,
@@ -1,17 +1,23 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import (
8
9
  requires_image_dependencies,
9
10
  requires_package,
10
11
  )
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
12
  from mteb.models.abs_encoder import AbsEncoder
13
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
14
+ from mteb.types import PromptType
15
+
16
+ if TYPE_CHECKING:
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput
15
21
 
16
22
 
17
23
  class EagerEmbedV1Wrapper(AbsEncoder):
@@ -147,6 +153,7 @@ Eager_Embed_V1 = ModelMeta(
147
153
  release_date="2025-11-20",
148
154
  modalities=["image", "text"],
149
155
  n_parameters=4_000_000_000,
156
+ n_embedding_parameters=None,
150
157
  memory_usage_mb=16929,
151
158
  max_tokens=262144,
152
159
  embed_dim=2560,
@@ -10,6 +10,7 @@ embedding_gemma_300m_scandi = ModelMeta(
10
10
  revision="9f3307b9f601db564a9190cb475324d128dcfe86",
11
11
  release_date="2025-10-17",
12
12
  n_parameters=307_581_696,
13
+ n_embedding_parameters=None,
13
14
  embed_dim=768,
14
15
  max_tokens=2048,
15
16
  license="apache-2.0",
@@ -43,6 +44,7 @@ qwen_scandi = ModelMeta(
43
44
  revision="cf1e7ba36ebd3d605549d8f02930a18e17b54513",
44
45
  release_date="2025-10-17",
45
46
  n_parameters=595776512,
47
+ n_embedding_parameters=None,
46
48
  memory_usage_mb=2272,
47
49
  embed_dim=1024,
48
50
  max_tokens=32768,
@@ -67,6 +69,7 @@ mmbert_scandi = ModelMeta(
67
69
  revision="82d74c7a5d8e1ddf31b132865df2d16b2b0294ee",
68
70
  release_date="2025-10-17",
69
71
  n_parameters=306939648,
72
+ n_embedding_parameters=None,
70
73
  memory_usage_mb=1171,
71
74
  embed_dim=768,
72
75
  max_tokens=8192,
@@ -18,6 +18,7 @@ english_code_retriever = ModelMeta(
18
18
  revision="be653fab7d27a7348a0c2c3d16b9f92a7f10cb0c",
19
19
  release_date="2025-07-10",
20
20
  n_parameters=149_000_000,
21
+ n_embedding_parameters=None,
21
22
  memory_usage_mb=568,
22
23
  embed_dim=768,
23
24
  license="mit",
@@ -9,6 +9,7 @@ Euler_Legal_Embedding_V1 = ModelMeta(
9
9
  release_date="2025-11-06",
10
10
  languages=["eng-Latn"],
11
11
  n_parameters=8000000000,
12
+ n_embedding_parameters=None,
12
13
  memory_usage_mb=15618,
13
14
  max_tokens=1536,
14
15
  embed_dim=4096,
@@ -1,15 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  from pathlib import Path
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import requires_image_dependencies
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
10
  from mteb.models.abs_encoder import AbsEncoder
11
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
13
18
 
14
19
  EVA_CLIP_CITATION = """@article{EVA-CLIP,
15
20
  title={EVA-CLIP: Improved Training Techniques for CLIP at Scale},
@@ -144,6 +149,7 @@ EVA02_CLIP_B_16 = ModelMeta(
144
149
  release_date="2023-04-26",
145
150
  modalities=["image", "text"],
146
151
  n_parameters=149_000_000,
152
+ n_embedding_parameters=None,
147
153
  memory_usage_mb=568,
148
154
  max_tokens=77,
149
155
  embed_dim=512,
@@ -168,6 +174,7 @@ EVA02_CLIP_L_14 = ModelMeta(
168
174
  release_date="2023-04-26",
169
175
  modalities=["image", "text"],
170
176
  n_parameters=428_000_000,
177
+ n_embedding_parameters=None,
171
178
  memory_usage_mb=1633,
172
179
  max_tokens=77,
173
180
  embed_dim=768,
@@ -192,6 +199,7 @@ EVA02_CLIP_bigE_14 = ModelMeta(
192
199
  release_date="2023-04-26",
193
200
  modalities=["image", "text"],
194
201
  n_parameters=4_700_000_000,
202
+ n_embedding_parameters=None,
195
203
  memory_usage_mb=17929,
196
204
  max_tokens=77,
197
205
  embed_dim=1024,
@@ -217,6 +225,7 @@ EVA02_CLIP_bigE_14_plus = ModelMeta(
217
225
  release_date="2023-04-26",
218
226
  modalities=["image", "text"],
219
227
  n_parameters=5_000_000_000,
228
+ n_embedding_parameters=None,
220
229
  memory_usage_mb=19073,
221
230
  max_tokens=77,
222
231
  embed_dim=1024,
@@ -12,6 +12,7 @@ parsbert = ModelMeta(
12
12
  revision="d73a0e2c7492c33bd5819bcdb23eba207404dd19",
13
13
  release_date="2021-05-19",
14
14
  n_parameters=162_841_344,
15
+ n_embedding_parameters=76_800_000,
15
16
  memory_usage_mb=621,
16
17
  embed_dim=768,
17
18
  license="not specified",
@@ -48,6 +49,7 @@ bert_zwnj = ModelMeta(
48
49
  revision="b9506ddc579ac8c398ae6dae680401ae0a1a5b23",
49
50
  release_date="2021-06-28",
50
51
  n_parameters=118_297_344,
52
+ n_embedding_parameters=32_256_000,
51
53
  memory_usage_mb=451,
52
54
  embed_dim=768,
53
55
  license="not specified",
@@ -74,6 +76,7 @@ roberta_zwnj = ModelMeta(
74
76
  revision="36f912ac44e22250aee16ea533a4ff8cd848c1a1",
75
77
  release_date="2021-06-28",
76
78
  n_parameters=118_298_112,
79
+ n_embedding_parameters=32_256_000,
77
80
  memory_usage_mb=451,
78
81
  embed_dim=768,
79
82
  license="not specified",
@@ -99,6 +102,7 @@ sentence_transformer_parsbert = ModelMeta(
99
102
  revision="72bd0a3557622f0ae08a092f4643609e0b950cdd",
100
103
  release_date="2024-12-10",
101
104
  n_parameters=162_841_344,
105
+ n_embedding_parameters=76_800_000,
102
106
  memory_usage_mb=621,
103
107
  embed_dim=768,
104
108
  license="apache-2.0",
@@ -123,6 +127,7 @@ tooka_bert_base = ModelMeta(
123
127
  revision="fa5ca89df5670700d9325b8872ac65c17cb24582",
124
128
  release_date="2024-12-08",
125
129
  n_parameters=122_905_344,
130
+ n_embedding_parameters=36_864_000,
126
131
  memory_usage_mb=469,
127
132
  embed_dim=768,
128
133
  license="apache-2.0",
@@ -150,6 +155,7 @@ tooka_sbert = ModelMeta(
150
155
  revision="5d07f0c543aca654373b931ae07cd197769110fd",
151
156
  release_date="2024-12-07",
152
157
  n_parameters=353_039_360,
158
+ n_embedding_parameters=49_152_000,
153
159
  memory_usage_mb=1347,
154
160
  embed_dim=1024,
155
161
  license="apache-2.0",
@@ -181,6 +187,7 @@ fa_bert = ModelMeta(
181
187
  revision="a0e3973064c97768e121b9b95f21adc94e0ca3fb",
182
188
  release_date="2024-10-07",
183
189
  n_parameters=124_441_344,
190
+ n_embedding_parameters=38_400_000,
184
191
  memory_usage_mb=475,
185
192
  embed_dim=768,
186
193
  license="not specified",
@@ -229,6 +236,7 @@ tooka_sbert_v2_small = ModelMeta(
229
236
  revision="8bbed87e36669387f71437c061430ba56d1b496f",
230
237
  release_date="2025-05-01",
231
238
  n_parameters=122_905_344,
239
+ n_embedding_parameters=36_864_000,
232
240
  memory_usage_mb=496,
233
241
  embed_dim=768,
234
242
  license="not specified",
@@ -260,6 +268,7 @@ tooka_sbert_v2_large = ModelMeta(
260
268
  revision="b59682efa961122cc0e4408296d5852870c82eae",
261
269
  release_date="2025-05-01",
262
270
  n_parameters=353_039_360,
271
+ n_embedding_parameters=49_152_000,
263
272
  memory_usage_mb=1347,
264
273
  embed_dim=1024,
265
274
  license="not specified",
@@ -113,6 +113,7 @@ xlmr_base = ModelMeta(
113
113
  revision="e73636d4f797dec63c3081bb6ed5c7b0bb3f2089",
114
114
  release_date="2019-11-05", # arxiv paper release
115
115
  n_parameters=278043648,
116
+ n_embedding_parameters=192_001_536,
116
117
  memory_usage_mb=1064,
117
118
  embed_dim=768,
118
119
  license="mit",
@@ -163,6 +164,7 @@ xlmr_large = ModelMeta(
163
164
  revision="c23d21b0620b635a76227c604d44e43a9f0ee389",
164
165
  release_date="2019-11-05", # arxiv paper release
165
166
  n_parameters=559890432,
167
+ n_embedding_parameters=256_002_048,
166
168
  memory_usage_mb=2141,
167
169
  embed_dim=1024,
168
170
  license="mit",
@@ -20,6 +20,7 @@ geoembedding = ModelMeta(
20
20
  ),
21
21
  release_date="2025-04-22",
22
22
  n_parameters=7241732096,
23
+ n_embedding_parameters=131_072_000,
23
24
  memory_usage_mb=27625,
24
25
  embed_dim=4096,
25
26
  license="apache-2.0",
@@ -6,16 +6,18 @@ import warnings
6
6
  from typing import TYPE_CHECKING, Any
7
7
 
8
8
  import torch
9
- from torch.utils.data import DataLoader
10
9
  from tqdm.autonotebook import tqdm
11
10
 
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
11
  from mteb.models.abs_encoder import AbsEncoder
14
12
  from mteb.models.model_meta import ModelMeta, ScoringFunction
15
- from mteb.types import Array, BatchedInput, PromptType
13
+ from mteb.types import PromptType
16
14
 
17
15
  if TYPE_CHECKING:
18
16
  from PIL import Image
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput
19
21
 
20
22
  logger = logging.getLogger(__name__)
21
23
 
@@ -354,6 +356,7 @@ gme_qwen2vl_2b = ModelMeta(
354
356
  release_date="2024-12-24",
355
357
  modalities=["image", "text"],
356
358
  n_parameters=2_210_000_000,
359
+ n_embedding_parameters=233_373_696,
357
360
  memory_usage_mb=8427,
358
361
  embed_dim=1536,
359
362
  license="apache-2.0",
@@ -378,6 +381,7 @@ gme_qwen2vl_7b = ModelMeta(
378
381
  release_date="2024-12-24",
379
382
  modalities=["image", "text"],
380
383
  n_parameters=8_290_000_000,
384
+ n_embedding_parameters=544_997_376,
381
385
  memory_usage_mb=31629,
382
386
  embed_dim=3584,
383
387
  license="apache-2.0",
@@ -1,17 +1,23 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import numpy as np
4
6
  from packaging.version import Version
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
  from transformers import __version__ as transformers_version
8
9
 
9
10
  from mteb._requires_package import requires_package
10
- from mteb.abstasks.task_metadata import TaskMetadata
11
11
  from mteb.models import sentence_transformers_loader
12
12
  from mteb.models.abs_encoder import AbsEncoder
13
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
14
+ from mteb.types import PromptType
15
+
16
+ if TYPE_CHECKING:
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput
15
21
 
16
22
  MULTILINGUAL_EVALUATED_LANGUAGES = [
17
23
  "arb-Arab",
@@ -156,6 +162,7 @@ google_text_emb_004 = ModelMeta(
156
162
  revision="1", # revision is intended for implementation
157
163
  release_date="2024-05-14",
158
164
  n_parameters=None,
165
+ n_embedding_parameters=None,
159
166
  memory_usage_mb=None,
160
167
  max_tokens=2048,
161
168
  embed_dim=768,
@@ -181,6 +188,7 @@ google_text_emb_005 = ModelMeta(
181
188
  revision="1", # revision is intended for implementation
182
189
  release_date="2024-11-18",
183
190
  n_parameters=None,
191
+ n_embedding_parameters=None,
184
192
  memory_usage_mb=None,
185
193
  max_tokens=2048,
186
194
  embed_dim=768,
@@ -206,6 +214,7 @@ google_text_multilingual_emb_002 = ModelMeta(
206
214
  revision="1",
207
215
  release_date="2024-05-14",
208
216
  n_parameters=None,
217
+ n_embedding_parameters=None,
209
218
  memory_usage_mb=None,
210
219
  max_tokens=2048,
211
220
  embed_dim=768,
@@ -231,6 +240,7 @@ google_gemini_embedding_001 = ModelMeta(
231
240
  revision="1",
232
241
  release_date="2025-03-07",
233
242
  n_parameters=None,
243
+ n_embedding_parameters=None,
234
244
  memory_usage_mb=None,
235
245
  max_tokens=2048,
236
246
  embed_dim=3072,
@@ -266,6 +276,7 @@ embedding_gemma_300m = ModelMeta(
266
276
  revision="64614b0b8b64f0c6c1e52b07e4e9a4e8fe4d2da2",
267
277
  release_date="2025-09-04",
268
278
  n_parameters=307_581_696,
279
+ n_embedding_parameters=201_326_592,
269
280
  embed_dim=768,
270
281
  max_tokens=2048,
271
282
  license="gemma",
@@ -4,20 +4,21 @@ import logging
4
4
  from typing import TYPE_CHECKING, Any
5
5
 
6
6
  import torch
7
- from torch.utils.data import DataLoader
8
7
  from tqdm.auto import tqdm
9
8
 
10
9
  from mteb._requires_package import (
11
10
  requires_image_dependencies,
12
11
  )
13
- from mteb.abstasks.task_metadata import TaskMetadata
14
12
  from mteb.models.model_meta import ModelMeta
15
- from mteb.types import Array, BatchedInput, PromptType
16
-
17
- logger = logging.getLogger(__name__)
18
13
 
19
14
  if TYPE_CHECKING:
20
15
  from PIL import Image
16
+ from torch.utils.data import DataLoader
17
+
18
+ from mteb.abstasks.task_metadata import TaskMetadata
19
+ from mteb.types import Array, BatchedInput, PromptType
20
+
21
+ logger = logging.getLogger(__name__)
21
22
 
22
23
 
23
24
  class GraniteVisionEmbeddingWrapper:
@@ -172,6 +173,7 @@ granite_vision_embedding = ModelMeta(
172
173
  release_date="2025-06-11",
173
174
  modalities=["image", "text"],
174
175
  n_parameters=2_980_000_000,
176
+ n_embedding_parameters=None,
175
177
  memory_usage_mb=11351,
176
178
  max_tokens=128000,
177
179
  embed_dim=128,
@@ -44,6 +44,7 @@ gritlm7b = ModelMeta(
44
44
  revision="13f00a0e36500c80ce12870ea513846a066004af",
45
45
  release_date="2024-02-15",
46
46
  n_parameters=7_240_000_000,
47
+ n_embedding_parameters=131_072_000,
47
48
  memory_usage_mb=13813,
48
49
  embed_dim=4096,
49
50
  license="apache-2.0",
@@ -73,6 +74,8 @@ gritlm8x7b = ModelMeta(
73
74
  revision="7f089b13e3345510281733ca1e6ff871b5b4bc76",
74
75
  release_date="2024-02-15",
75
76
  n_parameters=57_920_000_000,
77
+ n_embedding_parameters=None,
78
+ n_active_parameters_override=13_000_000_000,
76
79
  memory_usage_mb=89079,
77
80
  embed_dim=32768,
78
81
  license="apache-2.0",