mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. mteb/_create_dataloaders.py +63 -14
  2. mteb/_evaluators/any_sts_evaluator.py +12 -5
  3. mteb/_evaluators/clustering_evaluator.py +12 -4
  4. mteb/_evaluators/evaluator.py +11 -5
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
  6. mteb/_evaluators/pair_classification_evaluator.py +13 -5
  7. mteb/_evaluators/retrieval_evaluator.py +22 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +20 -11
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +10 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +48 -21
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +25 -9
  21. mteb/abstasks/clustering.py +23 -10
  22. mteb/abstasks/clustering_legacy.py +22 -8
  23. mteb/abstasks/image/image_text_pair_classification.py +23 -9
  24. mteb/abstasks/multilabel_classification.py +13 -5
  25. mteb/abstasks/pair_classification.py +27 -11
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +56 -30
  28. mteb/abstasks/retrieval_dataset_loaders.py +48 -37
  29. mteb/abstasks/sts.py +29 -13
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +23 -12
  32. mteb/abstasks/text/reranking.py +2 -2
  33. mteb/abstasks/text/summarization.py +19 -8
  34. mteb/abstasks/zeroshot_classification.py +23 -9
  35. mteb/benchmarks/_create_table.py +13 -7
  36. mteb/benchmarks/benchmark.py +11 -1
  37. mteb/benchmarks/benchmarks/__init__.py +2 -0
  38. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  39. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  40. mteb/cache.py +10 -5
  41. mteb/cli/_display_tasks.py +9 -3
  42. mteb/cli/build_cli.py +5 -2
  43. mteb/cli/generate_model_card.py +9 -2
  44. mteb/deprecated_evaluator.py +16 -12
  45. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  65. mteb/evaluate.py +33 -20
  66. mteb/filter_tasks.py +12 -7
  67. mteb/get_tasks.py +9 -4
  68. mteb/languages/language_scripts.py +8 -3
  69. mteb/leaderboard/app.py +11 -4
  70. mteb/leaderboard/table.py +7 -2
  71. mteb/load_results.py +9 -3
  72. mteb/models/abs_encoder.py +22 -12
  73. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  74. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  75. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  76. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  77. mteb/models/get_model_meta.py +32 -6
  78. mteb/models/instruct_wrapper.py +13 -5
  79. mteb/models/model_implementations/align_models.py +10 -4
  80. mteb/models/model_implementations/amazon_models.py +1 -0
  81. mteb/models/model_implementations/andersborges.py +2 -0
  82. mteb/models/model_implementations/ara_models.py +1 -0
  83. mteb/models/model_implementations/arctic_models.py +8 -0
  84. mteb/models/model_implementations/b1ade_models.py +1 -0
  85. mteb/models/model_implementations/bedrock_models.py +20 -6
  86. mteb/models/model_implementations/bge_models.py +40 -1
  87. mteb/models/model_implementations/bica_model.py +1 -0
  88. mteb/models/model_implementations/blip2_models.py +11 -4
  89. mteb/models/model_implementations/blip_models.py +17 -4
  90. mteb/models/model_implementations/bm25.py +24 -14
  91. mteb/models/model_implementations/bmretriever_models.py +10 -2
  92. mteb/models/model_implementations/cadet_models.py +1 -0
  93. mteb/models/model_implementations/cde_models.py +11 -5
  94. mteb/models/model_implementations/clip_models.py +12 -4
  95. mteb/models/model_implementations/clips_models.py +3 -0
  96. mteb/models/model_implementations/codefuse_models.py +5 -0
  97. mteb/models/model_implementations/codesage_models.py +3 -0
  98. mteb/models/model_implementations/cohere_models.py +14 -4
  99. mteb/models/model_implementations/cohere_v.py +14 -4
  100. mteb/models/model_implementations/colpali_models.py +7 -3
  101. mteb/models/model_implementations/colqwen_models.py +17 -31
  102. mteb/models/model_implementations/colsmol_models.py +3 -1
  103. mteb/models/model_implementations/conan_models.py +11 -4
  104. mteb/models/model_implementations/dino_models.py +28 -4
  105. mteb/models/model_implementations/e5_instruct.py +4 -0
  106. mteb/models/model_implementations/e5_models.py +9 -0
  107. mteb/models/model_implementations/e5_v.py +10 -4
  108. mteb/models/model_implementations/eagerworks_models.py +11 -4
  109. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  110. mteb/models/model_implementations/en_code_retriever.py +1 -0
  111. mteb/models/model_implementations/euler_models.py +1 -0
  112. mteb/models/model_implementations/evaclip_models.py +13 -4
  113. mteb/models/model_implementations/fa_models.py +9 -0
  114. mteb/models/model_implementations/facebookai.py +2 -0
  115. mteb/models/model_implementations/geogpt_models.py +1 -0
  116. mteb/models/model_implementations/gme_v_models.py +7 -3
  117. mteb/models/model_implementations/google_models.py +15 -4
  118. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  119. mteb/models/model_implementations/gritlm_models.py +3 -0
  120. mteb/models/model_implementations/gte_models.py +9 -0
  121. mteb/models/model_implementations/hinvec_models.py +6 -1
  122. mteb/models/model_implementations/human.py +1 -0
  123. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  124. mteb/models/model_implementations/inf_models.py +2 -0
  125. mteb/models/model_implementations/jasper_models.py +14 -5
  126. mteb/models/model_implementations/jina_clip.py +10 -4
  127. mteb/models/model_implementations/jina_models.py +17 -5
  128. mteb/models/model_implementations/kalm_models.py +24 -12
  129. mteb/models/model_implementations/kblab.py +1 -0
  130. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  131. mteb/models/model_implementations/kfst.py +1 -0
  132. mteb/models/model_implementations/kowshik24_models.py +1 -0
  133. mteb/models/model_implementations/lens_models.py +2 -0
  134. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  135. mteb/models/model_implementations/linq_models.py +7 -1
  136. mteb/models/model_implementations/listconranker.py +10 -4
  137. mteb/models/model_implementations/llm2clip_models.py +12 -4
  138. mteb/models/model_implementations/llm2vec_models.py +20 -6
  139. mteb/models/model_implementations/mcinext_models.py +8 -2
  140. mteb/models/model_implementations/mdbr_models.py +2 -0
  141. mteb/models/model_implementations/misc_models.py +63 -0
  142. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  143. mteb/models/model_implementations/mme5_models.py +2 -1
  144. mteb/models/model_implementations/moco_models.py +11 -4
  145. mteb/models/model_implementations/mod_models.py +2 -1
  146. mteb/models/model_implementations/model2vec_models.py +23 -4
  147. mteb/models/model_implementations/moka_models.py +3 -0
  148. mteb/models/model_implementations/nbailab.py +3 -0
  149. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  150. mteb/models/model_implementations/nomic_models.py +17 -4
  151. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  152. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  153. mteb/models/model_implementations/nvidia_models.py +15 -4
  154. mteb/models/model_implementations/octen_models.py +3 -1
  155. mteb/models/model_implementations/openai_models.py +14 -4
  156. mteb/models/model_implementations/openclip_models.py +17 -4
  157. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  158. mteb/models/model_implementations/ops_moa_models.py +9 -2
  159. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  160. mteb/models/model_implementations/pawan_models.py +1 -0
  161. mteb/models/model_implementations/piccolo_models.py +2 -0
  162. mteb/models/model_implementations/promptriever_models.py +16 -6
  163. mteb/models/model_implementations/pylate_models.py +32 -13
  164. mteb/models/model_implementations/qodo_models.py +2 -0
  165. mteb/models/model_implementations/qtack_models.py +1 -0
  166. mteb/models/model_implementations/qwen3_models.py +11 -1
  167. mteb/models/model_implementations/qzhou_models.py +2 -0
  168. mteb/models/model_implementations/random_baseline.py +4 -3
  169. mteb/models/model_implementations/rasgaard_models.py +1 -0
  170. mteb/models/model_implementations/reasonir_model.py +65 -0
  171. mteb/models/model_implementations/repllama_models.py +15 -6
  172. mteb/models/model_implementations/rerankers_custom.py +13 -4
  173. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  174. mteb/models/model_implementations/richinfoai_models.py +1 -0
  175. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  176. mteb/models/model_implementations/ruri_models.py +10 -0
  177. mteb/models/model_implementations/salesforce_models.py +10 -1
  178. mteb/models/model_implementations/samilpwc_models.py +1 -0
  179. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  180. mteb/models/model_implementations/searchmap_models.py +1 -0
  181. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  182. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  183. mteb/models/model_implementations/seed_models.py +2 -1
  184. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  185. mteb/models/model_implementations/shuu_model.py +1 -0
  186. mteb/models/model_implementations/siglip_models.py +19 -4
  187. mteb/models/model_implementations/slm_models.py +7 -4
  188. mteb/models/model_implementations/sonar_models.py +2 -1
  189. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  190. mteb/models/model_implementations/stella_models.py +6 -0
  191. mteb/models/model_implementations/tarka_models.py +2 -0
  192. mteb/models/model_implementations/text2vec_models.py +3 -0
  193. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  194. mteb/models/model_implementations/uae_models.py +10 -4
  195. mteb/models/model_implementations/vdr_models.py +8 -1
  196. mteb/models/model_implementations/vi_vn_models.py +6 -0
  197. mteb/models/model_implementations/vista_models.py +11 -4
  198. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  199. mteb/models/model_implementations/voyage_models.py +52 -4
  200. mteb/models/model_implementations/voyage_v.py +11 -6
  201. mteb/models/model_implementations/xyz_models.py +1 -0
  202. mteb/models/model_implementations/youtu_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models.py +1 -0
  204. mteb/models/model_implementations/yuan_models_en.py +2 -1
  205. mteb/models/model_meta.py +47 -9
  206. mteb/models/models_protocols.py +23 -18
  207. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  208. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  209. mteb/models/search_wrappers.py +31 -12
  210. mteb/models/sentence_transformer_wrapper.py +4 -3
  211. mteb/models/vllm_wrapper.py +8 -6
  212. mteb/results/benchmark_results.py +22 -17
  213. mteb/results/model_result.py +21 -15
  214. mteb/results/task_result.py +32 -16
  215. mteb/similarity_functions.py +8 -2
  216. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  220. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  223. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  224. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  225. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  226. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  227. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  228. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  229. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  230. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  231. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  232. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  233. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  234. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  235. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  236. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  237. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  238. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  239. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  240. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  241. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  242. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  243. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  244. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  245. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  246. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  247. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  248. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  249. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  250. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  251. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  252. mteb/tasks/classification/est/estonian_valence.py +1 -1
  253. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  254. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  256. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  257. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  258. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  259. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  260. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  261. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  262. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  263. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  264. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  265. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  266. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  267. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  268. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  269. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  270. mteb/tasks/classification/kor/klue_tc.py +2 -2
  271. mteb/tasks/classification/kor/kor_fin.py +1 -1
  272. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  273. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  274. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  275. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  276. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  277. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  278. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  279. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  280. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  281. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  282. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  283. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  284. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  285. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  286. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  287. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  288. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  289. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  290. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  291. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  292. mteb/tasks/classification/ron/moroco.py +1 -1
  293. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  294. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  295. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  296. mteb/tasks/classification/rus/headline_classification.py +2 -2
  297. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  298. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  299. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  300. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  301. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  302. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  303. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  304. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  305. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  306. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  307. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  308. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  309. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  310. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  311. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  312. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  313. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  314. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  315. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  316. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  317. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  318. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  319. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  320. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  321. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  322. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  323. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  324. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  325. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  326. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  327. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  328. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  329. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  330. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  331. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  332. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  333. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  334. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  335. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  336. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  337. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  338. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  341. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  342. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  343. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  344. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  345. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  346. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  347. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  348. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  349. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  350. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  351. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  352. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  353. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  354. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  355. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  356. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  357. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  358. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  359. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  360. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  361. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  362. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  363. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  364. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  365. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  366. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  367. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  368. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  369. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  370. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  371. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  372. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  373. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  374. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  375. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  376. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  377. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  378. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  379. mteb/tasks/pair_classification/rus/terra.py +2 -2
  380. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  381. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  382. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  383. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  384. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  385. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  386. mteb/tasks/retrieval/code/code_rag.py +4 -4
  387. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  388. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  389. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  390. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  391. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  392. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  393. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  394. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  395. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  396. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  397. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  398. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  399. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  400. mteb/tasks/retrieval/eng/__init__.py +42 -0
  401. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  402. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  403. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  404. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  405. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  406. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  407. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  408. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  409. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  410. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  411. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  412. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  413. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  414. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  415. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  416. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  417. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  418. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  419. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  420. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  421. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  422. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  423. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  424. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  425. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  426. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  428. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  435. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  438. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  439. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  440. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  441. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  442. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  443. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  444. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  445. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  446. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  447. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  448. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  449. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  450. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  451. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  452. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  453. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  454. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  455. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  456. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  457. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  458. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  459. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  460. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  461. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  462. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  463. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  464. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  465. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  466. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  467. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  468. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  469. mteb/tasks/retrieval/nob/norquad.py +1 -1
  470. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  471. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  472. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  473. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  474. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  475. mteb/tasks/sts/kor/klue_sts.py +1 -1
  476. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  477. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  478. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  479. mteb/types/_encoder_io.py +1 -1
  480. mteb/types/statistics.py +9 -2
  481. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
  482. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
  483. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  484. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  485. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  486. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -1,27 +1,31 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Sequence
3
4
  from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import numpy as np
6
7
  import torch
7
- from torch.utils.data import DataLoader
8
8
 
9
9
  import mteb
10
10
  from mteb._create_dataloaders import _corpus_to_dict
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
13
- from mteb.models.models_protocols import PromptType
14
12
  from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
15
- from mteb.types import Array, BatchedInput
13
+ from mteb.types import PromptType
16
14
 
17
15
  from .bge_models import bge_full_data
18
16
 
19
17
  if TYPE_CHECKING:
18
+ from collections.abc import Sequence
19
+
20
+ from torch.utils.data import DataLoader
21
+
20
22
  from mteb.abstasks import (
21
23
  AbsTaskClassification,
22
24
  AbsTaskRetrieval,
23
25
  AbsTaskSummarization,
24
26
  )
27
+ from mteb.abstasks.task_metadata import TaskMetadata
28
+ from mteb.types import Array, BatchedInput
25
29
  logger = logging.getLogger(__name__)
26
30
 
27
31
  CDE_CITATION = """@misc{morris2024contextualdocumentembeddings,
@@ -222,6 +226,7 @@ cde_small_v1 = ModelMeta(
222
226
  revision="e151df18af0d7f1d1c37b074fee58406ececf19f",
223
227
  release_date="2024-09-24",
224
228
  n_parameters=int(281 * 1e6),
229
+ n_embedding_parameters=None,
225
230
  memory_usage_mb=1072, # Though the second-stage model is only 140M
226
231
  max_tokens=512,
227
232
  embed_dim=768,
@@ -251,6 +256,7 @@ cde_small_v2 = ModelMeta(
251
256
  revision="4e1d021a6c3fd7ce8aa0a7204057eee5ae61d390",
252
257
  release_date="2025-01-13",
253
258
  n_parameters=int(306 * 1e6),
259
+ n_embedding_parameters=None,
254
260
  memory_usage_mb=1166, # Though the second-stage model is only 140M
255
261
  max_tokens=512,
256
262
  embed_dim=768,
@@ -1,13 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.abs_encoder import AbsEncoder
9
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
 
13
18
  class CLIPModel(AbsEncoder):
@@ -123,6 +128,7 @@ clip_vit_large_patch14 = ModelMeta(
123
128
  release_date="2021-02-26",
124
129
  modalities=["image", "text"],
125
130
  n_parameters=428_000_000,
131
+ n_embedding_parameters=None,
126
132
  memory_usage_mb=1631,
127
133
  max_tokens=77,
128
134
  embed_dim=768,
@@ -147,6 +153,7 @@ clip_vit_base_patch32 = ModelMeta(
147
153
  release_date="2021-02-26",
148
154
  modalities=["image", "text"],
149
155
  n_parameters=151_000_000,
156
+ n_embedding_parameters=None,
150
157
  memory_usage_mb=576,
151
158
  max_tokens=77,
152
159
  embed_dim=512,
@@ -171,6 +178,7 @@ clip_vit_base_patch16 = ModelMeta(
171
178
  release_date="2021-02-26",
172
179
  modalities=["image", "text"],
173
180
  n_parameters=151_000_000,
181
+ n_embedding_parameters=None,
174
182
  memory_usage_mb=576,
175
183
  max_tokens=77,
176
184
  embed_dim=512,
@@ -30,6 +30,7 @@ e5_nl_small = ModelMeta(
30
30
  revision="0243664a6c5e12eef854b091eb283e51833c3e9f",
31
31
  release_date="2025-09-23",
32
32
  n_parameters=40_800_000,
33
+ n_embedding_parameters=19_200_768,
33
34
  memory_usage_mb=78,
34
35
  embed_dim=384,
35
36
  license="mit",
@@ -57,6 +58,7 @@ e5_nl_base = ModelMeta(
57
58
  revision="6bd5722f236da48b4b8bcb28cc1fc478f7089956",
58
59
  release_date="2025-09-23",
59
60
  n_parameters=124_400_000,
61
+ n_embedding_parameters=38_401_536,
60
62
  memory_usage_mb=237,
61
63
  embed_dim=768,
62
64
  license="mit",
@@ -84,6 +86,7 @@ e5_nl_large = ModelMeta(
84
86
  revision="683333f86ed9eb3699b5567f0fdabeb958d412b0",
85
87
  release_date="2025-09-23",
86
88
  n_parameters=355_000_000,
89
+ n_embedding_parameters=51_202_048,
87
90
  memory_usage_mb=1355,
88
91
  embed_dim=1024,
89
92
  license="mit",
@@ -236,6 +236,7 @@ F2LLM_0B6 = ModelMeta(
236
236
  revision="36416618b83d4bd84a8ca30c2ee01ed518f9f2e7",
237
237
  release_date="2025-09-18",
238
238
  n_parameters=595_776_512,
239
+ n_embedding_parameters=None,
239
240
  memory_usage_mb=1137,
240
241
  embed_dim=1024,
241
242
  license="apache-2.0",
@@ -266,6 +267,7 @@ F2LLM_1B7 = ModelMeta(
266
267
  revision="fdce0e09655f42cea26f7f66f5a70cd4507ea45c",
267
268
  release_date="2025-09-18",
268
269
  n_parameters=1_720_574_976,
270
+ n_embedding_parameters=None,
269
271
  memory_usage_mb=3282,
270
272
  embed_dim=2560,
271
273
  license="apache-2.0",
@@ -296,6 +298,7 @@ F2LLM_4B = ModelMeta(
296
298
  revision="9fe95901ed2b6b59dd7673d6e93c9d76766a1e25",
297
299
  release_date="2025-09-18",
298
300
  n_parameters=4_021_774_336,
301
+ n_embedding_parameters=None,
299
302
  memory_usage_mb=7672,
300
303
  embed_dim=2560,
301
304
  license="apache-2.0",
@@ -318,6 +321,7 @@ C2LLM_0B5 = ModelMeta(
318
321
  release_date="2025-12-22",
319
322
  languages=c2llm_languages,
320
323
  n_parameters=497252096,
324
+ n_embedding_parameters=None,
321
325
  memory_usage_mb=948.0,
322
326
  max_tokens=32768,
323
327
  embed_dim=896,
@@ -346,6 +350,7 @@ C2LLM_7B = ModelMeta(
346
350
  release_date="2025-12-22",
347
351
  languages=c2llm_languages,
348
352
  n_parameters=7667028992,
353
+ n_embedding_parameters=None,
349
354
  memory_usage_mb=14624.0,
350
355
  max_tokens=32768,
351
356
  embed_dim=3584,
@@ -28,6 +28,7 @@ codesage_large = ModelMeta(
28
28
  release_date="2024-02-03",
29
29
  modalities=["text"],
30
30
  n_parameters=1_300_000_000,
31
+ n_embedding_parameters=100_667_392,
31
32
  memory_usage_mb=4959,
32
33
  max_tokens=2048,
33
34
  embed_dim=2048,
@@ -55,6 +56,7 @@ codesage_base = ModelMeta(
55
56
  release_date="2024-02-03",
56
57
  modalities=["text"],
57
58
  n_parameters=356_000_000,
59
+ n_embedding_parameters=50_333_696,
58
60
  memory_usage_mb=1358,
59
61
  max_tokens=2048,
60
62
  embed_dim=1024,
@@ -82,6 +84,7 @@ codesage_small = ModelMeta(
82
84
  release_date="2024-02-03",
83
85
  modalities=["text"],
84
86
  n_parameters=130_000_000,
87
+ n_embedding_parameters=50_333_696,
85
88
  memory_usage_mb=496,
86
89
  max_tokens=2048,
87
90
  embed_dim=1024,
@@ -1,18 +1,24 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import time
3
5
  from functools import wraps
4
- from typing import Any, Literal, get_args
6
+ from typing import TYPE_CHECKING, Any, Literal, get_args
5
7
 
6
8
  import numpy as np
7
9
  import torch
8
- from torch.utils.data import DataLoader
9
10
  from tqdm.auto import tqdm
10
11
 
11
12
  from mteb._requires_package import requires_package
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models.abs_encoder import AbsEncoder
14
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
15
- from mteb.types import Array, BatchedInput, PromptType
15
+ from mteb.types import PromptType
16
+
17
+ if TYPE_CHECKING:
18
+ from torch.utils.data import DataLoader
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.types import Array, BatchedInput
16
22
 
17
23
  logger = logging.getLogger(__name__)
18
24
 
@@ -386,6 +392,7 @@ cohere_mult_3 = ModelMeta(
386
392
  revision="1",
387
393
  release_date="2023-11-02",
388
394
  n_parameters=None,
395
+ n_embedding_parameters=None,
389
396
  memory_usage_mb=None,
390
397
  max_tokens=None,
391
398
  embed_dim=512,
@@ -412,6 +419,7 @@ cohere_eng_3 = ModelMeta(
412
419
  revision="1",
413
420
  release_date="2023-11-02",
414
421
  n_parameters=None,
422
+ n_embedding_parameters=None,
415
423
  memory_usage_mb=None,
416
424
  max_tokens=512,
417
425
  embed_dim=1024,
@@ -437,6 +445,7 @@ cohere_mult_light_3 = ModelMeta(
437
445
  reference="https://cohere.com/blog/introducing-embed-v3",
438
446
  release_date="2023-11-02",
439
447
  n_parameters=None,
448
+ n_embedding_parameters=None,
440
449
  memory_usage_mb=None,
441
450
  max_tokens=512,
442
451
  embed_dim=384,
@@ -462,6 +471,7 @@ cohere_eng_light_3 = ModelMeta(
462
471
  revision="1",
463
472
  release_date="2023-11-02",
464
473
  n_parameters=None,
474
+ n_embedding_parameters=None,
465
475
  memory_usage_mb=None,
466
476
  max_tokens=512,
467
477
  embed_dim=384,
@@ -1,15 +1,15 @@
1
+ from __future__ import annotations
2
+
1
3
  import base64
2
4
  import io
3
5
  import os
4
6
  import time
5
- from typing import Any, Literal, get_args
7
+ from typing import TYPE_CHECKING, Any, Literal, get_args
6
8
 
7
9
  import torch
8
- from torch.utils.data import DataLoader
9
10
  from tqdm.auto import tqdm
10
11
 
11
12
  from mteb._requires_package import requires_image_dependencies, requires_package
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models import ModelMeta
14
14
  from mteb.models.abs_encoder import AbsEncoder
15
15
  from mteb.models.model_implementations.cohere_models import (
@@ -18,7 +18,12 @@ from mteb.models.model_implementations.cohere_models import (
18
18
  retry_with_rate_limit,
19
19
  )
20
20
  from mteb.models.model_meta import ScoringFunction
21
- from mteb.types import Array, BatchedInput, PromptType
21
+
22
+ if TYPE_CHECKING:
23
+ from torch.utils.data import DataLoader
24
+
25
+ from mteb.abstasks.task_metadata import TaskMetadata
26
+ from mteb.types import Array, BatchedInput, PromptType
22
27
 
23
28
 
24
29
  def _post_process_embeddings(
@@ -386,6 +391,7 @@ cohere_mult_3 = ModelMeta(
386
391
  revision="1",
387
392
  release_date="2024-10-24",
388
393
  n_parameters=None,
394
+ n_embedding_parameters=None,
389
395
  memory_usage_mb=None,
390
396
  max_tokens=None,
391
397
  embed_dim=1024,
@@ -410,6 +416,7 @@ cohere_eng_3 = ModelMeta(
410
416
  revision="1",
411
417
  release_date="2024-10-24",
412
418
  n_parameters=None,
419
+ n_embedding_parameters=None,
413
420
  memory_usage_mb=None,
414
421
  max_tokens=None,
415
422
  embed_dim=1024,
@@ -434,6 +441,7 @@ cohere_embed_v4_multimodal = ModelMeta(
434
441
  revision="1",
435
442
  release_date="2024-12-01",
436
443
  n_parameters=None,
444
+ n_embedding_parameters=None,
437
445
  memory_usage_mb=None,
438
446
  max_tokens=128000,
439
447
  embed_dim=1536,
@@ -458,6 +466,7 @@ cohere_embed_v4_multimodal_binary = ModelMeta(
458
466
  revision="1",
459
467
  release_date="2024-12-01",
460
468
  n_parameters=None,
469
+ n_embedding_parameters=None,
461
470
  memory_usage_mb=None,
462
471
  max_tokens=128000,
463
472
  embed_dim=1536,
@@ -483,6 +492,7 @@ cohere_embed_v4_multimodal_int8 = ModelMeta(
483
492
  revision="1",
484
493
  release_date="2024-12-01",
485
494
  n_parameters=None,
495
+ n_embedding_parameters=None,
486
496
  memory_usage_mb=None,
487
497
  max_tokens=128000,
488
498
  embed_dim=1536,
@@ -4,20 +4,21 @@ import logging
4
4
  from typing import TYPE_CHECKING, Any
5
5
 
6
6
  import torch
7
- from torch.utils.data import DataLoader
8
7
  from tqdm.auto import tqdm
9
8
 
10
9
  from mteb._requires_package import (
11
10
  requires_image_dependencies,
12
11
  requires_package,
13
12
  )
14
- from mteb.abstasks.task_metadata import TaskMetadata
15
13
  from mteb.models.abs_encoder import AbsEncoder
16
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
17
- from mteb.types import Array, BatchedInput, PromptType
18
15
 
19
16
  if TYPE_CHECKING:
20
17
  from PIL import Image
18
+ from torch.utils.data import DataLoader
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.types import Array, BatchedInput, PromptType
21
22
 
22
23
  logger = logging.getLogger(__name__)
23
24
 
@@ -219,6 +220,7 @@ colpali_v1_1 = ModelMeta(
219
220
  release_date="2024-08-21",
220
221
  modalities=["image", "text"],
221
222
  n_parameters=2_920_000_000,
223
+ n_embedding_parameters=None,
222
224
  memory_usage_mb=4700,
223
225
  max_tokens=16384,
224
226
  embed_dim=128,
@@ -246,6 +248,7 @@ colpali_v1_2 = ModelMeta(
246
248
  release_date="2024-08-26",
247
249
  modalities=["image", "text"],
248
250
  n_parameters=2_920_000_000,
251
+ n_embedding_parameters=None,
249
252
  memory_usage_mb=4700,
250
253
  max_tokens=16384,
251
254
  embed_dim=128,
@@ -273,6 +276,7 @@ colpali_v1_3 = ModelMeta(
273
276
  release_date="2024-11-01",
274
277
  modalities=["image", "text"],
275
278
  n_parameters=2_920_000_000,
279
+ n_embedding_parameters=None,
276
280
  memory_usage_mb=4700,
277
281
  max_tokens=16384,
278
282
  embed_dim=128,
@@ -1,18 +1,23 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import (
9
10
  requires_image_dependencies,
10
11
  requires_package,
11
12
  )
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models.abs_encoder import AbsEncoder
14
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
15
- from mteb.types import Array, BatchedInput, PromptType
15
+
16
+ if TYPE_CHECKING:
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput, PromptType
16
21
 
17
22
  from .colpali_models import (
18
23
  COLPALI_CITATION,
@@ -219,6 +224,7 @@ colqwen2 = ModelMeta(
219
224
  release_date="2025-11-03",
220
225
  modalities=["image", "text"],
221
226
  n_parameters=2_210_000_000,
227
+ n_embedding_parameters=None,
222
228
  memory_usage_mb=7200,
223
229
  max_tokens=32768,
224
230
  embed_dim=128,
@@ -246,6 +252,7 @@ colqwen2_5 = ModelMeta(
246
252
  release_date="2025-01-31",
247
253
  modalities=["image", "text"],
248
254
  n_parameters=3_000_000_000,
255
+ n_embedding_parameters=None,
249
256
  memory_usage_mb=7200,
250
257
  max_tokens=128000,
251
258
  embed_dim=128,
@@ -290,6 +297,7 @@ colqwen3_8b = ModelMeta(
290
297
  release_date="2025-11-26",
291
298
  modalities=["image", "text"],
292
299
  n_parameters=8_000_000_000,
300
+ n_embedding_parameters=None,
293
301
  memory_usage_mb=16724,
294
302
  max_tokens=262144,
295
303
  embed_dim=320,
@@ -314,6 +322,7 @@ colqwen3_4b = ModelMeta(
314
322
  release_date="2025-11-26",
315
323
  modalities=["image", "text"],
316
324
  n_parameters=4_000_000_000,
325
+ n_embedding_parameters=None,
317
326
  memory_usage_mb=8466,
318
327
  max_tokens=262144,
319
328
  embed_dim=320,
@@ -329,32 +338,6 @@ colqwen3_4b = ModelMeta(
329
338
  citation=TOMORO_CITATION,
330
339
  )
331
340
 
332
- colnomic_7b = ModelMeta(
333
- loader=ColQwen2_5Wrapper,
334
- loader_kwargs=dict(
335
- torch_dtype=torch.float16,
336
- ),
337
- name="nomic-ai/colnomic-embed-multimodal-7b",
338
- model_type=["late-interaction"],
339
- languages=["eng-Latn"],
340
- revision="530094e83a40ca4edcb5c9e5ddfa61a4b5ea0d2f",
341
- release_date="2025-03-31",
342
- modalities=["image", "text"],
343
- n_parameters=7_000_000_000,
344
- memory_usage_mb=14400,
345
- max_tokens=128000,
346
- embed_dim=128,
347
- license="apache-2.0",
348
- open_weights=True,
349
- public_training_code="https://github.com/nomic-ai/colpali",
350
- public_training_data="https://huggingface.co/datasets/vidore/colpali_train_set",
351
- framework=["ColPali", "safetensors"],
352
- reference="https://huggingface.co/nomic-ai/colnomic-embed-multimodal-7b",
353
- similarity_fn_name="MaxSim",
354
- use_instructions=True,
355
- training_datasets=COLPALI_TRAINING_DATA,
356
- citation=COLPALI_CITATION,
357
- )
358
341
 
359
342
  COLNOMIC_CITATION = """
360
343
  @misc{nomicembedmultimodal2025,
@@ -386,6 +369,7 @@ colnomic_3b = ModelMeta(
386
369
  release_date="2025-03-31",
387
370
  modalities=["image", "text"],
388
371
  n_parameters=3_000_000_000,
372
+ n_embedding_parameters=None,
389
373
  memory_usage_mb=7200,
390
374
  max_tokens=128000,
391
375
  embed_dim=128,
@@ -402,7 +386,7 @@ colnomic_3b = ModelMeta(
402
386
  )
403
387
 
404
388
  colnomic_7b = ModelMeta(
405
- loader=ColQwen2Wrapper,
389
+ loader=ColQwen2_5Wrapper,
406
390
  loader_kwargs=dict(
407
391
  torch_dtype=torch.float16,
408
392
  ),
@@ -451,6 +435,7 @@ evoqwen25_vl_retriever_3b_v1 = ModelMeta(
451
435
  release_date="2025-11-04",
452
436
  modalities=["image", "text"],
453
437
  n_parameters=3_000_000_000,
438
+ n_embedding_parameters=None,
454
439
  memory_usage_mb=7200,
455
440
  max_tokens=128000,
456
441
  embed_dim=128,
@@ -477,6 +462,7 @@ evoqwen25_vl_retriever_7b_v1 = ModelMeta(
477
462
  release_date="2025-11-04",
478
463
  modalities=["image", "text"],
479
464
  n_parameters=7_000_000_000,
465
+ n_embedding_parameters=None,
480
466
  memory_usage_mb=14400,
481
467
  max_tokens=128000,
482
468
  embed_dim=128,
@@ -56,10 +56,11 @@ colsmol_256m = ModelMeta(
56
56
  name="vidore/colSmol-256M",
57
57
  model_type=["late-interaction"],
58
58
  languages=["eng-Latn"],
59
- revision="530094e83a40ca4edcb5c9e5ddfa61a4b5ea0d2f",
59
+ revision="a59110fdf114638b8018e6c9a018907e12f14855",
60
60
  release_date="2025-01-22",
61
61
  modalities=["image", "text"],
62
62
  n_parameters=256_000_000,
63
+ n_embedding_parameters=None,
63
64
  memory_usage_mb=800,
64
65
  max_tokens=8192,
65
66
  embed_dim=128,
@@ -87,6 +88,7 @@ colsmol_500m = ModelMeta(
87
88
  release_date="2025-01-22",
88
89
  modalities=["image", "text"],
89
90
  n_parameters=500_000_000,
91
+ n_embedding_parameters=None,
90
92
  memory_usage_mb=1200,
91
93
  max_tokens=8192,
92
94
  embed_dim=128,
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import hashlib
2
4
  import json
3
5
  import logging
@@ -5,20 +7,24 @@ import os
5
7
  import random
6
8
  import string
7
9
  import time
8
- from typing import Any
10
+ from typing import TYPE_CHECKING, Any
9
11
 
10
12
  import numpy as np
11
13
  import requests
12
- from torch.utils.data import DataLoader
13
14
 
14
- from mteb.abstasks.task_metadata import TaskMetadata
15
15
  from mteb.models.abs_encoder import AbsEncoder
16
16
  from mteb.models.model_meta import ModelMeta
17
- from mteb.types import Array, BatchedInput, PromptType
18
17
 
19
18
  from .bge_models import bge_full_data
20
19
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
21
20
 
21
+ if TYPE_CHECKING:
22
+ from torch.utils.data import DataLoader
23
+
24
+ from mteb.abstasks.task_metadata import TaskMetadata
25
+ from mteb.types import Array, BatchedInput, PromptType
26
+
27
+
22
28
  conan_zh_datasets = {
23
29
  "BQ",
24
30
  "LCQMC",
@@ -205,6 +211,7 @@ Conan_embedding_v2 = ModelMeta(
205
211
  embed_dim=3584,
206
212
  open_weights=False,
207
213
  n_parameters=None,
214
+ n_embedding_parameters=None,
208
215
  memory_usage_mb=None,
209
216
  license="apache-2.0",
210
217
  reference="https://huggingface.co/TencentBAC/Conan-embedding-v2",