mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. mteb/_create_dataloaders.py +63 -14
  2. mteb/_evaluators/any_sts_evaluator.py +12 -5
  3. mteb/_evaluators/clustering_evaluator.py +12 -4
  4. mteb/_evaluators/evaluator.py +11 -5
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
  6. mteb/_evaluators/pair_classification_evaluator.py +13 -5
  7. mteb/_evaluators/retrieval_evaluator.py +22 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +20 -11
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +10 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +48 -21
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +25 -9
  21. mteb/abstasks/clustering.py +23 -10
  22. mteb/abstasks/clustering_legacy.py +22 -8
  23. mteb/abstasks/image/image_text_pair_classification.py +23 -9
  24. mteb/abstasks/multilabel_classification.py +13 -5
  25. mteb/abstasks/pair_classification.py +27 -11
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +56 -30
  28. mteb/abstasks/retrieval_dataset_loaders.py +48 -37
  29. mteb/abstasks/sts.py +29 -13
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +23 -12
  32. mteb/abstasks/text/reranking.py +2 -2
  33. mteb/abstasks/text/summarization.py +19 -8
  34. mteb/abstasks/zeroshot_classification.py +23 -9
  35. mteb/benchmarks/_create_table.py +13 -7
  36. mteb/benchmarks/benchmark.py +11 -1
  37. mteb/benchmarks/benchmarks/__init__.py +2 -0
  38. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  39. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  40. mteb/cache.py +10 -5
  41. mteb/cli/_display_tasks.py +9 -3
  42. mteb/cli/build_cli.py +5 -2
  43. mteb/cli/generate_model_card.py +9 -2
  44. mteb/deprecated_evaluator.py +16 -12
  45. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  65. mteb/evaluate.py +33 -20
  66. mteb/filter_tasks.py +12 -7
  67. mteb/get_tasks.py +9 -4
  68. mteb/languages/language_scripts.py +8 -3
  69. mteb/leaderboard/app.py +11 -4
  70. mteb/leaderboard/table.py +7 -2
  71. mteb/load_results.py +9 -3
  72. mteb/models/abs_encoder.py +22 -12
  73. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  74. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  75. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  76. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  77. mteb/models/get_model_meta.py +32 -6
  78. mteb/models/instruct_wrapper.py +13 -5
  79. mteb/models/model_implementations/align_models.py +10 -4
  80. mteb/models/model_implementations/amazon_models.py +1 -0
  81. mteb/models/model_implementations/andersborges.py +2 -0
  82. mteb/models/model_implementations/ara_models.py +1 -0
  83. mteb/models/model_implementations/arctic_models.py +8 -0
  84. mteb/models/model_implementations/b1ade_models.py +1 -0
  85. mteb/models/model_implementations/bedrock_models.py +20 -6
  86. mteb/models/model_implementations/bge_models.py +40 -1
  87. mteb/models/model_implementations/bica_model.py +1 -0
  88. mteb/models/model_implementations/blip2_models.py +11 -4
  89. mteb/models/model_implementations/blip_models.py +17 -4
  90. mteb/models/model_implementations/bm25.py +24 -14
  91. mteb/models/model_implementations/bmretriever_models.py +10 -2
  92. mteb/models/model_implementations/cadet_models.py +1 -0
  93. mteb/models/model_implementations/cde_models.py +11 -5
  94. mteb/models/model_implementations/clip_models.py +12 -4
  95. mteb/models/model_implementations/clips_models.py +3 -0
  96. mteb/models/model_implementations/codefuse_models.py +5 -0
  97. mteb/models/model_implementations/codesage_models.py +3 -0
  98. mteb/models/model_implementations/cohere_models.py +14 -4
  99. mteb/models/model_implementations/cohere_v.py +14 -4
  100. mteb/models/model_implementations/colpali_models.py +7 -3
  101. mteb/models/model_implementations/colqwen_models.py +17 -31
  102. mteb/models/model_implementations/colsmol_models.py +3 -1
  103. mteb/models/model_implementations/conan_models.py +11 -4
  104. mteb/models/model_implementations/dino_models.py +28 -4
  105. mteb/models/model_implementations/e5_instruct.py +4 -0
  106. mteb/models/model_implementations/e5_models.py +9 -0
  107. mteb/models/model_implementations/e5_v.py +10 -4
  108. mteb/models/model_implementations/eagerworks_models.py +11 -4
  109. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  110. mteb/models/model_implementations/en_code_retriever.py +1 -0
  111. mteb/models/model_implementations/euler_models.py +1 -0
  112. mteb/models/model_implementations/evaclip_models.py +13 -4
  113. mteb/models/model_implementations/fa_models.py +9 -0
  114. mteb/models/model_implementations/facebookai.py +2 -0
  115. mteb/models/model_implementations/geogpt_models.py +1 -0
  116. mteb/models/model_implementations/gme_v_models.py +7 -3
  117. mteb/models/model_implementations/google_models.py +15 -4
  118. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  119. mteb/models/model_implementations/gritlm_models.py +3 -0
  120. mteb/models/model_implementations/gte_models.py +9 -0
  121. mteb/models/model_implementations/hinvec_models.py +6 -1
  122. mteb/models/model_implementations/human.py +1 -0
  123. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  124. mteb/models/model_implementations/inf_models.py +2 -0
  125. mteb/models/model_implementations/jasper_models.py +14 -5
  126. mteb/models/model_implementations/jina_clip.py +10 -4
  127. mteb/models/model_implementations/jina_models.py +17 -5
  128. mteb/models/model_implementations/kalm_models.py +24 -12
  129. mteb/models/model_implementations/kblab.py +1 -0
  130. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  131. mteb/models/model_implementations/kfst.py +1 -0
  132. mteb/models/model_implementations/kowshik24_models.py +1 -0
  133. mteb/models/model_implementations/lens_models.py +2 -0
  134. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  135. mteb/models/model_implementations/linq_models.py +7 -1
  136. mteb/models/model_implementations/listconranker.py +10 -4
  137. mteb/models/model_implementations/llm2clip_models.py +12 -4
  138. mteb/models/model_implementations/llm2vec_models.py +20 -6
  139. mteb/models/model_implementations/mcinext_models.py +8 -2
  140. mteb/models/model_implementations/mdbr_models.py +2 -0
  141. mteb/models/model_implementations/misc_models.py +63 -0
  142. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  143. mteb/models/model_implementations/mme5_models.py +2 -1
  144. mteb/models/model_implementations/moco_models.py +11 -4
  145. mteb/models/model_implementations/mod_models.py +2 -1
  146. mteb/models/model_implementations/model2vec_models.py +23 -4
  147. mteb/models/model_implementations/moka_models.py +3 -0
  148. mteb/models/model_implementations/nbailab.py +3 -0
  149. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  150. mteb/models/model_implementations/nomic_models.py +17 -4
  151. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  152. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  153. mteb/models/model_implementations/nvidia_models.py +15 -4
  154. mteb/models/model_implementations/octen_models.py +3 -1
  155. mteb/models/model_implementations/openai_models.py +14 -4
  156. mteb/models/model_implementations/openclip_models.py +17 -4
  157. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  158. mteb/models/model_implementations/ops_moa_models.py +9 -2
  159. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  160. mteb/models/model_implementations/pawan_models.py +1 -0
  161. mteb/models/model_implementations/piccolo_models.py +2 -0
  162. mteb/models/model_implementations/promptriever_models.py +16 -6
  163. mteb/models/model_implementations/pylate_models.py +32 -13
  164. mteb/models/model_implementations/qodo_models.py +2 -0
  165. mteb/models/model_implementations/qtack_models.py +1 -0
  166. mteb/models/model_implementations/qwen3_models.py +11 -1
  167. mteb/models/model_implementations/qzhou_models.py +2 -0
  168. mteb/models/model_implementations/random_baseline.py +4 -3
  169. mteb/models/model_implementations/rasgaard_models.py +1 -0
  170. mteb/models/model_implementations/reasonir_model.py +65 -0
  171. mteb/models/model_implementations/repllama_models.py +15 -6
  172. mteb/models/model_implementations/rerankers_custom.py +13 -4
  173. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  174. mteb/models/model_implementations/richinfoai_models.py +1 -0
  175. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  176. mteb/models/model_implementations/ruri_models.py +10 -0
  177. mteb/models/model_implementations/salesforce_models.py +10 -1
  178. mteb/models/model_implementations/samilpwc_models.py +1 -0
  179. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  180. mteb/models/model_implementations/searchmap_models.py +1 -0
  181. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  182. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  183. mteb/models/model_implementations/seed_models.py +2 -1
  184. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  185. mteb/models/model_implementations/shuu_model.py +1 -0
  186. mteb/models/model_implementations/siglip_models.py +19 -4
  187. mteb/models/model_implementations/slm_models.py +7 -4
  188. mteb/models/model_implementations/sonar_models.py +2 -1
  189. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  190. mteb/models/model_implementations/stella_models.py +6 -0
  191. mteb/models/model_implementations/tarka_models.py +2 -0
  192. mteb/models/model_implementations/text2vec_models.py +3 -0
  193. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  194. mteb/models/model_implementations/uae_models.py +10 -4
  195. mteb/models/model_implementations/vdr_models.py +8 -1
  196. mteb/models/model_implementations/vi_vn_models.py +6 -0
  197. mteb/models/model_implementations/vista_models.py +11 -4
  198. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  199. mteb/models/model_implementations/voyage_models.py +52 -4
  200. mteb/models/model_implementations/voyage_v.py +11 -6
  201. mteb/models/model_implementations/xyz_models.py +1 -0
  202. mteb/models/model_implementations/youtu_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models.py +1 -0
  204. mteb/models/model_implementations/yuan_models_en.py +2 -1
  205. mteb/models/model_meta.py +47 -9
  206. mteb/models/models_protocols.py +23 -18
  207. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  208. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  209. mteb/models/search_wrappers.py +31 -12
  210. mteb/models/sentence_transformer_wrapper.py +4 -3
  211. mteb/models/vllm_wrapper.py +8 -6
  212. mteb/results/benchmark_results.py +22 -17
  213. mteb/results/model_result.py +21 -15
  214. mteb/results/task_result.py +32 -16
  215. mteb/similarity_functions.py +8 -2
  216. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  220. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  223. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  224. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  225. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  226. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  227. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  228. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  229. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  230. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  231. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  232. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  233. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  234. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  235. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  236. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  237. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  238. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  239. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  240. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  241. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  242. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  243. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  244. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  245. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  246. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  247. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  248. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  249. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  250. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  251. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  252. mteb/tasks/classification/est/estonian_valence.py +1 -1
  253. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  254. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  256. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  257. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  258. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  259. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  260. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  261. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  262. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  263. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  264. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  265. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  266. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  267. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  268. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  269. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  270. mteb/tasks/classification/kor/klue_tc.py +2 -2
  271. mteb/tasks/classification/kor/kor_fin.py +1 -1
  272. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  273. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  274. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  275. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  276. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  277. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  278. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  279. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  280. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  281. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  282. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  283. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  284. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  285. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  286. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  287. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  288. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  289. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  290. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  291. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  292. mteb/tasks/classification/ron/moroco.py +1 -1
  293. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  294. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  295. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  296. mteb/tasks/classification/rus/headline_classification.py +2 -2
  297. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  298. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  299. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  300. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  301. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  302. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  303. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  304. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  305. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  306. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  307. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  308. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  309. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  310. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  311. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  312. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  313. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  314. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  315. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  316. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  317. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  318. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  319. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  320. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  321. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  322. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  323. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  324. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  325. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  326. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  327. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  328. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  329. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  330. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  331. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  332. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  333. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  334. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  335. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  336. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  337. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  338. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  341. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  342. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  343. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  344. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  345. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  346. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  347. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  348. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  349. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  350. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  351. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  352. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  353. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  354. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  355. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  356. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  357. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  358. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  359. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  360. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  361. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  362. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  363. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  364. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  365. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  366. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  367. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  368. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  369. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  370. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  371. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  372. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  373. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  374. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  375. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  376. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  377. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  378. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  379. mteb/tasks/pair_classification/rus/terra.py +2 -2
  380. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  381. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  382. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  383. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  384. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  385. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  386. mteb/tasks/retrieval/code/code_rag.py +4 -4
  387. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  388. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  389. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  390. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  391. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  392. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  393. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  394. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  395. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  396. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  397. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  398. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  399. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  400. mteb/tasks/retrieval/eng/__init__.py +42 -0
  401. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  402. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  403. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  404. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  405. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  406. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  407. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  408. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  409. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  410. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  411. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  412. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  413. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  414. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  415. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  416. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  417. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  418. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  419. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  420. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  421. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  422. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  423. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  424. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  425. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  426. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  428. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  435. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  438. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  439. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  440. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  441. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  442. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  443. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  444. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  445. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  446. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  447. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  448. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  449. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  450. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  451. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  452. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  453. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  454. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  455. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  456. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  457. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  458. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  459. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  460. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  461. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  462. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  463. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  464. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  465. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  466. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  467. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  468. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  469. mteb/tasks/retrieval/nob/norquad.py +1 -1
  470. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  471. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  472. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  473. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  474. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  475. mteb/tasks/sts/kor/klue_sts.py +1 -1
  476. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  477. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  478. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  479. mteb/types/_encoder_io.py +1 -1
  480. mteb/types/statistics.py +9 -2
  481. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
  482. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
  483. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  484. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  485. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  486. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
- from torch.utils.data import DataLoader
7
7
 
8
8
  from mteb._requires_package import requires_package, suggest_package
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
9
  from mteb.models.abs_encoder import AbsEncoder
11
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.models.models_protocols import EncoderProtocol
13
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Callable
14
+
15
+ from torch.utils.data import DataLoader
16
+
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.models.models_protocols import EncoderProtocol
19
+ from mteb.types import Array, BatchedInput, PromptType
14
20
 
15
21
  logger = logging.getLogger(__name__)
16
22
 
@@ -139,6 +145,7 @@ llm2vec_llama3_8b_supervised = ModelMeta(
139
145
  # TODO: Not sure what to put here as a model is made of two peft repos, each with a different revision
140
146
  release_date="2024-04-09",
141
147
  n_parameters=7_505_000_000,
148
+ n_embedding_parameters=None,
142
149
  memory_usage_mb=28629,
143
150
  max_tokens=8192,
144
151
  embed_dim=4096,
@@ -168,6 +175,7 @@ llm2vec_llama3_8b_unsupervised = ModelMeta(
168
175
  revision="1cb7b735326d13a8541db8f57f35da5373f5e9c6",
169
176
  release_date="2024-04-09",
170
177
  n_parameters=7_505_000_000,
178
+ n_embedding_parameters=None,
171
179
  memory_usage_mb=28629,
172
180
  max_tokens=8192,
173
181
  embed_dim=4096,
@@ -197,6 +205,7 @@ llm2vec_mistral7b_supervised = ModelMeta(
197
205
  revision="0ae69bdd5816105778b971c3138e8f8a18eaa3ae",
198
206
  release_date="2024-04-09",
199
207
  n_parameters=7_111_000_000,
208
+ n_embedding_parameters=131_072_000,
200
209
  memory_usage_mb=27126,
201
210
  max_tokens=32768,
202
211
  embed_dim=4096,
@@ -226,6 +235,7 @@ llm2vec_mistral7b_unsupervised = ModelMeta(
226
235
  revision="2c055a5d77126c0d3dc6cd8ffa30e2908f4f45f8",
227
236
  release_date="2024-04-09",
228
237
  n_parameters=7_111_000_000,
238
+ n_embedding_parameters=131_072_000,
229
239
  memory_usage_mb=27126,
230
240
  max_tokens=32768,
231
241
  embed_dim=4096,
@@ -255,6 +265,7 @@ llm2vec_llama2_7b_supervised = ModelMeta(
255
265
  revision="2c055a5d77126c0d3dc6cd8ffa30e2908f4f45f8",
256
266
  release_date="2024-04-09",
257
267
  n_parameters=7_111_000_000,
268
+ n_embedding_parameters=None,
258
269
  memory_usage_mb=27126,
259
270
  max_tokens=32768,
260
271
  embed_dim=4096,
@@ -284,6 +295,7 @@ llm2vec_llama2_7b_unsupervised = ModelMeta(
284
295
  revision="a76944871d169ebe7c97eb921764cd063afed785",
285
296
  release_date="2024-04-09",
286
297
  n_parameters=7_111_000_000,
298
+ n_embedding_parameters=None,
287
299
  memory_usage_mb=27126,
288
300
  max_tokens=32768,
289
301
  embed_dim=4096,
@@ -313,6 +325,7 @@ llm2vec_sheared_llama_supervised = ModelMeta(
313
325
  revision="a5943d406c6b016fef3f07906aac183cf1a0b47d",
314
326
  release_date="2024-04-09",
315
327
  n_parameters=7_111_000_000,
328
+ n_embedding_parameters=65_536_000,
316
329
  memory_usage_mb=27126,
317
330
  max_tokens=32768,
318
331
  embed_dim=4096,
@@ -342,6 +355,7 @@ llm2vec_sheared_llama_unsupervised = ModelMeta(
342
355
  revision="a5943d406c6b016fef3f07906aac183cf1a0b47d",
343
356
  release_date="2024-04-09",
344
357
  n_parameters=7_111_000_000,
358
+ n_embedding_parameters=65_536_000,
345
359
  memory_usage_mb=27126,
346
360
  max_tokens=32768,
347
361
  embed_dim=4096,
@@ -1,16 +1,19 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import os
3
5
  import time
4
6
  import warnings
5
- from typing import Any
7
+ from typing import TYPE_CHECKING, Any
6
8
 
7
9
  import numpy as np
8
10
  import requests
9
11
 
10
12
  from mteb.models.abs_encoder import AbsEncoder
11
13
  from mteb.models.model_meta import ModelMeta
12
- from mteb.types import PromptType
13
14
 
15
+ if TYPE_CHECKING:
16
+ from mteb.types import PromptType
14
17
  logger = logging.getLogger(__name__)
15
18
 
16
19
  HAKIM_CITATION = """@article{sarmadi2025hakim,
@@ -355,6 +358,7 @@ hakim = ModelMeta(
355
358
  revision="1",
356
359
  release_date="2025-05-10",
357
360
  n_parameters=124_441_344,
361
+ n_embedding_parameters=None,
358
362
  memory_usage_mb=475,
359
363
  embed_dim=768,
360
364
  license="not specified",
@@ -423,6 +427,7 @@ hakim_small = ModelMeta(
423
427
  revision="1",
424
428
  release_date="2025-05-10",
425
429
  n_parameters=38_736_384,
430
+ n_embedding_parameters=None,
426
431
  memory_usage_mb=148,
427
432
  embed_dim=512,
428
433
  license="not specified",
@@ -490,6 +495,7 @@ hakim_unsup = ModelMeta(
490
495
  revision="1",
491
496
  release_date="2025-05-10",
492
497
  n_parameters=124_441_344,
498
+ n_embedding_parameters=None,
493
499
  memory_usage_mb=475,
494
500
  embed_dim=768,
495
501
  license="not specified",
@@ -45,6 +45,7 @@ mdbr_leaf_ir = ModelMeta(
45
45
  "Transformers",
46
46
  ],
47
47
  n_parameters=22_861_056,
48
+ n_embedding_parameters=11_720_448,
48
49
  memory_usage_mb=86,
49
50
  max_tokens=512,
50
51
  embed_dim=768,
@@ -79,6 +80,7 @@ mdbr_leaf_mt = ModelMeta(
79
80
  "Transformers",
80
81
  ],
81
82
  n_parameters=22_958_592,
83
+ n_embedding_parameters=11_720_448,
82
84
  memory_usage_mb=86,
83
85
  max_tokens=512,
84
86
  embed_dim=1024,
@@ -18,6 +18,7 @@ Haon_Chen__speed_embedding_7b_instruct = ModelMeta(
18
18
  release_date="2024-10-31",
19
19
  languages=["eng-Latn"],
20
20
  n_parameters=7110660096,
21
+ n_embedding_parameters=None,
21
22
  memory_usage_mb=13563,
22
23
  max_tokens=32768.0,
23
24
  embed_dim=None,
@@ -47,6 +48,7 @@ Gameselo__STS_multilingual_mpnet_base_v2 = ModelMeta(
47
48
  languages=[],
48
49
  loader=sentence_transformers_loader,
49
50
  n_parameters=278043648,
51
+ n_embedding_parameters=192_001_536,
50
52
  memory_usage_mb=1061,
51
53
  max_tokens=514.0,
52
54
  embed_dim=768,
@@ -148,6 +150,7 @@ Hum_Works__lodestone_base_4096_v1 = ModelMeta(
148
150
  languages=["eng-Latn"],
149
151
  loader=sentence_transformers_loader,
150
152
  n_parameters=None,
153
+ n_embedding_parameters=None,
151
154
  memory_usage_mb=None,
152
155
  max_tokens=None,
153
156
  embed_dim=768,
@@ -215,6 +218,7 @@ Jaume__gemma_2b_embeddings = ModelMeta(
215
218
  languages=[],
216
219
  loader=sentence_transformers_loader,
217
220
  n_parameters=2506172416,
221
+ n_embedding_parameters=None,
218
222
  memory_usage_mb=9560,
219
223
  max_tokens=8192.0,
220
224
  embed_dim=2048,
@@ -250,6 +254,7 @@ Lajavaness__bilingual_embedding_base = ModelMeta(
250
254
  trust_remote_code=True,
251
255
  ),
252
256
  n_parameters=278043648,
257
+ n_embedding_parameters=192_001_536,
253
258
  memory_usage_mb=1061,
254
259
  max_tokens=514.0,
255
260
  embed_dim=768,
@@ -299,6 +304,7 @@ Lajavaness__bilingual_embedding_large = ModelMeta(
299
304
  trust_remote_code=True,
300
305
  ),
301
306
  n_parameters=559890432,
307
+ n_embedding_parameters=256_002_048,
302
308
  memory_usage_mb=2136,
303
309
  max_tokens=514.0,
304
310
  embed_dim=1024,
@@ -348,6 +354,7 @@ Lajavaness__bilingual_embedding_small = ModelMeta(
348
354
  trust_remote_code=True,
349
355
  ),
350
356
  n_parameters=117653760,
357
+ n_embedding_parameters=96_014_208,
351
358
  memory_usage_mb=449,
352
359
  max_tokens=512.0,
353
360
  embed_dim=384,
@@ -394,6 +401,7 @@ Mihaiii__Bulbasaur = ModelMeta(
394
401
  languages=None,
395
402
  loader=sentence_transformers_loader,
396
403
  n_parameters=17389824,
404
+ n_embedding_parameters=11_720_448,
397
405
  memory_usage_mb=66,
398
406
  max_tokens=512.0,
399
407
  embed_dim=384,
@@ -418,6 +426,7 @@ Mihaiii__Ivysaur = ModelMeta(
418
426
  languages=None,
419
427
  loader=sentence_transformers_loader,
420
428
  n_parameters=22713216,
429
+ n_embedding_parameters=11_720_448,
421
430
  memory_usage_mb=87,
422
431
  max_tokens=512.0,
423
432
  embed_dim=384,
@@ -442,6 +451,7 @@ Mihaiii__Squirtle = ModelMeta(
442
451
  languages=None,
443
452
  loader=sentence_transformers_loader,
444
453
  n_parameters=15615360,
454
+ n_embedding_parameters=11_720_448,
445
455
  memory_usage_mb=60,
446
456
  max_tokens=512.0,
447
457
  embed_dim=384,
@@ -466,6 +476,7 @@ Mihaiii__Venusaur = ModelMeta(
466
476
  languages=None,
467
477
  loader=sentence_transformers_loader,
468
478
  n_parameters=15615360,
479
+ n_embedding_parameters=11_720_448,
469
480
  memory_usage_mb=60,
470
481
  max_tokens=512.0,
471
482
  embed_dim=384,
@@ -490,6 +501,7 @@ Mihaiii__Wartortle = ModelMeta(
490
501
  languages=None,
491
502
  loader=sentence_transformers_loader,
492
503
  n_parameters=17389824,
504
+ n_embedding_parameters=11_720_448,
493
505
  memory_usage_mb=66,
494
506
  max_tokens=512.0,
495
507
  embed_dim=384,
@@ -514,6 +526,7 @@ Mihaiii__gte_micro = ModelMeta(
514
526
  languages=None,
515
527
  loader=sentence_transformers_loader,
516
528
  n_parameters=17389824,
529
+ n_embedding_parameters=11_720_448,
517
530
  memory_usage_mb=66,
518
531
  max_tokens=512.0,
519
532
  embed_dim=384,
@@ -537,6 +550,7 @@ Mihaiii__gte_micro_v4 = ModelMeta(
537
550
  languages=None,
538
551
  loader=sentence_transformers_loader,
539
552
  n_parameters=19164288,
553
+ n_embedding_parameters=11_720_448,
540
554
  memory_usage_mb=73,
541
555
  max_tokens=512.0,
542
556
  embed_dim=384,
@@ -560,6 +574,7 @@ OrdalieTech__Solon_embeddings_large_0_1 = ModelMeta(
560
574
  languages=["fra-Latn"],
561
575
  loader=sentence_transformers_loader,
562
576
  n_parameters=559890432,
577
+ n_embedding_parameters=256_002_048,
563
578
  memory_usage_mb=2136,
564
579
  max_tokens=514.0,
565
580
  embed_dim=1024,
@@ -583,6 +598,7 @@ Omartificial_Intelligence_Space__Arabert_all_nli_triplet_Matryoshka = ModelMeta(
583
598
  languages=["ara-Arab"],
584
599
  loader=sentence_transformers_loader,
585
600
  n_parameters=135193344,
601
+ n_embedding_parameters=49_152_000,
586
602
  memory_usage_mb=516,
587
603
  max_tokens=512.0,
588
604
  embed_dim=768,
@@ -615,6 +631,7 @@ Omartificial_Intelligence_Space__Arabic_MiniLM_L12_v2_all_nli_triplet = ModelMet
615
631
  languages=["ara-Arab"],
616
632
  loader=sentence_transformers_loader,
617
633
  n_parameters=117653760,
634
+ n_embedding_parameters=96_014_208,
618
635
  memory_usage_mb=449,
619
636
  max_tokens=512.0,
620
637
  embed_dim=384,
@@ -640,6 +657,7 @@ Omartificial_Intelligence_Space__Arabic_all_nli_triplet_Matryoshka = ModelMeta(
640
657
  languages=["ara-Arab"],
641
658
  loader=sentence_transformers_loader,
642
659
  n_parameters=278043648,
660
+ n_embedding_parameters=192_001_536,
643
661
  memory_usage_mb=1061,
644
662
  max_tokens=514.0,
645
663
  embed_dim=768,
@@ -674,6 +692,7 @@ Omartificial_Intelligence_Space__Arabic_labse_Matryoshka = ModelMeta(
674
692
  languages=["ara-Arab"],
675
693
  loader=sentence_transformers_loader,
676
694
  n_parameters=470926848,
695
+ n_embedding_parameters=384_885_504,
677
696
  memory_usage_mb=1796,
678
697
  max_tokens=512.0,
679
698
  embed_dim=768,
@@ -708,6 +727,7 @@ Omartificial_Intelligence_Space__Arabic_mpnet_base_all_nli_triplet = ModelMeta(
708
727
  languages=["ara-Arab"],
709
728
  loader=sentence_transformers_loader,
710
729
  n_parameters=109486464,
730
+ n_embedding_parameters=23_444_736,
711
731
  memory_usage_mb=418,
712
732
  max_tokens=514.0,
713
733
  embed_dim=768,
@@ -742,6 +762,7 @@ Omartificial_Intelligence_Space__Marbert_all_nli_triplet_Matryoshka = ModelMeta(
742
762
  languages=["ara-Arab"],
743
763
  loader=sentence_transformers_loader,
744
764
  n_parameters=162841344,
765
+ n_embedding_parameters=76_800_000,
745
766
  memory_usage_mb=621,
746
767
  max_tokens=512.0,
747
768
  embed_dim=768,
@@ -774,6 +795,7 @@ consciousai__cai_lunaris_text_embeddings = ModelMeta(
774
795
  languages=None,
775
796
  loader=sentence_transformers_loader,
776
797
  n_parameters=None,
798
+ n_embedding_parameters=31_254_528,
777
799
  memory_usage_mb=None,
778
800
  max_tokens=512.0,
779
801
  embed_dim=1024,
@@ -797,6 +819,7 @@ consciousai__cai_stellaris_text_embeddings = ModelMeta(
797
819
  languages=None,
798
820
  loader=sentence_transformers_loader,
799
821
  n_parameters=None,
822
+ n_embedding_parameters=None,
800
823
  memory_usage_mb=None,
801
824
  max_tokens=514.0,
802
825
  embed_dim=768,
@@ -829,6 +852,7 @@ manu__sentence_croissant_alpha_v0_2 = ModelMeta(
829
852
  languages=None,
830
853
  loader=sentence_transformers_loader,
831
854
  n_parameters=1279887360,
855
+ n_embedding_parameters=65_536_000,
832
856
  memory_usage_mb=2441,
833
857
  max_tokens=2048.0,
834
858
  embed_dim=2048,
@@ -852,6 +876,7 @@ manu__sentence_croissant_alpha_v0_3 = ModelMeta(
852
876
  languages=None,
853
877
  loader=sentence_transformers_loader,
854
878
  n_parameters=1279887360,
879
+ n_embedding_parameters=65_536_000,
855
880
  memory_usage_mb=2441,
856
881
  max_tokens=2048.0,
857
882
  embed_dim=2048,
@@ -875,6 +900,7 @@ manu__sentence_croissant_alpha_v0_4 = ModelMeta(
875
900
  languages=["fra-Latn", "eng-Latn"],
876
901
  loader=sentence_transformers_loader,
877
902
  n_parameters=1279887360,
903
+ n_embedding_parameters=65_536_000,
878
904
  memory_usage_mb=2441,
879
905
  max_tokens=2048.0,
880
906
  embed_dim=2048,
@@ -899,6 +925,7 @@ thenlper__gte_base = ModelMeta(
899
925
  languages=["eng-Latn"],
900
926
  loader=sentence_transformers_loader,
901
927
  n_parameters=109482752,
928
+ n_embedding_parameters=23_440_896,
902
929
  memory_usage_mb=209,
903
930
  max_tokens=512.0,
904
931
  embed_dim=768,
@@ -928,6 +955,7 @@ thenlper__gte_large = ModelMeta(
928
955
  languages=["eng-Latn"],
929
956
  loader=sentence_transformers_loader,
930
957
  n_parameters=335142400,
958
+ n_embedding_parameters=31_254_528,
931
959
  memory_usage_mb=639,
932
960
  max_tokens=512.0,
933
961
  embed_dim=1024,
@@ -957,6 +985,7 @@ thenlper__gte_small = ModelMeta(
957
985
  languages=["eng-Latn"],
958
986
  loader=sentence_transformers_loader,
959
987
  n_parameters=33360512,
988
+ n_embedding_parameters=11_720_448,
960
989
  memory_usage_mb=64,
961
990
  max_tokens=512.0,
962
991
  embed_dim=384,
@@ -986,6 +1015,7 @@ OrlikB__KartonBERT_USE_base_v1 = ModelMeta(
986
1015
  languages=["pol-Latn"],
987
1016
  loader=sentence_transformers_loader,
988
1017
  n_parameters=103705344,
1018
+ n_embedding_parameters=None,
989
1019
  memory_usage_mb=396,
990
1020
  max_tokens=512.0,
991
1021
  embed_dim=768,
@@ -1009,6 +1039,7 @@ OrlikB__st_polish_kartonberta_base_alpha_v1 = ModelMeta(
1009
1039
  languages=["pol-Latn"],
1010
1040
  loader=sentence_transformers_loader,
1011
1041
  n_parameters=None,
1042
+ n_embedding_parameters=None,
1012
1043
  memory_usage_mb=None,
1013
1044
  max_tokens=514.0,
1014
1045
  embed_dim=768,
@@ -1032,6 +1063,7 @@ sdadas__mmlw_e5_base = ModelMeta(
1032
1063
  languages=["pol-Latn"],
1033
1064
  loader=sentence_transformers_loader,
1034
1065
  n_parameters=278043648,
1066
+ n_embedding_parameters=192_001_536,
1035
1067
  memory_usage_mb=1061,
1036
1068
  max_tokens=514.0,
1037
1069
  embed_dim=768,
@@ -1063,6 +1095,7 @@ dwzhu__e5_base_4k = ModelMeta(
1063
1095
  languages=["eng-Latn"],
1064
1096
  loader=sentence_transformers_loader,
1065
1097
  n_parameters=None,
1098
+ n_embedding_parameters=23_440_896,
1066
1099
  memory_usage_mb=None,
1067
1100
  max_tokens=4096.0,
1068
1101
  embed_dim=None,
@@ -1092,6 +1125,7 @@ sdadas__mmlw_e5_large = ModelMeta(
1092
1125
  languages=["pol-Latn"],
1093
1126
  loader=sentence_transformers_loader,
1094
1127
  n_parameters=559890432,
1128
+ n_embedding_parameters=256_002_048,
1095
1129
  memory_usage_mb=2136,
1096
1130
  max_tokens=514.0,
1097
1131
  embed_dim=1024,
@@ -1123,6 +1157,7 @@ sdadas__mmlw_e5_small = ModelMeta(
1123
1157
  languages=["pol-Latn"],
1124
1158
  loader=sentence_transformers_loader,
1125
1159
  n_parameters=117653760,
1160
+ n_embedding_parameters=96_014_208,
1126
1161
  memory_usage_mb=449,
1127
1162
  max_tokens=512.0,
1128
1163
  embed_dim=384,
@@ -1154,6 +1189,7 @@ sdadas__mmlw_roberta_base = ModelMeta(
1154
1189
  languages=["pol-Latn"],
1155
1190
  loader=sentence_transformers_loader,
1156
1191
  n_parameters=124442880,
1192
+ n_embedding_parameters=38_400_768,
1157
1193
  memory_usage_mb=475,
1158
1194
  max_tokens=514.0,
1159
1195
  embed_dim=768,
@@ -1185,6 +1221,7 @@ sdadas__mmlw_roberta_large = ModelMeta(
1185
1221
  languages=["pol-Latn"],
1186
1222
  loader=sentence_transformers_loader,
1187
1223
  n_parameters=434961408,
1224
+ n_embedding_parameters=131_073_024,
1188
1225
  memory_usage_mb=1659,
1189
1226
  max_tokens=514.0,
1190
1227
  embed_dim=1024,
@@ -1271,6 +1308,7 @@ izhx__udever_bloom_1b1 = ModelMeta(
1271
1308
  languages=udever_languages,
1272
1309
  loader=sentence_transformers_loader,
1273
1310
  n_parameters=None,
1311
+ n_embedding_parameters=385_351_680,
1274
1312
  memory_usage_mb=None,
1275
1313
  max_tokens=None,
1276
1314
  embed_dim=None,
@@ -1300,6 +1338,7 @@ izhx__udever_bloom_3b = ModelMeta(
1300
1338
  languages=udever_languages,
1301
1339
  loader=sentence_transformers_loader,
1302
1340
  n_parameters=None,
1341
+ n_embedding_parameters=642_252_800,
1303
1342
  memory_usage_mb=None,
1304
1343
  max_tokens=None,
1305
1344
  embed_dim=None,
@@ -1329,6 +1368,7 @@ izhx__udever_bloom_560m = ModelMeta(
1329
1368
  languages=udever_languages,
1330
1369
  loader=sentence_transformers_loader,
1331
1370
  n_parameters=None,
1371
+ n_embedding_parameters=256_901_120,
1332
1372
  memory_usage_mb=None,
1333
1373
  max_tokens=None,
1334
1374
  embed_dim=None,
@@ -1358,6 +1398,7 @@ izhx__udever_bloom_7b1 = ModelMeta(
1358
1398
  languages=udever_languages,
1359
1399
  loader=sentence_transformers_loader,
1360
1400
  n_parameters=None,
1401
+ n_embedding_parameters=1_027_604_480,
1361
1402
  memory_usage_mb=None,
1362
1403
  max_tokens=None,
1363
1404
  embed_dim=None,
@@ -1387,6 +1428,7 @@ avsolatorio__gist_embedding_v0 = ModelMeta(
1387
1428
  languages=["eng-Latn"],
1388
1429
  loader=sentence_transformers_loader,
1389
1430
  n_parameters=109482240,
1431
+ n_embedding_parameters=23_440_896,
1390
1432
  memory_usage_mb=418,
1391
1433
  max_tokens=512.0,
1392
1434
  embed_dim=768,
@@ -1437,6 +1479,7 @@ avsolatorio__gist_all_minilm_l6_v2 = ModelMeta(
1437
1479
  languages=["eng-Latn"],
1438
1480
  loader=sentence_transformers_loader,
1439
1481
  n_parameters=22713216,
1482
+ n_embedding_parameters=11_720_448,
1440
1483
  memory_usage_mb=87,
1441
1484
  max_tokens=512.0,
1442
1485
  embed_dim=384,
@@ -1487,6 +1530,7 @@ avsolatorio__gist_large_embedding_v0 = ModelMeta(
1487
1530
  languages=["eng-Latn"],
1488
1531
  loader=sentence_transformers_loader,
1489
1532
  n_parameters=335141888,
1533
+ n_embedding_parameters=31_254_528,
1490
1534
  memory_usage_mb=1278,
1491
1535
  max_tokens=512.0,
1492
1536
  embed_dim=1024,
@@ -1537,6 +1581,7 @@ avsolatorio__gist_small_embedding_v0 = ModelMeta(
1537
1581
  languages=["eng-Latn"],
1538
1582
  loader=sentence_transformers_loader,
1539
1583
  n_parameters=33360000,
1584
+ n_embedding_parameters=11_720_448,
1540
1585
  memory_usage_mb=127,
1541
1586
  max_tokens=512.0,
1542
1587
  embed_dim=384,
@@ -1587,6 +1632,7 @@ bigscience__sgpt_bloom_7b1_msmarco = ModelMeta(
1587
1632
  languages=None,
1588
1633
  loader=sentence_transformers_loader,
1589
1634
  n_parameters=None,
1635
+ n_embedding_parameters=1_026_793_472,
1590
1636
  memory_usage_mb=None,
1591
1637
  max_tokens=None,
1592
1638
  embed_dim=4096,
@@ -1616,6 +1662,7 @@ aari1995__german_semantic_sts_v2 = ModelMeta(
1616
1662
  languages=["deu-Latn"],
1617
1663
  loader=sentence_transformers_loader,
1618
1664
  n_parameters=335736320,
1665
+ n_embedding_parameters=31_848_448,
1619
1666
  memory_usage_mb=1281,
1620
1667
  max_tokens=512.0,
1621
1668
  embed_dim=1024,
@@ -1640,6 +1687,7 @@ abhinand__medembed_small_v0_1 = ModelMeta(
1640
1687
  languages=["eng-Latn"],
1641
1688
  loader=sentence_transformers_loader,
1642
1689
  n_parameters=33360000,
1690
+ n_embedding_parameters=11_720_448,
1643
1691
  memory_usage_mb=127,
1644
1692
  max_tokens=512.0,
1645
1693
  embed_dim=384,
@@ -1678,6 +1726,7 @@ avsolatorio__noinstruct_small_embedding_v0 = ModelMeta(
1678
1726
  languages=["eng-Latn"],
1679
1727
  loader=sentence_transformers_loader,
1680
1728
  n_parameters=33360000,
1729
+ n_embedding_parameters=11720448,
1681
1730
  memory_usage_mb=127,
1682
1731
  max_tokens=512.0,
1683
1732
  embed_dim=384,
@@ -1701,6 +1750,7 @@ brahmairesearch__slx_v0_1 = ModelMeta(
1701
1750
  languages=["eng-Latn"],
1702
1751
  loader=sentence_transformers_loader,
1703
1752
  n_parameters=22713216,
1753
+ n_embedding_parameters=11_720_448,
1704
1754
  memory_usage_mb=87,
1705
1755
  max_tokens=512.0,
1706
1756
  embed_dim=384,
@@ -1724,6 +1774,7 @@ deepfile__embedder_100p = ModelMeta(
1724
1774
  languages=None,
1725
1775
  loader=sentence_transformers_loader,
1726
1776
  n_parameters=None,
1777
+ n_embedding_parameters=192_001_536,
1727
1778
  memory_usage_mb=1061,
1728
1779
  max_tokens=514.0,
1729
1780
  embed_dim=768,
@@ -1747,6 +1798,7 @@ infgrad__stella_base_en_v2 = ModelMeta(
1747
1798
  languages=["eng-Latn"],
1748
1799
  loader=sentence_transformers_loader,
1749
1800
  n_parameters=None,
1801
+ n_embedding_parameters=23_440_896,
1750
1802
  memory_usage_mb=None,
1751
1803
  max_tokens=512.0,
1752
1804
  embed_dim=None,
@@ -1770,6 +1822,7 @@ malenia1__ternary_weight_embedding = ModelMeta(
1770
1822
  languages=None,
1771
1823
  loader=sentence_transformers_loader,
1772
1824
  n_parameters=98688000,
1825
+ n_embedding_parameters=None,
1773
1826
  memory_usage_mb=158,
1774
1827
  max_tokens=512.0,
1775
1828
  embed_dim=1024,
@@ -1793,6 +1846,7 @@ omarelshehy__arabic_english_sts_matryoshka = ModelMeta(
1793
1846
  languages=["ara-Arab", "eng-Latn"],
1794
1847
  loader=sentence_transformers_loader,
1795
1848
  n_parameters=559890432,
1849
+ n_embedding_parameters=256_002_048,
1796
1850
  memory_usage_mb=2136,
1797
1851
  max_tokens=514.0,
1798
1852
  embed_dim=1024,
@@ -1833,6 +1887,7 @@ openbmb__minicpm_embedding = ModelMeta(
1833
1887
  release_date="2024-09-04",
1834
1888
  languages=["zho-Hans", "eng-Latn"],
1835
1889
  n_parameters=2724880896,
1890
+ n_embedding_parameters=282_822_912,
1836
1891
  memory_usage_mb=5197,
1837
1892
  max_tokens=512.0,
1838
1893
  embed_dim=2304,
@@ -1857,6 +1912,7 @@ silma_ai__silma_embedding_matryoshka_v0_1 = ModelMeta(
1857
1912
  languages=["ara-Arab", "eng-Latn"],
1858
1913
  loader=sentence_transformers_loader,
1859
1914
  n_parameters=135193344,
1915
+ n_embedding_parameters=49_152_000,
1860
1916
  memory_usage_mb=516,
1861
1917
  max_tokens=512.0,
1862
1918
  embed_dim=768,
@@ -1888,6 +1944,7 @@ sbert_chinese_general_v1 = ModelMeta(
1888
1944
  languages=["zho-Hans"],
1889
1945
  loader=sentence_transformers_loader,
1890
1946
  n_parameters=None,
1947
+ n_embedding_parameters=16_226_304,
1891
1948
  memory_usage_mb=None, # Not visible on repo
1892
1949
  max_tokens=512,
1893
1950
  embed_dim=128,
@@ -1916,6 +1973,7 @@ dmeta_embedding_zh_small = ModelMeta(
1916
1973
  languages=["zho-Hans"],
1917
1974
  loader=sentence_transformers_loader,
1918
1975
  n_parameters=int(74.2 * 1e6),
1976
+ n_embedding_parameters=16_226_304,
1919
1977
  memory_usage_mb=283,
1920
1978
  max_tokens=1024,
1921
1979
  embed_dim=768,
@@ -1939,6 +1997,7 @@ xiaobu_embedding = ModelMeta(
1939
1997
  languages=["zho-Hans"],
1940
1998
  loader=sentence_transformers_loader,
1941
1999
  n_parameters=int(326 * 1e6),
2000
+ n_embedding_parameters=21_635_072,
1942
2001
  memory_usage_mb=1244,
1943
2002
  max_tokens=512,
1944
2003
  embed_dim=1024,
@@ -1963,6 +2022,7 @@ xiaobu_embedding_v2 = ModelMeta(
1963
2022
  languages=["zho-Hans"],
1964
2023
  loader=sentence_transformers_loader,
1965
2024
  n_parameters=int(326 * 1e6),
2025
+ n_embedding_parameters=21_635_072,
1966
2026
  memory_usage_mb=1242,
1967
2027
  max_tokens=512,
1968
2028
  embed_dim=768,
@@ -1987,6 +2047,7 @@ yinka_embedding = ModelMeta(
1987
2047
  languages=["zho-Hans"],
1988
2048
  loader=sentence_transformers_loader,
1989
2049
  n_parameters=int(326 * 1e6),
2050
+ n_embedding_parameters=21_635_072,
1990
2051
  memory_usage_mb=1244,
1991
2052
  max_tokens=512,
1992
2053
  embed_dim=1024,
@@ -2010,6 +2071,7 @@ conan_embedding = ModelMeta(
2010
2071
  languages=["zho-Hans"],
2011
2072
  loader=sentence_transformers_loader,
2012
2073
  n_parameters=int(326 * 1e6),
2074
+ n_embedding_parameters=21_635_072,
2013
2075
  memory_usage_mb=1242,
2014
2076
  max_tokens=512,
2015
2077
  embed_dim=768,
@@ -2043,6 +2105,7 @@ ember_v1 = ModelMeta(
2043
2105
  release_date="2023-10-10",
2044
2106
  languages=["eng-Latn"],
2045
2107
  n_parameters=int(335 * 1e6),
2108
+ n_embedding_parameters=31_254_528,
2046
2109
  memory_usage_mb=1278,
2047
2110
  max_tokens=512,
2048
2111
  embed_dim=1024,
@@ -31,6 +31,7 @@ mxbai_embed_large_v1 = ModelMeta(
31
31
  revision="990580e27d329c7408b3741ecff85876e128e203",
32
32
  release_date="2024-03-07", # initial commit of hf model.
33
33
  n_parameters=335_000_000,
34
+ n_embedding_parameters=31_254_528,
34
35
  memory_usage_mb=639,
35
36
  max_tokens=512,
36
37
  embed_dim=1024,
@@ -75,6 +76,7 @@ mxbai_embed_2d_large_v1 = ModelMeta(
75
76
  revision="7e639ca8e344af398876ead3b19ec3c0b9068f49",
76
77
  release_date="2024-03-04", # initial commit of hf model.
77
78
  n_parameters=335_000_000,
79
+ n_embedding_parameters=31_254_528,
78
80
  memory_usage_mb=None,
79
81
  max_tokens=512,
80
82
  embed_dim=768,
@@ -106,6 +108,7 @@ mxbai_embed_xsmall_v1 = ModelMeta(
106
108
  revision="2f741ec33328bb57e4704e1238fc59a4a5745705",
107
109
  release_date="2024-08-13", # initial commit of hf model.
108
110
  n_parameters=24_100_000,
111
+ n_embedding_parameters=11_720_448,
109
112
  memory_usage_mb=None,
110
113
  max_tokens=512,
111
114
  embed_dim=384,
@@ -16,7 +16,8 @@ mme5_mllama = ModelMeta(
16
16
  revision="cbb328b9bf9ff5362c852c3166931903226d46f1",
17
17
  release_date="2025-02-12",
18
18
  languages=["eng-Latn"],
19
- n_parameters=10_600_000_000, # 10.6B
19
+ n_parameters=10_600_000_000,
20
+ n_embedding_parameters=None, # 10.6B
20
21
  memory_usage_mb=20300,
21
22
  max_tokens=128_000,
22
23
  embed_dim=4096,
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies, requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  MOCOV3_CITATION = """@Article{chen2021mocov3,
14
19
  author = {Xinlei Chen* and Saining Xie* and Kaiming He},
@@ -125,6 +130,7 @@ mocov3_vit_base = ModelMeta(
125
130
  release_date="2024-06-03",
126
131
  modalities=["image"],
127
132
  n_parameters=86_600_000,
133
+ n_embedding_parameters=None,
128
134
  memory_usage_mb=330,
129
135
  max_tokens=None,
130
136
  embed_dim=768,
@@ -149,6 +155,7 @@ mocov3_vit_large = ModelMeta(
149
155
  release_date="2024-06-03",
150
156
  modalities=["image"],
151
157
  n_parameters=304_000_000,
158
+ n_embedding_parameters=None,
152
159
  memory_usage_mb=1161,
153
160
  max_tokens=None,
154
161
  embed_dim=1024,