mteb 2.1.4__py3-none-any.whl → 2.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. mteb/__init__.py +4 -0
  2. mteb/_create_dataloaders.py +6 -3
  3. mteb/_evaluators/any_sts_evaluator.py +21 -12
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +1 -1
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +9 -4
  7. mteb/_evaluators/pair_classification_evaluator.py +30 -38
  8. mteb/_evaluators/sklearn_evaluator.py +15 -28
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
  10. mteb/_evaluators/text/summarization_evaluator.py +4 -2
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
  12. mteb/abstasks/_data_filter/__init__.py +0 -0
  13. mteb/abstasks/_data_filter/filters.py +125 -0
  14. mteb/abstasks/_data_filter/task_pipelines.py +102 -0
  15. mteb/abstasks/_statistics_calculation.py +6 -2
  16. mteb/abstasks/classification.py +0 -2
  17. mteb/abstasks/clustering.py +1 -1
  18. mteb/abstasks/clustering_legacy.py +3 -0
  19. mteb/abstasks/multilabel_classification.py +10 -3
  20. mteb/abstasks/pair_classification.py +8 -1
  21. mteb/abstasks/sts.py +7 -0
  22. mteb/abstasks/task_metadata.py +1 -0
  23. mteb/benchmarks/_create_table.py +84 -37
  24. mteb/benchmarks/benchmark.py +74 -15
  25. mteb/benchmarks/benchmarks/__init__.py +8 -0
  26. mteb/benchmarks/benchmarks/benchmarks.py +259 -15
  27. mteb/benchmarks/get_benchmark.py +2 -0
  28. mteb/cache.py +47 -10
  29. mteb/deprecated_evaluator.py +8 -13
  30. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  31. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  32. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  33. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  34. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  35. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  36. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  37. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  38. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  39. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  40. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  41. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  42. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  43. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  44. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  45. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  46. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  47. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  48. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  49. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  50. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  51. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  52. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  53. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  54. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  55. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  56. mteb/evaluate.py +65 -45
  57. mteb/leaderboard/app.py +268 -133
  58. mteb/leaderboard/benchmark_selector.py +14 -5
  59. mteb/leaderboard/figures.py +13 -15
  60. mteb/leaderboard/table.py +82 -17
  61. mteb/models/__init__.py +4 -1
  62. mteb/models/abs_encoder.py +21 -17
  63. mteb/models/cache_wrappers/__init__.py +2 -1
  64. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +2 -2
  65. mteb/models/cache_wrappers/cache_wrapper.py +1 -1
  66. mteb/models/get_model_meta.py +3 -114
  67. mteb/models/instruct_wrapper.py +5 -1
  68. mteb/models/model_implementations/align_models.py +7 -0
  69. mteb/models/model_implementations/amazon_models.py +1 -0
  70. mteb/models/model_implementations/andersborges.py +65 -0
  71. mteb/models/model_implementations/ara_models.py +8 -0
  72. mteb/models/model_implementations/arctic_models.py +8 -0
  73. mteb/models/model_implementations/b1ade_models.py +1 -0
  74. mteb/models/model_implementations/bedrock_models.py +4 -0
  75. mteb/models/model_implementations/bge_models.py +60 -0
  76. mteb/models/model_implementations/bica_model.py +35 -0
  77. mteb/models/model_implementations/blip2_models.py +11 -0
  78. mteb/models/model_implementations/blip_models.py +27 -0
  79. mteb/models/model_implementations/bm25.py +1 -0
  80. mteb/models/model_implementations/bmretriever_models.py +4 -0
  81. mteb/models/model_implementations/cadet_models.py +9 -0
  82. mteb/models/model_implementations/cde_models.py +14 -0
  83. mteb/models/model_implementations/clip_models.py +3 -0
  84. mteb/models/model_implementations/clips_models.py +100 -0
  85. mteb/models/model_implementations/codefuse_models.py +162 -0
  86. mteb/models/model_implementations/codesage_models.py +15 -0
  87. mteb/models/model_implementations/cohere_models.py +8 -1
  88. mteb/models/model_implementations/cohere_v.py +5 -0
  89. mteb/models/model_implementations/colpali_models.py +14 -6
  90. mteb/models/model_implementations/colqwen_models.py +271 -1
  91. mteb/models/model_implementations/colsmol_models.py +2 -0
  92. mteb/models/model_implementations/conan_models.py +1 -0
  93. mteb/models/model_implementations/dino_models.py +171 -0
  94. mteb/models/model_implementations/e5_instruct.py +4 -0
  95. mteb/models/model_implementations/e5_models.py +12 -101
  96. mteb/models/model_implementations/e5_v.py +1 -0
  97. mteb/models/model_implementations/eagerworks_models.py +164 -0
  98. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  99. mteb/models/model_implementations/en_code_retriever.py +1 -0
  100. mteb/models/model_implementations/euler_models.py +32 -0
  101. mteb/models/model_implementations/evaclip_models.py +4 -0
  102. mteb/models/model_implementations/fa_models.py +58 -0
  103. mteb/models/model_implementations/facebookai.py +193 -0
  104. mteb/models/model_implementations/geogpt_models.py +1 -0
  105. mteb/models/model_implementations/gme_v_models.py +11 -5
  106. mteb/models/model_implementations/google_models.py +16 -5
  107. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -2
  108. mteb/models/model_implementations/gritlm_models.py +2 -0
  109. mteb/models/model_implementations/gte_models.py +78 -0
  110. mteb/models/model_implementations/hinvec_models.py +1 -0
  111. mteb/models/model_implementations/human.py +1 -0
  112. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  113. mteb/models/model_implementations/inf_models.py +2 -0
  114. mteb/models/model_implementations/jasper_models.py +255 -2
  115. mteb/models/model_implementations/jina_clip.py +1 -0
  116. mteb/models/model_implementations/jina_models.py +209 -5
  117. mteb/models/model_implementations/kalm_models.py +203 -25
  118. mteb/models/model_implementations/kblab.py +31 -0
  119. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  120. mteb/models/model_implementations/kfst.py +25 -0
  121. mteb/models/model_implementations/kowshik24_models.py +32 -0
  122. mteb/models/model_implementations/lens_models.py +2 -0
  123. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  124. mteb/models/model_implementations/linq_models.py +3 -2
  125. mteb/models/model_implementations/listconranker.py +1 -1
  126. mteb/models/model_implementations/llm2clip_models.py +3 -0
  127. mteb/models/model_implementations/llm2vec_models.py +8 -0
  128. mteb/models/model_implementations/mcinext_models.py +3 -0
  129. mteb/models/model_implementations/mdbr_models.py +2 -0
  130. mteb/models/model_implementations/misc_models.py +362 -0
  131. mteb/models/model_implementations/mme5_models.py +1 -0
  132. mteb/models/model_implementations/moco_models.py +11 -0
  133. mteb/models/model_implementations/mod_models.py +191 -0
  134. mteb/models/model_implementations/model2vec_models.py +13 -0
  135. mteb/models/model_implementations/moka_models.py +3 -0
  136. mteb/models/model_implementations/mxbai_models.py +9 -0
  137. mteb/models/model_implementations/nbailab.py +70 -0
  138. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  139. mteb/models/model_implementations/nomic_models.py +156 -4
  140. mteb/models/model_implementations/nomic_models_vision.py +7 -2
  141. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +23 -16
  142. mteb/models/model_implementations/nvidia_models.py +4 -1
  143. mteb/models/model_implementations/octen_models.py +195 -0
  144. mteb/models/model_implementations/openai_models.py +20 -16
  145. mteb/models/model_implementations/openclip_models.py +24 -0
  146. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  147. mteb/models/model_implementations/ops_moa_models.py +4 -2
  148. mteb/models/model_implementations/pawan_models.py +39 -0
  149. mteb/models/model_implementations/piccolo_models.py +8 -0
  150. mteb/models/model_implementations/promptriever_models.py +8 -4
  151. mteb/models/model_implementations/pylate_models.py +37 -4
  152. mteb/models/model_implementations/qodo_models.py +2 -0
  153. mteb/models/model_implementations/qtack_models.py +1 -0
  154. mteb/models/model_implementations/qwen3_models.py +6 -3
  155. mteb/models/model_implementations/qzhou_models.py +3 -1
  156. mteb/models/model_implementations/random_baseline.py +16 -21
  157. mteb/models/model_implementations/rasgaard_models.py +34 -0
  158. mteb/models/model_implementations/reasonir_model.py +1 -0
  159. mteb/models/model_implementations/repllama_models.py +2 -0
  160. mteb/models/model_implementations/rerankers_custom.py +3 -3
  161. mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
  162. mteb/models/model_implementations/richinfoai_models.py +1 -0
  163. mteb/models/model_implementations/ru_sentence_models.py +51 -0
  164. mteb/models/model_implementations/ruri_models.py +322 -0
  165. mteb/models/model_implementations/salesforce_models.py +3 -0
  166. mteb/models/model_implementations/samilpwc_models.py +1 -0
  167. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  168. mteb/models/model_implementations/searchmap_models.py +1 -0
  169. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  170. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +658 -0
  171. mteb/models/model_implementations/seed_models.py +1 -0
  172. mteb/models/model_implementations/sentence_transformers_models.py +57 -0
  173. mteb/models/model_implementations/shuu_model.py +32 -31
  174. mteb/models/model_implementations/siglip_models.py +10 -0
  175. mteb/models/model_implementations/sonar_models.py +1 -0
  176. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  177. mteb/models/model_implementations/stella_models.py +6 -0
  178. mteb/models/model_implementations/tarka_models.py +376 -0
  179. mteb/models/model_implementations/ua_sentence_models.py +10 -0
  180. mteb/models/model_implementations/uae_models.py +1 -0
  181. mteb/models/model_implementations/vdr_models.py +2 -0
  182. mteb/models/model_implementations/vi_vn_models.py +39 -0
  183. mteb/models/model_implementations/vista_models.py +2 -0
  184. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  185. mteb/models/model_implementations/voyage_models.py +15 -0
  186. mteb/models/model_implementations/voyage_v.py +8 -2
  187. mteb/models/model_implementations/xyz_models.py +1 -0
  188. mteb/models/model_implementations/youtu_models.py +1 -0
  189. mteb/models/model_implementations/yuan_models.py +34 -0
  190. mteb/models/model_implementations/yuan_models_en.py +58 -0
  191. mteb/models/model_meta.py +442 -22
  192. mteb/models/search_encoder_index/__init__.py +7 -0
  193. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  194. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  195. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +157 -0
  196. mteb/models/search_wrappers.py +165 -48
  197. mteb/models/sentence_transformer_wrapper.py +2 -7
  198. mteb/results/benchmark_results.py +88 -47
  199. mteb/results/model_result.py +11 -4
  200. mteb/results/task_result.py +37 -19
  201. mteb/similarity_functions.py +49 -0
  202. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  203. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  204. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  205. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  206. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  207. mteb/tasks/classification/ara/ajgt.py +1 -2
  208. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  209. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  210. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  211. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  212. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  213. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  214. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  215. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  216. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  217. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  218. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  219. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  220. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  221. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  222. mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
  223. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  224. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  225. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  226. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  227. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  228. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  229. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  230. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  231. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  232. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  233. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  234. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  235. mteb/tasks/classification/eng/news_classification.py +1 -2
  236. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  237. mteb/tasks/classification/eng/patent_classification.py +1 -2
  238. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  239. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  240. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  241. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  242. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  243. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  244. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  245. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  246. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  247. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  248. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  249. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  250. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  251. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  252. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  253. mteb/tasks/classification/est/estonian_valence.py +1 -2
  254. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  255. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  256. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  257. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  258. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  259. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  260. mteb/tasks/classification/heb/__init__.py +6 -1
  261. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  262. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  263. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  264. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  265. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  266. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  267. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  268. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  269. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  270. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  271. mteb/tasks/classification/kor/klue_tc.py +1 -2
  272. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  273. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  274. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
  275. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  276. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  277. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  278. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  279. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  280. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  281. mteb/tasks/classification/multilingual/scala_classification.py +1 -2
  282. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  283. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  284. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  285. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  286. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  287. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  288. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  289. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  290. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  291. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  292. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  293. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  294. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  295. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  296. mteb/tasks/classification/pol/polish_classification.py +3 -6
  297. mteb/tasks/classification/ron/moroco.py +1 -2
  298. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  299. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  300. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  301. mteb/tasks/classification/rus/headline_classification.py +1 -2
  302. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  303. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  304. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  305. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  306. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  307. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  308. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  309. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  310. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  311. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  312. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  313. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  314. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  315. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  316. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  317. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  318. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  319. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  320. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  321. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  322. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  323. mteb/tasks/classification/tur/__init__.py +4 -0
  324. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  325. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  326. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  327. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  328. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  329. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  330. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  331. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  332. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  333. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  334. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  335. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  336. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  337. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  338. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  339. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  340. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  341. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  342. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  343. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  344. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  345. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  346. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  347. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  348. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  349. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  350. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  351. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  352. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  353. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  354. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  355. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  356. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  357. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  358. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  359. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  360. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  361. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  362. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  363. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  364. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  365. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  366. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  367. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  368. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  369. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  370. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  371. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  372. mteb/tasks/pair_classification/rus/terra.py +51 -25
  373. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  374. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  375. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  376. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  377. mteb/tasks/reranking/jpn/__init__.py +9 -1
  378. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  379. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  380. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  381. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  382. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  383. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  384. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  385. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  386. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  387. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  388. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  389. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  390. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  391. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  392. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  393. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  394. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  395. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  396. mteb/tasks/retrieval/kor/__init__.py +2 -1
  397. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  398. mteb/tasks/retrieval/multilingual/__init__.py +22 -0
  399. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  400. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  401. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  402. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  403. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  404. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  405. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  406. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  407. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
  408. mteb/tasks/retrieval/nld/__init__.py +8 -4
  409. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  410. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  411. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  412. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  413. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  414. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  415. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  416. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  417. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  418. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  419. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  420. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
  421. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  422. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  423. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  424. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  425. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  426. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  427. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  428. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  429. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  430. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  431. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
  432. mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
  433. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  434. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  435. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
  436. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
  437. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  438. mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
  439. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  440. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  441. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  442. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  443. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  444. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  445. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  446. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  447. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  448. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  449. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  450. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  451. mteb/types/_encoder_io.py +7 -2
  452. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/METADATA +11 -5
  453. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/RECORD +457 -391
  454. mteb/models/model_implementations/nb_sbert.py +0 -25
  455. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/WHEEL +0 -0
  456. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/entry_points.txt +0 -0
  457. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/licenses/LICENSE +0 -0
  458. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/top_level.txt +0 -0
@@ -106,6 +106,7 @@ dinov2_training_datasets = set(
106
106
  dinov2_small = ModelMeta(
107
107
  loader=DINOModel, # type: ignore
108
108
  name="facebook/dinov2-small",
109
+ model_type=["dense"],
109
110
  languages=["eng-Latn"],
110
111
  revision="ed25f3a31f01632728cabb09d1542f84ab7b0056",
111
112
  release_date="2023-07-18",
@@ -123,11 +124,20 @@ dinov2_small = ModelMeta(
123
124
  similarity_fn_name=ScoringFunction.COSINE,
124
125
  use_instructions=False,
125
126
  training_datasets=dinov2_training_datasets,
127
+ citation="""@misc{oquab2023dinov2,
128
+ title={DINOv2: Learning Robust Visual Features without Supervision},
129
+ author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
130
+ year={2023},
131
+ eprint={2304.07193},
132
+ archivePrefix={arXiv},
133
+ primaryClass={cs.CV}
134
+ }""",
126
135
  )
127
136
 
128
137
  dinov2_base = ModelMeta(
129
138
  loader=DINOModel, # type: ignore
130
139
  name="facebook/dinov2-base",
140
+ model_type=["dense"],
131
141
  languages=["eng-Latn"],
132
142
  revision="f9e44c814b77203eaa57a6bdbbd535f21ede1415",
133
143
  release_date="2023-07-18",
@@ -145,11 +155,20 @@ dinov2_base = ModelMeta(
145
155
  similarity_fn_name=ScoringFunction.COSINE,
146
156
  use_instructions=False,
147
157
  training_datasets=dinov2_training_datasets,
158
+ citation="""@misc{oquab2023dinov2,
159
+ title={DINOv2: Learning Robust Visual Features without Supervision},
160
+ author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
161
+ year={2023},
162
+ eprint={2304.07193},
163
+ archivePrefix={arXiv},
164
+ primaryClass={cs.CV}
165
+ }""",
148
166
  )
149
167
 
150
168
  dinov2_large = ModelMeta(
151
169
  loader=DINOModel, # type: ignore
152
170
  name="facebook/dinov2-large",
171
+ model_type=["dense"],
153
172
  languages=["eng-Latn"],
154
173
  revision="47b73eefe95e8d44ec3623f8890bd894b6ea2d6c",
155
174
  release_date="2023-07-18",
@@ -167,11 +186,20 @@ dinov2_large = ModelMeta(
167
186
  similarity_fn_name=ScoringFunction.COSINE,
168
187
  use_instructions=False,
169
188
  training_datasets=dinov2_training_datasets,
189
+ citation="""@misc{oquab2023dinov2,
190
+ title={DINOv2: Learning Robust Visual Features without Supervision},
191
+ author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
192
+ year={2023},
193
+ eprint={2304.07193},
194
+ archivePrefix={arXiv},
195
+ primaryClass={cs.CV}
196
+ }""",
170
197
  )
171
198
 
172
199
  dinov2_giant = ModelMeta(
173
200
  loader=DINOModel, # type: ignore
174
201
  name="facebook/dinov2-giant",
202
+ model_type=["dense"],
175
203
  languages=["eng-Latn"],
176
204
  revision="611a9d42f2335e0f921f1e313ad3c1b7178d206d",
177
205
  release_date="2023-07-18",
@@ -189,6 +217,14 @@ dinov2_giant = ModelMeta(
189
217
  similarity_fn_name=ScoringFunction.COSINE,
190
218
  use_instructions=False,
191
219
  training_datasets=dinov2_training_datasets,
220
+ citation="""@misc{oquab2023dinov2,
221
+ title={DINOv2: Learning Robust Visual Features without Supervision},
222
+ author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
223
+ year={2023},
224
+ eprint={2304.07193},
225
+ archivePrefix={arXiv},
226
+ primaryClass={cs.CV}
227
+ }""",
192
228
  )
193
229
 
194
230
  webssl_dino_training_datasets = set(
@@ -198,6 +234,7 @@ webssl_dino_training_datasets = set(
198
234
  webssl_dino300m_full2b = ModelMeta(
199
235
  loader=DINOModel,
200
236
  name="facebook/webssl-dino300m-full2b-224",
237
+ model_type=["dense"],
201
238
  languages=["eng-Latn"],
202
239
  revision="8529cdb3fb75014932af3b896455fc21c386168e",
203
240
  release_date="2025-04-24",
@@ -215,11 +252,20 @@ webssl_dino300m_full2b = ModelMeta(
215
252
  similarity_fn_name=None,
216
253
  use_instructions=False,
217
254
  training_datasets=webssl_dino_training_datasets,
255
+ citation="""@article{fan2025scaling,
256
+ title={Scaling Language-Free Visual Representation Learning},
257
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
258
+ year={2025},
259
+ eprint={2504.01017},
260
+ archivePrefix={arXiv},
261
+ primaryClass={cs.CV}
262
+ }""",
218
263
  )
219
264
 
220
265
  webssl_dino1b_full2b = ModelMeta(
221
266
  loader=DINOModel,
222
267
  name="facebook/webssl-dino1b-full2b-224",
268
+ model_type=["dense"],
223
269
  languages=["eng-Latn"],
224
270
  revision="d3bf033d9c8cc62ea9e73c40956642cad2ec568a",
225
271
  release_date="2025-04-24",
@@ -237,11 +283,20 @@ webssl_dino1b_full2b = ModelMeta(
237
283
  similarity_fn_name=None,
238
284
  use_instructions=False,
239
285
  training_datasets=webssl_dino_training_datasets,
286
+ citation="""@article{fan2025scaling,
287
+ title={Scaling Language-Free Visual Representation Learning},
288
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
289
+ year={2025},
290
+ eprint={2504.01017},
291
+ archivePrefix={arXiv},
292
+ primaryClass={cs.CV}
293
+ }""",
240
294
  )
241
295
 
242
296
  webssl_dino2b_full2b = ModelMeta(
243
297
  loader=DINOModel,
244
298
  name="facebook/webssl-dino2b-full2b-224",
299
+ model_type=["dense"],
245
300
  languages=["eng-Latn"],
246
301
  revision="cd5893e3fd2e988eb716792049b3dd53b3f1b68b",
247
302
  release_date="2025-04-24",
@@ -259,11 +314,20 @@ webssl_dino2b_full2b = ModelMeta(
259
314
  similarity_fn_name=None,
260
315
  use_instructions=False,
261
316
  training_datasets=webssl_dino_training_datasets,
317
+ citation="""@article{fan2025scaling,
318
+ title={Scaling Language-Free Visual Representation Learning},
319
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
320
+ year={2025},
321
+ eprint={2504.01017},
322
+ archivePrefix={arXiv},
323
+ primaryClass={cs.CV}
324
+ }""",
262
325
  )
263
326
 
264
327
  webssl_dino3b_full2b = ModelMeta(
265
328
  loader=DINOModel,
266
329
  name="facebook/webssl-dino3b-full2b-224",
330
+ model_type=["dense"],
267
331
  languages=["eng-Latn"],
268
332
  revision="2d015c340b16bc47bc6557fcb4e6c83a9d4aa1d3",
269
333
  release_date="2025-04-24",
@@ -281,11 +345,20 @@ webssl_dino3b_full2b = ModelMeta(
281
345
  similarity_fn_name=None,
282
346
  use_instructions=False,
283
347
  training_datasets=webssl_dino_training_datasets,
348
+ citation="""@article{fan2025scaling,
349
+ title={Scaling Language-Free Visual Representation Learning},
350
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
351
+ year={2025},
352
+ eprint={2504.01017},
353
+ archivePrefix={arXiv},
354
+ primaryClass={cs.CV}
355
+ }""",
284
356
  )
285
357
 
286
358
  webssl_dino5b_full2b = ModelMeta(
287
359
  loader=DINOModel,
288
360
  name="facebook/webssl-dino5b-full2b-224",
361
+ model_type=["dense"],
289
362
  languages=["eng-Latn"],
290
363
  revision="88006b18b9af369f6c611db7a64d908bde3714e0",
291
364
  release_date="2025-04-24",
@@ -303,11 +376,20 @@ webssl_dino5b_full2b = ModelMeta(
303
376
  similarity_fn_name=None,
304
377
  use_instructions=False,
305
378
  training_datasets=webssl_dino_training_datasets,
379
+ citation="""@article{fan2025scaling,
380
+ title={Scaling Language-Free Visual Representation Learning},
381
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
382
+ year={2025},
383
+ eprint={2504.01017},
384
+ archivePrefix={arXiv},
385
+ primaryClass={cs.CV}
386
+ }""",
306
387
  )
307
388
 
308
389
  webssl_dino7b_full8b_224 = ModelMeta(
309
390
  loader=DINOModel,
310
391
  name="facebook/webssl-dino7b-full8b-224",
392
+ model_type=["dense"],
311
393
  languages=["eng-Latn"],
312
394
  revision="c6085463ea680043042a80c6d41db2c65e85f466",
313
395
  release_date="2025-04-24",
@@ -325,11 +407,20 @@ webssl_dino7b_full8b_224 = ModelMeta(
325
407
  similarity_fn_name=None,
326
408
  use_instructions=False,
327
409
  training_datasets=webssl_dino_training_datasets,
410
+ citation="""@article{fan2025scaling,
411
+ title={Scaling Language-Free Visual Representation Learning},
412
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
413
+ year={2025},
414
+ eprint={2504.01017},
415
+ archivePrefix={arXiv},
416
+ primaryClass={cs.CV}
417
+ }""",
328
418
  )
329
419
 
330
420
  webssl_dino7b_full8b_378 = ModelMeta(
331
421
  loader=DINOModel,
332
422
  name="facebook/webssl-dino7b-full8b-378",
423
+ model_type=["dense"],
333
424
  languages=["eng-Latn"],
334
425
  revision="53c8c5b43070bd2ddb3f66161140408ce832301f",
335
426
  release_date="2025-04-24",
@@ -347,11 +438,20 @@ webssl_dino7b_full8b_378 = ModelMeta(
347
438
  similarity_fn_name=None,
348
439
  use_instructions=False,
349
440
  training_datasets=webssl_dino_training_datasets,
441
+ citation="""@article{fan2025scaling,
442
+ title={Scaling Language-Free Visual Representation Learning},
443
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
444
+ year={2025},
445
+ eprint={2504.01017},
446
+ archivePrefix={arXiv},
447
+ primaryClass={cs.CV}
448
+ }""",
350
449
  )
351
450
 
352
451
  webssl_dino7b_full8b_518 = ModelMeta(
353
452
  loader=DINOModel,
354
453
  name="facebook/webssl-dino7b-full8b-518",
454
+ model_type=["dense"],
355
455
  languages=["eng-Latn"],
356
456
  revision="aee350d2c5e3e5fdb7ee6985291d808ea5eef431",
357
457
  release_date="2025-04-24",
@@ -369,12 +469,21 @@ webssl_dino7b_full8b_518 = ModelMeta(
369
469
  similarity_fn_name=None,
370
470
  use_instructions=False,
371
471
  training_datasets=webssl_dino_training_datasets,
472
+ citation="""@article{fan2025scaling,
473
+ title={Scaling Language-Free Visual Representation Learning},
474
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
475
+ year={2025},
476
+ eprint={2504.01017},
477
+ archivePrefix={arXiv},
478
+ primaryClass={cs.CV}
479
+ }""",
372
480
  )
373
481
 
374
482
 
375
483
  webssl_dino2b_light2b = ModelMeta(
376
484
  loader=DINOModel,
377
485
  name="facebook/webssl-dino2b-light2b-224",
486
+ model_type=["dense"],
378
487
  languages=["eng-Latn"],
379
488
  revision="633a663f304e63cc3cbec3f7f9ca2fbc94736128",
380
489
  release_date="2025-04-24",
@@ -392,11 +501,20 @@ webssl_dino2b_light2b = ModelMeta(
392
501
  similarity_fn_name=None,
393
502
  use_instructions=False,
394
503
  training_datasets=webssl_dino_training_datasets,
504
+ citation="""@article{fan2025scaling,
505
+ title={Scaling Language-Free Visual Representation Learning},
506
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
507
+ year={2025},
508
+ eprint={2504.01017},
509
+ archivePrefix={arXiv},
510
+ primaryClass={cs.CV}
511
+ }""",
395
512
  )
396
513
 
397
514
  webssl_dino2b_heavy2b = ModelMeta(
398
515
  loader=DINOModel,
399
516
  name="facebook/webssl-dino2b-heavy2b-224",
517
+ model_type=["dense"],
400
518
  languages=["eng-Latn"],
401
519
  revision="9f46eb0c0129656a1ef195fde072e3765abdb7c6",
402
520
  release_date="2025-04-24",
@@ -414,11 +532,20 @@ webssl_dino2b_heavy2b = ModelMeta(
414
532
  similarity_fn_name=None,
415
533
  use_instructions=False,
416
534
  training_datasets=webssl_dino_training_datasets,
535
+ citation="""@article{fan2025scaling,
536
+ title={Scaling Language-Free Visual Representation Learning},
537
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
538
+ year={2025},
539
+ eprint={2504.01017},
540
+ archivePrefix={arXiv},
541
+ primaryClass={cs.CV}
542
+ }""",
417
543
  )
418
544
 
419
545
  webssl_dino3b_light2b = ModelMeta(
420
546
  loader=DINOModel,
421
547
  name="facebook/webssl-dino3b-light2b-224",
548
+ model_type=["dense"],
422
549
  languages=["eng-Latn"],
423
550
  revision="4d0160f60673805431f4ad14983e712ed88be5b8",
424
551
  release_date="2025-04-24",
@@ -436,11 +563,20 @@ webssl_dino3b_light2b = ModelMeta(
436
563
  similarity_fn_name=None,
437
564
  use_instructions=False,
438
565
  training_datasets=webssl_dino_training_datasets,
566
+ citation="""@article{fan2025scaling,
567
+ title={Scaling Language-Free Visual Representation Learning},
568
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
569
+ year={2025},
570
+ eprint={2504.01017},
571
+ archivePrefix={arXiv},
572
+ primaryClass={cs.CV}
573
+ }""",
439
574
  )
440
575
 
441
576
  webssl_dino3b_heavy2b = ModelMeta(
442
577
  loader=DINOModel,
443
578
  name="facebook/webssl-dino3b-heavy2b-224",
579
+ model_type=["dense"],
444
580
  languages=["eng-Latn"],
445
581
  revision="dd39c2910747561b332285d96c4dce0bdb240775",
446
582
  release_date="2025-04-24",
@@ -458,11 +594,20 @@ webssl_dino3b_heavy2b = ModelMeta(
458
594
  similarity_fn_name=None,
459
595
  use_instructions=False,
460
596
  training_datasets=webssl_dino_training_datasets,
597
+ citation="""@article{fan2025scaling,
598
+ title={Scaling Language-Free Visual Representation Learning},
599
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
600
+ year={2025},
601
+ eprint={2504.01017},
602
+ archivePrefix={arXiv},
603
+ primaryClass={cs.CV}
604
+ }""",
461
605
  )
462
606
 
463
607
  webssl_mae300m_full2b = ModelMeta(
464
608
  loader=DINOModel,
465
609
  name="facebook/webssl-mae300m-full2b-224",
610
+ model_type=["dense"],
466
611
  languages=["eng-Latn"],
467
612
  revision="4655a0ac1726c206ba14d5ccb26758c62a4d03b0",
468
613
  release_date="2025-04-24",
@@ -480,11 +625,20 @@ webssl_mae300m_full2b = ModelMeta(
480
625
  similarity_fn_name=None,
481
626
  use_instructions=False,
482
627
  training_datasets=webssl_dino_training_datasets,
628
+ citation="""@article{fan2025scaling,
629
+ title={Scaling Language-Free Visual Representation Learning},
630
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
631
+ year={2025},
632
+ eprint={2504.01017},
633
+ archivePrefix={arXiv},
634
+ primaryClass={cs.CV}
635
+ }""",
483
636
  )
484
637
 
485
638
  webssl_mae700m_full2b = ModelMeta(
486
639
  loader=DINOModel,
487
640
  name="facebook/webssl-mae700m-full2b-224",
641
+ model_type=["dense"],
488
642
  languages=["eng-Latn"],
489
643
  revision="c32be382e757d73a178de1ead62c27391d4b4280",
490
644
  release_date="2025-04-24",
@@ -502,11 +656,20 @@ webssl_mae700m_full2b = ModelMeta(
502
656
  similarity_fn_name=None,
503
657
  use_instructions=False,
504
658
  training_datasets=webssl_dino_training_datasets,
659
+ citation="""@article{fan2025scaling,
660
+ title={Scaling Language-Free Visual Representation Learning},
661
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
662
+ year={2025},
663
+ eprint={2504.01017},
664
+ archivePrefix={arXiv},
665
+ primaryClass={cs.CV}
666
+ }""",
505
667
  )
506
668
 
507
669
  webssl_mae1b_full2b = ModelMeta(
508
670
  loader=DINOModel,
509
671
  name="facebook/webssl-mae1b-full2b-224",
672
+ model_type=["dense"],
510
673
  languages=["eng-Latn"],
511
674
  revision="5880aefedbad8db0f44d27358f6f08e8576f70fc",
512
675
  release_date="2025-04-24",
@@ -524,4 +687,12 @@ webssl_mae1b_full2b = ModelMeta(
524
687
  similarity_fn_name=None,
525
688
  use_instructions=False,
526
689
  training_datasets=webssl_dino_training_datasets,
690
+ citation="""@article{fan2025scaling,
691
+ title={Scaling Language-Free Visual Representation Learning},
692
+ author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
693
+ year={2025},
694
+ eprint={2504.01017},
695
+ archivePrefix={arXiv},
696
+ primaryClass={cs.CV}
697
+ }""",
527
698
  )
@@ -40,6 +40,7 @@ e5_instruct = ModelMeta(
40
40
  normalized=True,
41
41
  ),
42
42
  name="intfloat/multilingual-e5-large-instruct",
43
+ model_type=["dense"],
43
44
  languages=XLMR_LANGUAGES,
44
45
  open_weights=True,
45
46
  revision="baa7be480a7de1539afce709c8f13f833a510e0a",
@@ -78,6 +79,7 @@ e5_mistral = ModelMeta(
78
79
  normalized=True,
79
80
  ),
80
81
  name="intfloat/e5-mistral-7b-instruct",
82
+ model_type=["dense"],
81
83
  languages=MISTRAL_LANGUAGES,
82
84
  open_weights=True,
83
85
  revision="07163b72af1488142a360786df853f237b1a3ca1",
@@ -125,6 +127,7 @@ zeta_alpha_ai__zeta_alpha_e5_mistral = ModelMeta(
125
127
  normalized=True,
126
128
  ),
127
129
  name="zeta-alpha-ai/Zeta-Alpha-E5-Mistral",
130
+ model_type=["dense"],
128
131
  revision="c791d37474fa6a5c72eb3a2522be346bc21fbfc3",
129
132
  release_date="2024-08-30",
130
133
  languages=["eng-Latn"],
@@ -201,6 +204,7 @@ BeastyZ__e5_R_mistral_7b = ModelMeta(
201
204
  tokenizer_kwargs={"pad_token": "</s>"},
202
205
  ),
203
206
  name="BeastyZ/e5-R-mistral-7b",
207
+ model_type=["dense"],
204
208
  revision="3f810a6a7fd220369ad248e3705cf13d71803602",
205
209
  release_date="2024-06-28",
206
210
  languages=["eng-Latn"],
@@ -5,108 +5,10 @@ from mteb.models.model_meta import (
5
5
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
6
6
  from mteb.types import PromptType
7
7
 
8
+ from .facebookai import XLMR_LANGUAGES
9
+
8
10
  E5_PAPER_RELEASE_DATE = "2024-02-08"
9
- XLMR_LANGUAGES = [
10
- "afr-Latn",
11
- "amh-Latn",
12
- "ara-Latn",
13
- "asm-Latn",
14
- "aze-Latn",
15
- "bel-Latn",
16
- "bul-Latn",
17
- "ben-Latn",
18
- "ben-Beng",
19
- "bre-Latn",
20
- "bos-Latn",
21
- "cat-Latn",
22
- "ces-Latn",
23
- "cym-Latn",
24
- "dan-Latn",
25
- "deu-Latn",
26
- "ell-Latn",
27
- "eng-Latn",
28
- "epo-Latn",
29
- "spa-Latn",
30
- "est-Latn",
31
- "eus-Latn",
32
- "fas-Latn",
33
- "fin-Latn",
34
- "fra-Latn",
35
- "fry-Latn",
36
- "gle-Latn",
37
- "gla-Latn",
38
- "glg-Latn",
39
- "guj-Latn",
40
- "hau-Latn",
41
- "heb-Latn",
42
- "hin-Latn",
43
- "hin-Deva",
44
- "hrv-Latn",
45
- "hun-Latn",
46
- "hye-Latn",
47
- "ind-Latn",
48
- "isl-Latn",
49
- "ita-Latn",
50
- "jpn-Latn",
51
- "jav-Latn",
52
- "kat-Latn",
53
- "kaz-Latn",
54
- "khm-Latn",
55
- "kan-Latn",
56
- "kor-Latn",
57
- "kur-Latn",
58
- "kir-Latn",
59
- "lat-Latn",
60
- "lao-Latn",
61
- "lit-Latn",
62
- "lav-Latn",
63
- "mlg-Latn",
64
- "mkd-Latn",
65
- "mal-Latn",
66
- "mon-Latn",
67
- "mar-Latn",
68
- "msa-Latn",
69
- "mya-Latn",
70
- "nep-Latn",
71
- "nld-Latn",
72
- "nob-Latn",
73
- "orm-Latn",
74
- "ori-Latn",
75
- "pan-Latn",
76
- "pol-Latn",
77
- "pus-Latn",
78
- "por-Latn",
79
- "ron-Latn",
80
- "rus-Latn",
81
- "san-Latn",
82
- "snd-Latn",
83
- "sin-Latn",
84
- "slk-Latn",
85
- "slv-Latn",
86
- "som-Latn",
87
- "sqi-Latn",
88
- "srp-Latn",
89
- "sun-Latn",
90
- "swe-Latn",
91
- "swa-Latn",
92
- "tam-Latn",
93
- "tam-Taml",
94
- "tel-Latn",
95
- "tel-Telu",
96
- "tha-Latn",
97
- "tgl-Latn",
98
- "tur-Latn",
99
- "uig-Latn",
100
- "ukr-Latn",
101
- "urd-Latn",
102
- "urd-Arab",
103
- "uzb-Latn",
104
- "vie-Latn",
105
- "xho-Latn",
106
- "yid-Latn",
107
- "zho-Hant",
108
- "zho-Hans",
109
- ]
11
+
110
12
 
111
13
  MULTILINGUAL_E5_CITATION = """
112
14
  @article{wang2024multilingual,
@@ -168,6 +70,7 @@ e5_mult_small = ModelMeta(
168
70
  model_prompts=model_prompts,
169
71
  ),
170
72
  name="intfloat/multilingual-e5-small",
73
+ model_type=["dense"],
171
74
  languages=XLMR_LANGUAGES,
172
75
  open_weights=True,
173
76
  revision="fd1525a9fd15316a2d503bf26ab031a61d056e98",
@@ -194,6 +97,7 @@ e5_mult_base = ModelMeta(
194
97
  model_prompts=model_prompts,
195
98
  ),
196
99
  name="intfloat/multilingual-e5-base",
100
+ model_type=["dense"],
197
101
  languages=XLMR_LANGUAGES,
198
102
  open_weights=True,
199
103
  revision="d13f1b27baf31030b7fd040960d60d909913633f",
@@ -220,6 +124,7 @@ e5_mult_large = ModelMeta(
220
124
  model_prompts=model_prompts,
221
125
  ),
222
126
  name="intfloat/multilingual-e5-large",
127
+ model_type=["dense"],
223
128
  languages=XLMR_LANGUAGES,
224
129
  open_weights=True,
225
130
  revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb",
@@ -246,6 +151,7 @@ e5_eng_small_v2 = ModelMeta(
246
151
  model_prompts=model_prompts,
247
152
  ),
248
153
  name="intfloat/e5-small-v2",
154
+ model_type=["dense"],
249
155
  languages=["eng-Latn"],
250
156
  open_weights=True,
251
157
  revision="dca8b1a9dae0d4575df2bf423a5edb485a431236",
@@ -272,6 +178,7 @@ e5_eng_small = ModelMeta(
272
178
  model_prompts=model_prompts,
273
179
  ),
274
180
  name="intfloat/e5-small",
181
+ model_type=["dense"],
275
182
  languages=["eng-Latn"],
276
183
  open_weights=True,
277
184
  revision="e272f3049e853b47cb5ca3952268c6662abda68f",
@@ -298,6 +205,7 @@ e5_eng_base_v2 = ModelMeta(
298
205
  model_prompts=model_prompts,
299
206
  ),
300
207
  name="intfloat/e5-base-v2",
208
+ model_type=["dense"],
301
209
  languages=["eng-Latn"],
302
210
  open_weights=True,
303
211
  revision="1c644c92ad3ba1efdad3f1451a637716616a20e8",
@@ -325,6 +233,7 @@ e5_eng_large_v2 = ModelMeta(
325
233
  model_prompts=model_prompts,
326
234
  ),
327
235
  name="intfloat/e5-large-v2",
236
+ model_type=["dense"],
328
237
  languages=["eng-Latn"],
329
238
  open_weights=True,
330
239
  revision="b322e09026e4ea05f42beadf4d661fb4e101d311",
@@ -352,6 +261,7 @@ e5_large = ModelMeta(
352
261
  model_prompts=model_prompts,
353
262
  ),
354
263
  name="intfloat/e5-large",
264
+ model_type=["dense"],
355
265
  languages=["eng-Latn"],
356
266
  open_weights=True,
357
267
  revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81",
@@ -379,6 +289,7 @@ e5_base = ModelMeta(
379
289
  model_prompts=model_prompts,
380
290
  ),
381
291
  name="intfloat/e5-base",
292
+ model_type=["dense"],
382
293
  languages=["eng-Latn"],
383
294
  open_weights=True,
384
295
  revision="b533fe4636f4a2507c08ddab40644d20b0006d6a",
@@ -160,6 +160,7 @@ e5_v = ModelMeta(
160
160
  device_map="auto",
161
161
  ),
162
162
  name="royokong/e5-v",
163
+ model_type=["dense"],
163
164
  languages=["eng-Latn"],
164
165
  revision="0c1f22679417b3ae925d779442221c40cd1861ab",
165
166
  release_date="2024-07-17",