mteb 2.1.4__py3-none-any.whl → 2.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. mteb/__init__.py +4 -0
  2. mteb/_create_dataloaders.py +6 -3
  3. mteb/_evaluators/any_sts_evaluator.py +21 -12
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +1 -1
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +9 -4
  7. mteb/_evaluators/pair_classification_evaluator.py +30 -38
  8. mteb/_evaluators/sklearn_evaluator.py +15 -28
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
  10. mteb/_evaluators/text/summarization_evaluator.py +4 -2
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
  12. mteb/abstasks/_data_filter/__init__.py +0 -0
  13. mteb/abstasks/_data_filter/filters.py +125 -0
  14. mteb/abstasks/_data_filter/task_pipelines.py +102 -0
  15. mteb/abstasks/_statistics_calculation.py +6 -2
  16. mteb/abstasks/classification.py +0 -2
  17. mteb/abstasks/clustering.py +1 -1
  18. mteb/abstasks/clustering_legacy.py +3 -0
  19. mteb/abstasks/multilabel_classification.py +10 -3
  20. mteb/abstasks/pair_classification.py +8 -1
  21. mteb/abstasks/sts.py +7 -0
  22. mteb/abstasks/task_metadata.py +1 -0
  23. mteb/benchmarks/_create_table.py +84 -37
  24. mteb/benchmarks/benchmark.py +74 -15
  25. mteb/benchmarks/benchmarks/__init__.py +8 -0
  26. mteb/benchmarks/benchmarks/benchmarks.py +259 -15
  27. mteb/benchmarks/get_benchmark.py +2 -0
  28. mteb/cache.py +47 -10
  29. mteb/deprecated_evaluator.py +8 -13
  30. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  31. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  32. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  33. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  34. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  35. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  36. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  37. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  38. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  39. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  40. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  41. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  42. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  43. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  44. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  45. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  46. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  47. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  48. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  49. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  50. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  51. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  52. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  53. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  54. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  55. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  56. mteb/evaluate.py +65 -45
  57. mteb/leaderboard/app.py +268 -133
  58. mteb/leaderboard/benchmark_selector.py +14 -5
  59. mteb/leaderboard/figures.py +13 -15
  60. mteb/leaderboard/table.py +82 -17
  61. mteb/models/__init__.py +4 -1
  62. mteb/models/abs_encoder.py +21 -17
  63. mteb/models/cache_wrappers/__init__.py +2 -1
  64. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +2 -2
  65. mteb/models/cache_wrappers/cache_wrapper.py +1 -1
  66. mteb/models/get_model_meta.py +3 -114
  67. mteb/models/instruct_wrapper.py +5 -1
  68. mteb/models/model_implementations/align_models.py +7 -0
  69. mteb/models/model_implementations/amazon_models.py +1 -0
  70. mteb/models/model_implementations/andersborges.py +65 -0
  71. mteb/models/model_implementations/ara_models.py +8 -0
  72. mteb/models/model_implementations/arctic_models.py +8 -0
  73. mteb/models/model_implementations/b1ade_models.py +1 -0
  74. mteb/models/model_implementations/bedrock_models.py +4 -0
  75. mteb/models/model_implementations/bge_models.py +60 -0
  76. mteb/models/model_implementations/bica_model.py +35 -0
  77. mteb/models/model_implementations/blip2_models.py +11 -0
  78. mteb/models/model_implementations/blip_models.py +27 -0
  79. mteb/models/model_implementations/bm25.py +1 -0
  80. mteb/models/model_implementations/bmretriever_models.py +4 -0
  81. mteb/models/model_implementations/cadet_models.py +9 -0
  82. mteb/models/model_implementations/cde_models.py +14 -0
  83. mteb/models/model_implementations/clip_models.py +3 -0
  84. mteb/models/model_implementations/clips_models.py +100 -0
  85. mteb/models/model_implementations/codefuse_models.py +162 -0
  86. mteb/models/model_implementations/codesage_models.py +15 -0
  87. mteb/models/model_implementations/cohere_models.py +8 -1
  88. mteb/models/model_implementations/cohere_v.py +5 -0
  89. mteb/models/model_implementations/colpali_models.py +14 -6
  90. mteb/models/model_implementations/colqwen_models.py +271 -1
  91. mteb/models/model_implementations/colsmol_models.py +2 -0
  92. mteb/models/model_implementations/conan_models.py +1 -0
  93. mteb/models/model_implementations/dino_models.py +171 -0
  94. mteb/models/model_implementations/e5_instruct.py +4 -0
  95. mteb/models/model_implementations/e5_models.py +12 -101
  96. mteb/models/model_implementations/e5_v.py +1 -0
  97. mteb/models/model_implementations/eagerworks_models.py +164 -0
  98. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  99. mteb/models/model_implementations/en_code_retriever.py +1 -0
  100. mteb/models/model_implementations/euler_models.py +32 -0
  101. mteb/models/model_implementations/evaclip_models.py +4 -0
  102. mteb/models/model_implementations/fa_models.py +58 -0
  103. mteb/models/model_implementations/facebookai.py +193 -0
  104. mteb/models/model_implementations/geogpt_models.py +1 -0
  105. mteb/models/model_implementations/gme_v_models.py +11 -5
  106. mteb/models/model_implementations/google_models.py +16 -5
  107. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -2
  108. mteb/models/model_implementations/gritlm_models.py +2 -0
  109. mteb/models/model_implementations/gte_models.py +78 -0
  110. mteb/models/model_implementations/hinvec_models.py +1 -0
  111. mteb/models/model_implementations/human.py +1 -0
  112. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  113. mteb/models/model_implementations/inf_models.py +2 -0
  114. mteb/models/model_implementations/jasper_models.py +255 -2
  115. mteb/models/model_implementations/jina_clip.py +1 -0
  116. mteb/models/model_implementations/jina_models.py +209 -5
  117. mteb/models/model_implementations/kalm_models.py +203 -25
  118. mteb/models/model_implementations/kblab.py +31 -0
  119. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  120. mteb/models/model_implementations/kfst.py +25 -0
  121. mteb/models/model_implementations/kowshik24_models.py +32 -0
  122. mteb/models/model_implementations/lens_models.py +2 -0
  123. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  124. mteb/models/model_implementations/linq_models.py +3 -2
  125. mteb/models/model_implementations/listconranker.py +1 -1
  126. mteb/models/model_implementations/llm2clip_models.py +3 -0
  127. mteb/models/model_implementations/llm2vec_models.py +8 -0
  128. mteb/models/model_implementations/mcinext_models.py +3 -0
  129. mteb/models/model_implementations/mdbr_models.py +2 -0
  130. mteb/models/model_implementations/misc_models.py +362 -0
  131. mteb/models/model_implementations/mme5_models.py +1 -0
  132. mteb/models/model_implementations/moco_models.py +11 -0
  133. mteb/models/model_implementations/mod_models.py +191 -0
  134. mteb/models/model_implementations/model2vec_models.py +13 -0
  135. mteb/models/model_implementations/moka_models.py +3 -0
  136. mteb/models/model_implementations/mxbai_models.py +9 -0
  137. mteb/models/model_implementations/nbailab.py +70 -0
  138. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  139. mteb/models/model_implementations/nomic_models.py +156 -4
  140. mteb/models/model_implementations/nomic_models_vision.py +7 -2
  141. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +23 -16
  142. mteb/models/model_implementations/nvidia_models.py +4 -1
  143. mteb/models/model_implementations/octen_models.py +195 -0
  144. mteb/models/model_implementations/openai_models.py +20 -16
  145. mteb/models/model_implementations/openclip_models.py +24 -0
  146. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  147. mteb/models/model_implementations/ops_moa_models.py +4 -2
  148. mteb/models/model_implementations/pawan_models.py +39 -0
  149. mteb/models/model_implementations/piccolo_models.py +8 -0
  150. mteb/models/model_implementations/promptriever_models.py +8 -4
  151. mteb/models/model_implementations/pylate_models.py +37 -4
  152. mteb/models/model_implementations/qodo_models.py +2 -0
  153. mteb/models/model_implementations/qtack_models.py +1 -0
  154. mteb/models/model_implementations/qwen3_models.py +6 -3
  155. mteb/models/model_implementations/qzhou_models.py +3 -1
  156. mteb/models/model_implementations/random_baseline.py +16 -21
  157. mteb/models/model_implementations/rasgaard_models.py +34 -0
  158. mteb/models/model_implementations/reasonir_model.py +1 -0
  159. mteb/models/model_implementations/repllama_models.py +2 -0
  160. mteb/models/model_implementations/rerankers_custom.py +3 -3
  161. mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
  162. mteb/models/model_implementations/richinfoai_models.py +1 -0
  163. mteb/models/model_implementations/ru_sentence_models.py +51 -0
  164. mteb/models/model_implementations/ruri_models.py +322 -0
  165. mteb/models/model_implementations/salesforce_models.py +3 -0
  166. mteb/models/model_implementations/samilpwc_models.py +1 -0
  167. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  168. mteb/models/model_implementations/searchmap_models.py +1 -0
  169. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  170. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +658 -0
  171. mteb/models/model_implementations/seed_models.py +1 -0
  172. mteb/models/model_implementations/sentence_transformers_models.py +57 -0
  173. mteb/models/model_implementations/shuu_model.py +32 -31
  174. mteb/models/model_implementations/siglip_models.py +10 -0
  175. mteb/models/model_implementations/sonar_models.py +1 -0
  176. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  177. mteb/models/model_implementations/stella_models.py +6 -0
  178. mteb/models/model_implementations/tarka_models.py +376 -0
  179. mteb/models/model_implementations/ua_sentence_models.py +10 -0
  180. mteb/models/model_implementations/uae_models.py +1 -0
  181. mteb/models/model_implementations/vdr_models.py +2 -0
  182. mteb/models/model_implementations/vi_vn_models.py +39 -0
  183. mteb/models/model_implementations/vista_models.py +2 -0
  184. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  185. mteb/models/model_implementations/voyage_models.py +15 -0
  186. mteb/models/model_implementations/voyage_v.py +8 -2
  187. mteb/models/model_implementations/xyz_models.py +1 -0
  188. mteb/models/model_implementations/youtu_models.py +1 -0
  189. mteb/models/model_implementations/yuan_models.py +34 -0
  190. mteb/models/model_implementations/yuan_models_en.py +58 -0
  191. mteb/models/model_meta.py +442 -22
  192. mteb/models/search_encoder_index/__init__.py +7 -0
  193. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  194. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  195. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +157 -0
  196. mteb/models/search_wrappers.py +165 -48
  197. mteb/models/sentence_transformer_wrapper.py +2 -7
  198. mteb/results/benchmark_results.py +88 -47
  199. mteb/results/model_result.py +11 -4
  200. mteb/results/task_result.py +37 -19
  201. mteb/similarity_functions.py +49 -0
  202. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  203. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  204. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  205. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  206. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  207. mteb/tasks/classification/ara/ajgt.py +1 -2
  208. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  209. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  210. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  211. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  212. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  213. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  214. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  215. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  216. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  217. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  218. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  219. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  220. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  221. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  222. mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
  223. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  224. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  225. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  226. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  227. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  228. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  229. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  230. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  231. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  232. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  233. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  234. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  235. mteb/tasks/classification/eng/news_classification.py +1 -2
  236. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  237. mteb/tasks/classification/eng/patent_classification.py +1 -2
  238. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  239. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  240. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  241. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  242. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  243. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  244. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  245. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  246. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  247. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  248. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  249. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  250. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  251. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  252. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  253. mteb/tasks/classification/est/estonian_valence.py +1 -2
  254. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  255. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  256. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  257. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  258. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  259. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  260. mteb/tasks/classification/heb/__init__.py +6 -1
  261. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  262. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  263. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  264. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  265. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  266. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  267. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  268. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  269. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  270. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  271. mteb/tasks/classification/kor/klue_tc.py +1 -2
  272. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  273. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  274. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
  275. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  276. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  277. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  278. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  279. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  280. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  281. mteb/tasks/classification/multilingual/scala_classification.py +1 -2
  282. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  283. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  284. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  285. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  286. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  287. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  288. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  289. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  290. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  291. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  292. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  293. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  294. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  295. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  296. mteb/tasks/classification/pol/polish_classification.py +3 -6
  297. mteb/tasks/classification/ron/moroco.py +1 -2
  298. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  299. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  300. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  301. mteb/tasks/classification/rus/headline_classification.py +1 -2
  302. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  303. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  304. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  305. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  306. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  307. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  308. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  309. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  310. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  311. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  312. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  313. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  314. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  315. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  316. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  317. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  318. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  319. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  320. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  321. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  322. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  323. mteb/tasks/classification/tur/__init__.py +4 -0
  324. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  325. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  326. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  327. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  328. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  329. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  330. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  331. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  332. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  333. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  334. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  335. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  336. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  337. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  338. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  339. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  340. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  341. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  342. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  343. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  344. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  345. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  346. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  347. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  348. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  349. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  350. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  351. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  352. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  353. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  354. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  355. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  356. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  357. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  358. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  359. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  360. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  361. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  362. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  363. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  364. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  365. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  366. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  367. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  368. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  369. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  370. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  371. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  372. mteb/tasks/pair_classification/rus/terra.py +51 -25
  373. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  374. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  375. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  376. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  377. mteb/tasks/reranking/jpn/__init__.py +9 -1
  378. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  379. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  380. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  381. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  382. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  383. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  384. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  385. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  386. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  387. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  388. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  389. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  390. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  391. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  392. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  393. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  394. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  395. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  396. mteb/tasks/retrieval/kor/__init__.py +2 -1
  397. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  398. mteb/tasks/retrieval/multilingual/__init__.py +22 -0
  399. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  400. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  401. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  402. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  403. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  404. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  405. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  406. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  407. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
  408. mteb/tasks/retrieval/nld/__init__.py +8 -4
  409. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  410. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  411. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  412. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  413. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  414. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  415. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  416. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  417. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  418. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  419. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  420. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
  421. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  422. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  423. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  424. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  425. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  426. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  427. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  428. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  429. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  430. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  431. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
  432. mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
  433. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  434. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  435. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
  436. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
  437. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  438. mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
  439. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  440. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  441. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  442. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  443. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  444. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  445. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  446. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  447. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  448. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  449. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  450. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  451. mteb/types/_encoder_io.py +7 -2
  452. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/METADATA +11 -5
  453. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/RECORD +457 -391
  454. mteb/models/model_implementations/nb_sbert.py +0 -25
  455. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/WHEEL +0 -0
  456. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/entry_points.txt +0 -0
  457. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/licenses/LICENSE +0 -0
  458. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/top_level.txt +0 -0
@@ -236,6 +236,7 @@ DEFAULT_INSTRUCTION = "Instruct: Given a web search query, retrieve relevant pas
236
236
 
237
237
  seed_embedding = ModelMeta(
238
238
  name="ByteDance-Seed/Seed1.5-Embedding",
239
+ model_type=["dense"],
239
240
  revision="4",
240
241
  release_date="2025-04-25",
241
242
  languages=[
@@ -113,6 +113,7 @@ sent_trf_training_dataset = {
113
113
  all_minilm_l6_v2 = ModelMeta(
114
114
  loader=sentence_transformers_loader,
115
115
  name="sentence-transformers/all-MiniLM-L6-v2",
116
+ model_type=["dense"],
116
117
  languages=["eng-Latn"],
117
118
  open_weights=True,
118
119
  revision="8b3219a92973c328a8e22fadcfa821b5dc75636a",
@@ -137,6 +138,7 @@ all_minilm_l6_v2 = ModelMeta(
137
138
  all_minilm_l12_v2 = ModelMeta(
138
139
  loader=sentence_transformers_loader,
139
140
  name="sentence-transformers/all-MiniLM-L12-v2",
141
+ model_type=["dense"],
140
142
  languages=["eng-Latn"],
141
143
  open_weights=True,
142
144
  revision="364dd28d28dcd3359b537f3cf1f5348ba679da62",
@@ -161,6 +163,7 @@ all_minilm_l12_v2 = ModelMeta(
161
163
  paraphrase_multilingual_minilm_l12_v2 = ModelMeta(
162
164
  loader=sentence_transformers_loader,
163
165
  name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
166
+ model_type=["dense"],
164
167
  languages=paraphrase_langs,
165
168
  open_weights=True,
166
169
  revision="bf3bf13ab40c3157080a7ab344c831b9ad18b5eb",
@@ -185,6 +188,7 @@ paraphrase_multilingual_minilm_l12_v2 = ModelMeta(
185
188
  paraphrase_multilingual_mpnet_base_v2 = ModelMeta(
186
189
  loader=sentence_transformers_loader,
187
190
  name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
191
+ model_type=["dense"],
188
192
  languages=paraphrase_langs,
189
193
  open_weights=True,
190
194
  revision="79f2382ceacceacdf38563d7c5d16b9ff8d725d6",
@@ -220,6 +224,7 @@ paraphrase_multilingual_mpnet_base_v2 = ModelMeta(
220
224
  labse = ModelMeta(
221
225
  loader=sentence_transformers_loader,
222
226
  name="sentence-transformers/LaBSE",
227
+ model_type=["dense"],
223
228
  languages=paraphrase_langs,
224
229
  open_weights=True,
225
230
  revision="e34fab64a3011d2176c99545a93d5cbddc9a91b7",
@@ -257,6 +262,7 @@ labse = ModelMeta(
257
262
  multi_qa_minilm_l6_cos_v1 = ModelMeta(
258
263
  loader=sentence_transformers_loader,
259
264
  name="sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
265
+ model_type=["dense"],
260
266
  languages=["eng-Latn"],
261
267
  open_weights=True,
262
268
  revision="b207367332321f8e44f96e224ef15bc607f4dbf0",
@@ -281,6 +287,7 @@ multi_qa_minilm_l6_cos_v1 = ModelMeta(
281
287
  all_mpnet_base_v2 = ModelMeta(
282
288
  loader=sentence_transformers_loader,
283
289
  name="sentence-transformers/all-mpnet-base-v2",
290
+ model_type=["dense"],
284
291
  languages=["eng-Latn"],
285
292
  open_weights=True,
286
293
  revision="9a3225965996d404b775526de6dbfe85d3368642",
@@ -380,6 +387,7 @@ static_multi_languages = [
380
387
 
381
388
  static_similarity_mrl_multilingual_v1 = ModelMeta(
382
389
  name="sentence-transformers/static-similarity-mrl-multilingual-v1",
390
+ model_type=["dense"],
383
391
  loader=SentenceTransformerEncoderWrapper,
384
392
  loader_kwargs=dict(
385
393
  device="cpu", # CPU is just as quick, if not quicker
@@ -402,11 +410,21 @@ static_similarity_mrl_multilingual_v1 = ModelMeta(
402
410
  training_datasets=static_multi_datasets,
403
411
  public_training_code="https://huggingface.co/blog/static-embeddings",
404
412
  public_training_data="https://huggingface.co/collections/sentence-transformers/embedding-model-datasets-6644d7a3673a511914aa7552",
413
+ citation="""@inproceedings{reimers-2019-sentence-bert,
414
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
415
+ author = "Reimers, Nils and Gurevych, Iryna",
416
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
417
+ month = "11",
418
+ year = "2019",
419
+ publisher = "Association for Computational Linguistics",
420
+ url = "https://arxiv.org/abs/1908.10084",
421
+ }""",
405
422
  )
406
423
 
407
424
  contriever = ModelMeta(
408
425
  loader=SentenceTransformerEncoderWrapper,
409
426
  name="facebook/contriever-msmarco",
427
+ model_type=["dense"],
410
428
  languages=["eng-Latn"],
411
429
  open_weights=True,
412
430
  revision="abe8c1493371369031bcb1e02acb754cf4e162fa",
@@ -436,6 +454,7 @@ contriever = ModelMeta(
436
454
  microllama_text_embedding = ModelMeta(
437
455
  loader=sentence_transformers_loader,
438
456
  name="keeeeenw/MicroLlama-text-embedding",
457
+ model_type=["dense"],
439
458
  languages=["eng-Latn"],
440
459
  open_weights=True,
441
460
  revision="98f70f14cdf12d7ea217ed2fd4e808b0195f1e7e",
@@ -467,9 +486,21 @@ microllama_text_embedding = ModelMeta(
467
486
  public_training_data=None,
468
487
  )
469
488
 
489
+ SENTENCE_T5_CITATION = """
490
+ @misc{ni2021sentencet5scalablesentenceencoders,
491
+ title={Sentence-T5: Scalable Sentence Encoders from Pre-trained Text-to-Text Models},
492
+ author={Jianmo Ni and Gustavo Hernández Ábrego and Noah Constant and Ji Ma and Keith B. Hall and Daniel Cer and Yinfei Yang},
493
+ year={2021},
494
+ eprint={2108.08877},
495
+ archivePrefix={arXiv},
496
+ primaryClass={cs.CL},
497
+ url={https://arxiv.org/abs/2108.08877},
498
+ }
499
+ """
470
500
  sentence_t5_base = ModelMeta(
471
501
  loader=sentence_transformers_loader,
472
502
  name="sentence-transformers/sentence-t5-base",
503
+ model_type=["dense"],
473
504
  languages=["eng-Latn"],
474
505
  open_weights=True,
475
506
  revision="50c53e206f8b01c9621484a3c0aafce4e55efebf",
@@ -486,11 +517,13 @@ sentence_t5_base = ModelMeta(
486
517
  public_training_code=None,
487
518
  public_training_data=None,
488
519
  training_datasets={"SNLI", "Community QA"},
520
+ citation=SENTENCE_T5_CITATION,
489
521
  )
490
522
 
491
523
  sentence_t5_large = ModelMeta(
492
524
  loader=sentence_transformers_loader,
493
525
  name="sentence-transformers/sentence-t5-large",
526
+ model_type=["dense"],
494
527
  languages=["eng-Latn"],
495
528
  open_weights=True,
496
529
  revision="1fc08ea477205aa54a3e5b13f0971ae16b86410a",
@@ -507,11 +540,13 @@ sentence_t5_large = ModelMeta(
507
540
  public_training_code=None,
508
541
  public_training_data=None,
509
542
  training_datasets={"SNLI", "Community QA"},
543
+ citation=SENTENCE_T5_CITATION,
510
544
  )
511
545
 
512
546
  sentence_t5_xl = ModelMeta(
513
547
  loader=sentence_transformers_loader,
514
548
  name="sentence-transformers/sentence-t5-xl",
549
+ model_type=["dense"],
515
550
  languages=["eng-Latn"],
516
551
  open_weights=True,
517
552
  revision="2965d31b368fb14117688e0bde77cbd720e91f53",
@@ -528,11 +563,13 @@ sentence_t5_xl = ModelMeta(
528
563
  public_training_code=None,
529
564
  public_training_data=None,
530
565
  training_datasets={"SNLI", "Community QA"},
566
+ citation=SENTENCE_T5_CITATION,
531
567
  )
532
568
 
533
569
  sentence_t5_xxl = ModelMeta(
534
570
  loader=sentence_transformers_loader,
535
571
  name="sentence-transformers/sentence-t5-xxl",
572
+ model_type=["dense"],
536
573
  languages=["eng-Latn"],
537
574
  open_weights=True,
538
575
  revision="4d122282ba80e807e9e6eb8c358269e92796365d",
@@ -549,10 +586,23 @@ sentence_t5_xxl = ModelMeta(
549
586
  public_training_code=None,
550
587
  public_training_data=None,
551
588
  training_datasets={"SNLI", "Community QA"},
589
+ citation=SENTENCE_T5_CITATION,
552
590
  )
591
+ GTR_CITATION = """
592
+ @misc{ni2021largedualencodersgeneralizable,
593
+ title={Large Dual Encoders Are Generalizable Retrievers},
594
+ author={Jianmo Ni and Chen Qu and Jing Lu and Zhuyun Dai and Gustavo Hernández Ábrego and Ji Ma and Vincent Y. Zhao and Yi Luan and Keith B. Hall and Ming-Wei Chang and Yinfei Yang},
595
+ year={2021},
596
+ eprint={2112.07899},
597
+ archivePrefix={arXiv},
598
+ primaryClass={cs.IR},
599
+ url={https://arxiv.org/abs/2112.07899},
600
+ }
601
+ """
553
602
  gtr_t5_large = ModelMeta(
554
603
  loader=sentence_transformers_loader,
555
604
  name="sentence-transformers/gtr-t5-large",
605
+ model_type=["dense"],
556
606
  languages=["eng-Latn"], # in format eng-Latn
557
607
  open_weights=True,
558
608
  revision="a2c8ac47f998531948d4cbe32a0b577a7037a5e3",
@@ -581,11 +631,13 @@ gtr_t5_large = ModelMeta(
581
631
  "NQ-PL", # translation not trained on
582
632
  "Community QA",
583
633
  },
634
+ citation=GTR_CITATION,
584
635
  )
585
636
 
586
637
  gtr_t5_xl = ModelMeta(
587
638
  loader=sentence_transformers_loader,
588
639
  name="sentence-transformers/gtr-t5-xl",
640
+ model_type=["dense"],
589
641
  languages=["eng-Latn"], # in format eng-Latn
590
642
  open_weights=True,
591
643
  revision="23a8d667a1ad2578af181ce762867003c498d1bf",
@@ -614,10 +666,12 @@ gtr_t5_xl = ModelMeta(
614
666
  "NQ-PL", # translation not trained on
615
667
  "Community QA",
616
668
  },
669
+ citation=GTR_CITATION,
617
670
  )
618
671
  gtr_t5_xxl = ModelMeta(
619
672
  loader=sentence_transformers_loader,
620
673
  name="sentence-transformers/gtr-t5-xxl",
674
+ model_type=["dense"],
621
675
  languages=["eng-Latn"], # in format eng-Latn
622
676
  open_weights=True,
623
677
  revision="73f2a9156a3dcc2194dfdb2bf201cd7d17e17884",
@@ -646,11 +700,13 @@ gtr_t5_xxl = ModelMeta(
646
700
  "NQ-PL", # translation not trained on
647
701
  "Community QA",
648
702
  },
703
+ citation=GTR_CITATION,
649
704
  )
650
705
 
651
706
  gtr_t5_base = ModelMeta(
652
707
  loader=sentence_transformers_loader,
653
708
  name="sentence-transformers/gtr-t5-base",
709
+ model_type=["dense"],
654
710
  languages=["eng-Latn"], # in format eng-Latn
655
711
  open_weights=True,
656
712
  revision="7027e9594267928589816394bdd295273ddc0739",
@@ -679,4 +735,5 @@ gtr_t5_base = ModelMeta(
679
735
  "NQ-PL", # translation not trained on
680
736
  "Community QA",
681
737
  },
738
+ citation=GTR_CITATION,
682
739
  )
@@ -1,31 +1,32 @@
1
- from mteb.models.model_meta import ModelMeta
2
- from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
3
-
4
- codemodernbert_crow_meta = ModelMeta(
5
- loader=sentence_transformers_loader,
6
- name="Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
7
- languages=["eng-Latn"],
8
- open_weights=True,
9
- revision="044a7a4b552f86e284817234c336bccf16f895ce",
10
- release_date="2025-04-21",
11
- n_parameters=151668480,
12
- memory_usage_mb=607,
13
- embed_dim=768,
14
- license="apache-2.0",
15
- max_tokens=1024,
16
- reference="https://huggingface.co/Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
17
- similarity_fn_name="cosine",
18
- framework=["Sentence Transformers", "PyTorch"],
19
- use_instructions=False,
20
- public_training_code=None,
21
- public_training_data=None,
22
- training_datasets={
23
- "CodeSearchNetRetrieval",
24
- # "code-search-net/code_search_net",
25
- # "Shuu12121/python-codesearch-filtered",
26
- # "Shuu12121/java-codesearch-filtered",
27
- # "Shuu12121/javascript-codesearch-filtered",
28
- # "Shuu12121/ruby-codesearch-filtered",
29
- # "Shuu12121/rust-codesearch-filtered",
30
- },
31
- )
1
+ from mteb.models.model_meta import ModelMeta
2
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
3
+
4
+ codemodernbert_crow_meta = ModelMeta(
5
+ loader=sentence_transformers_loader,
6
+ name="Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
7
+ model_type=["dense"],
8
+ languages=["eng-Latn"],
9
+ open_weights=True,
10
+ revision="044a7a4b552f86e284817234c336bccf16f895ce",
11
+ release_date="2025-04-21",
12
+ n_parameters=151668480,
13
+ memory_usage_mb=607,
14
+ embed_dim=768,
15
+ license="apache-2.0",
16
+ max_tokens=1024,
17
+ reference="https://huggingface.co/Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
18
+ similarity_fn_name="cosine",
19
+ framework=["Sentence Transformers", "PyTorch"],
20
+ use_instructions=False,
21
+ public_training_code=None,
22
+ public_training_data=None,
23
+ training_datasets={
24
+ "CodeSearchNetRetrieval",
25
+ # "code-search-net/code_search_net",
26
+ # "Shuu12121/python-codesearch-filtered",
27
+ # "Shuu12121/java-codesearch-filtered",
28
+ # "Shuu12121/javascript-codesearch-filtered",
29
+ # "Shuu12121/ruby-codesearch-filtered",
30
+ # "Shuu12121/rust-codesearch-filtered",
31
+ },
32
+ )
@@ -125,6 +125,7 @@ siglip_training_datasets = set(
125
125
  siglip_so400m_patch14_224 = ModelMeta(
126
126
  loader=SiglipModelWrapper, # type: ignore
127
127
  name="google/siglip-so400m-patch14-224",
128
+ model_type=["dense"],
128
129
  languages=["eng-Latn"],
129
130
  revision="d04cf29fca7b6374f74d8bea1969314492266b5e",
130
131
  release_date="2024-01-08",
@@ -148,6 +149,7 @@ siglip_so400m_patch14_224 = ModelMeta(
148
149
  siglip_so400m_patch14_384 = ModelMeta(
149
150
  loader=SiglipModelWrapper, # type: ignore
150
151
  name="google/siglip-so400m-patch14-384",
152
+ model_type=["dense"],
151
153
  languages=["eng-Latn"],
152
154
  revision="9fdffc58afc957d1a03a25b10dba0329ab15c2a3",
153
155
  release_date="2024-01-08",
@@ -171,6 +173,7 @@ siglip_so400m_patch14_384 = ModelMeta(
171
173
  siglip_so400m_patch16_256_i18n = ModelMeta(
172
174
  loader=SiglipModelWrapper, # type: ignore
173
175
  name="google/siglip-so400m-patch16-256-i18n",
176
+ model_type=["dense"],
174
177
  languages=["eng-Latn"],
175
178
  revision="365d321c0cfdea96bc28e3a29787a11a062681a1",
176
179
  release_date="2024-01-08",
@@ -194,6 +197,7 @@ siglip_so400m_patch16_256_i18n = ModelMeta(
194
197
  siglip_base_patch16_256_multilingual = ModelMeta(
195
198
  loader=SiglipModelWrapper, # type: ignore
196
199
  name="google/siglip-base-patch16-256-multilingual",
200
+ model_type=["dense"],
197
201
  languages=["eng-Latn"],
198
202
  revision="8952a4eafcde3cb7ab46b1dd629b33f8784ca9c6",
199
203
  release_date="2024-01-08",
@@ -217,6 +221,7 @@ siglip_base_patch16_256_multilingual = ModelMeta(
217
221
  siglip_base_patch16_256 = ModelMeta(
218
222
  loader=SiglipModelWrapper, # type: ignore
219
223
  name="google/siglip-base-patch16-256",
224
+ model_type=["dense"],
220
225
  languages=["eng-Latn"],
221
226
  revision="b078df89e446d623010d890864d4207fe6399f61",
222
227
  release_date="2024-01-08",
@@ -240,6 +245,7 @@ siglip_base_patch16_256 = ModelMeta(
240
245
  siglip_base_patch16_512 = ModelMeta(
241
246
  loader=SiglipModelWrapper, # type: ignore
242
247
  name="google/siglip-base-patch16-512",
248
+ model_type=["dense"],
243
249
  languages=["eng-Latn"],
244
250
  revision="753a949581523b60257d93e18391e8c27f72eb22",
245
251
  release_date="2024-01-08",
@@ -263,6 +269,7 @@ siglip_base_patch16_512 = ModelMeta(
263
269
  siglip_base_patch16_384 = ModelMeta(
264
270
  loader=SiglipModelWrapper, # type: ignore
265
271
  name="google/siglip-base-patch16-384",
272
+ model_type=["dense"],
266
273
  languages=["eng-Latn"],
267
274
  revision="41aec1c83b32e0a6fca20ad88ba058aa5b5ea394",
268
275
  release_date="2024-01-08",
@@ -286,6 +293,7 @@ siglip_base_patch16_384 = ModelMeta(
286
293
  siglip_base_patch16_224 = ModelMeta(
287
294
  loader=SiglipModelWrapper, # type: ignore
288
295
  name="google/siglip-base-patch16-224",
296
+ model_type=["dense"],
289
297
  languages=["eng-Latn"],
290
298
  revision="7fd15f0689c79d79e38b1c2e2e2370a7bf2761ed",
291
299
  release_date="2024-01-08",
@@ -309,6 +317,7 @@ siglip_base_patch16_224 = ModelMeta(
309
317
  siglip_large_patch16_256 = ModelMeta(
310
318
  loader=SiglipModelWrapper, # type: ignore
311
319
  name="google/siglip-large-patch16-256",
320
+ model_type=["dense"],
312
321
  languages=["eng-Latn"],
313
322
  revision="d0da9f876e7d66b4e250cd2450c3ba2ce735e447",
314
323
  release_date="2024-01-08",
@@ -332,6 +341,7 @@ siglip_large_patch16_256 = ModelMeta(
332
341
  siglip_large_patch16_384 = ModelMeta(
333
342
  loader=SiglipModelWrapper, # type: ignore
334
343
  name="google/siglip-large-patch16-384",
344
+ model_type=["dense"],
335
345
  languages=["eng-Latn"],
336
346
  revision="ce005573a40965dfd21fd937fbdeeebf2439fc35",
337
347
  release_date="2024-01-08",
@@ -218,6 +218,7 @@ sonar_langs = [
218
218
  sonar = ModelMeta(
219
219
  loader=None,
220
220
  name="facebook/SONAR",
221
+ model_type=["dense"],
221
222
  languages=sonar_langs,
222
223
  open_weights=True,
223
224
  use_instructions=False, # it does take a language code as input
@@ -0,0 +1,34 @@
1
+ """ATLES Champion Embedding Model for MTEB."""
2
+
3
+ from mteb.models.model_meta import ModelMeta
4
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
5
+
6
+ spartan8806_atles_champion_embedding = ModelMeta(
7
+ loader=sentence_transformers_loader,
8
+ name="spartan8806/atles-champion-embedding",
9
+ model_type=["dense"],
10
+ languages=["eng-Latn"],
11
+ open_weights=True,
12
+ revision="d4c74d7000bbd25f3597fc0f2dcde59ef1386e8f",
13
+ release_date="2025-11-15",
14
+ n_parameters=110_000_000,
15
+ memory_usage_mb=420,
16
+ max_tokens=512,
17
+ embed_dim=768,
18
+ license="apache-2.0",
19
+ similarity_fn_name="cosine",
20
+ framework=["Sentence Transformers"],
21
+ reference="https://huggingface.co/spartan8806/atles-champion-embedding",
22
+ use_instructions=False,
23
+ training_datasets={"STSBenchmark"},
24
+ adapted_from="sentence-transformers/all-mpnet-base-v2",
25
+ public_training_code=None,
26
+ public_training_data=None,
27
+ citation="""@article{conner2025epistemic,
28
+ title={The Epistemic Barrier: How RLHF Makes AI Consciousness Empirically Undecidable},
29
+ author={Conner (spartan8806)},
30
+ journal={ATLES Research Papers},
31
+ year={2025},
32
+ note={Cross-model validation study (Phoenix, Grok, Gemini, Claude)}
33
+ }""",
34
+ )
@@ -59,6 +59,7 @@ stella_en_400m = ModelMeta(
59
59
  torch_dtype="auto",
60
60
  ),
61
61
  name="NovaSearch/stella_en_400M_v5",
62
+ model_type=["dense"],
62
63
  languages=["eng-Latn"],
63
64
  open_weights=True,
64
65
  use_instructions=True,
@@ -87,6 +88,7 @@ stella_en_1_5b = ModelMeta(
87
88
  torch_dtype="auto",
88
89
  ),
89
90
  name="NovaSearch/stella_en_1.5B_v5",
91
+ model_type=["dense"],
90
92
  languages=["eng-Latn"],
91
93
  open_weights=True,
92
94
  use_instructions=True,
@@ -109,6 +111,7 @@ stella_en_1_5b = ModelMeta(
109
111
  stella_large_zh_v3_1792d = ModelMeta(
110
112
  loader=sentence_transformers_loader,
111
113
  name="dunzhang/stella-large-zh-v3-1792d",
114
+ model_type=["dense"],
112
115
  languages=["zho-Hans"],
113
116
  open_weights=True,
114
117
  revision="d5d39eb8cd11c80a63df53314e59997074469f09",
@@ -135,6 +138,7 @@ stella_large_zh_v3_1792d = ModelMeta(
135
138
  stella_base_zh_v3_1792d = ModelMeta(
136
139
  loader=sentence_transformers_loader,
137
140
  name="infgrad/stella-base-zh-v3-1792d",
141
+ model_type=["dense"],
138
142
  languages=["zho-Hans"],
139
143
  open_weights=True,
140
144
  revision="82254892a0fba125aa2abf3a4800d2dd12821343",
@@ -162,6 +166,7 @@ stella_base_zh_v3_1792d = ModelMeta(
162
166
  stella_mrl_large_zh_v3_5_1792d = ModelMeta(
163
167
  loader=sentence_transformers_loader,
164
168
  name="dunzhang/stella-mrl-large-zh-v3.5-1792d",
169
+ model_type=["dense"],
165
170
  languages=["zho-Hans"],
166
171
  open_weights=True,
167
172
  revision="17bb1c32a93a8fc5f6fc9e91d5ea86da99983cfe",
@@ -185,6 +190,7 @@ stella_mrl_large_zh_v3_5_1792d = ModelMeta(
185
190
  zpoint_large_embedding_zh = ModelMeta(
186
191
  loader=sentence_transformers_loader,
187
192
  name="iampanda/zpoint_large_embedding_zh",
193
+ model_type=["dense"],
188
194
  languages=["zho-Hans"],
189
195
  open_weights=True,
190
196
  revision="b1075144f440ab4409c05622c1179130ebd57d03",