mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,7 @@
1
1
  """Implementation of Sentence Transformers model validated in MTEB."""
2
2
 
3
+ import numpy as np
4
+
3
5
  from mteb.models.model_meta import ModelMeta, ScoringFunction
4
6
  from mteb.models.sentence_transformer_wrapper import (
5
7
  SentenceTransformerEncoderWrapper,
@@ -113,6 +115,7 @@ sent_trf_training_dataset = {
113
115
  all_minilm_l6_v2 = ModelMeta(
114
116
  loader=sentence_transformers_loader,
115
117
  name="sentence-transformers/all-MiniLM-L6-v2",
118
+ model_type=["dense"],
116
119
  languages=["eng-Latn"],
117
120
  open_weights=True,
118
121
  revision="8b3219a92973c328a8e22fadcfa821b5dc75636a",
@@ -124,7 +127,13 @@ all_minilm_l6_v2 = ModelMeta(
124
127
  max_tokens=256,
125
128
  reference="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
126
129
  similarity_fn_name=ScoringFunction.COSINE,
127
- framework=["Sentence Transformers", "PyTorch"],
130
+ framework=[
131
+ "Sentence Transformers",
132
+ "PyTorch",
133
+ "ONNX",
134
+ "safetensors",
135
+ "Transformers",
136
+ ],
128
137
  use_instructions=False,
129
138
  superseded_by=None,
130
139
  adapted_from=None,
@@ -137,6 +146,7 @@ all_minilm_l6_v2 = ModelMeta(
137
146
  all_minilm_l12_v2 = ModelMeta(
138
147
  loader=sentence_transformers_loader,
139
148
  name="sentence-transformers/all-MiniLM-L12-v2",
149
+ model_type=["dense"],
140
150
  languages=["eng-Latn"],
141
151
  open_weights=True,
142
152
  revision="364dd28d28dcd3359b537f3cf1f5348ba679da62",
@@ -148,7 +158,13 @@ all_minilm_l12_v2 = ModelMeta(
148
158
  max_tokens=256,
149
159
  reference="https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2",
150
160
  similarity_fn_name=ScoringFunction.COSINE,
151
- framework=["Sentence Transformers", "PyTorch"],
161
+ framework=[
162
+ "Sentence Transformers",
163
+ "PyTorch",
164
+ "ONNX",
165
+ "safetensors",
166
+ "Transformers",
167
+ ],
152
168
  use_instructions=False,
153
169
  superseded_by=None,
154
170
  adapted_from=None,
@@ -161,6 +177,7 @@ all_minilm_l12_v2 = ModelMeta(
161
177
  paraphrase_multilingual_minilm_l12_v2 = ModelMeta(
162
178
  loader=sentence_transformers_loader,
163
179
  name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
180
+ model_type=["dense"],
164
181
  languages=paraphrase_langs,
165
182
  open_weights=True,
166
183
  revision="bf3bf13ab40c3157080a7ab344c831b9ad18b5eb",
@@ -172,7 +189,13 @@ paraphrase_multilingual_minilm_l12_v2 = ModelMeta(
172
189
  max_tokens=512,
173
190
  reference="https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
174
191
  similarity_fn_name=ScoringFunction.COSINE,
175
- framework=["Sentence Transformers", "PyTorch"],
192
+ framework=[
193
+ "Sentence Transformers",
194
+ "PyTorch",
195
+ "ONNX",
196
+ "safetensors",
197
+ "Transformers",
198
+ ],
176
199
  use_instructions=False,
177
200
  superseded_by=None,
178
201
  adapted_from=None,
@@ -185,6 +208,7 @@ paraphrase_multilingual_minilm_l12_v2 = ModelMeta(
185
208
  paraphrase_multilingual_mpnet_base_v2 = ModelMeta(
186
209
  loader=sentence_transformers_loader,
187
210
  name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
211
+ model_type=["dense"],
188
212
  languages=paraphrase_langs,
189
213
  open_weights=True,
190
214
  revision="79f2382ceacceacdf38563d7c5d16b9ff8d725d6",
@@ -196,7 +220,13 @@ paraphrase_multilingual_mpnet_base_v2 = ModelMeta(
196
220
  max_tokens=512,
197
221
  reference="https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
198
222
  similarity_fn_name=ScoringFunction.COSINE,
199
- framework=["Sentence Transformers", "PyTorch"],
223
+ framework=[
224
+ "Sentence Transformers",
225
+ "PyTorch",
226
+ "ONNX",
227
+ "safetensors",
228
+ "Transformers",
229
+ ],
200
230
  use_instructions=False,
201
231
  superseded_by=None,
202
232
  adapted_from=None,
@@ -220,6 +250,7 @@ paraphrase_multilingual_mpnet_base_v2 = ModelMeta(
220
250
  labse = ModelMeta(
221
251
  loader=sentence_transformers_loader,
222
252
  name="sentence-transformers/LaBSE",
253
+ model_type=["dense"],
223
254
  languages=paraphrase_langs,
224
255
  open_weights=True,
225
256
  revision="e34fab64a3011d2176c99545a93d5cbddc9a91b7",
@@ -231,7 +262,7 @@ labse = ModelMeta(
231
262
  max_tokens=512,
232
263
  reference="https://huggingface.co/sentence-transformers/LaBSE",
233
264
  similarity_fn_name=ScoringFunction.COSINE,
234
- framework=["Sentence Transformers", "PyTorch"],
265
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
235
266
  use_instructions=False,
236
267
  superseded_by=None,
237
268
  adapted_from=None,
@@ -257,6 +288,7 @@ labse = ModelMeta(
257
288
  multi_qa_minilm_l6_cos_v1 = ModelMeta(
258
289
  loader=sentence_transformers_loader,
259
290
  name="sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
291
+ model_type=["dense"],
260
292
  languages=["eng-Latn"],
261
293
  open_weights=True,
262
294
  revision="b207367332321f8e44f96e224ef15bc607f4dbf0",
@@ -268,7 +300,13 @@ multi_qa_minilm_l6_cos_v1 = ModelMeta(
268
300
  max_tokens=512,
269
301
  reference="https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
270
302
  similarity_fn_name=ScoringFunction.COSINE,
271
- framework=["Sentence Transformers", "PyTorch"],
303
+ framework=[
304
+ "Sentence Transformers",
305
+ "PyTorch",
306
+ "ONNX",
307
+ "safetensors",
308
+ "Transformers",
309
+ ],
272
310
  use_instructions=False,
273
311
  superseded_by=None,
274
312
  adapted_from="nreimers/MiniLM-L6-H384-uncased",
@@ -281,6 +319,7 @@ multi_qa_minilm_l6_cos_v1 = ModelMeta(
281
319
  all_mpnet_base_v2 = ModelMeta(
282
320
  loader=sentence_transformers_loader,
283
321
  name="sentence-transformers/all-mpnet-base-v2",
322
+ model_type=["dense"],
284
323
  languages=["eng-Latn"],
285
324
  open_weights=True,
286
325
  revision="9a3225965996d404b775526de6dbfe85d3368642",
@@ -292,7 +331,13 @@ all_mpnet_base_v2 = ModelMeta(
292
331
  max_tokens=384,
293
332
  reference="https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
294
333
  similarity_fn_name=ScoringFunction.COSINE,
295
- framework=["Sentence Transformers", "PyTorch"],
334
+ framework=[
335
+ "Sentence Transformers",
336
+ "PyTorch",
337
+ "ONNX",
338
+ "safetensors",
339
+ "Transformers",
340
+ ],
296
341
  use_instructions=False,
297
342
  superseded_by=None,
298
343
  adapted_from=None,
@@ -380,6 +425,7 @@ static_multi_languages = [
380
425
 
381
426
  static_similarity_mrl_multilingual_v1 = ModelMeta(
382
427
  name="sentence-transformers/static-similarity-mrl-multilingual-v1",
428
+ model_type=["dense"],
383
429
  loader=SentenceTransformerEncoderWrapper,
384
430
  loader_kwargs=dict(
385
431
  device="cpu", # CPU is just as quick, if not quicker
@@ -395,18 +441,28 @@ static_similarity_mrl_multilingual_v1 = ModelMeta(
395
441
  max_tokens=None,
396
442
  reference="https://huggingface.co/sentence-transformers/static-similarity-mrl-multilingual-v1",
397
443
  similarity_fn_name="cosine",
398
- framework=["Sentence Transformers", "PyTorch"],
444
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
399
445
  use_instructions=False,
400
446
  superseded_by=None,
401
447
  adapted_from=None,
402
448
  training_datasets=static_multi_datasets,
403
449
  public_training_code="https://huggingface.co/blog/static-embeddings",
404
450
  public_training_data="https://huggingface.co/collections/sentence-transformers/embedding-model-datasets-6644d7a3673a511914aa7552",
451
+ citation="""@inproceedings{reimers-2019-sentence-bert,
452
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
453
+ author = "Reimers, Nils and Gurevych, Iryna",
454
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
455
+ month = "11",
456
+ year = "2019",
457
+ publisher = "Association for Computational Linguistics",
458
+ url = "https://arxiv.org/abs/1908.10084",
459
+ }""",
405
460
  )
406
461
 
407
462
  contriever = ModelMeta(
408
463
  loader=SentenceTransformerEncoderWrapper,
409
464
  name="facebook/contriever-msmarco",
465
+ model_type=["dense"],
410
466
  languages=["eng-Latn"],
411
467
  open_weights=True,
412
468
  revision="abe8c1493371369031bcb1e02acb754cf4e162fa",
@@ -418,7 +474,7 @@ contriever = ModelMeta(
418
474
  max_tokens=512,
419
475
  reference="https://huggingface.co/facebook/contriever-msmarco",
420
476
  similarity_fn_name=ScoringFunction.DOT_PRODUCT,
421
- framework=["Sentence Transformers", "PyTorch"],
477
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
422
478
  use_instructions=False,
423
479
  citation="""
424
480
  @misc{izacard2021contriever,
@@ -436,6 +492,7 @@ contriever = ModelMeta(
436
492
  microllama_text_embedding = ModelMeta(
437
493
  loader=sentence_transformers_loader,
438
494
  name="keeeeenw/MicroLlama-text-embedding",
495
+ model_type=["dense"],
439
496
  languages=["eng-Latn"],
440
497
  open_weights=True,
441
498
  revision="98f70f14cdf12d7ea217ed2fd4e808b0195f1e7e",
@@ -447,7 +504,7 @@ microllama_text_embedding = ModelMeta(
447
504
  max_tokens=2048,
448
505
  reference="https://huggingface.co/keeeeenw/MicroLlama-text-embedding",
449
506
  similarity_fn_name=ScoringFunction.COSINE,
450
- framework=["Sentence Transformers", "PyTorch"],
507
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
451
508
  use_instructions=False,
452
509
  superseded_by=None,
453
510
  adapted_from=None,
@@ -467,9 +524,21 @@ microllama_text_embedding = ModelMeta(
467
524
  public_training_data=None,
468
525
  )
469
526
 
527
+ SENTENCE_T5_CITATION = """
528
+ @misc{ni2021sentencet5scalablesentenceencoders,
529
+ title={Sentence-T5: Scalable Sentence Encoders from Pre-trained Text-to-Text Models},
530
+ author={Jianmo Ni and Gustavo Hernández Ábrego and Noah Constant and Ji Ma and Keith B. Hall and Daniel Cer and Yinfei Yang},
531
+ year={2021},
532
+ eprint={2108.08877},
533
+ archivePrefix={arXiv},
534
+ primaryClass={cs.CL},
535
+ url={https://arxiv.org/abs/2108.08877},
536
+ }
537
+ """
470
538
  sentence_t5_base = ModelMeta(
471
539
  loader=sentence_transformers_loader,
472
540
  name="sentence-transformers/sentence-t5-base",
541
+ model_type=["dense"],
473
542
  languages=["eng-Latn"],
474
543
  open_weights=True,
475
544
  revision="50c53e206f8b01c9621484a3c0aafce4e55efebf",
@@ -481,16 +550,18 @@ sentence_t5_base = ModelMeta(
481
550
  max_tokens=512,
482
551
  reference="https://huggingface.co/sentence-transformers/sentence-t5-base",
483
552
  similarity_fn_name=ScoringFunction.COSINE,
484
- framework=["Sentence Transformers", "PyTorch"],
553
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
485
554
  use_instructions=False,
486
555
  public_training_code=None,
487
556
  public_training_data=None,
488
557
  training_datasets={"SNLI", "Community QA"},
558
+ citation=SENTENCE_T5_CITATION,
489
559
  )
490
560
 
491
561
  sentence_t5_large = ModelMeta(
492
562
  loader=sentence_transformers_loader,
493
563
  name="sentence-transformers/sentence-t5-large",
564
+ model_type=["dense"],
494
565
  languages=["eng-Latn"],
495
566
  open_weights=True,
496
567
  revision="1fc08ea477205aa54a3e5b13f0971ae16b86410a",
@@ -502,16 +573,18 @@ sentence_t5_large = ModelMeta(
502
573
  max_tokens=512,
503
574
  reference="https://huggingface.co/sentence-transformers/sentence-t5-large",
504
575
  similarity_fn_name=ScoringFunction.COSINE,
505
- framework=["Sentence Transformers", "PyTorch"],
576
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
506
577
  use_instructions=False,
507
578
  public_training_code=None,
508
579
  public_training_data=None,
509
580
  training_datasets={"SNLI", "Community QA"},
581
+ citation=SENTENCE_T5_CITATION,
510
582
  )
511
583
 
512
584
  sentence_t5_xl = ModelMeta(
513
585
  loader=sentence_transformers_loader,
514
586
  name="sentence-transformers/sentence-t5-xl",
587
+ model_type=["dense"],
515
588
  languages=["eng-Latn"],
516
589
  open_weights=True,
517
590
  revision="2965d31b368fb14117688e0bde77cbd720e91f53",
@@ -523,16 +596,18 @@ sentence_t5_xl = ModelMeta(
523
596
  max_tokens=512,
524
597
  reference="https://huggingface.co/sentence-transformers/sentence-t5-xl",
525
598
  similarity_fn_name=ScoringFunction.COSINE,
526
- framework=["Sentence Transformers", "PyTorch"],
599
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
527
600
  use_instructions=False,
528
601
  public_training_code=None,
529
602
  public_training_data=None,
530
603
  training_datasets={"SNLI", "Community QA"},
604
+ citation=SENTENCE_T5_CITATION,
531
605
  )
532
606
 
533
607
  sentence_t5_xxl = ModelMeta(
534
608
  loader=sentence_transformers_loader,
535
609
  name="sentence-transformers/sentence-t5-xxl",
610
+ model_type=["dense"],
536
611
  languages=["eng-Latn"],
537
612
  open_weights=True,
538
613
  revision="4d122282ba80e807e9e6eb8c358269e92796365d",
@@ -544,15 +619,28 @@ sentence_t5_xxl = ModelMeta(
544
619
  max_tokens=512,
545
620
  reference="https://huggingface.co/sentence-transformers/sentence-t5-xxl",
546
621
  similarity_fn_name=ScoringFunction.COSINE,
547
- framework=["Sentence Transformers", "PyTorch"],
622
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
548
623
  use_instructions=False,
549
624
  public_training_code=None,
550
625
  public_training_data=None,
551
626
  training_datasets={"SNLI", "Community QA"},
627
+ citation=SENTENCE_T5_CITATION,
552
628
  )
629
+ GTR_CITATION = """
630
+ @misc{ni2021largedualencodersgeneralizable,
631
+ title={Large Dual Encoders Are Generalizable Retrievers},
632
+ author={Jianmo Ni and Chen Qu and Jing Lu and Zhuyun Dai and Gustavo Hernández Ábrego and Ji Ma and Vincent Y. Zhao and Yi Luan and Keith B. Hall and Ming-Wei Chang and Yinfei Yang},
633
+ year={2021},
634
+ eprint={2112.07899},
635
+ archivePrefix={arXiv},
636
+ primaryClass={cs.IR},
637
+ url={https://arxiv.org/abs/2112.07899},
638
+ }
639
+ """
553
640
  gtr_t5_large = ModelMeta(
554
641
  loader=sentence_transformers_loader,
555
642
  name="sentence-transformers/gtr-t5-large",
643
+ model_type=["dense"],
556
644
  languages=["eng-Latn"], # in format eng-Latn
557
645
  open_weights=True,
558
646
  revision="a2c8ac47f998531948d4cbe32a0b577a7037a5e3",
@@ -564,7 +652,7 @@ gtr_t5_large = ModelMeta(
564
652
  max_tokens=512,
565
653
  reference="https://huggingface.co/sentence-transformers/gtr-t5-large",
566
654
  similarity_fn_name=ScoringFunction.COSINE,
567
- framework=["Sentence Transformers", "PyTorch"],
655
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
568
656
  use_instructions=False,
569
657
  public_training_code=None,
570
658
  public_training_data=None,
@@ -581,11 +669,13 @@ gtr_t5_large = ModelMeta(
581
669
  "NQ-PL", # translation not trained on
582
670
  "Community QA",
583
671
  },
672
+ citation=GTR_CITATION,
584
673
  )
585
674
 
586
675
  gtr_t5_xl = ModelMeta(
587
676
  loader=sentence_transformers_loader,
588
677
  name="sentence-transformers/gtr-t5-xl",
678
+ model_type=["dense"],
589
679
  languages=["eng-Latn"], # in format eng-Latn
590
680
  open_weights=True,
591
681
  revision="23a8d667a1ad2578af181ce762867003c498d1bf",
@@ -597,7 +687,7 @@ gtr_t5_xl = ModelMeta(
597
687
  max_tokens=512,
598
688
  reference="https://huggingface.co/sentence-transformers/gtr-t5-xl",
599
689
  similarity_fn_name=ScoringFunction.COSINE,
600
- framework=["Sentence Transformers", "PyTorch"],
690
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
601
691
  use_instructions=False,
602
692
  public_training_code=None,
603
693
  public_training_data=None,
@@ -614,10 +704,12 @@ gtr_t5_xl = ModelMeta(
614
704
  "NQ-PL", # translation not trained on
615
705
  "Community QA",
616
706
  },
707
+ citation=GTR_CITATION,
617
708
  )
618
709
  gtr_t5_xxl = ModelMeta(
619
710
  loader=sentence_transformers_loader,
620
711
  name="sentence-transformers/gtr-t5-xxl",
712
+ model_type=["dense"],
621
713
  languages=["eng-Latn"], # in format eng-Latn
622
714
  open_weights=True,
623
715
  revision="73f2a9156a3dcc2194dfdb2bf201cd7d17e17884",
@@ -629,7 +721,7 @@ gtr_t5_xxl = ModelMeta(
629
721
  max_tokens=512,
630
722
  reference="https://huggingface.co/sentence-transformers/gtr-t5-xxl",
631
723
  similarity_fn_name=ScoringFunction.COSINE,
632
- framework=["Sentence Transformers", "PyTorch"],
724
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
633
725
  use_instructions=False,
634
726
  public_training_code=None,
635
727
  public_training_data=None,
@@ -646,11 +738,13 @@ gtr_t5_xxl = ModelMeta(
646
738
  "NQ-PL", # translation not trained on
647
739
  "Community QA",
648
740
  },
741
+ citation=GTR_CITATION,
649
742
  )
650
743
 
651
744
  gtr_t5_base = ModelMeta(
652
745
  loader=sentence_transformers_loader,
653
746
  name="sentence-transformers/gtr-t5-base",
747
+ model_type=["dense"],
654
748
  languages=["eng-Latn"], # in format eng-Latn
655
749
  open_weights=True,
656
750
  revision="7027e9594267928589816394bdd295273ddc0739",
@@ -662,7 +756,7 @@ gtr_t5_base = ModelMeta(
662
756
  max_tokens=512,
663
757
  reference="https://huggingface.co/sentence-transformers/gtr-t5-base",
664
758
  similarity_fn_name=ScoringFunction.COSINE,
665
- framework=["Sentence Transformers", "PyTorch"],
759
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
666
760
  use_instructions=False,
667
761
  public_training_code=None,
668
762
  public_training_data=None,
@@ -679,4 +773,69 @@ gtr_t5_base = ModelMeta(
679
773
  "NQ-PL", # translation not trained on
680
774
  "Community QA",
681
775
  },
776
+ citation=GTR_CITATION,
777
+ )
778
+
779
+ static_retrieval_mrl_en_v1 = ModelMeta(
780
+ loader=sentence_transformers_loader,
781
+ name="sentence-transformers/static-retrieval-mrl-en-v1",
782
+ revision="f60985c706f192d45d218078e49e5a8b6f15283a",
783
+ release_date="2024-10-24",
784
+ languages=["eng-Latn"],
785
+ n_parameters=3_125_4528,
786
+ memory_usage_mb=119,
787
+ max_tokens=np.inf,
788
+ embed_dim=1024,
789
+ license="apache-2.0",
790
+ open_weights=True,
791
+ public_training_code="https://huggingface.co/sentence-transformers/static-retrieval-mrl-en-v1/blob/main/train.py",
792
+ public_training_data=None,
793
+ framework=["PyTorch", "Sentence Transformers"],
794
+ reference="https://huggingface.co/sentence-transformers/static-retrieval-mrl-en-v1",
795
+ similarity_fn_name=ScoringFunction.COSINE,
796
+ use_instructions=False,
797
+ training_datasets={
798
+ "MSMARCO",
799
+ # gooaq
800
+ # s2orc
801
+ # allnli
802
+ # paq
803
+ # trivia-qa
804
+ # swim-ir-monolingual
805
+ # PubMedQA
806
+ # swim
807
+ "MIRACLRetrieval",
808
+ "MultiLongDocRetrieval",
809
+ "MrTidyRetrieval",
810
+ },
811
+ modalities=["text"],
812
+ model_type=["dense"],
813
+ )
814
+
815
+ multi_qa_mpnet_base_dot_v1 = ModelMeta(
816
+ loader=sentence_transformers_loader,
817
+ name="sentence-transformers/multi-qa-mpnet-base-dot-v1",
818
+ revision="3af7c6da5b3e1bea796ef6c97fe237538cbe6e7f",
819
+ release_date="2021-08-23",
820
+ languages=["eng-Latn"],
821
+ n_parameters=109486978,
822
+ memory_usage_mb=418.0,
823
+ max_tokens=512,
824
+ embed_dim=768,
825
+ license=None,
826
+ open_weights=True,
827
+ public_training_code="https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1/blob/main/train_script.py",
828
+ public_training_data=None,
829
+ framework=["PyTorch", "Sentence Transformers"],
830
+ reference="https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1",
831
+ similarity_fn_name=ScoringFunction.DOT_PRODUCT,
832
+ use_instructions=False,
833
+ training_datasets={
834
+ "MSMARCO",
835
+ "YahooAnswersTopicsClassification",
836
+ "NQ",
837
+ },
838
+ adapted_from="microsoft/mpnet-base",
839
+ modalities=["text"],
840
+ model_type=["dense"],
682
841
  )
@@ -1,31 +1,32 @@
1
- from mteb.models.model_meta import ModelMeta
2
- from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
3
-
4
- codemodernbert_crow_meta = ModelMeta(
5
- loader=sentence_transformers_loader,
6
- name="Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
7
- languages=["eng-Latn"],
8
- open_weights=True,
9
- revision="044a7a4b552f86e284817234c336bccf16f895ce",
10
- release_date="2025-04-21",
11
- n_parameters=151668480,
12
- memory_usage_mb=607,
13
- embed_dim=768,
14
- license="apache-2.0",
15
- max_tokens=1024,
16
- reference="https://huggingface.co/Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
17
- similarity_fn_name="cosine",
18
- framework=["Sentence Transformers", "PyTorch"],
19
- use_instructions=False,
20
- public_training_code=None,
21
- public_training_data=None,
22
- training_datasets={
23
- "CodeSearchNetRetrieval",
24
- # "code-search-net/code_search_net",
25
- # "Shuu12121/python-codesearch-filtered",
26
- # "Shuu12121/java-codesearch-filtered",
27
- # "Shuu12121/javascript-codesearch-filtered",
28
- # "Shuu12121/ruby-codesearch-filtered",
29
- # "Shuu12121/rust-codesearch-filtered",
30
- },
31
- )
1
+ from mteb.models.model_meta import ModelMeta
2
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
3
+
4
+ codemodernbert_crow_meta = ModelMeta(
5
+ loader=sentence_transformers_loader,
6
+ name="Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
7
+ model_type=["dense"],
8
+ languages=["eng-Latn"],
9
+ open_weights=True,
10
+ revision="044a7a4b552f86e284817234c336bccf16f895ce",
11
+ release_date="2025-04-21",
12
+ n_parameters=151668480,
13
+ memory_usage_mb=607,
14
+ embed_dim=768,
15
+ license="apache-2.0",
16
+ max_tokens=1024,
17
+ reference="https://huggingface.co/Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
18
+ similarity_fn_name="cosine",
19
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
20
+ use_instructions=False,
21
+ public_training_code=None,
22
+ public_training_data=None,
23
+ training_datasets={
24
+ "CodeSearchNetRetrieval",
25
+ # "code-search-net/code_search_net",
26
+ # "Shuu12121/python-codesearch-filtered",
27
+ # "Shuu12121/java-codesearch-filtered",
28
+ # "Shuu12121/javascript-codesearch-filtered",
29
+ # "Shuu12121/ruby-codesearch-filtered",
30
+ # "Shuu12121/rust-codesearch-filtered",
31
+ },
32
+ )