mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,17 @@ from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
11
  from mteb.types import Array, BatchedInput, PromptType
12
12
 
13
+ BLIP_CITATION = """@misc{https://doi.org/10.48550/arxiv.2201.12086,
14
+ doi = {10.48550/ARXIV.2201.12086},
15
+ url = {https://arxiv.org/abs/2201.12086},
16
+ author = {Li, Junnan and Li, Dongxu and Xiong, Caiming and Hoi, Steven},
17
+ keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
18
+ title = {BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation},
19
+ publisher = {arXiv},
20
+ year = {2022},
21
+ copyright = {Creative Commons Attribution 4.0 International}
22
+ }"""
23
+
13
24
 
14
25
  class BLIPModel(AbsEncoder):
15
26
  def __init__(
@@ -117,8 +128,9 @@ class BLIPModel(AbsEncoder):
117
128
 
118
129
  # in descending order of usage (downloads from huggingface)
119
130
  blip_image_captioning_large = ModelMeta(
120
- loader=BLIPModel, # type: ignore
131
+ loader=BLIPModel,
121
132
  name="Salesforce/blip-image-captioning-large",
133
+ model_type=["dense"],
122
134
  languages=["eng-Latn"],
123
135
  revision="2227ac38c9f16105cb0412e7cab4759978a8fd90",
124
136
  release_date="2023-12-07",
@@ -131,7 +143,7 @@ blip_image_captioning_large = ModelMeta(
131
143
  open_weights=True,
132
144
  public_training_code="https://github.com/salesforce/BLIP",
133
145
  public_training_data="https://github.com/salesforce/BLIP",
134
- framework=["PyTorch"],
146
+ framework=["PyTorch", "Transformers", "safetensors"],
135
147
  reference="https://huggingface.co/Salesforce/blip-image-captioning-large",
136
148
  similarity_fn_name=ScoringFunction.COSINE,
137
149
  use_instructions=False,
@@ -140,11 +152,13 @@ blip_image_captioning_large = ModelMeta(
140
152
  # CC3M+CC12M+SBU
141
153
  # LAION115M
142
154
  ),
155
+ citation=BLIP_CITATION,
143
156
  )
144
157
 
145
158
  blip_image_captioning_base = ModelMeta(
146
- loader=BLIPModel, # type: ignore
159
+ loader=BLIPModel,
147
160
  name="Salesforce/blip-image-captioning-base",
161
+ model_type=["dense"],
148
162
  languages=["eng-Latn"],
149
163
  revision="89b09ea1789f7addf2f6d6f0dfc4ce10ab58ef84",
150
164
  release_date="2023-08-01",
@@ -157,7 +171,7 @@ blip_image_captioning_base = ModelMeta(
157
171
  open_weights=True,
158
172
  public_training_code="https://github.com/salesforce/BLIP",
159
173
  public_training_data="https://github.com/salesforce/BLIP",
160
- framework=["PyTorch"],
174
+ framework=["PyTorch", "Transformers"],
161
175
  reference="https://huggingface.co/Salesforce/blip-image-captioning-base",
162
176
  similarity_fn_name=ScoringFunction.COSINE,
163
177
  use_instructions=False,
@@ -166,12 +180,14 @@ blip_image_captioning_base = ModelMeta(
166
180
  # CC3M+CC12M+SBU
167
181
  # LAION115M
168
182
  ),
183
+ citation=BLIP_CITATION,
169
184
  )
170
185
 
171
186
 
172
187
  blip_vqa_base = ModelMeta(
173
- loader=BLIPModel, # type: ignore
188
+ loader=BLIPModel,
174
189
  name="Salesforce/blip-vqa-base",
190
+ model_type=["dense"],
175
191
  languages=["eng-Latn"],
176
192
  revision="c7df8e7cd7aa2ee9af18f56e2b29e59a92651b64",
177
193
  release_date="2023-12-07",
@@ -184,7 +200,7 @@ blip_vqa_base = ModelMeta(
184
200
  open_weights=True,
185
201
  public_training_code="https://github.com/salesforce/BLIP",
186
202
  public_training_data="https://github.com/salesforce/BLIP",
187
- framework=["PyTorch"],
203
+ framework=["PyTorch", "Transformers", "safetensors"],
188
204
  reference="https://huggingface.co/Salesforce/blip-vqa-base",
189
205
  similarity_fn_name=ScoringFunction.COSINE,
190
206
  use_instructions=False,
@@ -192,11 +208,13 @@ blip_vqa_base = ModelMeta(
192
208
  # CC3M+CC12M+SBU
193
209
  # LAION115M
194
210
  ),
211
+ citation=BLIP_CITATION,
195
212
  )
196
213
 
197
214
  blip_vqa_capfilt_large = ModelMeta(
198
- loader=BLIPModel, # type: ignore
215
+ loader=BLIPModel,
199
216
  name="Salesforce/blip-vqa-capfilt-large",
217
+ model_type=["dense"],
200
218
  languages=["eng-Latn"],
201
219
  revision="e53f95265aeab69013fabb5380500ab984adbbb4",
202
220
  release_date="2023-01-22",
@@ -209,7 +227,7 @@ blip_vqa_capfilt_large = ModelMeta(
209
227
  open_weights=True,
210
228
  public_training_code="https://github.com/salesforce/BLIP",
211
229
  public_training_data="https://github.com/salesforce/BLIP",
212
- framework=["PyTorch"],
230
+ framework=["PyTorch", "Transformers"],
213
231
  reference="https://huggingface.co/Salesforce/blip-vqa-capfilt-large",
214
232
  similarity_fn_name=ScoringFunction.COSINE,
215
233
  use_instructions=False,
@@ -217,11 +235,13 @@ blip_vqa_capfilt_large = ModelMeta(
217
235
  # CC3M+CC12M+SBU
218
236
  # LAION115M
219
237
  ),
238
+ citation=BLIP_CITATION,
220
239
  )
221
240
 
222
241
  blip_itm_base_coco = ModelMeta(
223
- loader=BLIPModel, # type: ignore
242
+ loader=BLIPModel,
224
243
  name="Salesforce/blip-itm-base-coco",
244
+ model_type=["dense"],
225
245
  languages=["eng-Latn"],
226
246
  revision="7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f",
227
247
  release_date="2023-08-01",
@@ -234,7 +254,7 @@ blip_itm_base_coco = ModelMeta(
234
254
  open_weights=True,
235
255
  public_training_code="https://github.com/salesforce/BLIP",
236
256
  public_training_data="https://github.com/salesforce/BLIP",
237
- framework=["PyTorch"],
257
+ framework=["PyTorch", "Transformers"],
238
258
  reference="https://huggingface.co/Salesforce/blip-itm-base-coco",
239
259
  similarity_fn_name=ScoringFunction.COSINE,
240
260
  use_instructions=False,
@@ -242,11 +262,13 @@ blip_itm_base_coco = ModelMeta(
242
262
  # CC3M+CC12M+SBU
243
263
  # LAION115M
244
264
  ),
265
+ citation=BLIP_CITATION,
245
266
  )
246
267
 
247
268
  blip_itm_large_coco = ModelMeta(
248
- loader=BLIPModel, # type: ignore
269
+ loader=BLIPModel,
249
270
  name="Salesforce/blip-itm-large-coco",
271
+ model_type=["dense"],
250
272
  languages=["eng-Latn"],
251
273
  revision="fef05cafc05298067cbbca00b125749394a77a6f",
252
274
  release_date="2023-08-01",
@@ -259,7 +281,7 @@ blip_itm_large_coco = ModelMeta(
259
281
  open_weights=True,
260
282
  public_training_code="https://github.com/salesforce/BLIP",
261
283
  public_training_data="https://github.com/salesforce/BLIP",
262
- framework=["PyTorch"],
284
+ framework=["PyTorch", "Transformers"],
263
285
  reference="https://huggingface.co/Salesforce/blip-itm-large-coco",
264
286
  similarity_fn_name=ScoringFunction.COSINE,
265
287
  use_instructions=False,
@@ -268,11 +290,13 @@ blip_itm_large_coco = ModelMeta(
268
290
  # CC3M+CC12M+SBU
269
291
  # LAION115M
270
292
  ),
293
+ citation=BLIP_CITATION,
271
294
  )
272
295
 
273
296
  blip_itm_base_flickr = ModelMeta(
274
- loader=BLIPModel, # type: ignore
297
+ loader=BLIPModel,
275
298
  name="Salesforce/blip-itm-base-flickr",
299
+ model_type=["dense"],
276
300
  languages=["eng-Latn"],
277
301
  revision="1de29e660d91ae1786c1876212ea805a22eab251",
278
302
  release_date="2023-08-01",
@@ -285,7 +309,7 @@ blip_itm_base_flickr = ModelMeta(
285
309
  open_weights=True,
286
310
  public_training_code="https://github.com/salesforce/BLIP",
287
311
  public_training_data="https://github.com/salesforce/BLIP",
288
- framework=["PyTorch"],
312
+ framework=["PyTorch", "Transformers"],
289
313
  reference="https://huggingface.co/Salesforce/blip-itm-base-flickr",
290
314
  similarity_fn_name=ScoringFunction.COSINE,
291
315
  use_instructions=False,
@@ -294,11 +318,13 @@ blip_itm_base_flickr = ModelMeta(
294
318
  # LAION115M
295
319
  # Flickr30k
296
320
  ),
321
+ citation=BLIP_CITATION,
297
322
  )
298
323
 
299
324
  blip_itm_large_flickr = ModelMeta(
300
- loader=BLIPModel, # type: ignore
325
+ loader=BLIPModel,
301
326
  name="Salesforce/blip-itm-large-flickr",
327
+ model_type=["dense"],
302
328
  languages=["eng-Latn"],
303
329
  revision="bda12e6506758f54261b5ab174b2c55a3ba143fb",
304
330
  release_date="2023-08-01",
@@ -311,7 +337,7 @@ blip_itm_large_flickr = ModelMeta(
311
337
  open_weights=True,
312
338
  public_training_code="https://github.com/salesforce/BLIP",
313
339
  public_training_data="https://github.com/salesforce/BLIP",
314
- framework=["PyTorch"],
340
+ framework=["PyTorch", "Transformers"],
315
341
  reference="https://huggingface.co/Salesforce/blip-itm-large-flickr",
316
342
  similarity_fn_name=ScoringFunction.COSINE,
317
343
  use_instructions=False,
@@ -319,4 +345,5 @@ blip_itm_large_flickr = ModelMeta(
319
345
  # CC3M+CC12M+SBU
320
346
  # LAION115M
321
347
  ),
348
+ citation=BLIP_CITATION,
322
349
  )
@@ -1,5 +1,4 @@
1
1
  import logging
2
- from typing import Any
3
2
 
4
3
  from mteb._create_dataloaders import _create_text_queries_dataloader
5
4
  from mteb._requires_package import requires_package
@@ -8,6 +7,7 @@ from mteb.models.model_meta import ModelMeta
8
7
  from mteb.models.models_protocols import SearchProtocol
9
8
  from mteb.types import (
10
9
  CorpusDatasetType,
10
+ EncodeKwargs,
11
11
  InstructionDatasetType,
12
12
  QueryDatasetType,
13
13
  RetrievalOutputType,
@@ -49,7 +49,7 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
49
49
  task_metadata: TaskMetadata,
50
50
  hf_split: str,
51
51
  hf_subset: str,
52
- encode_kwargs: dict[str, Any],
52
+ encode_kwargs: EncodeKwargs,
53
53
  ) -> None:
54
54
  logger.info("Encoding Corpus...")
55
55
  corpus_texts = [
@@ -74,7 +74,7 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
74
74
  hf_split: str,
75
75
  hf_subset: str,
76
76
  top_k: int,
77
- encode_kwargs: dict[str, Any],
77
+ encode_kwargs: EncodeKwargs,
78
78
  instructions: InstructionDatasetType | None = None,
79
79
  top_ranked: TopRankedDocumentsType | None = None,
80
80
  ) -> RetrievalOutputType:
@@ -113,7 +113,7 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
113
113
 
114
114
  def encode(self, texts: list[str]):
115
115
  """Encode input text as term vectors"""
116
- return bm25s.tokenize(texts, stopwords=self.stopwords, stemmer=self.stemmer) # type: ignore
116
+ return bm25s.tokenize(texts, stopwords=self.stopwords, stemmer=self.stemmer)
117
117
 
118
118
  return BM25Search(**kwargs)
119
119
 
@@ -121,6 +121,7 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
121
121
  bm25_s = ModelMeta(
122
122
  loader=bm25_loader,
123
123
  name="bm25s",
124
+ model_type=["dense"],
124
125
  languages=["eng-Latn"],
125
126
  open_weights=True,
126
127
  revision="0_1_10",
@@ -25,6 +25,7 @@ class BMRetrieverWrapper(InstructSentenceTransformerModel):
25
25
  self,
26
26
  model_name: str,
27
27
  revision: str,
28
+ device: str | None = None,
28
29
  instruction_template: str
29
30
  | Callable[[str, PromptType | None], str]
30
31
  | None = None,
@@ -52,6 +53,7 @@ class BMRetrieverWrapper(InstructSentenceTransformerModel):
52
53
 
53
54
  transformer = Transformer(
54
55
  model_name,
56
+ device=device,
55
57
  **kwargs,
56
58
  )
57
59
  pooling = Pooling(
@@ -90,6 +92,7 @@ BMRetriever_410M = ModelMeta(
90
92
  apply_instruction_to_passages=True,
91
93
  ),
92
94
  name="BMRetriever/BMRetriever-410M",
95
+ model_type=["dense"],
93
96
  languages=["eng-Latn"],
94
97
  open_weights=True,
95
98
  revision="e3569bfbcfe3a1bc48c142e11a7b0f38e86065a3",
@@ -101,7 +104,7 @@ BMRetriever_410M = ModelMeta(
101
104
  license="mit",
102
105
  reference="https://huggingface.co/BMRetriever/BMRetriever-410M",
103
106
  similarity_fn_name="cosine",
104
- framework=["Sentence Transformers", "PyTorch"],
107
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
105
108
  use_instructions=True,
106
109
  public_training_code=None,
107
110
  public_training_data=None,
@@ -119,6 +122,7 @@ BMRetriever_1B = ModelMeta(
119
122
  apply_instruction_to_passages=True,
120
123
  ),
121
124
  name="BMRetriever/BMRetriever-1B",
125
+ model_type=["dense"],
122
126
  languages=["eng-Latn"],
123
127
  open_weights=True,
124
128
  revision="1b758c5f4d3af48ef6035cc4088bdbcd7df43ca6",
@@ -130,7 +134,7 @@ BMRetriever_1B = ModelMeta(
130
134
  license="mit",
131
135
  reference="https://huggingface.co/BMRetriever/BMRetriever-1B",
132
136
  similarity_fn_name="cosine",
133
- framework=["Sentence Transformers", "PyTorch"],
137
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
134
138
  use_instructions=True,
135
139
  public_training_code=None,
136
140
  public_training_data=None,
@@ -148,6 +152,7 @@ BMRetriever_2B = ModelMeta(
148
152
  apply_instruction_to_passages=True,
149
153
  ),
150
154
  name="BMRetriever/BMRetriever-2B",
155
+ model_type=["dense"],
151
156
  languages=["eng-Latn"],
152
157
  open_weights=True,
153
158
  revision="718179afd57926369c347f46eee616db81084941",
@@ -159,7 +164,7 @@ BMRetriever_2B = ModelMeta(
159
164
  license="mit",
160
165
  reference="https://huggingface.co/BMRetriever/BMRetriever-2B",
161
166
  similarity_fn_name="cosine",
162
- framework=["Sentence Transformers", "PyTorch"],
167
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
163
168
  use_instructions=True,
164
169
  public_training_code=None,
165
170
  public_training_data=None,
@@ -177,6 +182,7 @@ BMRetriever_7B = ModelMeta(
177
182
  apply_instruction_to_passages=True,
178
183
  ),
179
184
  name="BMRetriever/BMRetriever-7B",
185
+ model_type=["dense"],
180
186
  languages=["eng-Latn"],
181
187
  open_weights=True,
182
188
  revision="13e6adb9273c5f254e037987d6b44e9e4b005b9a",
@@ -188,7 +194,7 @@ BMRetriever_7B = ModelMeta(
188
194
  license="mit",
189
195
  reference="https://huggingface.co/BMRetriever/BMRetriever-7B",
190
196
  similarity_fn_name="cosine",
191
- framework=["Sentence Transformers", "PyTorch"],
197
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
192
198
  use_instructions=True,
193
199
  public_training_code=None,
194
200
  public_training_data=None,
@@ -3,6 +3,13 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
3
3
 
4
4
  from .bge_models import bge_m3_training_data
5
5
 
6
+ CADET_CITATION = """@article{tamber2025conventionalcontrastivelearningfalls,
7
+ title={Conventional Contrastive Learning Often Falls Short: Improving Dense Retrieval with Cross-Encoder Listwise Distillation and Synthetic Data},
8
+ author={Manveer Singh Tamber and Suleman Kazi and Vivek Sourabh and Jimmy Lin},
9
+ journal={arXiv:2505.19274},
10
+ year={2025}
11
+ }"""
12
+
6
13
  cadet_training_data = {
7
14
  # we train with the corpora of FEVER, MSMARCO, and DBPEDIA. We only train with synthetic generated queries.
8
15
  # However, we do use queries from MSMARCO as examples for synthetic query generation.
@@ -28,6 +35,7 @@ cadet_embed = ModelMeta(
28
35
  },
29
36
  ),
30
37
  name="manveertamber/cadet-embed-base-v1",
38
+ model_type=["dense"],
31
39
  languages=["eng-Latn"],
32
40
  revision="8056d118be37a566f20972a5f35cda815f6bc47e",
33
41
  open_weights=True,
@@ -39,11 +47,12 @@ cadet_embed = ModelMeta(
39
47
  max_tokens=512,
40
48
  reference="https://huggingface.co/manveertamber/cadet-embed-base-v1",
41
49
  similarity_fn_name="cosine",
42
- framework=["Sentence Transformers", "PyTorch"],
50
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
43
51
  use_instructions=True,
44
52
  public_training_code="https://github.com/manveertamber/cadet-dense-retrieval",
45
53
  # we provide the code to generate the training data
46
54
  public_training_data="https://github.com/manveertamber/cadet-dense-retrieval",
47
55
  training_datasets=cadet_training_data,
48
56
  adapted_from="intfloat/e5-base-unsupervised",
57
+ citation=CADET_CITATION,
49
58
  )
@@ -24,6 +24,16 @@ if TYPE_CHECKING:
24
24
  )
25
25
  logger = logging.getLogger(__name__)
26
26
 
27
+ CDE_CITATION = """@misc{morris2024contextualdocumentembeddings,
28
+ title={Contextual Document Embeddings},
29
+ author={John X. Morris and Alexander M. Rush},
30
+ year={2024},
31
+ eprint={2410.02525},
32
+ archivePrefix={arXiv},
33
+ primaryClass={cs.CL},
34
+ url={https://arxiv.org/abs/2410.02525},
35
+ }"""
36
+
27
37
 
28
38
  class CDEWrapper(SentenceTransformerEncoderWrapper):
29
39
  dataset_embeddings: torch.Tensor | None = None
@@ -39,10 +49,17 @@ class CDEWrapper(SentenceTransformerEncoderWrapper):
39
49
  "InstructionReranking",
40
50
  )
41
51
 
42
- def __init__(self, model: str, *args, **kwargs: Any) -> None:
52
+ def __init__(
53
+ self,
54
+ model: str,
55
+ revision: str | None = None,
56
+ device: str | None = None,
57
+ *args,
58
+ **kwargs: Any,
59
+ ) -> None:
43
60
  from transformers import AutoConfig
44
61
 
45
- super().__init__(model, *args, **kwargs)
62
+ super().__init__(model, revision=revision, device=device, *args, **kwargs)
46
63
  model_config = AutoConfig.from_pretrained(model, trust_remote_code=True)
47
64
  self.max_sentences = model_config.transductive_corpus_size
48
65
 
@@ -199,6 +216,7 @@ cde_small_v1 = ModelMeta(
199
216
  trust_remote_code=True,
200
217
  ),
201
218
  name="jxm/cde-small-v1",
219
+ model_type=["dense"],
202
220
  languages=["eng-Latn"],
203
221
  open_weights=True,
204
222
  revision="e151df18af0d7f1d1c37b074fee58406ececf19f",
@@ -209,7 +227,7 @@ cde_small_v1 = ModelMeta(
209
227
  embed_dim=768,
210
228
  license="mit",
211
229
  similarity_fn_name=ScoringFunction.COSINE,
212
- framework=["Sentence Transformers"],
230
+ framework=["Sentence Transformers", "safetensors", "Transformers"],
213
231
  reference="https://huggingface.co/jxm/cde-small-v1",
214
232
  use_instructions=True,
215
233
  adapted_from="nomic-ai/nomic-bert-2048",
@@ -217,6 +235,7 @@ cde_small_v1 = ModelMeta(
217
235
  training_datasets=bge_full_data,
218
236
  public_training_code="https://github.com/jxmorris12/cde",
219
237
  public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
238
+ citation=CDE_CITATION,
220
239
  )
221
240
 
222
241
  cde_small_v2 = ModelMeta(
@@ -226,6 +245,7 @@ cde_small_v2 = ModelMeta(
226
245
  trust_remote_code=True,
227
246
  ),
228
247
  name="jxm/cde-small-v2",
248
+ model_type=["dense"],
229
249
  languages=["eng-Latn"],
230
250
  open_weights=True,
231
251
  revision="4e1d021a6c3fd7ce8aa0a7204057eee5ae61d390",
@@ -236,7 +256,7 @@ cde_small_v2 = ModelMeta(
236
256
  embed_dim=768,
237
257
  license="mit",
238
258
  similarity_fn_name=ScoringFunction.COSINE,
239
- framework=["Sentence Transformers"],
259
+ framework=["Sentence Transformers", "safetensors", "Transformers"],
240
260
  reference="https://huggingface.co/jxm/cde-small-v1",
241
261
  use_instructions=True,
242
262
  adapted_from="answerdotai/ModernBERT-base",
@@ -244,4 +264,5 @@ cde_small_v2 = ModelMeta(
244
264
  training_datasets=bge_full_data,
245
265
  public_training_code="https://github.com/jxmorris12/cde",
246
266
  public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
267
+ citation=CDE_CITATION,
247
268
  )
@@ -115,8 +115,9 @@ CLIP_CITATION = """
115
115
 
116
116
 
117
117
  clip_vit_large_patch14 = ModelMeta(
118
- loader=CLIPModel, # type: ignore
118
+ loader=CLIPModel,
119
119
  name="openai/clip-vit-large-patch14",
120
+ model_type=["dense"],
120
121
  languages=["eng-Latn"],
121
122
  revision="32bd64288804d66eefd0ccbe215aa642df71cc41",
122
123
  release_date="2021-02-26",
@@ -129,7 +130,7 @@ clip_vit_large_patch14 = ModelMeta(
129
130
  open_weights=True,
130
131
  public_training_code=None,
131
132
  public_training_data=None,
132
- framework=["PyTorch"],
133
+ framework=["PyTorch", "Transformers", "safetensors"],
133
134
  reference="https://huggingface.co/openai/clip-vit-large-patch14",
134
135
  similarity_fn_name=ScoringFunction.COSINE,
135
136
  use_instructions=False,
@@ -138,8 +139,9 @@ clip_vit_large_patch14 = ModelMeta(
138
139
  )
139
140
 
140
141
  clip_vit_base_patch32 = ModelMeta(
141
- loader=CLIPModel, # type: ignore
142
+ loader=CLIPModel,
142
143
  name="openai/clip-vit-base-patch32",
144
+ model_type=["dense"],
143
145
  languages=["eng-Latn"],
144
146
  revision="3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268",
145
147
  release_date="2021-02-26",
@@ -152,7 +154,7 @@ clip_vit_base_patch32 = ModelMeta(
152
154
  open_weights=True,
153
155
  public_training_code=None,
154
156
  public_training_data=None,
155
- framework=["PyTorch"],
157
+ framework=["PyTorch", "Transformers"],
156
158
  reference="https://huggingface.co/openai/clip-vit-base-patch32",
157
159
  similarity_fn_name=ScoringFunction.COSINE,
158
160
  use_instructions=False,
@@ -161,8 +163,9 @@ clip_vit_base_patch32 = ModelMeta(
161
163
  )
162
164
 
163
165
  clip_vit_base_patch16 = ModelMeta(
164
- loader=CLIPModel, # type: ignore
166
+ loader=CLIPModel,
165
167
  name="openai/clip-vit-base-patch16",
168
+ model_type=["dense"],
166
169
  languages=["eng-Latn"],
167
170
  revision="57c216476eefef5ab752ec549e440a49ae4ae5f3",
168
171
  release_date="2021-02-26",
@@ -175,7 +178,7 @@ clip_vit_base_patch16 = ModelMeta(
175
178
  open_weights=True,
176
179
  public_training_code=None,
177
180
  public_training_data=None,
178
- framework=["PyTorch"],
181
+ framework=["PyTorch", "Transformers"],
179
182
  reference="https://huggingface.co/openai/clip-vit-base-patch16",
180
183
  similarity_fn_name=ScoringFunction.COSINE,
181
184
  use_instructions=False,
@@ -0,0 +1,100 @@
1
+ from mteb.models.model_meta import (
2
+ ModelMeta,
3
+ ScoringFunction,
4
+ )
5
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
6
+
7
+ from .e5_models import ME5_TRAINING_DATA, model_prompts
8
+
9
+ E5_NL_CITATION = """
10
+ @misc{banar2025mtebnle5nlembeddingbenchmark,
11
+ archiveprefix = {arXiv},
12
+ author = {Nikolay Banar and Ehsan Lotfi and Jens Van Nooten and Cristina Arhiliuc and Marija Kliocaite and Walter Daelemans},
13
+ eprint = {2509.12340},
14
+ primaryclass = {cs.CL},
15
+ title = {MTEB-NL and E5-NL: Embedding Benchmark and Models for Dutch},
16
+ url = {https://arxiv.org/abs/2509.12340},
17
+ year = {2025},
18
+ }
19
+ """
20
+
21
+ e5_nl_small = ModelMeta(
22
+ loader=sentence_transformers_loader,
23
+ loader_kwargs=dict(
24
+ model_prompts=model_prompts,
25
+ ),
26
+ name="clips/e5-small-trm-nl",
27
+ model_type=["dense"],
28
+ languages=["nld-Latn"],
29
+ open_weights=True,
30
+ revision="0243664a6c5e12eef854b091eb283e51833c3e9f",
31
+ release_date="2025-09-23",
32
+ n_parameters=40_800_000,
33
+ memory_usage_mb=78,
34
+ embed_dim=384,
35
+ license="mit",
36
+ max_tokens=512,
37
+ reference="https://huggingface.co/clips/e5-small-trm-nl",
38
+ similarity_fn_name=ScoringFunction.COSINE,
39
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
40
+ use_instructions=True,
41
+ public_training_code="https://github.com/ELotfi/e5-nl",
42
+ public_training_data="https://huggingface.co/collections/clips/beir-nl",
43
+ training_datasets=ME5_TRAINING_DATA, # mMARCO-NL, HotpotQA-NL, FEVER-NL, and LLM generated data
44
+ adapted_from="intfloat/multilingual-e5-small",
45
+ citation=E5_NL_CITATION,
46
+ )
47
+
48
+ e5_nl_base = ModelMeta(
49
+ loader=sentence_transformers_loader,
50
+ loader_kwargs=dict(
51
+ model_prompts=model_prompts,
52
+ ),
53
+ name="clips/e5-base-trm-nl",
54
+ model_type=["dense"],
55
+ languages=["nld-Latn"],
56
+ open_weights=True,
57
+ revision="6bd5722f236da48b4b8bcb28cc1fc478f7089956",
58
+ release_date="2025-09-23",
59
+ n_parameters=124_400_000,
60
+ memory_usage_mb=237,
61
+ embed_dim=768,
62
+ license="mit",
63
+ max_tokens=514,
64
+ reference="https://huggingface.co/clips/e5-base-trm-nl",
65
+ similarity_fn_name=ScoringFunction.COSINE,
66
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
67
+ use_instructions=True,
68
+ public_training_code="https://github.com/ELotfi/e5-nl",
69
+ public_training_data="https://huggingface.co/collections/clips/beir-nl",
70
+ adapted_from="intfloat/multilingual-e5-base",
71
+ training_datasets=ME5_TRAINING_DATA, # mMARCO-NL, HotpotQA-NL, FEVER-NL, and LLM generated data
72
+ citation=E5_NL_CITATION,
73
+ )
74
+
75
+ e5_nl_large = ModelMeta(
76
+ loader=sentence_transformers_loader,
77
+ loader_kwargs=dict(
78
+ model_prompts=model_prompts,
79
+ ),
80
+ name="clips/e5-large-trm-nl",
81
+ model_type=["dense"],
82
+ languages=["nld-Latn"],
83
+ open_weights=True,
84
+ revision="683333f86ed9eb3699b5567f0fdabeb958d412b0",
85
+ release_date="2025-09-23",
86
+ n_parameters=355_000_000,
87
+ memory_usage_mb=1355,
88
+ embed_dim=1024,
89
+ license="mit",
90
+ max_tokens=514,
91
+ reference="https://huggingface.co/clips/e5-large-trm-nl",
92
+ similarity_fn_name=ScoringFunction.COSINE,
93
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
94
+ use_instructions=True,
95
+ public_training_code="https://github.com/ELotfi/e5-nl",
96
+ public_training_data="https://huggingface.co/collections/clips/beir-nl",
97
+ training_datasets=ME5_TRAINING_DATA, # mMARCO-NL, HotpotQA-NL, FEVER-NL, and LLM generated data
98
+ adapted_from="intfloat/multilingual-e5-large",
99
+ citation=E5_NL_CITATION,
100
+ )