mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -140,11 +140,12 @@ arctic_v2_training_datasets = {
140
140
  arctic_embed_xs = ModelMeta(
141
141
  loader=sentence_transformers_loader,
142
142
  name="Snowflake/snowflake-arctic-embed-xs",
143
+ model_type=["dense"],
143
144
  revision="742da4f66e1823b5b4dbe6c320a1375a1fd85f9e",
144
145
  release_date="2024-07-08", # initial commit of hf model.
145
146
  languages=["eng-Latn"],
146
147
  open_weights=True,
147
- framework=["Sentence Transformers", "PyTorch"],
148
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
148
149
  n_parameters=22_600_000,
149
150
  memory_usage_mb=86,
150
151
  max_tokens=512,
@@ -165,11 +166,12 @@ arctic_embed_xs = ModelMeta(
165
166
  arctic_embed_s = ModelMeta(
166
167
  loader=sentence_transformers_loader,
167
168
  name="Snowflake/snowflake-arctic-embed-s",
169
+ model_type=["dense"],
168
170
  revision="d3c1d2d433dd0fdc8e9ca01331a5f225639e798f",
169
171
  release_date="2024-04-12", # initial commit of hf model.
170
172
  languages=["eng-Latn"],
171
173
  open_weights=True,
172
- framework=["Sentence Transformers", "PyTorch"],
174
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
173
175
  n_parameters=32_200_000,
174
176
  memory_usage_mb=127,
175
177
  max_tokens=512,
@@ -190,11 +192,12 @@ arctic_embed_s = ModelMeta(
190
192
  arctic_embed_m = ModelMeta(
191
193
  loader=sentence_transformers_loader,
192
194
  name="Snowflake/snowflake-arctic-embed-m",
195
+ model_type=["dense"],
193
196
  revision="cc17beacbac32366782584c8752220405a0f3f40",
194
197
  release_date="2024-04-12", # initial commit of hf model.
195
198
  languages=["eng-Latn"],
196
199
  open_weights=True,
197
- framework=["Sentence Transformers", "PyTorch"],
200
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
198
201
  n_parameters=109_000_000,
199
202
  memory_usage_mb=415,
200
203
  max_tokens=512,
@@ -215,11 +218,12 @@ arctic_embed_m_long = ModelMeta(
215
218
  loader=sentence_transformers_loader,
216
219
  loader_kwargs={"trust_remote_code": True},
217
220
  name="Snowflake/snowflake-arctic-embed-m-long",
221
+ model_type=["dense"],
218
222
  revision="89d0f6ab196eead40b90cb6f9fefec01a908d2d1",
219
223
  release_date="2024-04-12", # initial commit of hf model.
220
224
  languages=["eng-Latn"],
221
225
  open_weights=True,
222
- framework=["Sentence Transformers", "PyTorch"],
226
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
223
227
  n_parameters=137_000_000,
224
228
  memory_usage_mb=522,
225
229
  max_tokens=2048,
@@ -239,11 +243,12 @@ arctic_embed_m_long = ModelMeta(
239
243
  arctic_embed_l = ModelMeta(
240
244
  loader=sentence_transformers_loader,
241
245
  name="Snowflake/snowflake-arctic-embed-l",
246
+ model_type=["dense"],
242
247
  revision="9a9e5834d2e89cdd8bb72b64111dde496e4fe78c",
243
248
  release_date="2024-04-12", # initial commit of hf model.
244
249
  languages=["eng-Latn"],
245
250
  open_weights=True,
246
- framework=["Sentence Transformers", "PyTorch"],
251
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
247
252
  n_parameters=335_000_000,
248
253
  memory_usage_mb=1274,
249
254
  max_tokens=512,
@@ -268,11 +273,12 @@ arctic_embed_m_v1_5 = ModelMeta(
268
273
  },
269
274
  ),
270
275
  name="Snowflake/snowflake-arctic-embed-m-v1.5",
276
+ model_type=["dense"],
271
277
  revision="97eab2e17fcb7ccb8bb94d6e547898fa1a6a0f47",
272
278
  release_date="2024-07-08", # initial commit of hf model.
273
279
  languages=["eng-Latn"],
274
280
  open_weights=True,
275
- framework=["Sentence Transformers", "PyTorch"],
281
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors", "GGUF"],
276
282
  n_parameters=109_000_000,
277
283
  memory_usage_mb=415,
278
284
  max_tokens=512,
@@ -293,11 +299,12 @@ arctic_embed_m_v2_0 = ModelMeta(
293
299
  loader=sentence_transformers_loader,
294
300
  loader_kwargs={"trust_remote_code": True},
295
301
  name="Snowflake/snowflake-arctic-embed-m-v2.0",
302
+ model_type=["dense"],
296
303
  revision="f2a7d59d80dfda5b1d14f096f3ce88bb6bf9ebdc",
297
304
  release_date="2024-12-04", # initial commit of hf model.
298
305
  languages=LANGUAGES_V2_0,
299
306
  open_weights=True,
300
- framework=["Sentence Transformers", "PyTorch"],
307
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
301
308
  n_parameters=305_000_000,
302
309
  memory_usage_mb=1165,
303
310
  max_tokens=8192,
@@ -317,11 +324,12 @@ arctic_embed_m_v2_0 = ModelMeta(
317
324
  arctic_embed_l_v2_0 = ModelMeta(
318
325
  loader=sentence_transformers_loader,
319
326
  name="Snowflake/snowflake-arctic-embed-l-v2.0",
327
+ model_type=["dense"],
320
328
  revision="edc2df7b6c25794b340229ca082e7c78782e6374",
321
329
  release_date="2024-12-04", # initial commit of hf model.
322
330
  languages=LANGUAGES_V2_0,
323
331
  open_weights=True,
324
- framework=["Sentence Transformers", "PyTorch"],
332
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
325
333
  n_parameters=568_000_000,
326
334
  memory_usage_mb=2166,
327
335
  max_tokens=8192,
@@ -10,6 +10,7 @@ b1ade_training_data = {
10
10
  b1ade_embed = ModelMeta(
11
11
  loader=sentence_transformers_loader,
12
12
  name="w601sxs/b1ade-embed",
13
+ model_type=["dense"],
13
14
  languages=["eng-Latn"],
14
15
  revision="3bdac13927fdc888b903db93b2ffdbd90b295a69",
15
16
  open_weights=True,
@@ -21,7 +22,7 @@ b1ade_embed = ModelMeta(
21
22
  max_tokens=4096,
22
23
  reference="https://huggingface.co/w601sxs/b1ade-embed",
23
24
  similarity_fn_name=ScoringFunction.COSINE,
24
- framework=["Sentence Transformers", "PyTorch"],
25
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
25
26
  use_instructions=False,
26
27
  public_training_code=None,
27
28
  public_training_data=None,
@@ -155,6 +155,7 @@ class BedrockModel(AbsEncoder):
155
155
 
156
156
  amazon_titan_embed_text_v1 = ModelMeta(
157
157
  name="bedrock/amazon-titan-embed-text-v1",
158
+ model_type=["dense"],
158
159
  revision="1",
159
160
  release_date="2023-09-27",
160
161
  languages=None, # not specified
@@ -181,6 +182,7 @@ amazon_titan_embed_text_v1 = ModelMeta(
181
182
 
182
183
  amazon_titan_embed_text_v2 = ModelMeta(
183
184
  name="bedrock/amazon-titan-embed-text-v2",
185
+ model_type=["dense"],
184
186
  revision="1",
185
187
  release_date="2024-04-30",
186
188
  languages=None, # not specified
@@ -216,6 +218,7 @@ cohere_embed_english_v3 = ModelMeta(
216
218
  model_prompts=cohere_model_prompts,
217
219
  ),
218
220
  name="bedrock/cohere-embed-english-v3",
221
+ model_type=["dense"],
219
222
  languages=["eng-Latn"],
220
223
  open_weights=False,
221
224
  reference="https://cohere.com/blog/introducing-embed-v3",
@@ -243,6 +246,7 @@ cohere_embed_multilingual_v3 = ModelMeta(
243
246
  model_prompts=cohere_model_prompts,
244
247
  ),
245
248
  name="bedrock/cohere-embed-multilingual-v3",
249
+ model_type=["dense"],
246
250
  languages=cohere_supported_languages,
247
251
  open_weights=False,
248
252
  reference="https://cohere.com/blog/introducing-embed-v3",
@@ -319,6 +319,7 @@ bge_small_en_v1_5 = ModelMeta(
319
319
  model_prompts=model_prompts,
320
320
  ),
321
321
  name="BAAI/bge-small-en-v1.5",
322
+ model_type=["dense"],
322
323
  languages=["eng-Latn"],
323
324
  open_weights=True,
324
325
  revision="5c38ec7c405ec4b44b94cc5a9bb96e735b38267a",
@@ -330,7 +331,13 @@ bge_small_en_v1_5 = ModelMeta(
330
331
  max_tokens=512,
331
332
  reference="https://huggingface.co/BAAI/bge-small-en-v1.5",
332
333
  similarity_fn_name=ScoringFunction.COSINE,
333
- framework=["Sentence Transformers", "PyTorch"],
334
+ framework=[
335
+ "Sentence Transformers",
336
+ "PyTorch",
337
+ "ONNX",
338
+ "safetensors",
339
+ "Transformers",
340
+ ],
334
341
  use_instructions=True,
335
342
  public_training_code=None,
336
343
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
@@ -344,6 +351,7 @@ bge_base_en_v1_5 = ModelMeta(
344
351
  model_prompts=model_prompts,
345
352
  ),
346
353
  name="BAAI/bge-base-en-v1.5",
354
+ model_type=["dense"],
347
355
  languages=["eng-Latn"],
348
356
  open_weights=True,
349
357
  revision="a5beb1e3e68b9ab74eb54cfd186867f64f240e1a",
@@ -355,7 +363,13 @@ bge_base_en_v1_5 = ModelMeta(
355
363
  max_tokens=512,
356
364
  reference="https://huggingface.co/BAAI/bge-base-en-v1.5",
357
365
  similarity_fn_name=ScoringFunction.COSINE,
358
- framework=["Sentence Transformers", "PyTorch"],
366
+ framework=[
367
+ "Sentence Transformers",
368
+ "PyTorch",
369
+ "ONNX",
370
+ "safetensors",
371
+ "Transformers",
372
+ ],
359
373
  use_instructions=True,
360
374
  public_training_code=None, # seemingly released (at least for some models, but the link is broken
361
375
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
@@ -369,6 +383,7 @@ bge_large_en_v1_5 = ModelMeta(
369
383
  model_prompts=model_prompts,
370
384
  ),
371
385
  name="BAAI/bge-large-en-v1.5",
386
+ model_type=["dense"],
372
387
  languages=["eng-Latn"],
373
388
  open_weights=True,
374
389
  revision="d4aa6901d3a41ba39fb536a557fa166f842b0e09",
@@ -380,7 +395,13 @@ bge_large_en_v1_5 = ModelMeta(
380
395
  max_tokens=512,
381
396
  reference="https://huggingface.co/BAAI/bge-large-en-v1.5",
382
397
  similarity_fn_name=ScoringFunction.COSINE,
383
- framework=["Sentence Transformers", "PyTorch"],
398
+ framework=[
399
+ "Sentence Transformers",
400
+ "PyTorch",
401
+ "ONNX",
402
+ "safetensors",
403
+ "Transformers",
404
+ ],
384
405
  use_instructions=True,
385
406
  citation=BGE_15_CITATION,
386
407
  public_training_code=None, # seemingly released (at least for some models, but the link is broken
@@ -394,6 +415,7 @@ bge_small_zh = ModelMeta(
394
415
  model_prompts=model_prompts_zh,
395
416
  ),
396
417
  name="BAAI/bge-small-zh",
418
+ model_type=["dense"],
397
419
  languages=["zho-Hans"],
398
420
  open_weights=True,
399
421
  revision="1d2363c5de6ce9ba9c890c8e23a4c72dce540ca8",
@@ -405,12 +427,13 @@ bge_small_zh = ModelMeta(
405
427
  max_tokens=512,
406
428
  reference="https://huggingface.co/BAAI/bge-small-zh",
407
429
  similarity_fn_name=ScoringFunction.COSINE,
408
- framework=["Sentence Transformers", "PyTorch"],
430
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
409
431
  use_instructions=True,
410
432
  public_training_code=None,
411
433
  public_training_data=None,
412
434
  training_datasets=bge_chinese_training_data,
413
435
  superseded_by="BAAI/bge-small-zh-v1.5",
436
+ citation=BGE_15_CITATION,
414
437
  )
415
438
 
416
439
  bge_base_zh = ModelMeta(
@@ -419,6 +442,7 @@ bge_base_zh = ModelMeta(
419
442
  model_prompts=model_prompts_zh,
420
443
  ),
421
444
  name="BAAI/bge-base-zh",
445
+ model_type=["dense"],
422
446
  languages=["zho-Hans"],
423
447
  open_weights=True,
424
448
  revision="0e5f83d4895db7955e4cb9ed37ab73f7ded339b6",
@@ -430,12 +454,13 @@ bge_base_zh = ModelMeta(
430
454
  max_tokens=512,
431
455
  reference="https://huggingface.co/BAAI/bge-base-zh",
432
456
  similarity_fn_name=ScoringFunction.COSINE,
433
- framework=["Sentence Transformers", "PyTorch"],
457
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
434
458
  use_instructions=True,
435
459
  public_training_code=None,
436
460
  public_training_data=None,
437
461
  training_datasets=bge_chinese_training_data,
438
462
  superseded_by="BAAI/bge-base-zh-v1.5",
463
+ citation=BGE_15_CITATION,
439
464
  )
440
465
 
441
466
  bge_large_zh = ModelMeta(
@@ -444,6 +469,7 @@ bge_large_zh = ModelMeta(
444
469
  model_prompts=model_prompts_zh,
445
470
  ),
446
471
  name="BAAI/bge-large-zh",
472
+ model_type=["dense"],
447
473
  languages=["zho-Hans"],
448
474
  open_weights=True,
449
475
  revision="b5d9f5c027e87b6f0b6fa4b614f8f9cdc45ce0e8",
@@ -455,12 +481,13 @@ bge_large_zh = ModelMeta(
455
481
  max_tokens=512,
456
482
  reference="https://huggingface.co/BAAI/bge-large-zh",
457
483
  similarity_fn_name=ScoringFunction.COSINE,
458
- framework=["Sentence Transformers", "PyTorch"],
484
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
459
485
  use_instructions=True,
460
486
  public_training_code=None,
461
487
  public_training_data=None,
462
488
  training_datasets=bge_chinese_training_data,
463
489
  superseded_by="BAAI/bge-large-zh-v1.5",
490
+ citation=BGE_15_CITATION,
464
491
  )
465
492
 
466
493
  bge_small_en = ModelMeta(
@@ -469,6 +496,7 @@ bge_small_en = ModelMeta(
469
496
  model_prompts=model_prompts,
470
497
  ),
471
498
  name="BAAI/bge-small-en",
499
+ model_type=["dense"],
472
500
  languages=["eng-Latn"],
473
501
  open_weights=True,
474
502
  revision="4778d71a06863076696b03fd2777eb118712cad8",
@@ -480,12 +508,13 @@ bge_small_en = ModelMeta(
480
508
  max_tokens=512,
481
509
  reference="https://huggingface.co/BAAI/bge-small-en",
482
510
  similarity_fn_name=ScoringFunction.COSINE,
483
- framework=["Sentence Transformers", "PyTorch"],
511
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
484
512
  use_instructions=True,
485
513
  public_training_code=None,
486
514
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
487
515
  training_datasets=bge_training_data,
488
516
  superseded_by="BAAI/bge-small-en-v1.5",
517
+ citation=BGE_15_CITATION,
489
518
  )
490
519
 
491
520
  bge_base_en = ModelMeta(
@@ -494,6 +523,7 @@ bge_base_en = ModelMeta(
494
523
  model_prompts=model_prompts,
495
524
  ),
496
525
  name="BAAI/bge-base-en",
526
+ model_type=["dense"],
497
527
  languages=["eng-Latn"],
498
528
  open_weights=True,
499
529
  revision="b737bf5dcc6ee8bdc530531266b4804a5d77b5d8",
@@ -505,12 +535,19 @@ bge_base_en = ModelMeta(
505
535
  max_tokens=512,
506
536
  reference="https://huggingface.co/BAAI/bge-base-en",
507
537
  similarity_fn_name=ScoringFunction.COSINE,
508
- framework=["Sentence Transformers", "PyTorch"],
538
+ framework=[
539
+ "Sentence Transformers",
540
+ "PyTorch",
541
+ "Transformers",
542
+ "ONNX",
543
+ "safetensors",
544
+ ],
509
545
  use_instructions=True,
510
546
  public_training_code=None, # seemingly released (at least for some models, but the link is broken
511
547
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
512
548
  training_datasets=bge_training_data,
513
549
  superseded_by="BAAI/bge-base-en-v1.5",
550
+ citation=BGE_15_CITATION,
514
551
  )
515
552
 
516
553
  bge_large_en = ModelMeta(
@@ -519,6 +556,7 @@ bge_large_en = ModelMeta(
519
556
  model_prompts=model_prompts,
520
557
  ),
521
558
  name="BAAI/bge-large-en",
559
+ model_type=["dense"],
522
560
  languages=["eng-Latn"],
523
561
  open_weights=True,
524
562
  revision="abe7d9d814b775ca171121fb03f394dc42974275",
@@ -530,12 +568,13 @@ bge_large_en = ModelMeta(
530
568
  max_tokens=512,
531
569
  reference="https://huggingface.co/BAAI/bge-large-en",
532
570
  similarity_fn_name=ScoringFunction.COSINE,
533
- framework=["Sentence Transformers", "PyTorch"],
571
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
534
572
  use_instructions=True,
535
573
  public_training_code=None, # seemingly released (at least for some models, but the link is broken
536
574
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
537
575
  training_datasets=bge_training_data,
538
576
  superseded_by="BAAI/bge-large-en-v1.5",
577
+ citation=BGE_15_CITATION,
539
578
  )
540
579
 
541
580
 
@@ -545,6 +584,7 @@ bge_small_zh_v1_5 = ModelMeta(
545
584
  model_prompts=model_prompts_zh,
546
585
  ),
547
586
  name="BAAI/bge-small-zh-v1.5",
587
+ model_type=["dense"],
548
588
  languages=["zho-Hans"],
549
589
  open_weights=True,
550
590
  revision="7999e1d3359715c523056ef9478215996d62a620",
@@ -556,11 +596,12 @@ bge_small_zh_v1_5 = ModelMeta(
556
596
  max_tokens=512,
557
597
  reference="https://huggingface.co/BAAI/bge-small-zh-v1.5",
558
598
  similarity_fn_name=ScoringFunction.COSINE,
559
- framework=["Sentence Transformers", "PyTorch"],
599
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
560
600
  use_instructions=True,
561
601
  public_training_code=None,
562
602
  public_training_data=None,
563
603
  training_datasets=bge_chinese_training_data,
604
+ citation=BGE_15_CITATION,
564
605
  )
565
606
 
566
607
  bge_base_zh_v1_5 = ModelMeta(
@@ -569,6 +610,7 @@ bge_base_zh_v1_5 = ModelMeta(
569
610
  model_prompts=model_prompts_zh,
570
611
  ),
571
612
  name="BAAI/bge-base-zh-v1.5",
613
+ model_type=["dense"],
572
614
  languages=["zho-Hans"],
573
615
  open_weights=True,
574
616
  revision="f03589ceff5aac7111bd60cfc7d497ca17ecac65",
@@ -580,11 +622,12 @@ bge_base_zh_v1_5 = ModelMeta(
580
622
  max_tokens=512,
581
623
  reference="https://huggingface.co/BAAI/bge-base-zh-v1.5",
582
624
  similarity_fn_name=ScoringFunction.COSINE,
583
- framework=["Sentence Transformers", "PyTorch"],
625
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
584
626
  use_instructions=True,
585
627
  public_training_code=None,
586
628
  public_training_data=None,
587
629
  training_datasets=bge_chinese_training_data,
630
+ citation=BGE_15_CITATION,
588
631
  )
589
632
 
590
633
  bge_large_zh_v1_5 = ModelMeta(
@@ -593,6 +636,7 @@ bge_large_zh_v1_5 = ModelMeta(
593
636
  model_prompts=model_prompts_zh,
594
637
  ),
595
638
  name="BAAI/bge-large-zh-v1.5",
639
+ model_type=["dense"],
596
640
  languages=["zho-Hans"],
597
641
  open_weights=True,
598
642
  revision="79e7739b6ab944e86d6171e44d24c997fc1e0116",
@@ -604,16 +648,18 @@ bge_large_zh_v1_5 = ModelMeta(
604
648
  max_tokens=512,
605
649
  reference="https://huggingface.co/BAAI/bge-large-zh-v1.5",
606
650
  similarity_fn_name=ScoringFunction.COSINE,
607
- framework=["Sentence Transformers", "PyTorch"],
651
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
608
652
  use_instructions=True,
609
653
  public_training_code=None,
610
654
  public_training_data=None,
611
655
  training_datasets=bge_chinese_training_data,
656
+ citation=BGE_15_CITATION,
612
657
  )
613
658
 
614
659
  bge_m3 = ModelMeta(
615
660
  loader=sentence_transformers_loader,
616
661
  name="BAAI/bge-m3",
662
+ model_type=["dense"],
617
663
  languages=bgem3_languages,
618
664
  open_weights=True,
619
665
  revision="5617a9f61b028005a4858fdac845db406aefb181",
@@ -625,11 +671,19 @@ bge_m3 = ModelMeta(
625
671
  max_tokens=8194,
626
672
  reference="https://huggingface.co/BAAI/bge-m3",
627
673
  similarity_fn_name=ScoringFunction.COSINE,
628
- framework=["Sentence Transformers", "PyTorch"],
674
+ framework=["Sentence Transformers", "PyTorch", "ONNX"],
629
675
  use_instructions=False,
630
676
  public_training_code=None,
631
677
  public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
632
678
  training_datasets=bge_m3_training_data,
679
+ citation="""@misc{bge-m3,
680
+ title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
681
+ author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
682
+ year={2024},
683
+ eprint={2402.03216},
684
+ archivePrefix={arXiv},
685
+ primaryClass={cs.CL}
686
+ }""",
633
687
  )
634
688
 
635
689
  # Contents of cfli/bge-full-data
@@ -692,6 +746,7 @@ bge_full_data = {
692
746
  bge_multilingual_gemma2 = ModelMeta(
693
747
  loader=sentence_transformers_loader,
694
748
  name="BAAI/bge-multilingual-gemma2",
749
+ model_type=["dense"],
695
750
  languages=[
696
751
  "eng-Latn",
697
752
  "zho-Hans",
@@ -712,7 +767,7 @@ bge_multilingual_gemma2 = ModelMeta(
712
767
  max_tokens=8192, # from old C-MTEB leaderboard
713
768
  reference="https://huggingface.co/BAAI/bge-multilingual-gemma2",
714
769
  similarity_fn_name=ScoringFunction.COSINE,
715
- framework=["Sentence Transformers", "PyTorch"],
770
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
716
771
  use_instructions=False,
717
772
  public_training_code=None,
718
773
  public_training_data=None,
@@ -722,11 +777,30 @@ bge_multilingual_gemma2 = ModelMeta(
722
777
  }
723
778
  | bge_full_data
724
779
  | bge_m3_training_data,
780
+ citation="""@misc{bge-m3,
781
+ title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
782
+ author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
783
+ year={2024},
784
+ eprint={2402.03216},
785
+ archivePrefix={arXiv},
786
+ primaryClass={cs.CL}
787
+ }
788
+
789
+
790
+ @misc{bge_embedding,
791
+ title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
792
+ author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
793
+ year={2023},
794
+ eprint={2309.07597},
795
+ archivePrefix={arXiv},
796
+ primaryClass={cs.CL}
797
+ }""",
725
798
  )
726
799
 
727
800
  bge_en_icl = ModelMeta(
728
801
  loader=sentence_transformers_loader,
729
802
  name="BAAI/bge-en-icl",
803
+ model_type=["dense"],
730
804
  languages=[
731
805
  "eng-Latn",
732
806
  ],
@@ -740,7 +814,7 @@ bge_en_icl = ModelMeta(
740
814
  max_tokens=32768,
741
815
  reference="https://huggingface.co/BAAI/bge-en-icl",
742
816
  similarity_fn_name=ScoringFunction.COSINE,
743
- framework=["Sentence Transformers", "PyTorch"],
817
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
744
818
  use_instructions=False,
745
819
  public_training_code="https://github.com/FlagOpen/FlagEmbedding",
746
820
  public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
@@ -762,6 +836,7 @@ bge_en_icl = ModelMeta(
762
836
  bge_m3_unsupervised = ModelMeta(
763
837
  loader=sentence_transformers_loader,
764
838
  name="BAAI/bge-m3-unsupervised",
839
+ model_type=["dense"],
765
840
  languages=bgem3_languages,
766
841
  open_weights=True,
767
842
  revision="46f03bc86361cf88102b0b517b36c8259f2946b1",
@@ -773,15 +848,24 @@ bge_m3_unsupervised = ModelMeta(
773
848
  max_tokens=8192,
774
849
  reference="https://huggingface.co/BAAI/bge-m3-unsupervised",
775
850
  similarity_fn_name="cosine",
776
- framework=["Sentence Transformers", "PyTorch"],
851
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
777
852
  use_instructions=False,
778
853
  public_training_code="https://github.com/FlagOpen/FlagEmbedding",
779
854
  public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
780
855
  training_datasets=bge_m3_training_data,
856
+ citation="""@misc{bge-m3,
857
+ title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
858
+ author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
859
+ year={2024},
860
+ eprint={2402.03216},
861
+ archivePrefix={arXiv},
862
+ primaryClass={cs.CL}
863
+ }""",
781
864
  )
782
865
 
783
866
  manu__bge_m3_custom_fr = ModelMeta(
784
867
  name="manu/bge-m3-custom-fr",
868
+ model_type=["dense"],
785
869
  revision="ed3ef88678ba83ddf4c0fab71a93cb90d89a9078",
786
870
  release_date="2024-04-11",
787
871
  languages=None,
@@ -794,7 +878,7 @@ manu__bge_m3_custom_fr = ModelMeta(
794
878
  open_weights=True,
795
879
  public_training_code=None,
796
880
  public_training_data=None,
797
- framework=["PyTorch", "Sentence Transformers"],
881
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
798
882
  reference="https://huggingface.co/manu/bge-m3-custom-fr",
799
883
  similarity_fn_name=ScoringFunction.COSINE,
800
884
  use_instructions=None,
@@ -0,0 +1,35 @@
1
+ from mteb.models import ModelMeta, sentence_transformers_loader
2
+
3
+ bica_base = ModelMeta(
4
+ name="bisectgroup/BiCA-base",
5
+ model_type=["dense"],
6
+ loader=sentence_transformers_loader,
7
+ languages=["eng-Latn"],
8
+ open_weights=True,
9
+ revision="31237a836e5ae908c308a256573e5f0986498574",
10
+ release_date="2025-11-14",
11
+ n_parameters=110_000_000,
12
+ memory_usage_mb=418,
13
+ embed_dim=768,
14
+ license="mit",
15
+ max_tokens=512,
16
+ reference="https://huggingface.co/bisectgroup/BiCA-base",
17
+ similarity_fn_name="cosine",
18
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
19
+ use_instructions=False,
20
+ public_training_code="https://github.com/NiravBhattLab/BiCA",
21
+ public_training_data="https://huggingface.co/datasets/bisectgroup/hard-negatives-traversal",
22
+ adapted_from="thenlper/gte-base",
23
+ citation="""
24
+ @misc{sinha2025bicaeffectivebiomedicaldense,
25
+ title={BiCA: Effective Biomedical Dense Retrieval with Citation-Aware Hard Negatives},
26
+ author={Aarush Sinha and Pavan Kumar S and Roshan Balaji and Nirav Pravinbhai Bhatt},
27
+ year={2025},
28
+ eprint={2511.08029},
29
+ archivePrefix={arXiv},
30
+ primaryClass={cs.IR},
31
+ url={https://arxiv.org/abs/2511.08029},
32
+ }
33
+ """,
34
+ training_datasets=set(),
35
+ )
@@ -10,6 +10,13 @@ from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
11
  from mteb.types import Array, BatchedInput, PromptType
12
12
 
13
+ BLIP2_CITATION = """@inproceedings{li2023blip2,
14
+ title={{BLIP-2:} Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models},
15
+ author={Junnan Li and Dongxu Li and Silvio Savarese and Steven Hoi},
16
+ year={2023},
17
+ booktitle={ICML},
18
+ }"""
19
+
13
20
 
14
21
  def blip2_loader(model_name, **kwargs):
15
22
  requires_package(
@@ -159,6 +166,7 @@ blip2_training_datasets = set(
159
166
  blip2_opt_2_7b = ModelMeta(
160
167
  loader=blip2_loader,
161
168
  name="Salesforce/blip2-opt-2.7b",
169
+ model_type=["dense"],
162
170
  languages=["eng-Latn"],
163
171
  revision="51572668da0eb669e01a189dc22abe6088589a24",
164
172
  release_date="2024-03-22",
@@ -171,16 +179,18 @@ blip2_opt_2_7b = ModelMeta(
171
179
  open_weights=True,
172
180
  public_training_code="https://github.com/salesforce/LAVIS/tree/main/projects/blip2",
173
181
  public_training_data=None,
174
- framework=["PyTorch"],
182
+ framework=["PyTorch", "Transformers", "safetensors"],
175
183
  reference="https://huggingface.co/Salesforce/blip2-opt-2.7b",
176
184
  similarity_fn_name=ScoringFunction.COSINE,
177
185
  use_instructions=False,
178
186
  training_datasets=blip2_training_datasets,
187
+ citation=BLIP2_CITATION,
179
188
  )
180
189
 
181
190
  blip2_opt_6_7b_coco = ModelMeta(
182
191
  loader=blip2_loader,
183
192
  name="Salesforce/blip2-opt-6.7b-coco",
193
+ model_type=["dense"],
184
194
  languages=["eng-Latn"],
185
195
  revision="0d580de59320a25a4d2c386387bcef310d5f286e",
186
196
  release_date="2024-03-31",
@@ -193,9 +203,10 @@ blip2_opt_6_7b_coco = ModelMeta(
193
203
  open_weights=True,
194
204
  public_training_code="https://github.com/salesforce/LAVIS/tree/main/projects/blip2",
195
205
  public_training_data=None,
196
- framework=["PyTorch"],
206
+ framework=["PyTorch", "Transformers", "safetensors"],
197
207
  reference="https://huggingface.co/Salesforce/blip2-opt-6.7b-coco",
198
208
  similarity_fn_name=ScoringFunction.COSINE,
199
209
  use_instructions=False,
200
210
  training_datasets=blip2_training_datasets,
211
+ citation=BLIP2_CITATION,
201
212
  )