mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -139,7 +139,7 @@ class Model2VecModel(AbsEncoder):
139
139
  **kwargs: Additional arguments to pass to the wrapper.
140
140
  """
141
141
  requires_package(self, "model2vec", model_name, "pip install 'mteb[model2vec]'")
142
- from model2vec import StaticModel # type: ignore
142
+ from model2vec import StaticModel
143
143
 
144
144
  self.model_name = model_name
145
145
  self.model = StaticModel.from_pretrained(self.model_name)
@@ -161,6 +161,7 @@ class Model2VecModel(AbsEncoder):
161
161
  m2v_base_glove_subword = ModelMeta(
162
162
  loader=Model2VecModel,
163
163
  name="minishlab/M2V_base_glove_subword",
164
+ model_type=["dense"],
164
165
  languages=["eng-Latn"],
165
166
  open_weights=True,
166
167
  revision="5f4f5ca159b7321a8b39739bba0794fa0debddf4",
@@ -171,7 +172,7 @@ m2v_base_glove_subword = ModelMeta(
171
172
  embed_dim=256,
172
173
  license="mit",
173
174
  similarity_fn_name=ScoringFunction.COSINE,
174
- framework=["NumPy", "Sentence Transformers"],
175
+ framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
175
176
  reference="https://huggingface.co/minishlab/M2V_base_glove_subword",
176
177
  use_instructions=False,
177
178
  adapted_from="BAAI/bge-base-en-v1.5",
@@ -186,6 +187,7 @@ m2v_base_glove_subword = ModelMeta(
186
187
  m2v_base_glove = ModelMeta(
187
188
  loader=Model2VecModel,
188
189
  name="minishlab/M2V_base_glove",
190
+ model_type=["dense"],
189
191
  languages=["eng-Latn"],
190
192
  open_weights=True,
191
193
  revision="38ebd7f10f71e67fa8db898290f92b82e9cfff2b",
@@ -196,7 +198,7 @@ m2v_base_glove = ModelMeta(
196
198
  embed_dim=256,
197
199
  license="mit",
198
200
  similarity_fn_name=ScoringFunction.COSINE,
199
- framework=["NumPy", "Sentence Transformers"],
201
+ framework=["NumPy", "Sentence Transformers", "safetensors"],
200
202
  reference="https://huggingface.co/minishlab/M2V_base_glove",
201
203
  use_instructions=False,
202
204
  adapted_from="BAAI/bge-base-en-v1.5",
@@ -210,6 +212,7 @@ m2v_base_glove = ModelMeta(
210
212
  m2v_base_output = ModelMeta(
211
213
  loader=Model2VecModel,
212
214
  name="minishlab/M2V_base_output",
215
+ model_type=["dense"],
213
216
  languages=["eng-Latn"],
214
217
  open_weights=True,
215
218
  revision="02460ae401a22b09d2c6652e23371398329551e2",
@@ -220,7 +223,7 @@ m2v_base_output = ModelMeta(
220
223
  embed_dim=256,
221
224
  license="mit",
222
225
  similarity_fn_name=ScoringFunction.COSINE,
223
- framework=["NumPy", "Sentence Transformers"],
226
+ framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
224
227
  reference="https://huggingface.co/minishlab/M2V_base_output",
225
228
  use_instructions=False,
226
229
  adapted_from="BAAI/bge-base-en-v1.5",
@@ -234,6 +237,7 @@ m2v_base_output = ModelMeta(
234
237
  m2v_multilingual_output = ModelMeta(
235
238
  loader=Model2VecModel,
236
239
  name="minishlab/M2V_multilingual_output",
240
+ model_type=["dense"],
237
241
  languages=["eng-Latn"],
238
242
  open_weights=True,
239
243
  revision="2cf4ec4e1f51aeca6c55cf9b93097d00711a6305",
@@ -244,7 +248,7 @@ m2v_multilingual_output = ModelMeta(
244
248
  embed_dim=256,
245
249
  license="mit",
246
250
  similarity_fn_name=ScoringFunction.COSINE,
247
- framework=["NumPy", "Sentence Transformers"],
251
+ framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
248
252
  reference="https://huggingface.co/minishlab/M2V_multilingual_output",
249
253
  use_instructions=False,
250
254
  adapted_from="sentence-transformers/LaBSE",
@@ -258,6 +262,7 @@ m2v_multilingual_output = ModelMeta(
258
262
  potion_base_2m = ModelMeta(
259
263
  loader=Model2VecModel,
260
264
  name="minishlab/potion-base-2M",
265
+ model_type=["dense"],
261
266
  languages=["eng-Latn"],
262
267
  open_weights=True,
263
268
  revision="86db093558fbced2072b929eb1690bce5272bd4b",
@@ -268,7 +273,7 @@ potion_base_2m = ModelMeta(
268
273
  embed_dim=64,
269
274
  license="mit",
270
275
  similarity_fn_name=ScoringFunction.COSINE,
271
- framework=["NumPy", "Sentence Transformers"],
276
+ framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
272
277
  reference="https://huggingface.co/minishlab/potion-base-2M",
273
278
  use_instructions=False,
274
279
  adapted_from="BAAI/bge-base-en-v1.5",
@@ -282,6 +287,7 @@ potion_base_2m = ModelMeta(
282
287
  potion_base_4m = ModelMeta(
283
288
  loader=Model2VecModel,
284
289
  name="minishlab/potion-base-4M",
290
+ model_type=["dense"],
285
291
  languages=["eng-Latn"],
286
292
  open_weights=True,
287
293
  revision="81b1802ada41afcd0987a37dc15e569c9fa76f04",
@@ -292,7 +298,7 @@ potion_base_4m = ModelMeta(
292
298
  embed_dim=128,
293
299
  license="mit",
294
300
  similarity_fn_name=ScoringFunction.COSINE,
295
- framework=["NumPy", "Sentence Transformers"],
301
+ framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
296
302
  reference="https://huggingface.co/minishlab/potion-base-4M",
297
303
  use_instructions=False,
298
304
  adapted_from="BAAI/bge-base-en-v1.5",
@@ -306,6 +312,7 @@ potion_base_4m = ModelMeta(
306
312
  potion_base_8m = ModelMeta(
307
313
  loader=Model2VecModel,
308
314
  name="minishlab/potion-base-8M",
315
+ model_type=["dense"],
309
316
  languages=["eng-Latn"],
310
317
  open_weights=True,
311
318
  revision="dcbec7aa2d52fc76754ac6291803feedd8c619ce",
@@ -316,7 +323,7 @@ potion_base_8m = ModelMeta(
316
323
  embed_dim=256,
317
324
  license="mit",
318
325
  similarity_fn_name=ScoringFunction.COSINE,
319
- framework=["NumPy", "Sentence Transformers"],
326
+ framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
320
327
  reference="https://huggingface.co/minishlab/potion-base-8M",
321
328
  use_instructions=False,
322
329
  adapted_from="BAAI/bge-base-en-v1.5",
@@ -330,6 +337,7 @@ potion_base_8m = ModelMeta(
330
337
  potion_multilingual_128m = ModelMeta(
331
338
  loader=Model2VecModel,
332
339
  name="minishlab/potion-multilingual-128M",
340
+ model_type=["dense"],
333
341
  languages=_POTION_MULTILINGUAL_128M_LANGUAGES,
334
342
  open_weights=True,
335
343
  revision="38ebd7f10f71e67fa8db898290f92b82e9cfff2a",
@@ -340,7 +348,7 @@ potion_multilingual_128m = ModelMeta(
340
348
  embed_dim=256,
341
349
  license="mit",
342
350
  similarity_fn_name="cosine",
343
- framework=["NumPy"],
351
+ framework=["NumPy", "ONNX", "safetensors", "Sentence Transformers"],
344
352
  reference="https://huggingface.co/minishlab/potion-multilingual-128M",
345
353
  use_instructions=False,
346
354
  adapted_from="BAAI/bge-m3",
@@ -354,6 +362,7 @@ potion_multilingual_128m = ModelMeta(
354
362
  pubmed_bert_100k = ModelMeta(
355
363
  loader=Model2VecModel,
356
364
  name="NeuML/pubmedbert-base-embeddings-100K",
365
+ model_type=["dense"],
357
366
  languages=["eng-Latn"],
358
367
  open_weights=True,
359
368
  revision="bac5e3b12fb8c650e92a19c41b436732c4f16e9e",
@@ -364,7 +373,7 @@ pubmed_bert_100k = ModelMeta(
364
373
  embed_dim=64,
365
374
  license="apache-2.0",
366
375
  similarity_fn_name="cosine",
367
- framework=["NumPy"],
376
+ framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
368
377
  reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-100K",
369
378
  use_instructions=False,
370
379
  adapted_from="NeuML/pubmedbert-base-embeddings",
@@ -377,6 +386,7 @@ pubmed_bert_100k = ModelMeta(
377
386
  pubmed_bert_500k = ModelMeta(
378
387
  loader=Model2VecModel,
379
388
  name="NeuML/pubmedbert-base-embeddings-500K",
389
+ model_type=["dense"],
380
390
  languages=["eng-Latn"],
381
391
  open_weights=True,
382
392
  revision="34ba71e35c393fdad7ed695113f653feb407b16b",
@@ -387,7 +397,7 @@ pubmed_bert_500k = ModelMeta(
387
397
  embed_dim=64,
388
398
  license="apache-2.0",
389
399
  similarity_fn_name="cosine",
390
- framework=["NumPy"],
400
+ framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
391
401
  reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-500K",
392
402
  use_instructions=False,
393
403
  adapted_from="NeuML/pubmedbert-base-embeddings",
@@ -400,6 +410,7 @@ pubmed_bert_500k = ModelMeta(
400
410
  pubmed_bert_1m = ModelMeta(
401
411
  loader=Model2VecModel,
402
412
  name="NeuML/pubmedbert-base-embeddings-1M",
413
+ model_type=["dense"],
403
414
  languages=["eng-Latn"],
404
415
  open_weights=True,
405
416
  revision="2b7fed222594708da6d88bcda92ae9b434b7ddd1",
@@ -410,7 +421,7 @@ pubmed_bert_1m = ModelMeta(
410
421
  embed_dim=64,
411
422
  license="apache-2.0",
412
423
  similarity_fn_name="cosine",
413
- framework=["NumPy"],
424
+ framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
414
425
  reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-1M",
415
426
  use_instructions=False,
416
427
  adapted_from="NeuML/pubmedbert-base-embeddings",
@@ -423,6 +434,7 @@ pubmed_bert_1m = ModelMeta(
423
434
  pubmed_bert_2m = ModelMeta(
424
435
  loader=Model2VecModel,
425
436
  name="NeuML/pubmedbert-base-embeddings-2M",
437
+ model_type=["dense"],
426
438
  languages=["eng-Latn"],
427
439
  open_weights=True,
428
440
  revision="1d7bbe04d6713e425161146bfdc71473cbed498a",
@@ -433,7 +445,7 @@ pubmed_bert_2m = ModelMeta(
433
445
  embed_dim=64,
434
446
  license="apache-2.0",
435
447
  similarity_fn_name="cosine",
436
- framework=["NumPy"],
448
+ framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
437
449
  reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-2M",
438
450
  use_instructions=False,
439
451
  adapted_from="NeuML/pubmedbert-base-embeddings",
@@ -446,6 +458,7 @@ pubmed_bert_2m = ModelMeta(
446
458
  pubmed_bert_8m = ModelMeta(
447
459
  loader=Model2VecModel,
448
460
  name="NeuML/pubmedbert-base-embeddings-8M",
461
+ model_type=["dense"],
449
462
  languages=["eng-Latn"],
450
463
  open_weights=True,
451
464
  revision="387d350015e963744f4fafe56a574b7cd48646c9",
@@ -456,7 +469,7 @@ pubmed_bert_8m = ModelMeta(
456
469
  embed_dim=256,
457
470
  license="apache-2.0",
458
471
  similarity_fn_name="cosine",
459
- framework=["NumPy"],
472
+ framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
460
473
  reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-8M",
461
474
  use_instructions=False,
462
475
  adapted_from="NeuML/pubmedbert-base-embeddings",
@@ -91,6 +91,7 @@ m3e_dataset = {
91
91
  m3e_base = ModelMeta(
92
92
  loader=sentence_transformers_loader,
93
93
  name="moka-ai/m3e-base",
94
+ model_type=["dense"],
94
95
  languages=["zho-Hans", "eng-Latn"],
95
96
  open_weights=True,
96
97
  revision="764b537a0e50e5c7d64db883f2d2e051cbe3c64c",
@@ -103,7 +104,7 @@ m3e_base = ModelMeta(
103
104
  max_tokens=512,
104
105
  reference="https://huggingface.co/moka-ai/m3e-base",
105
106
  similarity_fn_name=ScoringFunction.COSINE,
106
- framework=["Sentence Transformers", "PyTorch"],
107
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
107
108
  use_instructions=False,
108
109
  superseded_by=None,
109
110
  adapted_from=None,
@@ -116,6 +117,7 @@ m3e_base = ModelMeta(
116
117
  m3e_small = ModelMeta(
117
118
  loader=sentence_transformers_loader,
118
119
  name="moka-ai/m3e-small",
120
+ model_type=["dense"],
119
121
  languages=["zho-Hans", "eng-Latn"],
120
122
  open_weights=True,
121
123
  revision="44c696631b2a8c200220aaaad5f987f096e986df",
@@ -141,6 +143,7 @@ m3e_small = ModelMeta(
141
143
  m3e_large = ModelMeta(
142
144
  loader=sentence_transformers_loader,
143
145
  name="moka-ai/m3e-large",
146
+ model_type=["dense"],
144
147
  languages=["zho-Hans", "eng-Latn"],
145
148
  open_weights=True,
146
149
  revision="12900375086c37ba5d83d1e417b21dc7d1d1f388",
@@ -0,0 +1,70 @@
1
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
2
+ from mteb.models.sentence_transformer_wrapper import (
3
+ SentenceTransformerEncoderWrapper,
4
+ )
5
+
6
+ nb_sbert = ModelMeta(
7
+ loader=SentenceTransformerEncoderWrapper, # type: ignore[arg-type]
8
+ name="NbAiLab/nb-sbert-base",
9
+ model_type=["dense"],
10
+ languages=["nno-Latn", "nob-Latn", "swe-Latn", "dan-Latn"],
11
+ open_weights=True,
12
+ revision="b95656350a076aeafd2d23763660f80655408cc6",
13
+ release_date="2022-11-23",
14
+ n_parameters=1_780_000_000,
15
+ memory_usage_mb=678,
16
+ embed_dim=4096,
17
+ license="apache-2.0",
18
+ max_tokens=75,
19
+ reference="https://huggingface.co/NbAiLab/nb-sbert-base",
20
+ similarity_fn_name=ScoringFunction.COSINE,
21
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
22
+ use_instructions=False,
23
+ public_training_code=None,
24
+ public_training_data="https://huggingface.co/datasets/NbAiLab/mnli-norwegian",
25
+ training_datasets=set(),
26
+ )
27
+
28
+ nb_bert_large = ModelMeta(
29
+ loader=SentenceTransformerEncoderWrapper, # type: ignore[arg-type]
30
+ name="NbAiLab/nb-bert-large",
31
+ model_type=["dense"],
32
+ languages=["nno-Latn", "nob-Latn"],
33
+ open_weights=True,
34
+ revision="f9d0fc184adab4dc354d85e1854b7634540d7550",
35
+ release_date="2021-04-29",
36
+ n_parameters=355087360,
37
+ memory_usage_mb=1359,
38
+ embed_dim=1024,
39
+ license="cc-by-4.0",
40
+ max_tokens=512,
41
+ reference="https://huggingface.co/NbAiLab/nb-bert-large",
42
+ similarity_fn_name=ScoringFunction.COSINE,
43
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
44
+ use_instructions=False,
45
+ public_training_code=None,
46
+ public_training_data="https://huggingface.co/NbAiLab/nb-bert-large#training-data",
47
+ training_datasets=set(),
48
+ )
49
+
50
+ nb_bert_base = ModelMeta(
51
+ loader=SentenceTransformerEncoderWrapper, # type: ignore[arg-type]
52
+ name="NbAiLab/nb-bert-base",
53
+ model_type=["dense"],
54
+ languages=["nno-Latn", "nob-Latn"],
55
+ open_weights=True,
56
+ revision="9417c3f62a3adc99f17ff92bff446f35d011f994",
57
+ release_date="2021-01-13",
58
+ n_parameters=177853440,
59
+ memory_usage_mb=681,
60
+ embed_dim=768,
61
+ license="cc-by-4.0",
62
+ max_tokens=512,
63
+ reference="https://huggingface.co/NbAiLab/nb-bert-base",
64
+ similarity_fn_name=ScoringFunction.COSINE,
65
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
66
+ use_instructions=False,
67
+ public_training_code=None,
68
+ public_training_data="https://huggingface.co/NbAiLab/nb-bert-base#training-data",
69
+ training_datasets=set(),
70
+ )
@@ -30,13 +30,13 @@ class NoInstructModel(AbsEncoder):
30
30
  self,
31
31
  model_name: str,
32
32
  revision: str,
33
+ device: str | None = None,
33
34
  model_prompts: dict[str, str] | None = None,
34
35
  **kwargs: Any,
35
36
  ):
36
37
  from transformers import AutoModel, AutoTokenizer
37
38
 
38
39
  self.model_name = model_name
39
- device = kwargs.pop("device", None)
40
40
  self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
41
41
  self.model = AutoModel.from_pretrained(
42
42
  model_name, revision=revision, **kwargs
@@ -97,6 +97,7 @@ class NoInstructModel(AbsEncoder):
97
97
  no_instruct_small_v0 = ModelMeta(
98
98
  loader=NoInstructModel,
99
99
  name="avsolatorio/NoInstruct-small-Embedding-v0",
100
+ model_type=["dense"],
100
101
  languages=["eng-Latn"],
101
102
  open_weights=True,
102
103
  revision="b38747000553d8268915c95a55fc87e707c9aadd",
@@ -108,7 +109,7 @@ no_instruct_small_v0 = ModelMeta(
108
109
  license="mit",
109
110
  reference="https://huggingface.co/avsolatorio/NoInstruct-small-Embedding-v0",
110
111
  similarity_fn_name=ScoringFunction.COSINE,
111
- framework=["PyTorch"],
112
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
112
113
  use_instructions=False,
113
114
  adapted_from=None,
114
115
  superseded_by=None,
@@ -23,6 +23,7 @@ class NomicWrapper(SentenceTransformerEncoderWrapper):
23
23
  self,
24
24
  model_name: str,
25
25
  revision: str,
26
+ device: str | None = None,
26
27
  model_prompts: dict[str, str] | None = None,
27
28
  **kwargs: Any,
28
29
  ):
@@ -37,7 +38,9 @@ class NomicWrapper(SentenceTransformerEncoderWrapper):
37
38
  f"Current transformers version is {transformers.__version__} is lower than the required version"
38
39
  f" {MODERN_BERT_TRANSFORMERS_MIN_VERSION}"
39
40
  )
40
- super().__init__(model_name, revision, model_prompts, **kwargs)
41
+ super().__init__(
42
+ model_name, revision, device=device, model_prompts=model_prompts, **kwargs
43
+ )
41
44
 
42
45
  def to(self, device: torch.device) -> None:
43
46
  self.model.to(device)
@@ -199,6 +202,7 @@ nomic_embed_v1_5 = ModelMeta(
199
202
  model_prompts=model_prompts,
200
203
  ),
201
204
  name="nomic-ai/nomic-embed-text-v1.5",
205
+ model_type=["dense"],
202
206
  languages=["eng-Latn"],
203
207
  open_weights=True,
204
208
  revision="b0753ae76394dd36bcfb912a46018088bca48be0",
@@ -211,7 +215,13 @@ nomic_embed_v1_5 = ModelMeta(
211
215
  license="apache-2.0",
212
216
  reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
213
217
  similarity_fn_name=ScoringFunction.COSINE,
214
- framework=["Sentence Transformers", "PyTorch"],
218
+ framework=[
219
+ "Sentence Transformers",
220
+ "PyTorch",
221
+ "ONNX",
222
+ "safetensors",
223
+ "Transformers",
224
+ ],
215
225
  use_instructions=True,
216
226
  adapted_from=None,
217
227
  superseded_by=None,
@@ -227,6 +237,7 @@ nomic_embed_v1 = ModelMeta(
227
237
  model_prompts=model_prompts,
228
238
  ),
229
239
  name="nomic-ai/nomic-embed-text-v1",
240
+ model_type=["dense"],
230
241
  languages=["eng-Latn"],
231
242
  open_weights=True,
232
243
  revision="0759316f275aa0cb93a5b830973843ca66babcf5",
@@ -238,7 +249,13 @@ nomic_embed_v1 = ModelMeta(
238
249
  license="apache-2.0",
239
250
  reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1",
240
251
  similarity_fn_name=ScoringFunction.COSINE,
241
- framework=["Sentence Transformers", "PyTorch"],
252
+ framework=[
253
+ "Sentence Transformers",
254
+ "PyTorch",
255
+ "ONNX",
256
+ "safetensors",
257
+ "Transformers",
258
+ ],
242
259
  use_instructions=True,
243
260
  citation=NOMIC_CITATION,
244
261
  adapted_from=None,
@@ -255,6 +272,7 @@ nomic_embed_v1_ablated = ModelMeta(
255
272
  model_prompts=model_prompts,
256
273
  ),
257
274
  name="nomic-ai/nomic-embed-text-v1-ablated",
275
+ model_type=["dense"],
258
276
  languages=["eng-Latn"],
259
277
  open_weights=True,
260
278
  revision="7d948905c5d5d3874fa55a925d68e49dbf411e5f",
@@ -266,7 +284,7 @@ nomic_embed_v1_ablated = ModelMeta(
266
284
  license="apache-2.0",
267
285
  reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1-ablated",
268
286
  similarity_fn_name=ScoringFunction.COSINE,
269
- framework=["Sentence Transformers", "PyTorch"],
287
+ framework=["Sentence Transformers", "PyTorch", "ONNX"],
270
288
  use_instructions=True,
271
289
  adapted_from=None,
272
290
  superseded_by=None,
@@ -282,6 +300,7 @@ nomic_embed_v1_unsupervised = ModelMeta(
282
300
  model_prompts=model_prompts,
283
301
  ),
284
302
  name="nomic-ai/nomic-embed-text-v1-unsupervised",
303
+ model_type=["dense"],
285
304
  languages=["eng-Latn"],
286
305
  open_weights=True,
287
306
  revision="b53d557b15ae63852847c222d336c1609eced93c",
@@ -293,7 +312,7 @@ nomic_embed_v1_unsupervised = ModelMeta(
293
312
  license="apache-2.0",
294
313
  reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1-unsupervised",
295
314
  similarity_fn_name=ScoringFunction.COSINE,
296
- framework=["Sentence Transformers", "PyTorch"],
315
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "Transformers"],
297
316
  use_instructions=True,
298
317
  adapted_from=None,
299
318
  superseded_by=None,
@@ -309,6 +328,7 @@ nomic_modern_bert_embed = ModelMeta(
309
328
  model_prompts=model_prompts,
310
329
  ),
311
330
  name="nomic-ai/modernbert-embed-base",
331
+ model_type=["dense"],
312
332
  languages=["eng-Latn"],
313
333
  open_weights=True,
314
334
  revision="5960f1566fb7cb1adf1eb6e816639cf4646d9b12",
@@ -320,7 +340,7 @@ nomic_modern_bert_embed = ModelMeta(
320
340
  license="apache-2.0",
321
341
  reference="https://huggingface.co/nomic-ai/modernbert-embed-base",
322
342
  similarity_fn_name=ScoringFunction.COSINE,
323
- framework=["Sentence Transformers", "PyTorch"],
343
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
324
344
  use_instructions=True,
325
345
  adapted_from="answerdotai/ModernBERT-base",
326
346
  public_training_code="https://github.com/nomic-ai/contrastors/blob/5f7b461e5a13b5636692d1c9f1141b27232fe966/src/contrastors/configs/train/contrastive_pretrain_modernbert.yaml",
@@ -328,4 +348,151 @@ nomic_modern_bert_embed = ModelMeta(
328
348
  superseded_by=None,
329
349
  training_datasets=nomic_training_data,
330
350
  public_training_data=None,
351
+ citation="""@misc{nussbaum2024nomic,
352
+ title={Nomic Embed: Training a Reproducible Long Context Text Embedder},
353
+ author={Zach Nussbaum and John X. Morris and Brandon Duderstadt and Andriy Mulyar},
354
+ year={2024},
355
+ eprint={2402.01613},
356
+ archivePrefix={arXiv},
357
+ primaryClass={cs.CL}
358
+ }""",
359
+ )
360
+
361
+
362
+ m_languages = [
363
+ "eng-Latn",
364
+ "spa-Latn",
365
+ "fra-Latn",
366
+ "deu-Latn",
367
+ "ita-Latn",
368
+ "por-Latn",
369
+ "pol-Latn",
370
+ "nld-Latn",
371
+ "tur-Latn",
372
+ "jpn-Jpan",
373
+ "vie-Latn",
374
+ "rus-Cyrl",
375
+ "ind-Latn",
376
+ "arb-Arab",
377
+ "ces-Latn",
378
+ "ron-Latn",
379
+ "swe-Latn",
380
+ "ell-Grek",
381
+ "ukr-Cyrl",
382
+ "zho-Hans",
383
+ "hun-Latn",
384
+ "dan-Latn",
385
+ "nor-Latn",
386
+ "hin-Deva",
387
+ "fin-Latn",
388
+ "bul-Cyrl",
389
+ "kor-Hang",
390
+ "slk-Latn",
391
+ "tha-Thai",
392
+ "heb-Hebr",
393
+ "cat-Latn",
394
+ "lit-Latn",
395
+ "fas-Arab",
396
+ "msa-Latn",
397
+ "slv-Latn",
398
+ "lav-Latn",
399
+ "mar-Deva",
400
+ "ben-Beng",
401
+ "sqi-Latn",
402
+ "cym-Latn",
403
+ "bel-Cyrl",
404
+ "mal-Mlym",
405
+ "kan-Knda",
406
+ "mkd-Cyrl",
407
+ "urd-Arab",
408
+ "fry-Latn",
409
+ "fil-Latn",
410
+ "tel-Telu",
411
+ "eus-Latn",
412
+ "swh-Latn",
413
+ "som-Latn",
414
+ "snd-Arab",
415
+ "uzb-Latn",
416
+ "cos-Latn",
417
+ "hrv-Latn",
418
+ "guj-Gujr",
419
+ "hin-Latn",
420
+ "ceb-Latn",
421
+ "epo-Latn",
422
+ "jav-Latn",
423
+ "lat-Latn",
424
+ "zul-Latn",
425
+ "mon-Cyrl",
426
+ "sin-Sinh",
427
+ "ell-Latn",
428
+ "gle-Latn",
429
+ "kir-Cyrl",
430
+ "tgk-Cyrl",
431
+ "mya-Mymr",
432
+ "khm-Khmr",
433
+ "mlg-Latn",
434
+ "pan-Guru",
435
+ "rus-Latn",
436
+ "sna-Latn",
437
+ "zho-Latn",
438
+ "hau-Latn",
439
+ "heb-Latn",
440
+ "hmn-Latn",
441
+ "hat-Latn",
442
+ "jpn-Latn",
443
+ "sun-Latn",
444
+ "bul-Latn",
445
+ "gla-Latn",
446
+ "nya-Latn",
447
+ "pus-Arab",
448
+ "kur-Latn",
449
+ "hbs-Latn",
450
+ "amh-Ethi",
451
+ "ibo-Latn",
452
+ "lao-Laoo",
453
+ "mri-Latn",
454
+ "nno-Latn",
455
+ "smo-Latn",
456
+ "yid-Hebr",
457
+ "sot-Latn",
458
+ "tgl-Latn",
459
+ "xho-Latn",
460
+ "yor-Latn",
461
+ ]
462
+
463
+ nomic_embed_text_v2_moe = ModelMeta(
464
+ loader=NomicWrapper,
465
+ loader_kwargs=dict(
466
+ trust_remote_code=True,
467
+ model_prompts=model_prompts,
468
+ ),
469
+ name="nomic-ai/nomic-embed-text-v2-moe",
470
+ model_type=["dense"],
471
+ languages=m_languages,
472
+ open_weights=True,
473
+ revision="1066b6599d099fbb93dfcb64f9c37a7c9e503e85",
474
+ release_date="2025-02-07",
475
+ n_parameters=475292928,
476
+ memory_usage_mb=1813,
477
+ max_tokens=512,
478
+ embed_dim=768,
479
+ license="apache-2.0",
480
+ reference="https://huggingface.co/nomic-ai/nomic-embed-text-v2-moe",
481
+ similarity_fn_name=ScoringFunction.COSINE,
482
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
483
+ use_instructions=True,
484
+ adapted_from="nomic-ai/nomic-xlm-2048",
485
+ public_training_data="https://github.com/nomic-ai/contrastors?tab=readme-ov-file#data-access",
486
+ public_training_code="https://github.com/nomic-ai/contrastors/blob/613ddfd37309e538cceadb05b1e6423e7b09f603/src/contrastors/configs/train/contrastive_finetune_moe.yaml",
487
+ training_datasets=None, # did not look into this further
488
+ superseded_by=None,
489
+ citation="""@misc{nussbaum2025trainingsparsemixtureexperts,
490
+ title={Training Sparse Mixture Of Experts Text Embedding Models},
491
+ author={Zach Nussbaum and Brandon Duderstadt},
492
+ year={2025},
493
+ eprint={2502.07972},
494
+ archivePrefix={arXiv},
495
+ primaryClass={cs.CL},
496
+ url={https://arxiv.org/abs/2502.07972},
497
+ }""",
331
498
  )
@@ -1,8 +1,9 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
6
  import torch.nn.functional as F
5
- from PIL import Image
6
7
  from torch.utils.data import DataLoader
7
8
  from tqdm.auto import tqdm
8
9
 
@@ -12,6 +13,9 @@ from mteb.models.abs_encoder import AbsEncoder
12
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
13
14
  from mteb.types import Array, BatchedInput, PromptType
14
15
 
16
+ if TYPE_CHECKING:
17
+ from PIL import Image
18
+
15
19
  NOMIC_EMBED_VISION_CITATION = """@article{nussbaum2024nomicembedvision,
16
20
  title={Nomic Embed Vision: Expanding the Latent Space},
17
21
  author={Nussbaum, Zach and Duderstadt, Brandon and Mulyar, Andriy},
@@ -164,6 +168,7 @@ nomic_embed_vision_v1_5 = ModelMeta(
164
168
  "text_model_revision": "a03db6748c80237063eb0546ac6b627eca2318cb",
165
169
  },
166
170
  name="nomic-ai/nomic-embed-vision-v1.5",
171
+ model_type=["dense"],
167
172
  languages=["eng-Latn"],
168
173
  revision="af2246fffdab78d8458418480e4886a8e48b70a7",
169
174
  release_date="2024-06-08",
@@ -176,7 +181,7 @@ nomic_embed_vision_v1_5 = ModelMeta(
176
181
  open_weights=True,
177
182
  public_training_code="https://github.com/nomic-ai/contrastors",
178
183
  public_training_data=None,
179
- framework=["PyTorch"],
184
+ framework=["PyTorch", "Transformers", "ONNX", "safetensors"],
180
185
  reference="https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5",
181
186
  similarity_fn_name=ScoringFunction.COSINE,
182
187
  use_instructions=True,