mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -42,6 +42,7 @@ gte_qwen2_7b_instruct = ModelMeta(
42
42
  embed_eos="<|endoftext|>",
43
43
  ),
44
44
  name="Alibaba-NLP/gte-Qwen2-7B-instruct",
45
+ model_type=["dense"],
45
46
  languages=None,
46
47
  open_weights=True,
47
48
  revision="e26182b2122f4435e8b3ebecbf363990f409b45b",
@@ -52,7 +53,7 @@ gte_qwen2_7b_instruct = ModelMeta(
52
53
  license="apache-2.0",
53
54
  reference="https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct",
54
55
  similarity_fn_name=ScoringFunction.COSINE,
55
- framework=["Sentence Transformers", "PyTorch"],
56
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
56
57
  use_instructions=True,
57
58
  citation=GTE_CITATION,
58
59
  public_training_code=None,
@@ -73,6 +74,7 @@ gte_qwen1_5_7b_instruct = ModelMeta(
73
74
  embed_eos="<|endoftext|>",
74
75
  ),
75
76
  name="Alibaba-NLP/gte-Qwen1.5-7B-instruct",
77
+ model_type=["dense"],
76
78
  languages=["eng-Latn"],
77
79
  open_weights=True,
78
80
  revision="07d27e5226328010336563bc1b564a5e3436a298",
@@ -84,11 +86,17 @@ gte_qwen1_5_7b_instruct = ModelMeta(
84
86
  max_tokens=32_768,
85
87
  reference="https://huggingface.co/Alibaba-NLP/gte-Qwen1.5-7B-instruct",
86
88
  similarity_fn_name=ScoringFunction.COSINE,
87
- framework=["Sentence Transformers", "PyTorch"],
89
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
88
90
  use_instructions=True,
89
91
  public_training_code=None,
90
92
  public_training_data=None,
91
93
  training_datasets=None,
94
+ citation="""@article{li2023towards,
95
+ title={Towards general text embeddings with multi-stage contrastive learning},
96
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
97
+ journal={arXiv preprint arXiv:2308.03281},
98
+ year={2023}
99
+ }""",
92
100
  )
93
101
 
94
102
  gte_qwen2_1_5b_instruct = ModelMeta(
@@ -103,6 +111,7 @@ gte_qwen2_1_5b_instruct = ModelMeta(
103
111
  embed_eos="<|endoftext|>",
104
112
  ),
105
113
  name="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
114
+ model_type=["dense"],
106
115
  languages=["eng-Latn"],
107
116
  open_weights=True,
108
117
  revision="c6c1b92f4a3e1b92b326ad29dd3c8433457df8dd",
@@ -114,16 +123,23 @@ gte_qwen2_1_5b_instruct = ModelMeta(
114
123
  max_tokens=32_768,
115
124
  reference="https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct",
116
125
  similarity_fn_name=ScoringFunction.COSINE,
117
- framework=["Sentence Transformers", "PyTorch"],
126
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
118
127
  use_instructions=True,
119
128
  public_training_code=None,
120
129
  public_training_data=None,
121
130
  training_datasets=None,
131
+ citation="""@article{li2023towards,
132
+ title={Towards general text embeddings with multi-stage contrastive learning},
133
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
134
+ journal={arXiv preprint arXiv:2308.03281},
135
+ year={2023}
136
+ }""",
122
137
  )
123
138
 
124
139
  gte_small_zh = ModelMeta(
125
140
  loader=sentence_transformers_loader,
126
141
  name="thenlper/gte-small-zh",
142
+ model_type=["dense"],
127
143
  languages=["zho-Hans"],
128
144
  open_weights=True,
129
145
  revision="af7bd46fbb00b3a6963c8dd7f1786ddfbfbe973a",
@@ -135,16 +151,23 @@ gte_small_zh = ModelMeta(
135
151
  max_tokens=512,
136
152
  reference="https://huggingface.co/thenlper/gte-small-zh",
137
153
  similarity_fn_name=ScoringFunction.COSINE,
138
- framework=["Sentence Transformers", "PyTorch"],
154
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
139
155
  use_instructions=False,
140
156
  public_training_code=None,
141
157
  public_training_data=None,
142
158
  training_datasets=None, # Not disclosed
159
+ citation="""@article{li2023towards,
160
+ title={Towards general text embeddings with multi-stage contrastive learning},
161
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
162
+ journal={arXiv preprint arXiv:2308.03281},
163
+ year={2023}
164
+ }""",
143
165
  )
144
166
 
145
167
  gte_base_zh = ModelMeta(
146
168
  loader=sentence_transformers_loader,
147
169
  name="thenlper/gte-base-zh",
170
+ model_type=["dense"],
148
171
  languages=["zho-Hans"],
149
172
  open_weights=True,
150
173
  revision="71ab7947d6fac5b64aa299e6e40e6c2b2e85976c",
@@ -156,16 +179,23 @@ gte_base_zh = ModelMeta(
156
179
  max_tokens=512,
157
180
  reference="https://huggingface.co/thenlper/gte-base-zh",
158
181
  similarity_fn_name=ScoringFunction.COSINE,
159
- framework=["Sentence Transformers", "PyTorch"],
182
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
160
183
  use_instructions=False,
161
184
  public_training_code=None,
162
185
  public_training_data=None,
163
186
  training_datasets=None, # Not disclosed
187
+ citation="""@article{li2023towards,
188
+ title={Towards general text embeddings with multi-stage contrastive learning},
189
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
190
+ journal={arXiv preprint arXiv:2308.03281},
191
+ year={2023}
192
+ }""",
164
193
  )
165
194
 
166
195
  gte_large_zh = ModelMeta(
167
196
  loader=sentence_transformers_loader,
168
197
  name="thenlper/gte-large-zh",
198
+ model_type=["dense"],
169
199
  languages=["zho-Hans"],
170
200
  open_weights=True,
171
201
  revision="64c364e579de308104a9b2c170ca009502f4f545",
@@ -177,11 +207,17 @@ gte_large_zh = ModelMeta(
177
207
  max_tokens=512,
178
208
  reference="https://huggingface.co/thenlper/gte-large-zh",
179
209
  similarity_fn_name=ScoringFunction.COSINE,
180
- framework=["Sentence Transformers", "PyTorch"],
210
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
181
211
  use_instructions=False,
182
212
  public_training_code=None,
183
213
  public_training_data=None,
184
214
  training_datasets=None, # Not disclosed
215
+ citation="""@article{li2023towards,
216
+ title={Towards general text embeddings with multi-stage contrastive learning},
217
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
218
+ journal={arXiv preprint arXiv:2308.03281},
219
+ year={2023}
220
+ }""",
185
221
  )
186
222
 
187
223
  gte_multilingual_langs = [
@@ -288,6 +324,7 @@ gte_multi_training_data = {
288
324
  gte_multilingual_base = ModelMeta(
289
325
  loader=sentence_transformers_loader,
290
326
  name="Alibaba-NLP/gte-multilingual-base",
327
+ model_type=["dense"],
291
328
  languages=gte_multilingual_langs,
292
329
  open_weights=True,
293
330
  revision="ca1791e0bcc104f6db161f27de1340241b13c5a4",
@@ -299,16 +336,24 @@ gte_multilingual_base = ModelMeta(
299
336
  max_tokens=8192,
300
337
  reference="https://huggingface.co/Alibaba-NLP/gte-multilingual-base",
301
338
  similarity_fn_name=ScoringFunction.COSINE,
302
- framework=["Sentence Transformers", "PyTorch"],
339
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
303
340
  use_instructions=False,
304
341
  public_training_code=None,
305
342
  public_training_data=None, # couldn't find
306
343
  training_datasets=gte_multi_training_data,
344
+ citation="""@inproceedings{zhang2024mgte,
345
+ title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
346
+ author={Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Wen and Dai, Ziqi and Tang, Jialong and Lin, Huan and Yang, Baosong and Xie, Pengjun and Huang, Fei and others},
347
+ booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track},
348
+ pages={1393--1412},
349
+ year={2024}
350
+ }""",
307
351
  )
308
352
 
309
353
  gte_modernbert_base = ModelMeta(
310
354
  loader=sentence_transformers_loader,
311
355
  name="Alibaba-NLP/gte-modernbert-base",
356
+ model_type=["dense"],
312
357
  languages=["eng-Latn"],
313
358
  open_weights=True,
314
359
  revision="7ca8b4ca700621b67618669f5378fe5f5820b8e4",
@@ -320,17 +365,38 @@ gte_modernbert_base = ModelMeta(
320
365
  max_tokens=8192,
321
366
  reference="https://huggingface.co/Alibaba-NLP/gte-modernbert-base",
322
367
  similarity_fn_name=ScoringFunction.COSINE,
323
- framework=["Sentence Transformers", "PyTorch"],
368
+ framework=[
369
+ "Sentence Transformers",
370
+ "PyTorch",
371
+ "Transformers",
372
+ "ONNX",
373
+ "safetensors",
374
+ ],
324
375
  use_instructions=False,
325
376
  public_training_code=None, # couldn't find
326
377
  public_training_data=None,
327
378
  training_datasets=gte_multi_training_data, # English part of gte_multi_training_data,
379
+ citation="""@inproceedings{zhang2024mgte,
380
+ title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
381
+ author={Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Wen and Dai, Ziqi and Tang, Jialong and Lin, Huan and Yang, Baosong and Xie, Pengjun and Huang, Fei and others},
382
+ booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track},
383
+ pages={1393--1412},
384
+ year={2024}
385
+ }
386
+
387
+ @article{li2023towards,
388
+ title={Towards general text embeddings with multi-stage contrastive learning},
389
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
390
+ journal={arXiv preprint arXiv:2308.03281},
391
+ year={2023}
392
+ }""",
328
393
  )
329
394
 
330
395
 
331
396
  gte_base_en_v15 = ModelMeta(
332
397
  loader=sentence_transformers_loader,
333
398
  name="Alibaba-NLP/gte-base-en-v1.5",
399
+ model_type=["dense"],
334
400
  languages=["eng-Latn"],
335
401
  open_weights=True,
336
402
  revision="a829fd0e060bb84554da0dfd354d0de0f7712b7f", # can be any
@@ -342,11 +408,35 @@ gte_base_en_v15 = ModelMeta(
342
408
  max_tokens=8192,
343
409
  reference="https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5",
344
410
  similarity_fn_name=ScoringFunction.COSINE,
345
- framework=["Sentence Transformers", "PyTorch"],
411
+ framework=[
412
+ "Sentence Transformers",
413
+ "PyTorch",
414
+ "Transformers",
415
+ "ONNX",
416
+ "safetensors",
417
+ ],
346
418
  use_instructions=False,
347
419
  superseded_by=None,
348
420
  adapted_from=None,
349
421
  public_training_code=None,
350
422
  public_training_data=None,
351
423
  training_datasets=None,
424
+ citation="""@misc{zhang2024mgte,
425
+ title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
426
+ author={Xin Zhang and Yanzhao Zhang and Dingkun Long and Wen Xie and Ziqi Dai and Jialong Tang and Huan Lin and Baosong Yang and Pengjun Xie and Fei Huang and Meishan Zhang and Wenjie Li and Min Zhang},
427
+ year={2024},
428
+ eprint={2407.19669},
429
+ archivePrefix={arXiv},
430
+ primaryClass={cs.CL},
431
+ url={https://arxiv.org/abs/2407.19669},
432
+ }
433
+ @misc{li2023gte,
434
+ title={Towards General Text Embeddings with Multi-stage Contrastive Learning},
435
+ author={Zehan Li and Xin Zhang and Yanzhao Zhang and Dingkun Long and Pengjun Xie and Meishan Zhang},
436
+ year={2023},
437
+ eprint={2308.03281},
438
+ archivePrefix={arXiv},
439
+ primaryClass={cs.CL},
440
+ url={https://arxiv.org/abs/2308.03281},
441
+ }""",
352
442
  )
@@ -37,6 +37,7 @@ Hinvec_bidir = ModelMeta(
37
37
  add_eos_token=True,
38
38
  ),
39
39
  name="Sailesh97/Hinvec",
40
+ model_type=["dense"],
40
41
  languages=["eng-Latn", "hin-Deva"],
41
42
  open_weights=True,
42
43
  revision="d4fc678720cc1b8c5d18599ce2d9a4d6090c8b6b",
@@ -48,7 +49,7 @@ Hinvec_bidir = ModelMeta(
48
49
  max_tokens=2048,
49
50
  reference="https://huggingface.co/Sailesh97/Hinvec",
50
51
  similarity_fn_name="cosine",
51
- framework=["Sentence Transformers", "PyTorch"],
52
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
52
53
  use_instructions=True,
53
54
  training_datasets=hinvec_training_datasets,
54
55
  public_training_code=None,
@@ -3,6 +3,7 @@ from mteb.models import ModelMeta
3
3
  human = ModelMeta(
4
4
  loader=None,
5
5
  name="Human",
6
+ model_type=["dense"],
6
7
  languages=["eng-Latn", "ara-Arab", "rus-Cyrl", "dan-Latn", "nob-Latn"],
7
8
  open_weights=True,
8
9
  revision="2025_09_25",
@@ -94,6 +94,7 @@ granite_training_data = {
94
94
  granite_107m_multilingual = ModelMeta(
95
95
  loader=sentence_transformers_loader,
96
96
  name="ibm-granite/granite-embedding-107m-multilingual",
97
+ model_type=["dense"],
97
98
  languages=GRANITE_LANGUAGES,
98
99
  open_weights=True,
99
100
  revision="47db56afe692f731540413c67dd818ff492277e7",
@@ -105,7 +106,13 @@ granite_107m_multilingual = ModelMeta(
105
106
  max_tokens=512,
106
107
  reference="https://huggingface.co/ibm-granite/granite-embedding-107m-multilingual",
107
108
  similarity_fn_name=ScoringFunction.COSINE,
108
- framework=["Sentence Transformers", "PyTorch"],
109
+ framework=[
110
+ "Sentence Transformers",
111
+ "PyTorch",
112
+ "Transformers",
113
+ "ONNX",
114
+ "safetensors",
115
+ ],
109
116
  adapted_from=None,
110
117
  superseded_by=None,
111
118
  public_training_code=None,
@@ -118,6 +125,7 @@ granite_107m_multilingual = ModelMeta(
118
125
  granite_278m_multilingual = ModelMeta(
119
126
  loader=sentence_transformers_loader,
120
127
  name="ibm-granite/granite-embedding-278m-multilingual",
128
+ model_type=["dense"],
121
129
  languages=GRANITE_LANGUAGES,
122
130
  open_weights=True,
123
131
  revision="84e3546b88b0cb69f8078608a1df558020bcbf1f",
@@ -129,7 +137,13 @@ granite_278m_multilingual = ModelMeta(
129
137
  max_tokens=512,
130
138
  reference="https://huggingface.co/ibm-granite/granite-embedding-278m-multilingual",
131
139
  similarity_fn_name=ScoringFunction.COSINE,
132
- framework=["Sentence Transformers", "PyTorch"],
140
+ framework=[
141
+ "Sentence Transformers",
142
+ "PyTorch",
143
+ "Transformers",
144
+ "ONNX",
145
+ "safetensors",
146
+ ],
133
147
  adapted_from=None,
134
148
  superseded_by=None,
135
149
  public_training_code=None,
@@ -142,6 +156,7 @@ granite_278m_multilingual = ModelMeta(
142
156
  granite_30m_english = ModelMeta(
143
157
  loader=sentence_transformers_loader,
144
158
  name="ibm-granite/granite-embedding-30m-english",
159
+ model_type=["dense"],
145
160
  languages=["eng-Latn"],
146
161
  open_weights=True,
147
162
  revision="eddbb57470f896b5f8e2bfcb823d8f0e2d2024a5",
@@ -153,7 +168,13 @@ granite_30m_english = ModelMeta(
153
168
  max_tokens=512,
154
169
  reference="https://huggingface.co/ibm-granite/granite-embedding-30m-english",
155
170
  similarity_fn_name=ScoringFunction.COSINE,
156
- framework=["Sentence Transformers", "PyTorch"],
171
+ framework=[
172
+ "Sentence Transformers",
173
+ "PyTorch",
174
+ "ONNX",
175
+ "safetensors",
176
+ "Transformers",
177
+ ],
157
178
  adapted_from=None,
158
179
  superseded_by=None,
159
180
  public_training_code=None,
@@ -166,6 +187,7 @@ granite_30m_english = ModelMeta(
166
187
  granite_125m_english = ModelMeta(
167
188
  loader=sentence_transformers_loader,
168
189
  name="ibm-granite/granite-embedding-125m-english",
190
+ model_type=["dense"],
169
191
  languages=["eng-Latn"],
170
192
  open_weights=True,
171
193
  revision="e48d3a5b47eaa18e3fe07d4676e187fd80f32730",
@@ -177,7 +199,13 @@ granite_125m_english = ModelMeta(
177
199
  max_tokens=512,
178
200
  reference="https://huggingface.co/ibm-granite/granite-embedding-125m-english",
179
201
  similarity_fn_name=ScoringFunction.COSINE,
180
- framework=["Sentence Transformers", "PyTorch"],
202
+ framework=[
203
+ "Sentence Transformers",
204
+ "PyTorch",
205
+ "ONNX",
206
+ "safetensors",
207
+ "Transformers",
208
+ ],
181
209
  adapted_from=None,
182
210
  superseded_by=None,
183
211
  public_training_code=None,
@@ -191,6 +219,7 @@ granite_125m_english = ModelMeta(
191
219
  granite_english_r2 = ModelMeta(
192
220
  loader=sentence_transformers_loader,
193
221
  name="ibm-granite/granite-embedding-english-r2",
222
+ model_type=["dense"],
194
223
  languages=["eng-Latn"],
195
224
  open_weights=True,
196
225
  revision="6e7b8ce0e76270394ac4669ba4bbd7133b60b7f9",
@@ -202,7 +231,7 @@ granite_english_r2 = ModelMeta(
202
231
  max_tokens=8192,
203
232
  reference="https://huggingface.co/ibm-granite/granite-embedding-english-r2",
204
233
  similarity_fn_name="cosine",
205
- framework=["Sentence Transformers", "PyTorch"],
234
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
206
235
  adapted_from=None,
207
236
  superseded_by=None,
208
237
  public_training_code=None,
@@ -215,6 +244,7 @@ granite_english_r2 = ModelMeta(
215
244
  granite_small_english_r2 = ModelMeta(
216
245
  loader=sentence_transformers_loader,
217
246
  name="ibm-granite/granite-embedding-small-english-r2",
247
+ model_type=["dense"],
218
248
  languages=["eng-Latn"],
219
249
  open_weights=True,
220
250
  revision="54a8d2616a0844355a5164432d3f6dafb37b17a3",
@@ -226,7 +256,7 @@ granite_small_english_r2 = ModelMeta(
226
256
  max_tokens=8192,
227
257
  reference="https://huggingface.co/ibm-granite/granite-embedding-small-english-r2",
228
258
  similarity_fn_name="cosine",
229
- framework=["Sentence Transformers", "PyTorch"],
259
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
230
260
  adapted_from=None,
231
261
  superseded_by=None,
232
262
  public_training_code=None,
@@ -50,6 +50,7 @@ inf_retriever_v1 = ModelMeta(
50
50
  trust_remote_code=True,
51
51
  ),
52
52
  name="infly/inf-retriever-v1",
53
+ model_type=["dense"],
53
54
  languages=["eng-Latn", "zho-Hans"],
54
55
  open_weights=True,
55
56
  revision="cb70ca7c31dfa866b2eff2dad229c144d8ddfd91",
@@ -61,7 +62,7 @@ inf_retriever_v1 = ModelMeta(
61
62
  max_tokens=32768,
62
63
  reference="https://huggingface.co/infly/inf-retriever-v1",
63
64
  similarity_fn_name=ScoringFunction.COSINE,
64
- framework=["Sentence Transformers", "PyTorch"],
65
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
65
66
  use_instructions=True,
66
67
  adapted_from="Alibaba-NLP/gte-Qwen2-7B-instruct",
67
68
  public_training_code=None,
@@ -76,6 +77,7 @@ inf_retriever_v1_1_5b = ModelMeta(
76
77
  trust_remote_code=True,
77
78
  ),
78
79
  name="infly/inf-retriever-v1-1.5b",
80
+ model_type=["dense"],
79
81
  languages=["eng-Latn", "zho-Hans"],
80
82
  open_weights=True,
81
83
  revision="c9c05c2dd50707a486966ba81703021ae2094a06",
@@ -87,7 +89,7 @@ inf_retriever_v1_1_5b = ModelMeta(
87
89
  max_tokens=32768,
88
90
  reference="https://huggingface.co/infly/inf-retriever-v1-1.5b",
89
91
  similarity_fn_name=ScoringFunction.COSINE,
90
- framework=["Sentence Transformers", "PyTorch"],
92
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
91
93
  use_instructions=True,
92
94
  adapted_from="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
93
95
  public_training_code=None,