mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -315,6 +315,7 @@ monot5_small = ModelMeta(
315
315
  fp_options="float16",
316
316
  ),
317
317
  name="castorini/monot5-small-msmarco-10k",
318
+ model_type=["cross-encoder"],
318
319
  languages=["eng-Latn"],
319
320
  open_weights=True,
320
321
  revision="77f8e3f7b1eb1afe353aa21a7c3a2fc8feca702e",
@@ -329,8 +330,7 @@ monot5_small = ModelMeta(
329
330
  similarity_fn_name=None,
330
331
  use_instructions=None,
331
332
  training_datasets=None,
332
- framework=["PyTorch"],
333
- is_cross_encoder=True,
333
+ framework=["PyTorch", "Transformers"],
334
334
  citation="""@misc{rosa2022parameterleftbehinddistillation,
335
335
  title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
336
336
  author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
@@ -343,11 +343,12 @@ monot5_small = ModelMeta(
343
343
  )
344
344
 
345
345
  monot5_base = ModelMeta(
346
- loader=MonoT5Reranker, # type: ignore
346
+ loader=MonoT5Reranker,
347
347
  loader_kwargs=dict(
348
348
  fp_options="float16",
349
349
  ),
350
350
  name="castorini/monot5-base-msmarco-10k",
351
+ model_type=["cross-encoder"],
351
352
  languages=["eng-Latn"],
352
353
  open_weights=True,
353
354
  revision="f15657ab3d2a5dd0b9a30c8c0b6a0a73c9cb5884",
@@ -371,8 +372,7 @@ monot5_base = ModelMeta(
371
372
  similarity_fn_name=None,
372
373
  use_instructions=None,
373
374
  training_datasets=None,
374
- framework=["PyTorch"],
375
- is_cross_encoder=True,
375
+ framework=["PyTorch", "Transformers"],
376
376
  )
377
377
 
378
378
  monot5_large = ModelMeta(
@@ -381,6 +381,7 @@ monot5_large = ModelMeta(
381
381
  fp_options="float16",
382
382
  ),
383
383
  name="castorini/monot5-large-msmarco-10k",
384
+ model_type=["cross-encoder"],
384
385
  languages=["eng-Latn"],
385
386
  open_weights=True,
386
387
  revision="48cfad1d8dd587670393f27ee8ec41fde63e3d98",
@@ -395,8 +396,7 @@ monot5_large = ModelMeta(
395
396
  similarity_fn_name=None,
396
397
  use_instructions=None,
397
398
  training_datasets=None,
398
- framework=["PyTorch"],
399
- is_cross_encoder=True,
399
+ framework=["PyTorch", "Transformers"],
400
400
  citation="""@misc{rosa2022parameterleftbehinddistillation,
401
401
  title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
402
402
  author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
@@ -414,6 +414,7 @@ monot5_3b = ModelMeta(
414
414
  fp_options="float16",
415
415
  ),
416
416
  name="castorini/monot5-3b-msmarco-10k",
417
+ model_type=["cross-encoder"],
417
418
  languages=["eng-Latn"],
418
419
  open_weights=True,
419
420
  revision="bc0c419a438c81f592f878ce32430a1823f5db6c",
@@ -428,8 +429,7 @@ monot5_3b = ModelMeta(
428
429
  similarity_fn_name=None,
429
430
  use_instructions=None,
430
431
  training_datasets=None,
431
- framework=["PyTorch"],
432
- is_cross_encoder=True,
432
+ framework=["PyTorch", "Transformers"],
433
433
  citation="""@misc{rosa2022parameterleftbehinddistillation,
434
434
  title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
435
435
  author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
@@ -442,11 +442,12 @@ monot5_3b = ModelMeta(
442
442
  )
443
443
 
444
444
  flant5_base = ModelMeta(
445
- loader=FLANT5Reranker, # type: ignore
445
+ loader=FLANT5Reranker,
446
446
  loader_kwargs=dict(
447
447
  fp_options="float16",
448
448
  ),
449
449
  name="google/flan-t5-base",
450
+ model_type=["cross-encoder"],
450
451
  languages=["eng-Latn"],
451
452
  open_weights=True,
452
453
  revision="7bcac572ce56db69c1ea7c8af255c5d7c9672fc2",
@@ -483,8 +484,7 @@ flant5_base = ModelMeta(
483
484
  public_training_data=None,
484
485
  similarity_fn_name=None,
485
486
  use_instructions=None,
486
- framework=["PyTorch"],
487
- is_cross_encoder=True,
487
+ framework=["PyTorch", "Transformers", "safetensors"],
488
488
  )
489
489
 
490
490
  flant5_large = ModelMeta(
@@ -493,6 +493,7 @@ flant5_large = ModelMeta(
493
493
  fp_options="float16",
494
494
  ),
495
495
  name="google/flan-t5-large",
496
+ model_type=["cross-encoder"],
496
497
  languages=["eng-Latn"],
497
498
  open_weights=True,
498
499
  revision="0613663d0d48ea86ba8cb3d7a44f0f65dc596a2a",
@@ -529,8 +530,7 @@ flant5_large = ModelMeta(
529
530
  public_training_data=None,
530
531
  similarity_fn_name=None,
531
532
  use_instructions=None,
532
- framework=["PyTorch"],
533
- is_cross_encoder=True,
533
+ framework=["PyTorch", "Transformers", "safetensors"],
534
534
  )
535
535
 
536
536
  flant5_xl = ModelMeta(
@@ -539,6 +539,7 @@ flant5_xl = ModelMeta(
539
539
  fp_options="float16",
540
540
  ),
541
541
  name="google/flan-t5-xl",
542
+ model_type=["cross-encoder"],
542
543
  languages=["eng-Latn"],
543
544
  open_weights=True,
544
545
  revision="7d6315df2c2fb742f0f5b556879d730926ca9001",
@@ -575,8 +576,7 @@ flant5_xl = ModelMeta(
575
576
  public_training_data=None,
576
577
  similarity_fn_name=None,
577
578
  use_instructions=None,
578
- framework=["PyTorch"],
579
- is_cross_encoder=True,
579
+ framework=["PyTorch", "Transformers", "safetensors"],
580
580
  )
581
581
 
582
582
  flant5_xxl = ModelMeta(
@@ -585,6 +585,7 @@ flant5_xxl = ModelMeta(
585
585
  fp_options="float16",
586
586
  ),
587
587
  name="google/flan-t5-xxl",
588
+ model_type=["cross-encoder"],
588
589
  languages=["eng-Latn"],
589
590
  open_weights=True,
590
591
  revision="ae7c9136adc7555eeccc78cdd960dfd60fb346ce",
@@ -621,8 +622,7 @@ flant5_xxl = ModelMeta(
621
622
  public_training_data=None,
622
623
  similarity_fn_name=None,
623
624
  use_instructions=None,
624
- framework=["PyTorch"],
625
- is_cross_encoder=True,
625
+ framework=["PyTorch", "Transformers", "safetensors"],
626
626
  )
627
627
 
628
628
 
@@ -632,6 +632,7 @@ llama2_7b = ModelMeta(
632
632
  fp_options="float16",
633
633
  ),
634
634
  name="meta-llama/Llama-2-7b-hf",
635
+ model_type=["cross-encoder"],
635
636
  languages=["eng-Latn"],
636
637
  open_weights=True,
637
638
  revision="01c7f73d771dfac7d292323805ebc428287df4f9",
@@ -646,7 +647,7 @@ llama2_7b = ModelMeta(
646
647
  similarity_fn_name=None,
647
648
  use_instructions=None,
648
649
  training_datasets=None,
649
- framework=["PyTorch"],
650
+ framework=["PyTorch", "Transformers", "safetensors"],
650
651
  citation="""@misc{touvron2023llama2openfoundation,
651
652
  title={Llama 2: Open Foundation and Fine-Tuned Chat Models},
652
653
  author={Hugo Touvron and Louis Martin and Kevin Stone and Peter Albert and Amjad Almahairi and Yasmine Babaei and Nikolay Bashlykov and Soumya Batra and Prajjwal Bhargava and Shruti Bhosale and Dan Bikel and Lukas Blecher and Cristian Canton Ferrer and Moya Chen and Guillem Cucurull and David Esiobu and Jude Fernandes and Jeremy Fu and Wenyin Fu and Brian Fuller and Cynthia Gao and Vedanuj Goswami and Naman Goyal and Anthony Hartshorn and Saghar Hosseini and Rui Hou and Hakan Inan and Marcin Kardas and Viktor Kerkez and Madian Khabsa and Isabel Kloumann and Artem Korenev and Punit Singh Koura and Marie-Anne Lachaux and Thibaut Lavril and Jenya Lee and Diana Liskovich and Yinghai Lu and Yuning Mao and Xavier Martinet and Todor Mihaylov and Pushkar Mishra and Igor Molybog and Yixin Nie and Andrew Poulton and Jeremy Reizenstein and Rashi Rungta and Kalyan Saladi and Alan Schelten and Ruan Silva and Eric Michael Smith and Ranjan Subramanian and Xiaoqing Ellen Tan and Binh Tang and Ross Taylor and Adina Williams and Jian Xiang Kuan and Puxin Xu and Zheng Yan and Iliyan Zarov and Yuchen Zhang and Angela Fan and Melanie Kambadur and Sharan Narang and Aurelien Rodriguez and Robert Stojnic and Sergey Edunov and Thomas Scialom},
@@ -656,7 +657,6 @@ llama2_7b = ModelMeta(
656
657
  primaryClass={cs.CL},
657
658
  url={https://arxiv.org/abs/2307.09288},
658
659
  }""",
659
- is_cross_encoder=True,
660
660
  )
661
661
 
662
662
  llama2_7b_chat = ModelMeta(
@@ -665,6 +665,7 @@ llama2_7b_chat = ModelMeta(
665
665
  fp_options="float16",
666
666
  ),
667
667
  name="meta-llama/Llama-2-7b-chat-hf",
668
+ model_type=["cross-encoder"],
668
669
  languages=["eng-Latn"],
669
670
  open_weights=True,
670
671
  revision="f5db02db724555f92da89c216ac04704f23d4590",
@@ -688,8 +689,7 @@ llama2_7b_chat = ModelMeta(
688
689
  similarity_fn_name=None,
689
690
  use_instructions=None,
690
691
  training_datasets=None,
691
- framework=["PyTorch"],
692
- is_cross_encoder=True,
692
+ framework=["PyTorch", "Transformers", "safetensors"],
693
693
  )
694
694
 
695
695
  mistral_7b = ModelMeta(
@@ -698,6 +698,7 @@ mistral_7b = ModelMeta(
698
698
  fp_options="float16",
699
699
  ),
700
700
  name="mistralai/Mistral-7B-Instruct-v0.2",
701
+ model_type=["cross-encoder"],
701
702
  languages=["eng-Latn"],
702
703
  open_weights=True,
703
704
  revision="3ad372fc79158a2148299e3318516c786aeded6c",
@@ -712,7 +713,7 @@ mistral_7b = ModelMeta(
712
713
  similarity_fn_name=None,
713
714
  use_instructions=None,
714
715
  training_datasets=None,
715
- framework=["PyTorch"],
716
+ framework=["PyTorch", "Transformers", "safetensors"],
716
717
  citation="""@misc{jiang2023mistral7b,
717
718
  title={Mistral 7B},
718
719
  author={Albert Q. Jiang and Alexandre Sablayrolles and Arthur Mensch and Chris Bamford and Devendra Singh Chaplot and Diego de las Casas and Florian Bressand and Gianna Lengyel and Guillaume Lample and Lucile Saulnier and Lélio Renard Lavaud and Marie-Anne Lachaux and Pierre Stock and Teven Le Scao and Thibaut Lavril and Thomas Wang and Timothée Lacroix and William El Sayed},
@@ -722,7 +723,6 @@ mistral_7b = ModelMeta(
722
723
  primaryClass={cs.CL},
723
724
  url={https://arxiv.org/abs/2310.06825},
724
725
  }""",
725
- is_cross_encoder=True,
726
726
  )
727
727
 
728
728
  followir_7b = ModelMeta(
@@ -731,6 +731,7 @@ followir_7b = ModelMeta(
731
731
  fp_options="float16",
732
732
  ),
733
733
  name="jhu-clsp/FollowIR-7B",
734
+ model_type=["cross-encoder"],
734
735
  languages=["eng-Latn"],
735
736
  open_weights=True,
736
737
  revision="4d25d437e38b510c01852070c0731e8f6e1875d1",
@@ -747,7 +748,7 @@ followir_7b = ModelMeta(
747
748
  public_training_data=None,
748
749
  similarity_fn_name=None,
749
750
  use_instructions=None,
750
- framework=["PyTorch"],
751
+ framework=["PyTorch", "Transformers", "safetensors"],
751
752
  citation="""
752
753
  @misc{weller2024followir,
753
754
  title={FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions},
@@ -758,7 +759,6 @@ followir_7b = ModelMeta(
758
759
  primaryClass={cs.IR}
759
760
  }
760
761
  """,
761
- is_cross_encoder=True,
762
762
  )
763
763
 
764
764
 
@@ -874,6 +874,7 @@ mt5_base_mmarco_v2 = ModelMeta(
874
874
  fp_options="float16",
875
875
  ),
876
876
  name="unicamp-dl/mt5-base-mmarco-v2",
877
+ model_type=["cross-encoder"],
877
878
  languages=mt5_languages,
878
879
  open_weights=True,
879
880
  revision="cc0a949b9f21efcaba45c8cabb998ad02ce8d4e7",
@@ -897,16 +898,16 @@ mt5_base_mmarco_v2 = ModelMeta(
897
898
  public_training_data=None,
898
899
  similarity_fn_name=None,
899
900
  use_instructions=None,
900
- framework=["PyTorch"],
901
- is_cross_encoder=True,
901
+ framework=["PyTorch", "Transformers"],
902
902
  )
903
903
 
904
904
  mt5_13b_mmarco_100k = ModelMeta(
905
- loader=MonoT5Reranker, # type: ignore
905
+ loader=MonoT5Reranker,
906
906
  loader_kwargs=dict(
907
907
  fp_options="float16",
908
908
  ),
909
909
  name="unicamp-dl/mt5-13b-mmarco-100k",
910
+ model_type=["cross-encoder"],
910
911
  languages=mt5_languages,
911
912
  open_weights=True,
912
913
  revision="e1a4317e102a525ea9e16745ad21394a4f1bffbc",
@@ -921,6 +922,5 @@ mt5_13b_mmarco_100k = ModelMeta(
921
922
  similarity_fn_name=None,
922
923
  use_instructions=None,
923
924
  training_datasets=None,
924
- framework=["PyTorch"],
925
- is_cross_encoder=True,
925
+ framework=["PyTorch", "Transformers"],
926
926
  )
@@ -9,6 +9,7 @@ from .stella_models import stella_zh_datasets
9
9
  ritrieve_zh_v1 = ModelMeta(
10
10
  loader=SentenceTransformerEncoderWrapper,
11
11
  name="richinfoai/ritrieve_zh_v1",
12
+ model_type=["dense"],
12
13
  languages=["zho-Hans"],
13
14
  open_weights=True,
14
15
  revision="f8d5a707656c55705027678e311f9202c8ced12c",
@@ -20,7 +21,7 @@ ritrieve_zh_v1 = ModelMeta(
20
21
  max_tokens=512,
21
22
  reference="https://huggingface.co/richinfoai/ritrieve_zh_v1",
22
23
  similarity_fn_name="cosine",
23
- framework=["Sentence Transformers", "PyTorch"],
24
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
24
25
  use_instructions=False,
25
26
  superseded_by=None,
26
27
  adapted_from=None,
@@ -43,6 +43,10 @@ GIGA_task_prompts = {
43
43
  "query": "Given a news title, retrieve relevant news article",
44
44
  "document": "",
45
45
  },
46
+ "RiaNewsRetrievalHardNegatives.v2": {
47
+ "query": "Given a news title, retrieve relevant news article",
48
+ "document": "",
49
+ },
46
50
  "MIRACLReranking": {
47
51
  "query": "Given a question, retrieve Wikipedia passages that answer the question",
48
52
  "document": "",
@@ -51,6 +55,10 @@ GIGA_task_prompts = {
51
55
  "query": "Given a question, retrieve Wikipedia passages that answer the question",
52
56
  "document": "",
53
57
  },
58
+ "MIRACLRetrievalHardNegatives.v2": {
59
+ "query": "Given a question, retrieve Wikipedia passages that answer the question",
60
+ "document": "",
61
+ },
54
62
  "ArguAna": {
55
63
  "query": "Given a search query, retrieve passages that answer the question",
56
64
  "document": "Given a search query, retrieve passages that answer the question",
@@ -230,6 +238,7 @@ GIGA_task_prompts = {
230
238
  rubert_tiny = ModelMeta(
231
239
  loader=sentence_transformers_loader,
232
240
  name="cointegrated/rubert-tiny",
241
+ model_type=["dense"],
233
242
  languages=["rus-Cyrl"],
234
243
  open_weights=True,
235
244
  revision="5441c5ea8026d4f6d7505ec004845409f1259fb1",
@@ -241,7 +250,7 @@ rubert_tiny = ModelMeta(
241
250
  max_tokens=512,
242
251
  reference="https://huggingface.co/cointegrated/rubert-tiny",
243
252
  similarity_fn_name=ScoringFunction.COSINE,
244
- framework=["Sentence Transformers", "PyTorch"],
253
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
245
254
  use_instructions=False,
246
255
  public_training_code="https://gist.github.com/avidale/7bc6350f26196918bf339c01261f5c60",
247
256
  training_datasets={
@@ -255,6 +264,7 @@ rubert_tiny = ModelMeta(
255
264
  rubert_tiny2 = ModelMeta(
256
265
  loader=sentence_transformers_loader,
257
266
  name="cointegrated/rubert-tiny2",
267
+ model_type=["dense"],
258
268
  languages=["rus-Cyrl"],
259
269
  open_weights=True,
260
270
  revision="dad72b8f77c5eef6995dd3e4691b758ba56b90c3",
@@ -266,7 +276,7 @@ rubert_tiny2 = ModelMeta(
266
276
  max_tokens=2048,
267
277
  reference="https://huggingface.co/cointegrated/rubert-tiny2",
268
278
  similarity_fn_name=ScoringFunction.COSINE,
269
- framework=["Sentence Transformers", "PyTorch"],
279
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
270
280
  use_instructions=False,
271
281
  public_training_code="https://colab.research.google.com/drive/1mSWfIQ6PIlteLVZ9DKKpcorycgLIKZLf?usp=sharing",
272
282
  training_datasets=set(
@@ -281,6 +291,7 @@ rubert_tiny2 = ModelMeta(
281
291
  sbert_large_nlu_ru = ModelMeta(
282
292
  loader=sentence_transformers_loader,
283
293
  name="ai-forever/sbert_large_nlu_ru",
294
+ model_type=["dense"],
284
295
  languages=["rus-Cyrl"],
285
296
  open_weights=True,
286
297
  revision="af977d5dfa46a3635e29bf0ef383f2df2a08d47a",
@@ -292,7 +303,7 @@ sbert_large_nlu_ru = ModelMeta(
292
303
  max_tokens=512, # best guess
293
304
  reference="https://huggingface.co/ai-forever/sbert_large_nlu_ru",
294
305
  similarity_fn_name=ScoringFunction.COSINE,
295
- framework=["Sentence Transformers", "PyTorch"],
306
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
296
307
  use_instructions=False,
297
308
  public_training_code=None,
298
309
  public_training_data=None,
@@ -306,6 +317,7 @@ sbert_large_nlu_ru = ModelMeta(
306
317
  sbert_large_mt_nlu_ru = ModelMeta(
307
318
  loader=sentence_transformers_loader,
308
319
  name="ai-forever/sbert_large_mt_nlu_ru",
320
+ model_type=["dense"],
309
321
  languages=["rus-Cyrl"],
310
322
  open_weights=True,
311
323
  revision="05300876c2b83f46d3ddd422a7f17e45cf633bb0",
@@ -317,7 +329,7 @@ sbert_large_mt_nlu_ru = ModelMeta(
317
329
  max_tokens=512, # best guess
318
330
  reference="https://huggingface.co/ai-forever/sbert_large_mt_nlu_ru",
319
331
  similarity_fn_name=ScoringFunction.COSINE,
320
- framework=["Sentence Transformers", "PyTorch"],
332
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
321
333
  use_instructions=False,
322
334
  public_training_code=None,
323
335
  public_training_data=None,
@@ -333,6 +345,7 @@ user_base_ru = ModelMeta(
333
345
  model_prompts={"query": "query: ", "document": "passage: "},
334
346
  ),
335
347
  name="deepvk/USER-base",
348
+ model_type=["dense"],
336
349
  languages=["rus-Cyrl"],
337
350
  open_weights=True,
338
351
  revision="436a489a2087d61aa670b3496a9915f84e46c861",
@@ -344,7 +357,7 @@ user_base_ru = ModelMeta(
344
357
  max_tokens=512,
345
358
  reference="https://huggingface.co/deepvk/USER-base",
346
359
  similarity_fn_name=ScoringFunction.COSINE,
347
- framework=["Sentence Transformers", "PyTorch"],
360
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
348
361
  adapted_from="https://huggingface.co/deepvk/deberta-v1-base",
349
362
  use_instructions=True,
350
363
  citation="""@misc{deepvk2024user,
@@ -393,6 +406,7 @@ user_base_ru = ModelMeta(
393
406
  user_bge_m3 = ModelMeta(
394
407
  loader=sentence_transformers_loader,
395
408
  name="deepvk/USER-bge-m3",
409
+ model_type=["dense"],
396
410
  languages=["rus-Cyrl"],
397
411
  open_weights=True,
398
412
  revision="0cc6cfe48e260fb0474c753087a69369e88709ae",
@@ -404,7 +418,7 @@ user_bge_m3 = ModelMeta(
404
418
  max_tokens=8194,
405
419
  reference="https://huggingface.co/deepvk/USER-base",
406
420
  similarity_fn_name=ScoringFunction.COSINE,
407
- framework=["Sentence Transformers", "PyTorch"],
421
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
408
422
  adapted_from="BAAI/bge-m3",
409
423
  use_instructions=False,
410
424
  training_datasets={
@@ -431,11 +445,19 @@ user_bge_m3 = ModelMeta(
431
445
  },
432
446
  public_training_code=None,
433
447
  public_training_data=None,
448
+ citation="""@misc{deepvk2024user,
449
+ title={USER: Universal Sentence Encoder for Russian},
450
+ author={Malashenko, Boris and Zemerov, Anton and Spirin, Egor},
451
+ url={https://huggingface.co/datasets/deepvk/USER-base},
452
+ publisher={Hugging Face},
453
+ year={2024},
454
+ }""",
434
455
  )
435
456
 
436
457
  deberta_v1_ru = ModelMeta(
437
458
  loader=sentence_transformers_loader,
438
459
  name="deepvk/deberta-v1-base",
460
+ model_type=["dense"],
439
461
  languages=["rus-Cyrl"],
440
462
  open_weights=True,
441
463
  revision="bdd30b0e19757e6940c92c7aff19e8fc0a60dff4",
@@ -447,7 +469,7 @@ deberta_v1_ru = ModelMeta(
447
469
  max_tokens=512,
448
470
  reference="https://huggingface.co/deepvk/deberta-v1-base",
449
471
  similarity_fn_name=ScoringFunction.COSINE,
450
- framework=["Sentence Transformers", "PyTorch"],
472
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
451
473
  use_instructions=False,
452
474
  # Wikipedia, Books, Twitter comments, Pikabu, Proza.ru, Film subtitles, News websites, and Social corpus
453
475
  public_training_code=None,
@@ -466,6 +488,7 @@ deberta_v1_ru = ModelMeta(
466
488
  rubert_base_cased = ModelMeta(
467
489
  loader=sentence_transformers_loader,
468
490
  name="DeepPavlov/rubert-base-cased",
491
+ model_type=["dense"],
469
492
  languages=["rus-Cyrl"],
470
493
  open_weights=True,
471
494
  revision="4036cab694767a299f2b9e6492909664d9414229",
@@ -477,7 +500,7 @@ rubert_base_cased = ModelMeta(
477
500
  max_tokens=512,
478
501
  reference="https://huggingface.co/DeepPavlov/rubert-base-cased",
479
502
  similarity_fn_name=ScoringFunction.COSINE,
480
- framework=["Sentence Transformers", "PyTorch"],
503
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
481
504
  use_instructions=False,
482
505
  public_training_code=None,
483
506
  public_training_data=None,
@@ -501,6 +524,7 @@ rubert_base_cased = ModelMeta(
501
524
  distilrubert_small_cased_conversational = ModelMeta(
502
525
  loader=sentence_transformers_loader,
503
526
  name="DeepPavlov/distilrubert-small-cased-conversational",
527
+ model_type=["dense"],
504
528
  languages=["rus-Cyrl"],
505
529
  open_weights=True,
506
530
  revision="e348066b4a7279b97138038299bddc6580a9169a",
@@ -512,7 +536,7 @@ distilrubert_small_cased_conversational = ModelMeta(
512
536
  max_tokens=512,
513
537
  reference="https://huggingface.co/DeepPavlov/distilrubert-small-cased-conversational",
514
538
  similarity_fn_name=ScoringFunction.COSINE,
515
- framework=["Sentence Transformers", "PyTorch"],
539
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
516
540
  use_instructions=False,
517
541
  public_training_code=None,
518
542
  public_training_data=None,
@@ -535,6 +559,7 @@ distilrubert_small_cased_conversational = ModelMeta(
535
559
  rubert_base_cased_sentence = ModelMeta(
536
560
  loader=sentence_transformers_loader,
537
561
  name="DeepPavlov/rubert-base-cased-sentence",
562
+ model_type=["dense"],
538
563
  languages=["rus-Cyrl"],
539
564
  open_weights=True,
540
565
  revision="78b5122d6365337dd4114281b0d08cd1edbb3bc8",
@@ -546,7 +571,7 @@ rubert_base_cased_sentence = ModelMeta(
546
571
  max_tokens=512,
547
572
  reference="https://huggingface.co/DeepPavlov/rubert-base-cased-sentence",
548
573
  similarity_fn_name=ScoringFunction.COSINE,
549
- framework=["Sentence Transformers", "PyTorch"],
574
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
550
575
  use_instructions=False,
551
576
  public_training_code=None,
552
577
  public_training_data=None,
@@ -559,6 +584,7 @@ rubert_base_cased_sentence = ModelMeta(
559
584
  labse_en_ru = ModelMeta(
560
585
  loader=sentence_transformers_loader,
561
586
  name="cointegrated/LaBSE-en-ru",
587
+ model_type=["dense"],
562
588
  languages=["rus-Cyrl"],
563
589
  open_weights=True,
564
590
  revision="cf0714e606d4af551e14ad69a7929cd6b0da7f7e",
@@ -570,7 +596,7 @@ labse_en_ru = ModelMeta(
570
596
  max_tokens=512,
571
597
  reference="https://huggingface.co/cointegrated/LaBSE-en-ru",
572
598
  similarity_fn_name=ScoringFunction.COSINE,
573
- framework=["Sentence Transformers", "PyTorch"],
599
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
574
600
  use_instructions=False,
575
601
  public_training_code="https://colab.research.google.com/drive/1dnPRn0-ugj3vZgSpyCC9sgslM2SuSfHy?usp=sharing",
576
602
  public_training_data=None,
@@ -586,6 +612,7 @@ turbo_models_datasets = set(
586
612
  rubert_tiny_turbo = ModelMeta(
587
613
  loader=sentence_transformers_loader,
588
614
  name="sergeyzh/rubert-tiny-turbo",
615
+ model_type=["dense"],
589
616
  languages=["rus-Cyrl"],
590
617
  open_weights=True,
591
618
  revision="8ce0cf757446ce9bb2d5f5a4ac8103c7a1049054",
@@ -597,7 +624,7 @@ rubert_tiny_turbo = ModelMeta(
597
624
  max_tokens=2048,
598
625
  reference="https://huggingface.co/sergeyzh/rubert-tiny-turbo",
599
626
  similarity_fn_name=ScoringFunction.COSINE,
600
- framework=["Sentence Transformers", "PyTorch"],
627
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
601
628
  use_instructions=False,
602
629
  public_training_code=None,
603
630
  public_training_data=None,
@@ -608,6 +635,7 @@ rubert_tiny_turbo = ModelMeta(
608
635
  rubert_mini_frida = ModelMeta(
609
636
  loader=sentence_transformers_loader,
610
637
  name="sergeyzh/rubert-mini-frida",
638
+ model_type=["dense"],
611
639
  languages=["rus-Cyrl"],
612
640
  open_weights=True,
613
641
  revision="19b279b78afd945b5ccae78f63e284909814adc2",
@@ -619,7 +647,7 @@ rubert_mini_frida = ModelMeta(
619
647
  max_tokens=2048,
620
648
  reference="https://huggingface.co/sergeyzh/rubert-mini-frida",
621
649
  similarity_fn_name=ScoringFunction.COSINE,
622
- framework=["Sentence Transformers", "PyTorch"],
650
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
623
651
  use_instructions=True,
624
652
  public_training_code=None,
625
653
  public_training_data=None,
@@ -635,6 +663,7 @@ rubert_mini_frida = ModelMeta(
635
663
  labse_ru_turbo = ModelMeta(
636
664
  loader=sentence_transformers_loader,
637
665
  name="sergeyzh/LaBSE-ru-turbo",
666
+ model_type=["dense"],
638
667
  languages=["rus-Cyrl"],
639
668
  open_weights=True,
640
669
  revision="1940b046c6b5e125df11722b899130329d0a46da",
@@ -646,7 +675,7 @@ labse_ru_turbo = ModelMeta(
646
675
  max_tokens=512,
647
676
  reference="https://huggingface.co/sergeyzh/LaBSE-ru-turbo",
648
677
  similarity_fn_name=ScoringFunction.COSINE,
649
- framework=["Sentence Transformers", "PyTorch"],
678
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
650
679
  use_instructions=False,
651
680
  training_datasets=turbo_models_datasets,
652
681
  public_training_code=None,
@@ -683,6 +712,7 @@ rosberta_ru_en = ModelMeta(
683
712
  model_prompts=rosberta_prompts,
684
713
  ),
685
714
  name="ai-forever/ru-en-RoSBERTa",
715
+ model_type=["dense"],
686
716
  languages=["rus-Cyrl"],
687
717
  open_weights=True,
688
718
  revision="89fb1651989adbb1cfcfdedafd7d102951ad0555",
@@ -715,7 +745,7 @@ rosberta_ru_en = ModelMeta(
715
745
  },
716
746
  public_training_data=None,
717
747
  public_training_code=None,
718
- framework=["Sentence Transformers", "PyTorch"],
748
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
719
749
  citation="""@misc{snegirev2024russianfocusedembeddersexplorationrumteb,
720
750
  title={The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design},
721
751
  author={Artem Snegirev and Maria Tikhonova and Anna Maksimova and Alena Fenogenova and Alexander Abramov},
@@ -755,6 +785,7 @@ frida_prompts = {
755
785
  "SensitiveTopicsClassification": "categorize_topic: ",
756
786
  "TERRa": "categorize_entailment: ",
757
787
  "RiaNewsRetrieval": "categorize: ",
788
+ "RiaNewsRetrievalHardNegatives.v2": "",
758
789
  }
759
790
 
760
791
  frida_training_datasets = {
@@ -847,6 +878,7 @@ frida = ModelMeta(
847
878
  model_prompts=frida_prompts,
848
879
  ),
849
880
  name="ai-forever/FRIDA",
881
+ model_type=["dense"],
850
882
  languages=["rus-Cyrl"],
851
883
  open_weights=True,
852
884
  revision="7292217af9a9e6dbf07048f76b434ad1e2aa8b76",
@@ -863,7 +895,8 @@ frida = ModelMeta(
863
895
  training_datasets=frida_training_datasets,
864
896
  public_training_data=None,
865
897
  public_training_code=None,
866
- framework=["Sentence Transformers", "PyTorch"],
898
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
899
+ citation=None,
867
900
  )
868
901
 
869
902
  giga_embeddings = ModelMeta(
@@ -879,6 +912,7 @@ giga_embeddings = ModelMeta(
879
912
  },
880
913
  ),
881
914
  name="ai-sage/Giga-Embeddings-instruct",
915
+ model_type=["dense"],
882
916
  languages=["eng-Latn", "rus-Cyrl"],
883
917
  open_weights=True,
884
918
  revision="0ad5b29bfecd806cecc9d66b927d828a736594dc",
@@ -890,7 +924,7 @@ giga_embeddings = ModelMeta(
890
924
  max_tokens=4096,
891
925
  reference="https://huggingface.co/ai-sage/Giga-Embeddings-instruct",
892
926
  similarity_fn_name=ScoringFunction.COSINE,
893
- framework=["Sentence Transformers", "PyTorch"],
927
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
894
928
  use_instructions=True,
895
929
  public_training_code=None,
896
930
  public_training_data=None,
@@ -910,6 +944,7 @@ berta_training_datasets = (
910
944
  berta = ModelMeta(
911
945
  loader=sentence_transformers_loader,
912
946
  name="sergeyzh/BERTA",
947
+ model_type=["dense"],
913
948
  languages=["rus-Cyrl"],
914
949
  open_weights=True,
915
950
  revision="914c8c8aed14042ed890fc2c662d5e9e66b2faa7",
@@ -921,7 +956,7 @@ berta = ModelMeta(
921
956
  max_tokens=512,
922
957
  reference="https://huggingface.co/sergeyzh/BERTA",
923
958
  similarity_fn_name=ScoringFunction.COSINE,
924
- framework=["Sentence Transformers", "PyTorch"],
959
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
925
960
  use_instructions=True,
926
961
  training_datasets=berta_training_datasets,
927
962
  public_training_code=None,
@@ -982,6 +1017,7 @@ user2_small = ModelMeta(
982
1017
  model_prompts=user2_prompts,
983
1018
  ),
984
1019
  name="deepvk/USER2-small",
1020
+ model_type=["dense"],
985
1021
  languages=["rus-Cyrl"],
986
1022
  open_weights=True,
987
1023
  revision="23f65b34cf7632032061f5cc66c14714e6d4cee4",
@@ -998,7 +1034,14 @@ user2_small = ModelMeta(
998
1034
  training_datasets=user2_training_data,
999
1035
  public_training_data=None,
1000
1036
  public_training_code="https://github.com/BlessedTatonka/some_code/tree/2899f27d51efdf4217fc6453799ff197e9792f1e",
1001
- framework=["Sentence Transformers", "PyTorch"],
1037
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
1038
+ citation="""@misc{deepvk2025user,
1039
+ title={USER2},
1040
+ author={Malashenko, Boris and Spirin, Egor and Sokolov Andrey},
1041
+ url={https://huggingface.co/deepvk/USER2-small},
1042
+ publisher={Hugging Face},
1043
+ year={2025},
1044
+ }""",
1002
1045
  )
1003
1046
 
1004
1047
  user2_base = ModelMeta(
@@ -1007,6 +1050,7 @@ user2_base = ModelMeta(
1007
1050
  model_prompts=user2_prompts,
1008
1051
  ),
1009
1052
  name="deepvk/USER2-base",
1053
+ model_type=["dense"],
1010
1054
  languages=["rus-Cyrl"],
1011
1055
  open_weights=True,
1012
1056
  revision="0942cf96909b6d52e61f79a01e2d30c7be640b27",
@@ -1023,5 +1067,12 @@ user2_base = ModelMeta(
1023
1067
  training_datasets=user2_training_data,
1024
1068
  public_training_data=None,
1025
1069
  public_training_code="https://github.com/BlessedTatonka/some_code/tree/2899f27d51efdf4217fc6453799ff197e9792f1e",
1026
- framework=["Sentence Transformers", "PyTorch"],
1070
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
1071
+ citation="""@misc{deepvk2025user,
1072
+ title={USER2},
1073
+ author={Malashenko, Boris and Spirin, Egor and Sokolov Andrey},
1074
+ url={https://huggingface.co/deepvk/USER2-base},
1075
+ publisher={Hugging Face},
1076
+ year={2025},
1077
+ }""",
1027
1078
  )