mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
 
6
- from mteb.abstasks.task_metadata import TaskMetadata
7
7
  from mteb.models.abs_encoder import AbsEncoder
8
8
  from mteb.models.model_meta import ModelMeta
9
- from mteb.types import Array, BatchedInput, PromptType
9
+ from mteb.types import PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput
10
16
 
11
17
  v2_training_data = {
12
18
  "MSMARCO",
@@ -134,13 +140,14 @@ opensearch_neural_sparse_encoding_doc_v3_gte = ModelMeta(
134
140
  revision="a8abaa916125ee512a7a8f4d706d07eb0128a8e6",
135
141
  release_date="2025-06-18",
136
142
  n_parameters=137_394_234,
143
+ n_embedding_parameters=23_440_896,
137
144
  memory_usage_mb=549,
138
145
  embed_dim=30522,
139
146
  license="apache-2.0",
140
147
  max_tokens=8192,
141
148
  reference="https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-doc-v3-gte",
142
149
  similarity_fn_name="dot",
143
- framework=["Sentence Transformers", "PyTorch"],
150
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
144
151
  public_training_code="https://github.com/zhichao-aws/opensearch-sparse-model-tuning-sample",
145
152
  public_training_data=True,
146
153
  use_instructions=True,
@@ -160,13 +167,14 @@ opensearch_neural_sparse_encoding_doc_v3_distill = ModelMeta(
160
167
  revision="babf71f3c48695e2e53a978208e8aba48335e3c0",
161
168
  release_date="2025-03-28",
162
169
  n_parameters=66_985_530,
170
+ n_embedding_parameters=23_440_896,
163
171
  memory_usage_mb=267,
164
172
  embed_dim=30522,
165
173
  license="apache-2.0",
166
174
  max_tokens=512,
167
175
  reference="https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-doc-v3-distill",
168
176
  similarity_fn_name="dot",
169
- framework=["Sentence Transformers", "PyTorch"],
177
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
170
178
  public_training_code="https://github.com/zhichao-aws/opensearch-sparse-model-tuning-sample",
171
179
  public_training_data=True,
172
180
  use_instructions=True,
@@ -182,13 +190,14 @@ opensearch_neural_sparse_encoding_doc_v2_distill = ModelMeta(
182
190
  revision="8921a26c78b8559d6604eb1f5c0b74c079bee38f",
183
191
  release_date="2024-07-17",
184
192
  n_parameters=66_985_530,
193
+ n_embedding_parameters=23_440_896,
185
194
  memory_usage_mb=267,
186
195
  embed_dim=30522,
187
196
  license="apache-2.0",
188
197
  max_tokens=512,
189
198
  reference="https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill",
190
199
  similarity_fn_name="dot",
191
- framework=["Sentence Transformers", "PyTorch"],
200
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
192
201
  public_training_code="https://github.com/zhichao-aws/opensearch-sparse-model-tuning-sample",
193
202
  public_training_data=True,
194
203
  use_instructions=True,
@@ -205,13 +214,14 @@ opensearch_neural_sparse_encoding_doc_v2_mini = ModelMeta(
205
214
  revision="4af867a426867dfdd744097531046f4289a32fdd",
206
215
  release_date="2024-07-18",
207
216
  n_parameters=22_744_506,
217
+ n_embedding_parameters=11_720_448,
208
218
  memory_usage_mb=86,
209
219
  embed_dim=30522,
210
220
  license="apache-2.0",
211
221
  max_tokens=512,
212
222
  reference="https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-doc-v2-mini",
213
223
  similarity_fn_name="dot",
214
- framework=["Sentence Transformers", "PyTorch"],
224
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
215
225
  public_training_code="https://github.com/zhichao-aws/opensearch-sparse-model-tuning-sample",
216
226
  public_training_data=True,
217
227
  use_instructions=True,
@@ -227,13 +237,14 @@ opensearch_neural_sparse_encoding_doc_v1 = ModelMeta(
227
237
  revision="98cdcbd72867c547f72f2b7b7bed9cdf9f09922d",
228
238
  release_date="2024-03-07",
229
239
  n_parameters=132_955_194,
240
+ n_embedding_parameters=23_440_896,
230
241
  memory_usage_mb=507,
231
242
  embed_dim=30522,
232
243
  license="apache-2.0",
233
244
  max_tokens=512,
234
245
  reference="https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-doc-v1",
235
246
  similarity_fn_name="dot",
236
- framework=["Sentence Transformers", "PyTorch"],
247
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
237
248
  public_training_code="https://github.com/zhichao-aws/opensearch-sparse-model-tuning-sample",
238
249
  public_training_data=True,
239
250
  use_instructions=True,
@@ -1,8 +1,13 @@
1
- import numpy as np
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
2
4
 
3
5
  from mteb.models.abs_encoder import AbsEncoder
4
6
  from mteb.models.model_meta import ModelMeta
5
7
 
8
+ if TYPE_CHECKING:
9
+ from mteb.types import Array
10
+
6
11
 
7
12
  class OPSWrapper(AbsEncoder):
8
13
  def __init__(self, model_name: str, revision: str):
@@ -15,7 +20,7 @@ class OPSWrapper(AbsEncoder):
15
20
  )
16
21
  self.output_dim = 1536
17
22
 
18
- def encode(self, sentences: list[str], **kwargs) -> np.ndarray:
23
+ def encode(self, sentences: list[str], **kwargs) -> Array:
19
24
  embeddings = self.model.encode(sentences, **kwargs)
20
25
  return embeddings[:, : self.output_dim]
21
26
 
@@ -28,6 +33,7 @@ ops_moa_conan_embedding = ModelMeta(
28
33
  languages=["zho-Hans"],
29
34
  loader=OPSWrapper,
30
35
  n_parameters=int(343 * 1e6),
36
+ n_embedding_parameters=21_635_072,
31
37
  memory_usage_mb=1308,
32
38
  max_tokens=512,
33
39
  embed_dim=1536,
@@ -60,6 +66,7 @@ ops_moa_yuan_embedding = ModelMeta(
60
66
  languages=["zho-Hans"],
61
67
  loader=OPSWrapper,
62
68
  n_parameters=int(343 * 1e6),
69
+ n_embedding_parameters=21_635_072,
63
70
  memory_usage_mb=1242,
64
71
  max_tokens=512,
65
72
  embed_dim=1536,
@@ -67,7 +74,7 @@ ops_moa_yuan_embedding = ModelMeta(
67
74
  open_weights=True,
68
75
  public_training_code=None,
69
76
  public_training_data=None,
70
- framework=["PyTorch", "Sentence Transformers"],
77
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
71
78
  reference="https://huggingface.co/OpenSearch-AI/Ops-MoA-Yuan-embedding-1.0",
72
79
  similarity_fn_name="cosine",
73
80
  use_instructions=False,
@@ -4,6 +4,7 @@ solon_embeddings_1_1 = ModelMeta(
4
4
  name="OrdalieTech/Solon-embeddings-mini-beta-1.1",
5
5
  languages=["fra-Latn"],
6
6
  n_parameters=210_000_000,
7
+ n_embedding_parameters=None,
7
8
  public_training_code=None,
8
9
  memory_usage_mb=808.0,
9
10
  open_weights=True,
@@ -14,7 +15,7 @@ solon_embeddings_1_1 = ModelMeta(
14
15
  max_tokens=8192,
15
16
  reference="https://huggingface.co/OrdalieTech/Solon-embeddings-mini-beta-1.1",
16
17
  similarity_fn_name="cosine",
17
- framework=["Sentence Transformers", "PyTorch"],
18
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
18
19
  use_instructions=False,
19
20
  public_training_data=(
20
21
  "https://huggingface.co/datasets/PleIAs/common_corpus; "
@@ -20,13 +20,14 @@ pawan_embd_68m = ModelMeta(
20
20
  revision="32f295145802bdbd65699ad65fd27d2a5b69a909",
21
21
  release_date="2025-12-08",
22
22
  n_parameters=68_000_000,
23
+ n_embedding_parameters=None,
23
24
  memory_usage_mb=260,
24
25
  embed_dim=768,
25
26
  license="apache-2.0",
26
27
  max_tokens=512,
27
28
  reference="https://huggingface.co/dmedhi/PawanEmbd-68M",
28
29
  similarity_fn_name=ScoringFunction.COSINE,
29
- framework=["Sentence Transformers", "PyTorch"],
30
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
30
31
  adapted_from="ibm-granite/granite-embedding-278m-multilingual",
31
32
  superseded_by=None,
32
33
  public_training_code=None,
@@ -12,13 +12,14 @@ piccolo_base_zh = ModelMeta(
12
12
  revision="47c0a63b8f667c3482e05b2fd45577bb19252196",
13
13
  release_date="2023-09-04", # first commit
14
14
  n_parameters=None,
15
+ n_embedding_parameters=16_226_304,
15
16
  memory_usage_mb=None, # can't see on model card
16
17
  embed_dim=768,
17
18
  license="mit",
18
19
  max_tokens=512,
19
20
  reference="https://huggingface.co/sensenova/piccolo-base-zh",
20
21
  similarity_fn_name=ScoringFunction.COSINE,
21
- framework=["Sentence Transformers", "PyTorch"],
22
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
22
23
  use_instructions=False,
23
24
  superseded_by=None,
24
25
  adapted_from=None,
@@ -37,6 +38,7 @@ piccolo_large_zh_v2 = ModelMeta(
37
38
  revision="05948c1d889355936bdf9db7d30df57dd78d25a3",
38
39
  release_date="2024-04-22", # first commit
39
40
  n_parameters=None,
41
+ n_embedding_parameters=None,
40
42
  memory_usage_mb=None, # we don't know because they removed the model
41
43
  embed_dim=1024,
42
44
  license="not specified",
@@ -0,0 +1,56 @@
1
+ from mteb.models.model_implementations.arctic_models import (
2
+ ARCTIC_V2_CITATION,
3
+ LANGUAGES_V2_0,
4
+ arctic_v2_training_datasets,
5
+ )
6
+ from mteb.models.model_meta import (
7
+ ModelMeta,
8
+ ScoringFunction,
9
+ )
10
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
11
+
12
+ PIXIE_RUNE_V1_CITATION = """@misc{TelePIX-PIXIE-Rune-v1.0,
13
+ title = {PIXIE-Rune-v1.0},
14
+ author = {TelePIX AI Research Team and Bongmin Kim},
15
+ year = {2026},
16
+ howpublished = {Hugging Face model card},
17
+ url = {https://huggingface.co/telepix/PIXIE-Rune-v1.0}
18
+ }"""
19
+
20
+ PIXIE_RUNE_V1_PROMPTS = {
21
+ "query": "query: ",
22
+ "document": "",
23
+ }
24
+
25
+ # it is further fine-tuned on TelePIX proprietary IR data (not public).
26
+ pixie_rune_v1_training_datasets = set(arctic_v2_training_datasets) | {
27
+ "TelePIX-Proprietary-IR-Triplets",
28
+ }
29
+
30
+ pixie_rune_v1_0 = ModelMeta(
31
+ loader=sentence_transformers_loader,
32
+ loader_kwargs={
33
+ "model_prompts": PIXIE_RUNE_V1_PROMPTS,
34
+ },
35
+ name="telepix/PIXIE-Rune-v1.0",
36
+ model_type=["dense"],
37
+ revision="b2486496da71191626666a88f9bfec844933a134",
38
+ release_date="2026-01-15",
39
+ languages=LANGUAGES_V2_0,
40
+ open_weights=True,
41
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
42
+ n_parameters=567754752,
43
+ memory_usage_mb=2166,
44
+ max_tokens=6144,
45
+ embed_dim=1024,
46
+ license="apache-2.0",
47
+ reference="https://huggingface.co/telepix/PIXIE-Rune-v1.0",
48
+ similarity_fn_name=ScoringFunction.COSINE,
49
+ use_instructions=True,
50
+ adapted_from="Snowflake/snowflake-arctic-embed-l-v2.0",
51
+ superseded_by=None,
52
+ public_training_code=None,
53
+ public_training_data=None,
54
+ training_datasets=pixie_rune_v1_training_datasets,
55
+ citation=PIXIE_RUNE_V1_CITATION + "\n\n" + ARCTIC_V2_CITATION,
56
+ )
@@ -1,15 +1,21 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
- from torch.utils.data import DataLoader
7
7
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
8
  from mteb.models.abs_encoder import AbsEncoder
10
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.models.models_protocols import EncoderProtocol
12
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from collections.abc import Callable
13
+
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.models.models_protocols import EncoderProtocol
18
+ from mteb.types import Array, BatchedInput, PromptType
13
19
 
14
20
  from .repllama_models import RepLLaMAModel, model_prompts
15
21
 
@@ -81,6 +87,7 @@ promptriever_llama2 = ModelMeta(
81
87
  revision="01c7f73d771dfac7d292323805ebc428287df4f9-30b14e3813c0fa45facfd01a594580c3fe5ecf23", # base-peft revision
82
88
  release_date="2024-09-15",
83
89
  n_parameters=7_000_000_000,
90
+ n_embedding_parameters=None,
84
91
  memory_usage_mb=26703,
85
92
  max_tokens=4096,
86
93
  embed_dim=4096,
@@ -90,7 +97,7 @@ promptriever_llama2 = ModelMeta(
90
97
  ),
91
98
  reference="https://huggingface.co/samaya-ai/promptriever-llama2-7b-v1",
92
99
  similarity_fn_name=ScoringFunction.COSINE,
93
- framework=["PyTorch", "Tevatron"],
100
+ framework=["PyTorch", "Tevatron", "safetensors"],
94
101
  use_instructions=True,
95
102
  citation=PROMPTRIEVER_CITATION,
96
103
  public_training_code=None,
@@ -117,13 +124,14 @@ promptriever_llama3 = ModelMeta(
117
124
  },
118
125
  release_date="2024-09-15",
119
126
  n_parameters=8_000_000_000,
127
+ n_embedding_parameters=None,
120
128
  memory_usage_mb=30518,
121
129
  max_tokens=8192,
122
130
  embed_dim=4096,
123
131
  license="apache-2.0",
124
132
  reference="https://huggingface.co/samaya-ai/promptriever-llama3.1-8b-v1",
125
133
  similarity_fn_name=ScoringFunction.COSINE,
126
- framework=["PyTorch", "Tevatron"],
134
+ framework=["PyTorch", "Tevatron", "safetensors"],
127
135
  use_instructions=True,
128
136
  citation=PROMPTRIEVER_CITATION,
129
137
  public_training_code=None,
@@ -146,6 +154,7 @@ promptriever_llama3_instruct = ModelMeta(
146
154
  revision="5206a32e0bd3067aef1ce90f5528ade7d866253f-8b677258615625122c2eb7329292b8c402612c21", # base-peft revision
147
155
  release_date="2024-09-15",
148
156
  n_parameters=8_000_000_000,
157
+ n_embedding_parameters=None,
149
158
  memory_usage_mb=30518,
150
159
  max_tokens=8192,
151
160
  embed_dim=4096,
@@ -156,7 +165,7 @@ promptriever_llama3_instruct = ModelMeta(
156
165
  license="apache-2.0",
157
166
  reference="https://huggingface.co/samaya-ai/promptriever-llama3.1-8b-instruct-v1",
158
167
  similarity_fn_name=ScoringFunction.COSINE,
159
- framework=["PyTorch", "Tevatron"],
168
+ framework=["PyTorch", "Tevatron", "safetensors"],
160
169
  use_instructions=True,
161
170
  citation=PROMPTRIEVER_CITATION,
162
171
  public_training_code=None,
@@ -179,6 +188,7 @@ promptriever_mistral_v1 = ModelMeta(
179
188
  revision="7231864981174d9bee8c7687c24c8344414eae6b-876d63e49b6115ecb6839893a56298fadee7e8f5", # base-peft revision
180
189
  release_date="2024-09-15",
181
190
  n_parameters=7_000_000_000,
191
+ n_embedding_parameters=131_072_000,
182
192
  memory_usage_mb=26703,
183
193
  training_datasets={
184
194
  # "samaya-ai/msmarco-w-instructions",
@@ -189,7 +199,7 @@ promptriever_mistral_v1 = ModelMeta(
189
199
  license="apache-2.0",
190
200
  reference="https://huggingface.co/samaya-ai/promptriever-mistral-v0.1-7b-v1",
191
201
  similarity_fn_name=ScoringFunction.COSINE,
192
- framework=["PyTorch", "Tevatron"],
202
+ framework=["PyTorch", "Tevatron", "safetensors"],
193
203
  use_instructions=True,
194
204
  citation=PROMPTRIEVER_CITATION,
195
205
  public_training_code=None,
@@ -1,35 +1,42 @@
1
+ from __future__ import annotations
2
+
1
3
  import heapq
2
4
  import logging
3
5
  import shutil
4
6
  import tempfile
5
7
  from pathlib import Path
6
- from typing import Any
8
+ from typing import TYPE_CHECKING, Any
7
9
 
8
10
  import torch
9
- from torch.utils.data import DataLoader
10
11
 
11
12
  from mteb._create_dataloaders import (
12
13
  create_dataloader,
13
14
  )
14
15
  from mteb._requires_package import requires_package
15
- from mteb.abstasks.task_metadata import TaskMetadata
16
16
  from mteb.models.abs_encoder import AbsEncoder
17
17
  from mteb.models.model_meta import ModelMeta, ScoringFunction
18
- from mteb.types import (
19
- Array,
20
- BatchedInput,
21
- CorpusDatasetType,
22
- PromptType,
23
- QueryDatasetType,
24
- RetrievalOutputType,
25
- TopRankedDocumentsType,
26
- )
18
+ from mteb.types import PromptType
19
+
20
+ if TYPE_CHECKING:
21
+ from torch.utils.data import DataLoader
22
+
23
+ from mteb.abstasks.task_metadata import TaskMetadata
24
+ from mteb.types import (
25
+ Array,
26
+ BatchedInput,
27
+ CorpusDatasetType,
28
+ EncodeKwargs,
29
+ QueryDatasetType,
30
+ RetrievalOutputType,
31
+ TopRankedDocumentsType,
32
+ )
33
+
27
34
 
28
35
  logger = logging.getLogger(__name__)
29
36
 
30
37
 
31
38
  class PylateSearchEncoder:
32
- """Mixin class to add PyLate-based indexing and search to an encoder. Implements :class:`SearchProtocol`"""
39
+ """Mixin class to add PyLate-based indexing and search to an encoder. Implements [SearchProtocol][mteb.models.SearchProtocol]"""
33
40
 
34
41
  base_index_dir: Path | None = None
35
42
  _index_dir: Path | None = None
@@ -45,7 +52,8 @@ class PylateSearchEncoder:
45
52
  task_metadata: TaskMetadata,
46
53
  hf_split: str,
47
54
  hf_subset: str,
48
- encode_kwargs: dict[str, Any],
55
+ encode_kwargs: EncodeKwargs,
56
+ num_proc: int,
49
57
  ) -> None:
50
58
  """Index the corpus for retrieval.
51
59
 
@@ -55,6 +63,7 @@ class PylateSearchEncoder:
55
63
  hf_split: Split of current task, allows to know some additional information about current split.
56
64
  hf_subset: Subset of current task. Similar to `hf_split` to get more information
57
65
  encode_kwargs: Additional arguments to pass to the encoder during indexing.
66
+ num_proc: Number of processes to use for indexing.
58
67
  """
59
68
  self.task_corpus = corpus
60
69
 
@@ -78,14 +87,16 @@ class PylateSearchEncoder:
78
87
  hf_split: str,
79
88
  hf_subset: str,
80
89
  top_k: int,
81
- encode_kwargs: dict[str, Any],
90
+ encode_kwargs: EncodeKwargs,
82
91
  top_ranked: TopRankedDocumentsType | None = None,
92
+ num_proc: int,
83
93
  ) -> RetrievalOutputType:
84
94
  queries_dataloader = create_dataloader(
85
95
  queries,
86
96
  task_metadata,
87
97
  prompt_type=PromptType.query,
88
98
  batch_size=encode_kwargs.get("batch_size", 32),
99
+ num_proc=num_proc,
89
100
  )
90
101
 
91
102
  query_embeddings = self.encode(
@@ -109,6 +120,7 @@ class PylateSearchEncoder:
109
120
  hf_subset=hf_subset,
110
121
  hf_split=hf_split,
111
122
  encode_kwargs=encode_kwargs,
123
+ num_proc=num_proc,
112
124
  )
113
125
  else:
114
126
  result_heaps = self._pylate_full_corpus_search(
@@ -119,6 +131,7 @@ class PylateSearchEncoder:
119
131
  hf_subset=hf_subset,
120
132
  hf_split=hf_split,
121
133
  encode_kwargs=encode_kwargs,
134
+ num_proc=num_proc,
122
135
  )
123
136
 
124
137
  results = {qid: {} for qid in query_idx_to_id.values()}
@@ -136,7 +149,8 @@ class PylateSearchEncoder:
136
149
  hf_subset: str,
137
150
  hf_split: str,
138
151
  top_k: int,
139
- encode_kwargs: dict[str, Any],
152
+ encode_kwargs: EncodeKwargs,
153
+ num_proc: int,
140
154
  ) -> dict[str, list[tuple[float, str]]]:
141
155
  from pylate import indexes, retrieve
142
156
 
@@ -163,6 +177,7 @@ class PylateSearchEncoder:
163
177
  task_metadata,
164
178
  prompt_type=PromptType.document,
165
179
  batch_size=encode_kwargs.get("batch_size", 32),
180
+ num_proc=num_proc,
166
181
  )
167
182
  documents_embeddings = self.encode(
168
183
  documents_loader,
@@ -200,7 +215,8 @@ class PylateSearchEncoder:
200
215
  task_metadata: TaskMetadata,
201
216
  hf_subset: str,
202
217
  hf_split: str,
203
- encode_kwargs: dict[str, Any],
218
+ encode_kwargs: EncodeKwargs,
219
+ num_proc: int = 1,
204
220
  ) -> dict[str, list[tuple[float, str]]]:
205
221
  """Rerank with PyLate's rank.rerank using per-query candidates.
206
222
 
@@ -223,6 +239,7 @@ class PylateSearchEncoder:
223
239
  task_metadata,
224
240
  prompt_type=PromptType.document,
225
241
  batch_size=encode_kwargs.get("batch_size", 32),
242
+ num_proc=num_proc,
226
243
  ),
227
244
  task_metadata=task_metadata,
228
245
  hf_split=hf_split,
@@ -345,12 +362,13 @@ colbert_v2 = ModelMeta(
345
362
  public_training_data=None,
346
363
  release_date="2024-09-21",
347
364
  n_parameters=int(110 * 1e6),
365
+ n_embedding_parameters=23_440_896,
348
366
  memory_usage_mb=418,
349
367
  max_tokens=180,
350
368
  embed_dim=None,
351
369
  license="mit",
352
370
  similarity_fn_name=ScoringFunction.MAX_SIM,
353
- framework=["PyLate", "ColBERT"],
371
+ framework=["PyLate", "ColBERT", "Transformers", "ONNX", "safetensors"],
354
372
  reference="https://huggingface.co/colbert-ir/colbertv2.0",
355
373
  use_instructions=False,
356
374
  adapted_from=None,
@@ -401,12 +419,13 @@ jina_colbert_v2 = ModelMeta(
401
419
  public_training_data=None,
402
420
  release_date="2024-08-16",
403
421
  n_parameters=int(559 * 1e6),
422
+ n_embedding_parameters=None,
404
423
  memory_usage_mb=1067,
405
424
  max_tokens=8192,
406
425
  embed_dim=None,
407
426
  license="cc-by-nc-4.0",
408
427
  similarity_fn_name=ScoringFunction.MAX_SIM,
409
- framework=["PyLate", "ColBERT"],
428
+ framework=["PyLate", "ColBERT", "ONNX", "safetensors"],
410
429
  reference="https://huggingface.co/jinaai/jina-colbert-v2",
411
430
  use_instructions=False,
412
431
  adapted_from=None,
@@ -439,7 +458,7 @@ jina_colbert_v2 = ModelMeta(
439
458
  url = "https://aclanthology.org/2024.mrl-1.11/",
440
459
  doi = "10.18653/v1/2024.mrl-1.11",
441
460
  pages = "159--166",
442
- abstract = "Multi-vector dense models, such as ColBERT, have proven highly effective in information retrieval. ColBERT`s late interaction scoring approximates the joint query-document attention seen in cross-encoders while maintaining inference efficiency closer to traditional dense retrieval models, thanks to its bi-encoder architecture and recent optimizations in indexing and search. In this paper, we introduce a novel architecture and a training framework to support long context window and multilingual retrieval. Leveraging Matryoshka Representation Loss, we further demonstrate that the reducing the embedding dimensionality from 128 to 64 has insignificant impact on the model`s retrieval performance and cut storage requirements by up to 50{\%}. Our new model, Jina-ColBERT-v2, demonstrates strong performance across a range of English and multilingual retrieval tasks,"
461
+ abstract = "Multi-vector dense models, such as ColBERT, have proven highly effective in information retrieval. ColBERT`s late interaction scoring approximates the joint query-document attention seen in cross-encoders while maintaining inference efficiency closer to traditional dense retrieval models, thanks to its bi-encoder architecture and recent optimizations in indexing and search. In this paper, we introduce a novel architecture and a training framework to support long context window and multilingual retrieval. Leveraging Matryoshka Representation Loss, we further demonstrate that the reducing the embedding dimensionality from 128 to 64 has insignificant impact on the model`s retrieval performance and cut storage requirements by up to 50{\\%}. Our new model, Jina-ColBERT-v2, demonstrates strong performance across a range of English and multilingual retrieval tasks,"
443
462
  }""",
444
463
  )
445
464
 
@@ -457,12 +476,13 @@ lightonai__gte_moderncolbert_v1 = ModelMeta(
457
476
  public_training_data="https://huggingface.co/datasets/lightonai/ms-marco-en-bge-gemma",
458
477
  release_date="2025-04-30",
459
478
  n_parameters=int(149 * 1e6),
479
+ n_embedding_parameters=None,
460
480
  memory_usage_mb=None,
461
481
  max_tokens=8192,
462
482
  embed_dim=None,
463
483
  license="apache-2.0",
464
484
  similarity_fn_name="MaxSim",
465
- framework=["PyLate", "ColBERT"],
485
+ framework=["PyLate", "ColBERT", "safetensors", "Sentence Transformers"],
466
486
  reference="https://huggingface.co/lightonai/GTE-ModernColBERT-v1",
467
487
  use_instructions=False,
468
488
  adapted_from="Alibaba-NLP/gte-modernbert-base",
@@ -36,13 +36,14 @@ Qodo_Embed_1_1_5B = ModelMeta(
36
36
  revision="84bbef079b32e8823ec226d4e9e92902706b0eb6",
37
37
  release_date="2025-02-19",
38
38
  n_parameters=1_780_000_000,
39
+ n_embedding_parameters=232_928_256,
39
40
  memory_usage_mb=6776,
40
41
  embed_dim=1536,
41
42
  license="https://huggingface.co/Qodo/Qodo-Embed-1-1.5B/blob/main/LICENSE",
42
43
  max_tokens=32768,
43
44
  reference="https://huggingface.co/Qodo/Qodo-Embed-1-1.5B",
44
45
  similarity_fn_name=ScoringFunction.COSINE,
45
- framework=["Sentence Transformers", "PyTorch"],
46
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
46
47
  use_instructions=False,
47
48
  public_training_code=None,
48
49
  public_training_data=None,
@@ -59,13 +60,14 @@ Qodo_Embed_1_7B = ModelMeta(
59
60
  revision="f9edd9bf7f687c0e832424058e265120f603cd81",
60
61
  release_date="2025-02-24",
61
62
  n_parameters=7_613_000_000,
63
+ n_embedding_parameters=None,
62
64
  memory_usage_mb=29040,
63
65
  embed_dim=3584,
64
66
  license="https://huggingface.co/Qodo/Qodo-Embed-1-1.5B/blob/main/LICENSE",
65
67
  max_tokens=32768,
66
68
  reference="https://huggingface.co/Qodo/Qodo-Embed-1-7B",
67
69
  similarity_fn_name=ScoringFunction.COSINE,
68
- framework=["Sentence Transformers", "PyTorch"],
70
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
69
71
  use_instructions=False,
70
72
  public_training_code=None,
71
73
  public_training_data=None,
@@ -31,13 +31,14 @@ mini_gte = ModelMeta(
31
31
  revision="7fbe6f9b4cc42615e0747299f837ad7769025492",
32
32
  release_date="2025-01-28",
33
33
  n_parameters=int(66.3 * 1e6),
34
+ n_embedding_parameters=23_440_896,
34
35
  memory_usage_mb=253,
35
36
  embed_dim=768,
36
37
  license="apache-2.0",
37
38
  max_tokens=512,
38
39
  reference="https://huggingface.co/prdev/mini-gte",
39
40
  similarity_fn_name=ScoringFunction.COSINE,
40
- framework=["Sentence Transformers", "PyTorch"],
41
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
41
42
  use_instructions=False,
42
43
  public_training_code=None,
43
44
  public_training_data=None,
@@ -1,6 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
6
  from mteb.models.model_meta import ModelMeta
3
- from mteb.models.models_protocols import EncoderProtocol, PromptType
7
+ from mteb.types import PromptType
8
+
9
+ if TYPE_CHECKING:
10
+ from mteb.models.models_protocols import EncoderProtocol
4
11
 
5
12
 
6
13
  def instruction_template(
@@ -140,13 +147,14 @@ Qwen3_Embedding_0B6 = ModelMeta(
140
147
  revision="b22da495047858cce924d27d76261e96be6febc0", # Commit of @tomaarsen
141
148
  release_date="2025-06-05",
142
149
  n_parameters=595776512,
150
+ n_embedding_parameters=None,
143
151
  memory_usage_mb=1136,
144
152
  embed_dim=1024,
145
153
  max_tokens=32768,
146
154
  license="apache-2.0",
147
155
  reference="https://huggingface.co/Qwen/Qwen3-Embedding-0.6B",
148
156
  similarity_fn_name="cosine",
149
- framework=["Sentence Transformers", "PyTorch"],
157
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
150
158
  use_instructions=True,
151
159
  public_training_code=None,
152
160
  public_training_data=None,
@@ -163,13 +171,14 @@ Qwen3_Embedding_4B = ModelMeta(
163
171
  revision="636cd9bf47d976946cdbb2b0c3ca0cb2f8eea5ff", # Commit of @tomaarsen
164
172
  release_date="2025-06-05",
165
173
  n_parameters=4021774336,
174
+ n_embedding_parameters=None,
166
175
  memory_usage_mb=7671,
167
176
  embed_dim=2560,
168
177
  max_tokens=32768,
169
178
  license="apache-2.0",
170
179
  reference="https://huggingface.co/Qwen/Qwen3-Embedding-4B",
171
180
  similarity_fn_name="cosine",
172
- framework=["Sentence Transformers", "PyTorch"],
181
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
173
182
  use_instructions=True,
174
183
  public_training_code=None,
175
184
  public_training_data=None,
@@ -186,13 +195,14 @@ Qwen3_Embedding_8B = ModelMeta(
186
195
  revision="4e423935c619ae4df87b646a3ce949610c66241c", # Commit of @tomaarsen
187
196
  release_date="2025-06-05",
188
197
  n_parameters=7567295488,
198
+ n_embedding_parameters=None,
189
199
  memory_usage_mb=14433,
190
200
  embed_dim=4096,
191
201
  max_tokens=32768,
192
202
  license="apache-2.0",
193
203
  reference="https://huggingface.co/Qwen/Qwen3-Embedding-8B",
194
204
  similarity_fn_name="cosine",
195
- framework=["Sentence Transformers", "PyTorch"],
205
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
196
206
  use_instructions=True,
197
207
  public_training_code=None,
198
208
  public_training_data=None,