mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -76,13 +76,14 @@ e5_mult_small = ModelMeta(
76
76
  revision="fd1525a9fd15316a2d503bf26ab031a61d056e98",
77
77
  release_date=E5_PAPER_RELEASE_DATE,
78
78
  n_parameters=118_000_000,
79
+ n_embedding_parameters=96_014_208,
79
80
  memory_usage_mb=449,
80
81
  embed_dim=384,
81
82
  license="mit",
82
83
  max_tokens=512,
83
84
  reference="https://huggingface.co/intfloat/multilingual-e5-small",
84
85
  similarity_fn_name=ScoringFunction.COSINE,
85
- framework=["Sentence Transformers", "PyTorch"],
86
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
86
87
  use_instructions=True,
87
88
  public_training_code=None, # couldn't find
88
89
  public_training_data=None,
@@ -103,13 +104,14 @@ e5_mult_base = ModelMeta(
103
104
  revision="d13f1b27baf31030b7fd040960d60d909913633f",
104
105
  release_date=E5_PAPER_RELEASE_DATE,
105
106
  n_parameters=278_000_000,
107
+ n_embedding_parameters=192_001_536,
106
108
  memory_usage_mb=1061,
107
109
  embed_dim=768,
108
110
  license="mit",
109
111
  max_tokens=514,
110
112
  reference="https://huggingface.co/intfloat/multilingual-e5-base",
111
113
  similarity_fn_name=ScoringFunction.COSINE,
112
- framework=["Sentence Transformers", "PyTorch"],
114
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
113
115
  use_instructions=True,
114
116
  public_training_code=None,
115
117
  public_training_data=None,
@@ -130,13 +132,14 @@ e5_mult_large = ModelMeta(
130
132
  revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb",
131
133
  release_date=E5_PAPER_RELEASE_DATE,
132
134
  n_parameters=560_000_000,
135
+ n_embedding_parameters=256_002_048,
133
136
  memory_usage_mb=2136,
134
137
  embed_dim=1024,
135
138
  license="mit",
136
139
  max_tokens=514,
137
140
  reference="https://huggingface.co/intfloat/multilingual-e5-large",
138
141
  similarity_fn_name=ScoringFunction.COSINE,
139
- framework=["Sentence Transformers", "PyTorch"],
142
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
140
143
  use_instructions=True,
141
144
  public_training_code=None,
142
145
  public_training_data=None,
@@ -157,13 +160,14 @@ e5_eng_small_v2 = ModelMeta(
157
160
  revision="dca8b1a9dae0d4575df2bf423a5edb485a431236",
158
161
  release_date=E5_PAPER_RELEASE_DATE,
159
162
  n_parameters=33_000_000,
163
+ n_embedding_parameters=11_720_448,
160
164
  memory_usage_mb=127,
161
165
  embed_dim=384,
162
166
  license="mit",
163
167
  max_tokens=512,
164
168
  reference="https://huggingface.co/intfloat/e5-small-v2",
165
169
  similarity_fn_name=ScoringFunction.COSINE,
166
- framework=["Sentence Transformers", "PyTorch"],
170
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
167
171
  use_instructions=True,
168
172
  public_training_code=None,
169
173
  public_training_data=None,
@@ -184,13 +188,14 @@ e5_eng_small = ModelMeta(
184
188
  revision="e272f3049e853b47cb5ca3952268c6662abda68f",
185
189
  release_date=E5_PAPER_RELEASE_DATE,
186
190
  n_parameters=33_000_000,
191
+ n_embedding_parameters=11_720_448,
187
192
  memory_usage_mb=127,
188
193
  embed_dim=384,
189
194
  license="mit",
190
195
  max_tokens=512,
191
196
  reference="https://huggingface.co/intfloat/e5-small",
192
197
  similarity_fn_name=ScoringFunction.COSINE,
193
- framework=["Sentence Transformers", "PyTorch"],
198
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
194
199
  use_instructions=True,
195
200
  public_training_code=None,
196
201
  public_training_data=None,
@@ -211,13 +216,14 @@ e5_eng_base_v2 = ModelMeta(
211
216
  revision="1c644c92ad3ba1efdad3f1451a637716616a20e8",
212
217
  release_date=E5_PAPER_RELEASE_DATE,
213
218
  n_parameters=109_000_000,
219
+ n_embedding_parameters=23_440_896,
214
220
  memory_usage_mb=418,
215
221
  embed_dim=768,
216
222
  license="mit",
217
223
  max_tokens=512,
218
224
  reference="https://huggingface.co/intfloat/e5-base-v2",
219
225
  similarity_fn_name=ScoringFunction.COSINE,
220
- framework=["Sentence Transformers", "PyTorch"],
226
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
221
227
  use_instructions=True,
222
228
  superseded_by=None,
223
229
  adapted_from="intfloat/e5-base",
@@ -239,13 +245,14 @@ e5_eng_large_v2 = ModelMeta(
239
245
  revision="b322e09026e4ea05f42beadf4d661fb4e101d311",
240
246
  release_date=E5_PAPER_RELEASE_DATE,
241
247
  n_parameters=335_000_000,
248
+ n_embedding_parameters=31_254_528,
242
249
  memory_usage_mb=1278,
243
250
  embed_dim=1024,
244
251
  license="mit",
245
252
  max_tokens=514,
246
253
  reference="https://huggingface.co/intfloat/e5-large-v2",
247
254
  similarity_fn_name=ScoringFunction.COSINE,
248
- framework=["Sentence Transformers", "PyTorch"],
255
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
249
256
  use_instructions=True,
250
257
  superseded_by=None,
251
258
  adapted_from="intfloat/e5-large",
@@ -267,13 +274,14 @@ e5_large = ModelMeta(
267
274
  revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81",
268
275
  release_date="2022-12-26",
269
276
  n_parameters=335_000_000,
277
+ n_embedding_parameters=31_254_528,
270
278
  memory_usage_mb=1278,
271
279
  embed_dim=1024,
272
280
  license="apache-2.0",
273
281
  max_tokens=512,
274
282
  reference="https://huggingface.co/intfloat/e5-large",
275
283
  similarity_fn_name=ScoringFunction.COSINE,
276
- framework=["Sentence Transformers", "PyTorch"],
284
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
277
285
  use_instructions=True,
278
286
  superseded_by="intfloat/e5-large-v2",
279
287
  adapted_from="google-bert/bert-large-uncased-whole-word-masking",
@@ -295,13 +303,14 @@ e5_base = ModelMeta(
295
303
  revision="b533fe4636f4a2507c08ddab40644d20b0006d6a",
296
304
  release_date="2022-12-26",
297
305
  n_parameters=109_000_000,
306
+ n_embedding_parameters=23_440_896,
298
307
  memory_usage_mb=418,
299
308
  embed_dim=768,
300
309
  license="apache-2.0",
301
310
  max_tokens=512,
302
311
  reference="https://huggingface.co/intfloat/e5-base",
303
312
  similarity_fn_name=ScoringFunction.COSINE,
304
- framework=["Sentence Transformers", "PyTorch"],
313
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
305
314
  use_instructions=True,
306
315
  superseded_by="intfloat/e5-base-v2",
307
316
  adapted_from="google-bert/bert-base-uncased",
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
6
  from packaging import version
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  E5_V_TRANSFORMERS_VERSION = (
14
19
  "4.44.2" # Issue 1647: Only works with transformers==4.44.2.
@@ -30,6 +35,7 @@ class E5VModel(AbsEncoder):
30
35
  self,
31
36
  model_name: str,
32
37
  revision: str,
38
+ device: str | None = None,
33
39
  composed_prompt=None,
34
40
  **kwargs: Any,
35
41
  ):
@@ -47,8 +53,7 @@ class E5VModel(AbsEncoder):
47
53
  self.processor = LlavaNextProcessor.from_pretrained(
48
54
  model_name, revision=revision
49
55
  )
50
- if "device" in kwargs:
51
- self.device = kwargs.pop("device")
56
+ self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
52
57
  self.model = LlavaNextForConditionalGeneration.from_pretrained(
53
58
  model_name, revision=revision, **kwargs
54
59
  )
@@ -87,7 +92,7 @@ class E5VModel(AbsEncoder):
87
92
  ],
88
93
  return_tensors="pt",
89
94
  padding=True,
90
- ).to("cuda")
95
+ ).to(self.device)
91
96
  text_outputs = self.model(
92
97
  **text_inputs, output_hidden_states=True, return_dict=True
93
98
  ).hidden_states[-1][:, -1, :]
@@ -111,7 +116,7 @@ class E5VModel(AbsEncoder):
111
116
  batch["image"],
112
117
  return_tensors="pt",
113
118
  padding=True,
114
- ).to("cuda")
119
+ ).to(self.device)
115
120
  image_outputs = self.model(
116
121
  **img_inputs, output_hidden_states=True, return_dict=True
117
122
  ).hidden_states[-1][:, -1, :]
@@ -141,7 +146,7 @@ class E5VModel(AbsEncoder):
141
146
  ]
142
147
  inputs = self.processor(
143
148
  prompts, batch["image"], return_tensors="pt", padding=True
144
- ).to("cuda")
149
+ ).to(self.device)
145
150
  outputs = self.model(
146
151
  **inputs, output_hidden_states=True, return_dict=True
147
152
  ).hidden_states[-1][:, -1, :]
@@ -166,6 +171,7 @@ e5_v = ModelMeta(
166
171
  release_date="2024-07-17",
167
172
  modalities=["image", "text"],
168
173
  n_parameters=8_360_000_000,
174
+ n_embedding_parameters=None,
169
175
  memory_usage_mb=15936,
170
176
  max_tokens=8192,
171
177
  embed_dim=4096,
@@ -173,7 +179,7 @@ e5_v = ModelMeta(
173
179
  open_weights=True,
174
180
  public_training_code="https://github.com/kongds/E5-V",
175
181
  public_training_data="https://huggingface.co/datasets/princeton-nlp/datasets-for-simcse",
176
- framework=["PyTorch"],
182
+ framework=["PyTorch", "Transformers", "safetensors"],
177
183
  reference="https://huggingface.co/royokong/e5-v",
178
184
  similarity_fn_name=ScoringFunction.COSINE,
179
185
  use_instructions=True,
@@ -1,17 +1,23 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import (
8
9
  requires_image_dependencies,
9
10
  requires_package,
10
11
  )
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
12
  from mteb.models.abs_encoder import AbsEncoder
13
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
14
+ from mteb.types import PromptType
15
+
16
+ if TYPE_CHECKING:
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput
15
21
 
16
22
 
17
23
  class EagerEmbedV1Wrapper(AbsEncoder):
@@ -147,12 +153,13 @@ Eager_Embed_V1 = ModelMeta(
147
153
  release_date="2025-11-20",
148
154
  modalities=["image", "text"],
149
155
  n_parameters=4_000_000_000,
156
+ n_embedding_parameters=None,
150
157
  memory_usage_mb=16929,
151
158
  max_tokens=262144,
152
159
  embed_dim=2560,
153
160
  license="apache-2.0",
154
161
  open_weights=True,
155
- framework=["Tevatron"],
162
+ framework=["Tevatron", "safetensors"],
156
163
  reference="https://huggingface.co/eagerworks/eager-embed-v1",
157
164
  similarity_fn_name=ScoringFunction.COSINE,
158
165
  use_instructions=True,
@@ -2,7 +2,7 @@ from mteb.models.model_meta import ModelMeta
2
2
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
3
3
 
4
4
  embedding_gemma_300m_scandi = ModelMeta(
5
- loader=sentence_transformers_loader, # type: ignore
5
+ loader=sentence_transformers_loader,
6
6
  name="emillykkejensen/EmbeddingGemma-Scandi-300m",
7
7
  model_type=["dense"],
8
8
  languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"],
@@ -10,11 +10,12 @@ embedding_gemma_300m_scandi = ModelMeta(
10
10
  revision="9f3307b9f601db564a9190cb475324d128dcfe86",
11
11
  release_date="2025-10-17",
12
12
  n_parameters=307_581_696,
13
+ n_embedding_parameters=None,
13
14
  embed_dim=768,
14
15
  max_tokens=2048,
15
16
  license="apache-2.0",
16
17
  reference="https://huggingface.co/emillykkejensen/EmbeddingGemma-Scandi-300m",
17
- framework=["Sentence Transformers", "PyTorch"],
18
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
18
19
  use_instructions=True,
19
20
  public_training_code=None,
20
21
  public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
@@ -35,7 +36,7 @@ embedding_gemma_300m_scandi = ModelMeta(
35
36
 
36
37
 
37
38
  qwen_scandi = ModelMeta(
38
- loader=sentence_transformers_loader, # type: ignore
39
+ loader=sentence_transformers_loader,
39
40
  name="emillykkejensen/Qwen3-Embedding-Scandi-0.6B",
40
41
  model_type=["dense"],
41
42
  languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"],
@@ -43,12 +44,13 @@ qwen_scandi = ModelMeta(
43
44
  revision="cf1e7ba36ebd3d605549d8f02930a18e17b54513",
44
45
  release_date="2025-10-17",
45
46
  n_parameters=595776512,
47
+ n_embedding_parameters=None,
46
48
  memory_usage_mb=2272,
47
49
  embed_dim=1024,
48
50
  max_tokens=32768,
49
51
  license="apache-2.0",
50
52
  reference="https://huggingface.co/emillykkejensen/Qwen3-Embedding-Scandi-0.6B",
51
- framework=["Sentence Transformers", "PyTorch"],
53
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
52
54
  use_instructions=True,
53
55
  public_training_code=None,
54
56
  public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
@@ -59,7 +61,7 @@ qwen_scandi = ModelMeta(
59
61
 
60
62
 
61
63
  mmbert_scandi = ModelMeta(
62
- loader=sentence_transformers_loader, # type: ignore
64
+ loader=sentence_transformers_loader,
63
65
  name="emillykkejensen/mmBERTscandi-base-embedding",
64
66
  model_type=["dense"],
65
67
  languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"],
@@ -67,12 +69,13 @@ mmbert_scandi = ModelMeta(
67
69
  revision="82d74c7a5d8e1ddf31b132865df2d16b2b0294ee",
68
70
  release_date="2025-10-17",
69
71
  n_parameters=306939648,
72
+ n_embedding_parameters=None,
70
73
  memory_usage_mb=1171,
71
74
  embed_dim=768,
72
75
  max_tokens=8192,
73
76
  license="apache-2.0",
74
77
  reference="https://huggingface.co/emillykkejensen/Qwen3-Embedding-Scandi-0.6B",
75
- framework=["Sentence Transformers", "PyTorch"],
78
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
76
79
  use_instructions=True,
77
80
  public_training_code=None,
78
81
  public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
@@ -18,13 +18,14 @@ english_code_retriever = ModelMeta(
18
18
  revision="be653fab7d27a7348a0c2c3d16b9f92a7f10cb0c",
19
19
  release_date="2025-07-10",
20
20
  n_parameters=149_000_000,
21
+ n_embedding_parameters=None,
21
22
  memory_usage_mb=568,
22
23
  embed_dim=768,
23
24
  license="mit",
24
25
  max_tokens=8192,
25
26
  reference="https://huggingface.co/fyaronskiy/english_code_retriever",
26
27
  similarity_fn_name="cosine",
27
- framework=["Sentence Transformers", "PyTorch"],
28
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
28
29
  use_instructions=True,
29
30
  public_training_code=None,
30
31
  public_training_data="https://huggingface.co/datasets/code-search-net/code_search_net",
@@ -9,6 +9,7 @@ Euler_Legal_Embedding_V1 = ModelMeta(
9
9
  release_date="2025-11-06",
10
10
  languages=["eng-Latn"],
11
11
  n_parameters=8000000000,
12
+ n_embedding_parameters=None,
12
13
  memory_usage_mb=15618,
13
14
  max_tokens=1536,
14
15
  embed_dim=4096,
@@ -16,7 +17,7 @@ Euler_Legal_Embedding_V1 = ModelMeta(
16
17
  open_weights=True,
17
18
  public_training_code=None,
18
19
  public_training_data=None,
19
- framework=["PyTorch", "Sentence Transformers"],
20
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
20
21
  reference="https://huggingface.co/Mira190/Euler-Legal-Embedding-V1",
21
22
  similarity_fn_name="cosine",
22
23
  use_instructions=False,
@@ -24,7 +25,7 @@ Euler_Legal_Embedding_V1 = ModelMeta(
24
25
  adapted_from="Qwen/Qwen3-Embedding-8B",
25
26
  superseded_by=None,
26
27
  citation="""@misc{euler2025legal,
27
- title={Euler-Legal-Embedding: Advanced Legal Representation Learning},
28
+ title={Euler-Legal-Embedding: Advanced Legal Representation Learning},
28
29
  author={LawRank Team},
29
30
  year={2025},
30
31
  publisher={Hugging Face}
@@ -1,15 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  from pathlib import Path
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import requires_image_dependencies
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
10
  from mteb.models.abs_encoder import AbsEncoder
11
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
13
18
 
14
19
  EVA_CLIP_CITATION = """@article{EVA-CLIP,
15
20
  title={EVA-CLIP: Improved Training Techniques for CLIP at Scale},
@@ -144,6 +149,7 @@ EVA02_CLIP_B_16 = ModelMeta(
144
149
  release_date="2023-04-26",
145
150
  modalities=["image", "text"],
146
151
  n_parameters=149_000_000,
152
+ n_embedding_parameters=None,
147
153
  memory_usage_mb=568,
148
154
  max_tokens=77,
149
155
  embed_dim=512,
@@ -168,6 +174,7 @@ EVA02_CLIP_L_14 = ModelMeta(
168
174
  release_date="2023-04-26",
169
175
  modalities=["image", "text"],
170
176
  n_parameters=428_000_000,
177
+ n_embedding_parameters=None,
171
178
  memory_usage_mb=1633,
172
179
  max_tokens=77,
173
180
  embed_dim=768,
@@ -192,6 +199,7 @@ EVA02_CLIP_bigE_14 = ModelMeta(
192
199
  release_date="2023-04-26",
193
200
  modalities=["image", "text"],
194
201
  n_parameters=4_700_000_000,
202
+ n_embedding_parameters=None,
195
203
  memory_usage_mb=17929,
196
204
  max_tokens=77,
197
205
  embed_dim=1024,
@@ -217,6 +225,7 @@ EVA02_CLIP_bigE_14_plus = ModelMeta(
217
225
  release_date="2023-04-26",
218
226
  modalities=["image", "text"],
219
227
  n_parameters=5_000_000_000,
228
+ n_embedding_parameters=None,
220
229
  memory_usage_mb=19073,
221
230
  max_tokens=77,
222
231
  embed_dim=1024,
@@ -12,13 +12,14 @@ parsbert = ModelMeta(
12
12
  revision="d73a0e2c7492c33bd5819bcdb23eba207404dd19",
13
13
  release_date="2021-05-19",
14
14
  n_parameters=162_841_344,
15
+ n_embedding_parameters=76_800_000,
15
16
  memory_usage_mb=621,
16
17
  embed_dim=768,
17
18
  license="not specified",
18
19
  max_tokens=512,
19
20
  reference="https://huggingface.co/HooshvareLab/bert-base-parsbert-uncased",
20
21
  similarity_fn_name=ScoringFunction.COSINE,
21
- framework=["Sentence Transformers", "PyTorch"],
22
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
22
23
  use_instructions=False,
23
24
  public_training_code=None,
24
25
  public_training_data=None,
@@ -48,13 +49,14 @@ bert_zwnj = ModelMeta(
48
49
  revision="b9506ddc579ac8c398ae6dae680401ae0a1a5b23",
49
50
  release_date="2021-06-28",
50
51
  n_parameters=118_297_344,
52
+ n_embedding_parameters=32_256_000,
51
53
  memory_usage_mb=451,
52
54
  embed_dim=768,
53
55
  license="not specified",
54
56
  max_tokens=512,
55
57
  reference="https://huggingface.co/m3hrdadfi/bert-zwnj-wnli-mean-tokens",
56
58
  similarity_fn_name=ScoringFunction.COSINE,
57
- framework=["Sentence Transformers", "PyTorch"],
59
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
58
60
  use_instructions=False,
59
61
  public_training_code=None,
60
62
  public_training_data=None,
@@ -74,13 +76,14 @@ roberta_zwnj = ModelMeta(
74
76
  revision="36f912ac44e22250aee16ea533a4ff8cd848c1a1",
75
77
  release_date="2021-06-28",
76
78
  n_parameters=118_298_112,
79
+ n_embedding_parameters=32_256_000,
77
80
  memory_usage_mb=451,
78
81
  embed_dim=768,
79
82
  license="not specified",
80
83
  max_tokens=514,
81
84
  reference="https://huggingface.co/m3hrdadfi/roberta-zwnj-wnli-mean-tokens",
82
85
  similarity_fn_name=ScoringFunction.COSINE,
83
- framework=["Sentence Transformers", "PyTorch"],
86
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
84
87
  use_instructions=False,
85
88
  public_training_code=None,
86
89
  public_training_data=None,
@@ -99,13 +102,14 @@ sentence_transformer_parsbert = ModelMeta(
99
102
  revision="72bd0a3557622f0ae08a092f4643609e0b950cdd",
100
103
  release_date="2024-12-10",
101
104
  n_parameters=162_841_344,
105
+ n_embedding_parameters=76_800_000,
102
106
  memory_usage_mb=621,
103
107
  embed_dim=768,
104
108
  license="apache-2.0",
105
109
  max_tokens=512,
106
110
  reference="https://huggingface.co/myrkur/sentence-transformer-parsbert-fa",
107
111
  similarity_fn_name=ScoringFunction.COSINE,
108
- framework=["Sentence Transformers", "PyTorch"],
112
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
109
113
  use_instructions=False,
110
114
  public_training_code=None,
111
115
  public_training_data=None,
@@ -123,13 +127,14 @@ tooka_bert_base = ModelMeta(
123
127
  revision="fa5ca89df5670700d9325b8872ac65c17cb24582",
124
128
  release_date="2024-12-08",
125
129
  n_parameters=122_905_344,
130
+ n_embedding_parameters=36_864_000,
126
131
  memory_usage_mb=469,
127
132
  embed_dim=768,
128
133
  license="apache-2.0",
129
134
  max_tokens=512,
130
135
  reference="https://huggingface.co/PartAI/TookaBERT-Base",
131
136
  similarity_fn_name=ScoringFunction.COSINE,
132
- framework=["Sentence Transformers", "PyTorch"],
137
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
133
138
  use_instructions=False,
134
139
  public_training_code=None,
135
140
  public_training_data=None,
@@ -150,13 +155,14 @@ tooka_sbert = ModelMeta(
150
155
  revision="5d07f0c543aca654373b931ae07cd197769110fd",
151
156
  release_date="2024-12-07",
152
157
  n_parameters=353_039_360,
158
+ n_embedding_parameters=49_152_000,
153
159
  memory_usage_mb=1347,
154
160
  embed_dim=1024,
155
161
  license="apache-2.0",
156
162
  max_tokens=512,
157
163
  reference="https://huggingface.co/PartAI/Tooka-SBERT",
158
164
  similarity_fn_name=ScoringFunction.COSINE,
159
- framework=["Sentence Transformers", "PyTorch"],
165
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
160
166
  use_instructions=False,
161
167
  public_training_code=None,
162
168
  public_training_data=None,
@@ -181,13 +187,14 @@ fa_bert = ModelMeta(
181
187
  revision="a0e3973064c97768e121b9b95f21adc94e0ca3fb",
182
188
  release_date="2024-10-07",
183
189
  n_parameters=124_441_344,
190
+ n_embedding_parameters=38_400_000,
184
191
  memory_usage_mb=475,
185
192
  embed_dim=768,
186
193
  license="not specified",
187
194
  max_tokens=512,
188
195
  reference="https://huggingface.co/sbunlp/fabert",
189
196
  similarity_fn_name=ScoringFunction.COSINE,
190
- framework=["Sentence Transformers", "PyTorch"],
197
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
191
198
  use_instructions=False,
192
199
  public_training_code=None,
193
200
  public_training_data=None,
@@ -229,13 +236,14 @@ tooka_sbert_v2_small = ModelMeta(
229
236
  revision="8bbed87e36669387f71437c061430ba56d1b496f",
230
237
  release_date="2025-05-01",
231
238
  n_parameters=122_905_344,
239
+ n_embedding_parameters=36_864_000,
232
240
  memory_usage_mb=496,
233
241
  embed_dim=768,
234
242
  license="not specified",
235
243
  max_tokens=512,
236
244
  reference="https://huggingface.co/PartAI/Tooka-SBERT-V2-Small",
237
245
  similarity_fn_name="cosine",
238
- framework=["Sentence Transformers", "PyTorch"],
246
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
239
247
  use_instructions=False,
240
248
  public_training_code=None,
241
249
  public_training_data=None,
@@ -260,13 +268,14 @@ tooka_sbert_v2_large = ModelMeta(
260
268
  revision="b59682efa961122cc0e4408296d5852870c82eae",
261
269
  release_date="2025-05-01",
262
270
  n_parameters=353_039_360,
271
+ n_embedding_parameters=49_152_000,
263
272
  memory_usage_mb=1347,
264
273
  embed_dim=1024,
265
274
  license="not specified",
266
275
  max_tokens=512,
267
276
  reference="https://huggingface.co/PartAI/Tooka-SBERT-V2-Large",
268
277
  similarity_fn_name="cosine",
269
- framework=["Sentence Transformers", "PyTorch"],
278
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
270
279
  use_instructions=False,
271
280
  public_training_code=None,
272
281
  public_training_data=None,
@@ -113,13 +113,20 @@ xlmr_base = ModelMeta(
113
113
  revision="e73636d4f797dec63c3081bb6ed5c7b0bb3f2089",
114
114
  release_date="2019-11-05", # arxiv paper release
115
115
  n_parameters=278043648,
116
+ n_embedding_parameters=192_001_536,
116
117
  memory_usage_mb=1064,
117
118
  embed_dim=768,
118
119
  license="mit",
119
120
  max_tokens=512,
120
121
  reference="https://huggingface.co/FacebookAI/xlm-roberta-base",
121
122
  similarity_fn_name=ScoringFunction.COSINE,
122
- framework=["Sentence Transformers", "PyTorch"],
123
+ framework=[
124
+ "Sentence Transformers",
125
+ "PyTorch",
126
+ "Transformers",
127
+ "ONNX",
128
+ "safetensors",
129
+ ],
123
130
  use_instructions=False,
124
131
  public_training_code=None,
125
132
  public_training_data=None,
@@ -157,13 +164,20 @@ xlmr_large = ModelMeta(
157
164
  revision="c23d21b0620b635a76227c604d44e43a9f0ee389",
158
165
  release_date="2019-11-05", # arxiv paper release
159
166
  n_parameters=559890432,
167
+ n_embedding_parameters=256_002_048,
160
168
  memory_usage_mb=2141,
161
169
  embed_dim=1024,
162
170
  license="mit",
163
171
  max_tokens=512,
164
172
  reference="https://huggingface.co/FacebookAI/xlm-roberta-large",
165
173
  similarity_fn_name=ScoringFunction.COSINE,
166
- framework=["Sentence Transformers", "PyTorch"],
174
+ framework=[
175
+ "Sentence Transformers",
176
+ "PyTorch",
177
+ "Transformers",
178
+ "ONNX",
179
+ "safetensors",
180
+ ],
167
181
  use_instructions=False,
168
182
  public_training_code=None,
169
183
  public_training_data=None,
@@ -20,13 +20,14 @@ geoembedding = ModelMeta(
20
20
  ),
21
21
  release_date="2025-04-22",
22
22
  n_parameters=7241732096,
23
+ n_embedding_parameters=131_072_000,
23
24
  memory_usage_mb=27625,
24
25
  embed_dim=4096,
25
26
  license="apache-2.0",
26
27
  max_tokens=32768,
27
28
  reference="https://huggingface.co/GeoGPT-Research-Project/GeoEmbedding",
28
29
  similarity_fn_name="cosine",
29
- framework=["Sentence Transformers", "PyTorch"],
30
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
30
31
  use_instructions=True,
31
32
  public_training_code=None,
32
33
  public_training_data=None,