mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,7 @@ Haon_Chen__speed_embedding_7b_instruct = ModelMeta(
18
18
  release_date="2024-10-31",
19
19
  languages=["eng-Latn"],
20
20
  n_parameters=7110660096,
21
+ n_embedding_parameters=None,
21
22
  memory_usage_mb=13563,
22
23
  max_tokens=32768.0,
23
24
  embed_dim=None,
@@ -25,7 +26,7 @@ Haon_Chen__speed_embedding_7b_instruct = ModelMeta(
25
26
  open_weights=True,
26
27
  public_training_code=None,
27
28
  public_training_data=None,
28
- framework=["PyTorch"],
29
+ framework=["PyTorch", "Transformers", "safetensors"],
29
30
  reference="https://huggingface.co/Haon-Chen/speed-embedding-7b-instruct",
30
31
  similarity_fn_name=ScoringFunction.COSINE,
31
32
  use_instructions=None,
@@ -47,6 +48,7 @@ Gameselo__STS_multilingual_mpnet_base_v2 = ModelMeta(
47
48
  languages=[],
48
49
  loader=sentence_transformers_loader,
49
50
  n_parameters=278043648,
51
+ n_embedding_parameters=192_001_536,
50
52
  memory_usage_mb=1061,
51
53
  max_tokens=514.0,
52
54
  embed_dim=768,
@@ -54,7 +56,7 @@ Gameselo__STS_multilingual_mpnet_base_v2 = ModelMeta(
54
56
  open_weights=True,
55
57
  public_training_code=None,
56
58
  public_training_data=None,
57
- framework=["PyTorch", "Sentence Transformers"],
59
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
58
60
  reference="https://huggingface.co/Gameselo/STS-multilingual-mpnet-base-v2",
59
61
  similarity_fn_name=ScoringFunction.COSINE,
60
62
  use_instructions=None,
@@ -148,6 +150,7 @@ Hum_Works__lodestone_base_4096_v1 = ModelMeta(
148
150
  languages=["eng-Latn"],
149
151
  loader=sentence_transformers_loader,
150
152
  n_parameters=None,
153
+ n_embedding_parameters=None,
151
154
  memory_usage_mb=None,
152
155
  max_tokens=None,
153
156
  embed_dim=768,
@@ -155,7 +158,7 @@ Hum_Works__lodestone_base_4096_v1 = ModelMeta(
155
158
  open_weights=True,
156
159
  public_training_code=None,
157
160
  public_training_data=None,
158
- framework=["PyTorch"],
161
+ framework=["PyTorch", "Sentence Transformers"],
159
162
  reference="https://huggingface.co/Hum-Works/lodestone-base-4096-v1",
160
163
  similarity_fn_name=ScoringFunction.COSINE,
161
164
  use_instructions=None,
@@ -215,6 +218,7 @@ Jaume__gemma_2b_embeddings = ModelMeta(
215
218
  languages=[],
216
219
  loader=sentence_transformers_loader,
217
220
  n_parameters=2506172416,
221
+ n_embedding_parameters=None,
218
222
  memory_usage_mb=9560,
219
223
  max_tokens=8192.0,
220
224
  embed_dim=2048,
@@ -222,7 +226,7 @@ Jaume__gemma_2b_embeddings = ModelMeta(
222
226
  open_weights=True,
223
227
  public_training_code=None,
224
228
  public_training_data=None,
225
- framework=["PyTorch", "Sentence Transformers"],
229
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
226
230
  reference="https://huggingface.co/Jaume/gemma-2b-embeddings",
227
231
  similarity_fn_name=ScoringFunction.COSINE,
228
232
  use_instructions=None,
@@ -250,6 +254,7 @@ Lajavaness__bilingual_embedding_base = ModelMeta(
250
254
  trust_remote_code=True,
251
255
  ),
252
256
  n_parameters=278043648,
257
+ n_embedding_parameters=192_001_536,
253
258
  memory_usage_mb=1061,
254
259
  max_tokens=514.0,
255
260
  embed_dim=768,
@@ -257,7 +262,7 @@ Lajavaness__bilingual_embedding_base = ModelMeta(
257
262
  open_weights=True,
258
263
  public_training_code=None,
259
264
  public_training_data=None,
260
- framework=["PyTorch", "Sentence Transformers"],
265
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
261
266
  reference="https://huggingface.co/Lajavaness/bilingual-embedding-base",
262
267
  similarity_fn_name=ScoringFunction.COSINE,
263
268
  use_instructions=None,
@@ -299,6 +304,7 @@ Lajavaness__bilingual_embedding_large = ModelMeta(
299
304
  trust_remote_code=True,
300
305
  ),
301
306
  n_parameters=559890432,
307
+ n_embedding_parameters=256_002_048,
302
308
  memory_usage_mb=2136,
303
309
  max_tokens=514.0,
304
310
  embed_dim=1024,
@@ -306,7 +312,7 @@ Lajavaness__bilingual_embedding_large = ModelMeta(
306
312
  open_weights=True,
307
313
  public_training_code=None,
308
314
  public_training_data=None,
309
- framework=["PyTorch", "Sentence Transformers"],
315
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
310
316
  reference="https://huggingface.co/Lajavaness/bilingual-embedding-large",
311
317
  similarity_fn_name=ScoringFunction.COSINE,
312
318
  use_instructions=None,
@@ -348,6 +354,7 @@ Lajavaness__bilingual_embedding_small = ModelMeta(
348
354
  trust_remote_code=True,
349
355
  ),
350
356
  n_parameters=117653760,
357
+ n_embedding_parameters=96_014_208,
351
358
  memory_usage_mb=449,
352
359
  max_tokens=512.0,
353
360
  embed_dim=384,
@@ -355,7 +362,7 @@ Lajavaness__bilingual_embedding_small = ModelMeta(
355
362
  open_weights=True,
356
363
  public_training_code=None,
357
364
  public_training_data=None,
358
- framework=["PyTorch", "Sentence Transformers"],
365
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
359
366
  reference="https://huggingface.co/Lajavaness/bilingual-embedding-small",
360
367
  similarity_fn_name=ScoringFunction.COSINE,
361
368
  use_instructions=None,
@@ -394,6 +401,7 @@ Mihaiii__Bulbasaur = ModelMeta(
394
401
  languages=None,
395
402
  loader=sentence_transformers_loader,
396
403
  n_parameters=17389824,
404
+ n_embedding_parameters=11_720_448,
397
405
  memory_usage_mb=66,
398
406
  max_tokens=512.0,
399
407
  embed_dim=384,
@@ -401,7 +409,7 @@ Mihaiii__Bulbasaur = ModelMeta(
401
409
  open_weights=True,
402
410
  public_training_code=None,
403
411
  public_training_data=None,
404
- framework=["PyTorch", "Sentence Transformers"],
412
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
405
413
  reference="https://huggingface.co/Mihaiii/Bulbasaur",
406
414
  similarity_fn_name=ScoringFunction.COSINE,
407
415
  use_instructions=None,
@@ -418,6 +426,7 @@ Mihaiii__Ivysaur = ModelMeta(
418
426
  languages=None,
419
427
  loader=sentence_transformers_loader,
420
428
  n_parameters=22713216,
429
+ n_embedding_parameters=11_720_448,
421
430
  memory_usage_mb=87,
422
431
  max_tokens=512.0,
423
432
  embed_dim=384,
@@ -425,7 +434,7 @@ Mihaiii__Ivysaur = ModelMeta(
425
434
  open_weights=True,
426
435
  public_training_code=None,
427
436
  public_training_data=None,
428
- framework=["PyTorch", "Sentence Transformers"],
437
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
429
438
  reference="https://huggingface.co/Mihaiii/Ivysaur",
430
439
  similarity_fn_name=ScoringFunction.COSINE,
431
440
  use_instructions=None,
@@ -442,6 +451,7 @@ Mihaiii__Squirtle = ModelMeta(
442
451
  languages=None,
443
452
  loader=sentence_transformers_loader,
444
453
  n_parameters=15615360,
454
+ n_embedding_parameters=11_720_448,
445
455
  memory_usage_mb=60,
446
456
  max_tokens=512.0,
447
457
  embed_dim=384,
@@ -449,7 +459,7 @@ Mihaiii__Squirtle = ModelMeta(
449
459
  open_weights=True,
450
460
  public_training_code=None,
451
461
  public_training_data=None,
452
- framework=["PyTorch", "Sentence Transformers"],
462
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
453
463
  reference="https://huggingface.co/Mihaiii/Squirtle",
454
464
  similarity_fn_name=ScoringFunction.COSINE,
455
465
  use_instructions=None,
@@ -466,6 +476,7 @@ Mihaiii__Venusaur = ModelMeta(
466
476
  languages=None,
467
477
  loader=sentence_transformers_loader,
468
478
  n_parameters=15615360,
479
+ n_embedding_parameters=11_720_448,
469
480
  memory_usage_mb=60,
470
481
  max_tokens=512.0,
471
482
  embed_dim=384,
@@ -473,7 +484,7 @@ Mihaiii__Venusaur = ModelMeta(
473
484
  open_weights=True,
474
485
  public_training_code=None,
475
486
  public_training_data=None,
476
- framework=["PyTorch", "Sentence Transformers"],
487
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
477
488
  reference="https://huggingface.co/Mihaiii/Venusaur",
478
489
  similarity_fn_name=ScoringFunction.COSINE,
479
490
  use_instructions=None,
@@ -490,6 +501,7 @@ Mihaiii__Wartortle = ModelMeta(
490
501
  languages=None,
491
502
  loader=sentence_transformers_loader,
492
503
  n_parameters=17389824,
504
+ n_embedding_parameters=11_720_448,
493
505
  memory_usage_mb=66,
494
506
  max_tokens=512.0,
495
507
  embed_dim=384,
@@ -497,7 +509,7 @@ Mihaiii__Wartortle = ModelMeta(
497
509
  open_weights=True,
498
510
  public_training_code=None,
499
511
  public_training_data=None,
500
- framework=["PyTorch", "Sentence Transformers"],
512
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
501
513
  reference="https://huggingface.co/Mihaiii/Wartortle",
502
514
  similarity_fn_name=ScoringFunction.COSINE,
503
515
  use_instructions=None,
@@ -514,6 +526,7 @@ Mihaiii__gte_micro = ModelMeta(
514
526
  languages=None,
515
527
  loader=sentence_transformers_loader,
516
528
  n_parameters=17389824,
529
+ n_embedding_parameters=11_720_448,
517
530
  memory_usage_mb=66,
518
531
  max_tokens=512.0,
519
532
  embed_dim=384,
@@ -521,7 +534,7 @@ Mihaiii__gte_micro = ModelMeta(
521
534
  open_weights=True,
522
535
  public_training_code=None,
523
536
  public_training_data=None,
524
- framework=["PyTorch", "Sentence Transformers"],
537
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
525
538
  reference="https://huggingface.co/Mihaiii/gte-micro",
526
539
  similarity_fn_name=ScoringFunction.COSINE,
527
540
  use_instructions=None,
@@ -537,6 +550,7 @@ Mihaiii__gte_micro_v4 = ModelMeta(
537
550
  languages=None,
538
551
  loader=sentence_transformers_loader,
539
552
  n_parameters=19164288,
553
+ n_embedding_parameters=11_720_448,
540
554
  memory_usage_mb=73,
541
555
  max_tokens=512.0,
542
556
  embed_dim=384,
@@ -544,7 +558,7 @@ Mihaiii__gte_micro_v4 = ModelMeta(
544
558
  open_weights=True,
545
559
  public_training_code=None,
546
560
  public_training_data=None,
547
- framework=["PyTorch", "Sentence Transformers"],
561
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
548
562
  reference="https://huggingface.co/Mihaiii/gte-micro-v4",
549
563
  similarity_fn_name=ScoringFunction.COSINE,
550
564
  use_instructions=None,
@@ -560,6 +574,7 @@ OrdalieTech__Solon_embeddings_large_0_1 = ModelMeta(
560
574
  languages=["fra-Latn"],
561
575
  loader=sentence_transformers_loader,
562
576
  n_parameters=559890432,
577
+ n_embedding_parameters=256_002_048,
563
578
  memory_usage_mb=2136,
564
579
  max_tokens=514.0,
565
580
  embed_dim=1024,
@@ -567,7 +582,7 @@ OrdalieTech__Solon_embeddings_large_0_1 = ModelMeta(
567
582
  open_weights=True,
568
583
  public_training_code=None,
569
584
  public_training_data=None,
570
- framework=["PyTorch"],
585
+ framework=["PyTorch", "Transformers", "safetensors"],
571
586
  reference="https://huggingface.co/OrdalieTech/Solon-embeddings-large-0.1",
572
587
  similarity_fn_name=ScoringFunction.COSINE,
573
588
  use_instructions=None,
@@ -583,6 +598,7 @@ Omartificial_Intelligence_Space__Arabert_all_nli_triplet_Matryoshka = ModelMeta(
583
598
  languages=["ara-Arab"],
584
599
  loader=sentence_transformers_loader,
585
600
  n_parameters=135193344,
601
+ n_embedding_parameters=49_152_000,
586
602
  memory_usage_mb=516,
587
603
  max_tokens=512.0,
588
604
  embed_dim=768,
@@ -590,7 +606,7 @@ Omartificial_Intelligence_Space__Arabert_all_nli_triplet_Matryoshka = ModelMeta(
590
606
  open_weights=True,
591
607
  public_training_code=None,
592
608
  public_training_data=None,
593
- framework=["PyTorch", "Sentence Transformers"],
609
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
594
610
  reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka",
595
611
  similarity_fn_name=ScoringFunction.COSINE,
596
612
  use_instructions=None,
@@ -615,6 +631,7 @@ Omartificial_Intelligence_Space__Arabic_MiniLM_L12_v2_all_nli_triplet = ModelMet
615
631
  languages=["ara-Arab"],
616
632
  loader=sentence_transformers_loader,
617
633
  n_parameters=117653760,
634
+ n_embedding_parameters=96_014_208,
618
635
  memory_usage_mb=449,
619
636
  max_tokens=512.0,
620
637
  embed_dim=384,
@@ -622,7 +639,7 @@ Omartificial_Intelligence_Space__Arabic_MiniLM_L12_v2_all_nli_triplet = ModelMet
622
639
  open_weights=True,
623
640
  public_training_code=None,
624
641
  public_training_data=None,
625
- framework=["PyTorch", "Sentence Transformers"],
642
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
626
643
  reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet",
627
644
  similarity_fn_name=ScoringFunction.COSINE,
628
645
  use_instructions=None,
@@ -640,6 +657,7 @@ Omartificial_Intelligence_Space__Arabic_all_nli_triplet_Matryoshka = ModelMeta(
640
657
  languages=["ara-Arab"],
641
658
  loader=sentence_transformers_loader,
642
659
  n_parameters=278043648,
660
+ n_embedding_parameters=192_001_536,
643
661
  memory_usage_mb=1061,
644
662
  max_tokens=514.0,
645
663
  embed_dim=768,
@@ -647,7 +665,7 @@ Omartificial_Intelligence_Space__Arabic_all_nli_triplet_Matryoshka = ModelMeta(
647
665
  open_weights=True,
648
666
  public_training_code=None,
649
667
  public_training_data=None,
650
- framework=["PyTorch", "Sentence Transformers"],
668
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
651
669
  reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka",
652
670
  similarity_fn_name=ScoringFunction.COSINE,
653
671
  use_instructions=None,
@@ -674,6 +692,7 @@ Omartificial_Intelligence_Space__Arabic_labse_Matryoshka = ModelMeta(
674
692
  languages=["ara-Arab"],
675
693
  loader=sentence_transformers_loader,
676
694
  n_parameters=470926848,
695
+ n_embedding_parameters=384_885_504,
677
696
  memory_usage_mb=1796,
678
697
  max_tokens=512.0,
679
698
  embed_dim=768,
@@ -681,7 +700,7 @@ Omartificial_Intelligence_Space__Arabic_labse_Matryoshka = ModelMeta(
681
700
  open_weights=True,
682
701
  public_training_code=None,
683
702
  public_training_data=None,
684
- framework=["PyTorch", "Sentence Transformers"],
703
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
685
704
  reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-labse-Matryoshka",
686
705
  similarity_fn_name=ScoringFunction.COSINE,
687
706
  use_instructions=None,
@@ -708,6 +727,7 @@ Omartificial_Intelligence_Space__Arabic_mpnet_base_all_nli_triplet = ModelMeta(
708
727
  languages=["ara-Arab"],
709
728
  loader=sentence_transformers_loader,
710
729
  n_parameters=109486464,
730
+ n_embedding_parameters=23_444_736,
711
731
  memory_usage_mb=418,
712
732
  max_tokens=514.0,
713
733
  embed_dim=768,
@@ -715,7 +735,7 @@ Omartificial_Intelligence_Space__Arabic_mpnet_base_all_nli_triplet = ModelMeta(
715
735
  open_weights=True,
716
736
  public_training_code=None,
717
737
  public_training_data=None,
718
- framework=["PyTorch", "Sentence Transformers"],
738
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
719
739
  reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet",
720
740
  similarity_fn_name=ScoringFunction.COSINE,
721
741
  use_instructions=None,
@@ -742,6 +762,7 @@ Omartificial_Intelligence_Space__Marbert_all_nli_triplet_Matryoshka = ModelMeta(
742
762
  languages=["ara-Arab"],
743
763
  loader=sentence_transformers_loader,
744
764
  n_parameters=162841344,
765
+ n_embedding_parameters=76_800_000,
745
766
  memory_usage_mb=621,
746
767
  max_tokens=512.0,
747
768
  embed_dim=768,
@@ -749,7 +770,7 @@ Omartificial_Intelligence_Space__Marbert_all_nli_triplet_Matryoshka = ModelMeta(
749
770
  open_weights=True,
750
771
  public_training_code=None,
751
772
  public_training_data=None,
752
- framework=["PyTorch", "Sentence Transformers"],
773
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
753
774
  reference="https://huggingface.co/Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka",
754
775
  similarity_fn_name=ScoringFunction.COSINE,
755
776
  use_instructions=None,
@@ -774,6 +795,7 @@ consciousai__cai_lunaris_text_embeddings = ModelMeta(
774
795
  languages=None,
775
796
  loader=sentence_transformers_loader,
776
797
  n_parameters=None,
798
+ n_embedding_parameters=31_254_528,
777
799
  memory_usage_mb=None,
778
800
  max_tokens=512.0,
779
801
  embed_dim=1024,
@@ -781,7 +803,7 @@ consciousai__cai_lunaris_text_embeddings = ModelMeta(
781
803
  open_weights=True,
782
804
  public_training_code=None,
783
805
  public_training_data=None,
784
- framework=["PyTorch"],
806
+ framework=["PyTorch", "Sentence Transformers", "Transformers"],
785
807
  reference="https://huggingface.co/consciousAI/cai-lunaris-text-embeddings",
786
808
  similarity_fn_name=ScoringFunction.COSINE,
787
809
  use_instructions=None,
@@ -797,6 +819,7 @@ consciousai__cai_stellaris_text_embeddings = ModelMeta(
797
819
  languages=None,
798
820
  loader=sentence_transformers_loader,
799
821
  n_parameters=None,
822
+ n_embedding_parameters=None,
800
823
  memory_usage_mb=None,
801
824
  max_tokens=514.0,
802
825
  embed_dim=768,
@@ -804,7 +827,7 @@ consciousai__cai_stellaris_text_embeddings = ModelMeta(
804
827
  open_weights=True,
805
828
  public_training_code=None,
806
829
  public_training_data=None,
807
- framework=["PyTorch"],
830
+ framework=["PyTorch", "Sentence Transformers"],
808
831
  reference="https://huggingface.co/consciousAI/cai-stellaris-text-embeddings",
809
832
  similarity_fn_name=ScoringFunction.COSINE,
810
833
  use_instructions=None,
@@ -829,6 +852,7 @@ manu__sentence_croissant_alpha_v0_2 = ModelMeta(
829
852
  languages=None,
830
853
  loader=sentence_transformers_loader,
831
854
  n_parameters=1279887360,
855
+ n_embedding_parameters=65_536_000,
832
856
  memory_usage_mb=2441,
833
857
  max_tokens=2048.0,
834
858
  embed_dim=2048,
@@ -836,7 +860,7 @@ manu__sentence_croissant_alpha_v0_2 = ModelMeta(
836
860
  open_weights=True,
837
861
  public_training_code=None,
838
862
  public_training_data=None,
839
- framework=["PyTorch", "Sentence Transformers"],
863
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
840
864
  reference="https://huggingface.co/manu/sentence_croissant_alpha_v0.2",
841
865
  similarity_fn_name=ScoringFunction.COSINE,
842
866
  use_instructions=None,
@@ -852,6 +876,7 @@ manu__sentence_croissant_alpha_v0_3 = ModelMeta(
852
876
  languages=None,
853
877
  loader=sentence_transformers_loader,
854
878
  n_parameters=1279887360,
879
+ n_embedding_parameters=65_536_000,
855
880
  memory_usage_mb=2441,
856
881
  max_tokens=2048.0,
857
882
  embed_dim=2048,
@@ -859,7 +884,7 @@ manu__sentence_croissant_alpha_v0_3 = ModelMeta(
859
884
  open_weights=True,
860
885
  public_training_code=None,
861
886
  public_training_data=None,
862
- framework=["PyTorch", "Sentence Transformers"],
887
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
863
888
  reference="https://huggingface.co/manu/sentence_croissant_alpha_v0.3",
864
889
  similarity_fn_name=ScoringFunction.COSINE,
865
890
  use_instructions=None,
@@ -875,6 +900,7 @@ manu__sentence_croissant_alpha_v0_4 = ModelMeta(
875
900
  languages=["fra-Latn", "eng-Latn"],
876
901
  loader=sentence_transformers_loader,
877
902
  n_parameters=1279887360,
903
+ n_embedding_parameters=65_536_000,
878
904
  memory_usage_mb=2441,
879
905
  max_tokens=2048.0,
880
906
  embed_dim=2048,
@@ -882,7 +908,7 @@ manu__sentence_croissant_alpha_v0_4 = ModelMeta(
882
908
  open_weights=True,
883
909
  public_training_code=None,
884
910
  public_training_data=None,
885
- framework=["PyTorch", "Sentence Transformers"],
911
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
886
912
  reference="https://huggingface.co/manu/sentence_croissant_alpha_v0.4",
887
913
  similarity_fn_name=ScoringFunction.COSINE,
888
914
  use_instructions=None,
@@ -899,6 +925,7 @@ thenlper__gte_base = ModelMeta(
899
925
  languages=["eng-Latn"],
900
926
  loader=sentence_transformers_loader,
901
927
  n_parameters=109482752,
928
+ n_embedding_parameters=23_440_896,
902
929
  memory_usage_mb=209,
903
930
  max_tokens=512.0,
904
931
  embed_dim=768,
@@ -906,7 +933,7 @@ thenlper__gte_base = ModelMeta(
906
933
  open_weights=True,
907
934
  public_training_code=None,
908
935
  public_training_data=None,
909
- framework=["PyTorch"],
936
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
910
937
  reference="https://huggingface.co/thenlper/gte-base",
911
938
  similarity_fn_name=ScoringFunction.COSINE,
912
939
  use_instructions=None,
@@ -928,6 +955,7 @@ thenlper__gte_large = ModelMeta(
928
955
  languages=["eng-Latn"],
929
956
  loader=sentence_transformers_loader,
930
957
  n_parameters=335142400,
958
+ n_embedding_parameters=31_254_528,
931
959
  memory_usage_mb=639,
932
960
  max_tokens=512.0,
933
961
  embed_dim=1024,
@@ -935,7 +963,7 @@ thenlper__gte_large = ModelMeta(
935
963
  open_weights=True,
936
964
  public_training_code=None,
937
965
  public_training_data=None,
938
- framework=["PyTorch"],
966
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
939
967
  reference="https://huggingface.co/thenlper/gte-large",
940
968
  similarity_fn_name=ScoringFunction.COSINE,
941
969
  use_instructions=None,
@@ -957,6 +985,7 @@ thenlper__gte_small = ModelMeta(
957
985
  languages=["eng-Latn"],
958
986
  loader=sentence_transformers_loader,
959
987
  n_parameters=33360512,
988
+ n_embedding_parameters=11_720_448,
960
989
  memory_usage_mb=64,
961
990
  max_tokens=512.0,
962
991
  embed_dim=384,
@@ -964,7 +993,7 @@ thenlper__gte_small = ModelMeta(
964
993
  open_weights=True,
965
994
  public_training_code=None,
966
995
  public_training_data=None,
967
- framework=["PyTorch"],
996
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
968
997
  reference="https://huggingface.co/thenlper/gte-small",
969
998
  similarity_fn_name=ScoringFunction.COSINE,
970
999
  use_instructions=None,
@@ -986,6 +1015,7 @@ OrlikB__KartonBERT_USE_base_v1 = ModelMeta(
986
1015
  languages=["pol-Latn"],
987
1016
  loader=sentence_transformers_loader,
988
1017
  n_parameters=103705344,
1018
+ n_embedding_parameters=None,
989
1019
  memory_usage_mb=396,
990
1020
  max_tokens=512.0,
991
1021
  embed_dim=768,
@@ -1009,6 +1039,7 @@ OrlikB__st_polish_kartonberta_base_alpha_v1 = ModelMeta(
1009
1039
  languages=["pol-Latn"],
1010
1040
  loader=sentence_transformers_loader,
1011
1041
  n_parameters=None,
1042
+ n_embedding_parameters=None,
1012
1043
  memory_usage_mb=None,
1013
1044
  max_tokens=514.0,
1014
1045
  embed_dim=768,
@@ -1032,6 +1063,7 @@ sdadas__mmlw_e5_base = ModelMeta(
1032
1063
  languages=["pol-Latn"],
1033
1064
  loader=sentence_transformers_loader,
1034
1065
  n_parameters=278043648,
1066
+ n_embedding_parameters=192_001_536,
1035
1067
  memory_usage_mb=1061,
1036
1068
  max_tokens=514.0,
1037
1069
  embed_dim=768,
@@ -1039,7 +1071,7 @@ sdadas__mmlw_e5_base = ModelMeta(
1039
1071
  open_weights=True,
1040
1072
  public_training_code=None,
1041
1073
  public_training_data=None,
1042
- framework=["PyTorch"],
1074
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
1043
1075
  reference="https://huggingface.co/sdadas/mmlw-e5-base",
1044
1076
  similarity_fn_name=ScoringFunction.COSINE,
1045
1077
  use_instructions=None,
@@ -1047,7 +1079,7 @@ sdadas__mmlw_e5_base = ModelMeta(
1047
1079
  adapted_from="intfloat/multilingual-e5-base",
1048
1080
  superseded_by=None,
1049
1081
  citation="""@article{dadas2024pirb,
1050
- title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1082
+ title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1051
1083
  author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
1052
1084
  year={2024},
1053
1085
  eprint={2402.13350},
@@ -1063,6 +1095,7 @@ dwzhu__e5_base_4k = ModelMeta(
1063
1095
  languages=["eng-Latn"],
1064
1096
  loader=sentence_transformers_loader,
1065
1097
  n_parameters=None,
1098
+ n_embedding_parameters=23_440_896,
1066
1099
  memory_usage_mb=None,
1067
1100
  max_tokens=4096.0,
1068
1101
  embed_dim=None,
@@ -1070,7 +1103,7 @@ dwzhu__e5_base_4k = ModelMeta(
1070
1103
  open_weights=True,
1071
1104
  public_training_code=None,
1072
1105
  public_training_data=None,
1073
- framework=["PyTorch"],
1106
+ framework=["PyTorch", "Transformers"],
1074
1107
  reference="https://huggingface.co/dwzhu/e5-base-4k",
1075
1108
  similarity_fn_name=ScoringFunction.COSINE,
1076
1109
  use_instructions=None,
@@ -1092,6 +1125,7 @@ sdadas__mmlw_e5_large = ModelMeta(
1092
1125
  languages=["pol-Latn"],
1093
1126
  loader=sentence_transformers_loader,
1094
1127
  n_parameters=559890432,
1128
+ n_embedding_parameters=256_002_048,
1095
1129
  memory_usage_mb=2136,
1096
1130
  max_tokens=514.0,
1097
1131
  embed_dim=1024,
@@ -1099,7 +1133,7 @@ sdadas__mmlw_e5_large = ModelMeta(
1099
1133
  open_weights=True,
1100
1134
  public_training_code=None,
1101
1135
  public_training_data=None,
1102
- framework=["PyTorch"],
1136
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
1103
1137
  reference="https://huggingface.co/sdadas/mmlw-e5-large",
1104
1138
  similarity_fn_name=ScoringFunction.COSINE,
1105
1139
  use_instructions=None,
@@ -1107,7 +1141,7 @@ sdadas__mmlw_e5_large = ModelMeta(
1107
1141
  adapted_from="intfloat/multilingual-e5-large",
1108
1142
  superseded_by=None,
1109
1143
  citation="""@article{dadas2024pirb,
1110
- title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1144
+ title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1111
1145
  author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
1112
1146
  year={2024},
1113
1147
  eprint={2402.13350},
@@ -1123,6 +1157,7 @@ sdadas__mmlw_e5_small = ModelMeta(
1123
1157
  languages=["pol-Latn"],
1124
1158
  loader=sentence_transformers_loader,
1125
1159
  n_parameters=117653760,
1160
+ n_embedding_parameters=96_014_208,
1126
1161
  memory_usage_mb=449,
1127
1162
  max_tokens=512.0,
1128
1163
  embed_dim=384,
@@ -1130,7 +1165,7 @@ sdadas__mmlw_e5_small = ModelMeta(
1130
1165
  open_weights=True,
1131
1166
  public_training_code=None,
1132
1167
  public_training_data=None,
1133
- framework=["PyTorch"],
1168
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
1134
1169
  reference="https://huggingface.co/sdadas/mmlw-e5-small",
1135
1170
  similarity_fn_name=ScoringFunction.COSINE,
1136
1171
  use_instructions=None,
@@ -1138,7 +1173,7 @@ sdadas__mmlw_e5_small = ModelMeta(
1138
1173
  adapted_from="intfloat/multilingual-e5-small",
1139
1174
  superseded_by=None,
1140
1175
  citation="""@article{dadas2024pirb,
1141
- title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1176
+ title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1142
1177
  author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
1143
1178
  year={2024},
1144
1179
  eprint={2402.13350},
@@ -1154,6 +1189,7 @@ sdadas__mmlw_roberta_base = ModelMeta(
1154
1189
  languages=["pol-Latn"],
1155
1190
  loader=sentence_transformers_loader,
1156
1191
  n_parameters=124442880,
1192
+ n_embedding_parameters=38_400_768,
1157
1193
  memory_usage_mb=475,
1158
1194
  max_tokens=514.0,
1159
1195
  embed_dim=768,
@@ -1161,7 +1197,7 @@ sdadas__mmlw_roberta_base = ModelMeta(
1161
1197
  open_weights=True,
1162
1198
  public_training_code=None,
1163
1199
  public_training_data=None,
1164
- framework=["PyTorch"],
1200
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
1165
1201
  reference="https://huggingface.co/sdadas/mmlw-roberta-base",
1166
1202
  similarity_fn_name=ScoringFunction.COSINE,
1167
1203
  use_instructions=None,
@@ -1169,7 +1205,7 @@ sdadas__mmlw_roberta_base = ModelMeta(
1169
1205
  adapted_from="sdadas/polish-roberta-base-v2",
1170
1206
  superseded_by=None,
1171
1207
  citation="""@article{dadas2024pirb,
1172
- title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1208
+ title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1173
1209
  author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
1174
1210
  year={2024},
1175
1211
  eprint={2402.13350},
@@ -1185,6 +1221,7 @@ sdadas__mmlw_roberta_large = ModelMeta(
1185
1221
  languages=["pol-Latn"],
1186
1222
  loader=sentence_transformers_loader,
1187
1223
  n_parameters=434961408,
1224
+ n_embedding_parameters=131_073_024,
1188
1225
  memory_usage_mb=1659,
1189
1226
  max_tokens=514.0,
1190
1227
  embed_dim=1024,
@@ -1192,7 +1229,7 @@ sdadas__mmlw_roberta_large = ModelMeta(
1192
1229
  open_weights=True,
1193
1230
  public_training_code=None,
1194
1231
  public_training_data=None,
1195
- framework=["PyTorch"],
1232
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
1196
1233
  reference="https://huggingface.co/sdadas/mmlw-roberta-large",
1197
1234
  similarity_fn_name=ScoringFunction.COSINE,
1198
1235
  use_instructions=None,
@@ -1200,7 +1237,7 @@ sdadas__mmlw_roberta_large = ModelMeta(
1200
1237
  adapted_from="sdadas/polish-roberta-large-v2",
1201
1238
  superseded_by=None,
1202
1239
  citation="""@article{dadas2024pirb,
1203
- title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1240
+ title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1204
1241
  author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
1205
1242
  year={2024},
1206
1243
  eprint={2402.13350},
@@ -1271,6 +1308,7 @@ izhx__udever_bloom_1b1 = ModelMeta(
1271
1308
  languages=udever_languages,
1272
1309
  loader=sentence_transformers_loader,
1273
1310
  n_parameters=None,
1311
+ n_embedding_parameters=385_351_680,
1274
1312
  memory_usage_mb=None,
1275
1313
  max_tokens=None,
1276
1314
  embed_dim=None,
@@ -1278,7 +1316,7 @@ izhx__udever_bloom_1b1 = ModelMeta(
1278
1316
  open_weights=True,
1279
1317
  public_training_code=None,
1280
1318
  public_training_data=None,
1281
- framework=["PyTorch"],
1319
+ framework=["PyTorch", "Transformers"],
1282
1320
  reference="https://huggingface.co/izhx/udever-bloom-1b1",
1283
1321
  similarity_fn_name=ScoringFunction.COSINE,
1284
1322
  use_instructions=None,
@@ -1300,6 +1338,7 @@ izhx__udever_bloom_3b = ModelMeta(
1300
1338
  languages=udever_languages,
1301
1339
  loader=sentence_transformers_loader,
1302
1340
  n_parameters=None,
1341
+ n_embedding_parameters=642_252_800,
1303
1342
  memory_usage_mb=None,
1304
1343
  max_tokens=None,
1305
1344
  embed_dim=None,
@@ -1307,7 +1346,7 @@ izhx__udever_bloom_3b = ModelMeta(
1307
1346
  open_weights=True,
1308
1347
  public_training_code=None,
1309
1348
  public_training_data=None,
1310
- framework=["PyTorch"],
1349
+ framework=["PyTorch", "Transformers"],
1311
1350
  reference="https://huggingface.co/izhx/udever-bloom-3b",
1312
1351
  similarity_fn_name=ScoringFunction.COSINE,
1313
1352
  use_instructions=None,
@@ -1329,6 +1368,7 @@ izhx__udever_bloom_560m = ModelMeta(
1329
1368
  languages=udever_languages,
1330
1369
  loader=sentence_transformers_loader,
1331
1370
  n_parameters=None,
1371
+ n_embedding_parameters=256_901_120,
1332
1372
  memory_usage_mb=None,
1333
1373
  max_tokens=None,
1334
1374
  embed_dim=None,
@@ -1336,7 +1376,7 @@ izhx__udever_bloom_560m = ModelMeta(
1336
1376
  open_weights=True,
1337
1377
  public_training_code=None,
1338
1378
  public_training_data=None,
1339
- framework=["PyTorch"],
1379
+ framework=["PyTorch", "Transformers"],
1340
1380
  reference="https://huggingface.co/izhx/udever-bloom-560m",
1341
1381
  similarity_fn_name=ScoringFunction.COSINE,
1342
1382
  use_instructions=None,
@@ -1358,6 +1398,7 @@ izhx__udever_bloom_7b1 = ModelMeta(
1358
1398
  languages=udever_languages,
1359
1399
  loader=sentence_transformers_loader,
1360
1400
  n_parameters=None,
1401
+ n_embedding_parameters=1_027_604_480,
1361
1402
  memory_usage_mb=None,
1362
1403
  max_tokens=None,
1363
1404
  embed_dim=None,
@@ -1365,7 +1406,7 @@ izhx__udever_bloom_7b1 = ModelMeta(
1365
1406
  open_weights=True,
1366
1407
  public_training_code=None,
1367
1408
  public_training_data=None,
1368
- framework=["PyTorch"],
1409
+ framework=["PyTorch", "Transformers"],
1369
1410
  reference="https://huggingface.co/izhx/udever-bloom-7b1",
1370
1411
  similarity_fn_name=ScoringFunction.COSINE,
1371
1412
  use_instructions=None,
@@ -1387,6 +1428,7 @@ avsolatorio__gist_embedding_v0 = ModelMeta(
1387
1428
  languages=["eng-Latn"],
1388
1429
  loader=sentence_transformers_loader,
1389
1430
  n_parameters=109482240,
1431
+ n_embedding_parameters=23_440_896,
1390
1432
  memory_usage_mb=418,
1391
1433
  max_tokens=512.0,
1392
1434
  embed_dim=768,
@@ -1394,7 +1436,7 @@ avsolatorio__gist_embedding_v0 = ModelMeta(
1394
1436
  open_weights=True,
1395
1437
  public_training_code=None,
1396
1438
  public_training_data=None,
1397
- framework=["PyTorch", "Sentence Transformers"],
1439
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
1398
1440
  reference="https://huggingface.co/avsolatorio/GIST-Embedding-v0",
1399
1441
  similarity_fn_name=ScoringFunction.COSINE,
1400
1442
  use_instructions=None,
@@ -1437,6 +1479,7 @@ avsolatorio__gist_all_minilm_l6_v2 = ModelMeta(
1437
1479
  languages=["eng-Latn"],
1438
1480
  loader=sentence_transformers_loader,
1439
1481
  n_parameters=22713216,
1482
+ n_embedding_parameters=11_720_448,
1440
1483
  memory_usage_mb=87,
1441
1484
  max_tokens=512.0,
1442
1485
  embed_dim=384,
@@ -1444,7 +1487,7 @@ avsolatorio__gist_all_minilm_l6_v2 = ModelMeta(
1444
1487
  open_weights=True,
1445
1488
  public_training_code=None,
1446
1489
  public_training_data=None,
1447
- framework=["PyTorch", "Sentence Transformers"],
1490
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
1448
1491
  reference="https://huggingface.co/avsolatorio/GIST-all-MiniLM-L6-v2",
1449
1492
  similarity_fn_name=ScoringFunction.COSINE,
1450
1493
  use_instructions=None,
@@ -1487,6 +1530,7 @@ avsolatorio__gist_large_embedding_v0 = ModelMeta(
1487
1530
  languages=["eng-Latn"],
1488
1531
  loader=sentence_transformers_loader,
1489
1532
  n_parameters=335141888,
1533
+ n_embedding_parameters=31_254_528,
1490
1534
  memory_usage_mb=1278,
1491
1535
  max_tokens=512.0,
1492
1536
  embed_dim=1024,
@@ -1494,7 +1538,7 @@ avsolatorio__gist_large_embedding_v0 = ModelMeta(
1494
1538
  open_weights=True,
1495
1539
  public_training_code=None,
1496
1540
  public_training_data=None,
1497
- framework=["PyTorch", "Sentence Transformers"],
1541
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
1498
1542
  reference="https://huggingface.co/avsolatorio/GIST-large-Embedding-v0",
1499
1543
  similarity_fn_name=ScoringFunction.COSINE,
1500
1544
  use_instructions=None,
@@ -1537,6 +1581,7 @@ avsolatorio__gist_small_embedding_v0 = ModelMeta(
1537
1581
  languages=["eng-Latn"],
1538
1582
  loader=sentence_transformers_loader,
1539
1583
  n_parameters=33360000,
1584
+ n_embedding_parameters=11_720_448,
1540
1585
  memory_usage_mb=127,
1541
1586
  max_tokens=512.0,
1542
1587
  embed_dim=384,
@@ -1544,7 +1589,7 @@ avsolatorio__gist_small_embedding_v0 = ModelMeta(
1544
1589
  open_weights=True,
1545
1590
  public_training_code=None,
1546
1591
  public_training_data=None,
1547
- framework=["PyTorch", "Sentence Transformers"],
1592
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
1548
1593
  reference="https://huggingface.co/avsolatorio/GIST-small-Embedding-v0",
1549
1594
  similarity_fn_name=ScoringFunction.COSINE,
1550
1595
  use_instructions=None,
@@ -1587,6 +1632,7 @@ bigscience__sgpt_bloom_7b1_msmarco = ModelMeta(
1587
1632
  languages=None,
1588
1633
  loader=sentence_transformers_loader,
1589
1634
  n_parameters=None,
1635
+ n_embedding_parameters=1_026_793_472,
1590
1636
  memory_usage_mb=None,
1591
1637
  max_tokens=None,
1592
1638
  embed_dim=4096,
@@ -1594,7 +1640,7 @@ bigscience__sgpt_bloom_7b1_msmarco = ModelMeta(
1594
1640
  open_weights=True,
1595
1641
  public_training_code=None,
1596
1642
  public_training_data=None,
1597
- framework=["PyTorch"],
1643
+ framework=["PyTorch", "Sentence Transformers"],
1598
1644
  reference="https://huggingface.co/bigscience/sgpt-bloom-7b1-msmarco",
1599
1645
  similarity_fn_name=ScoringFunction.COSINE,
1600
1646
  use_instructions=None,
@@ -1616,6 +1662,7 @@ aari1995__german_semantic_sts_v2 = ModelMeta(
1616
1662
  languages=["deu-Latn"],
1617
1663
  loader=sentence_transformers_loader,
1618
1664
  n_parameters=335736320,
1665
+ n_embedding_parameters=31_848_448,
1619
1666
  memory_usage_mb=1281,
1620
1667
  max_tokens=512.0,
1621
1668
  embed_dim=1024,
@@ -1623,7 +1670,7 @@ aari1995__german_semantic_sts_v2 = ModelMeta(
1623
1670
  open_weights=True,
1624
1671
  public_training_code=None,
1625
1672
  public_training_data=None,
1626
- framework=["PyTorch"],
1673
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
1627
1674
  reference="https://huggingface.co/aari1995/German_Semantic_STS_V2",
1628
1675
  similarity_fn_name=ScoringFunction.COSINE,
1629
1676
  use_instructions=None,
@@ -1640,6 +1687,7 @@ abhinand__medembed_small_v0_1 = ModelMeta(
1640
1687
  languages=["eng-Latn"],
1641
1688
  loader=sentence_transformers_loader,
1642
1689
  n_parameters=33360000,
1690
+ n_embedding_parameters=11_720_448,
1643
1691
  memory_usage_mb=127,
1644
1692
  max_tokens=512.0,
1645
1693
  embed_dim=384,
@@ -1647,7 +1695,7 @@ abhinand__medembed_small_v0_1 = ModelMeta(
1647
1695
  open_weights=True,
1648
1696
  public_training_code=None,
1649
1697
  public_training_data=None,
1650
- framework=["PyTorch"],
1698
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
1651
1699
  reference="https://huggingface.co/abhinand/MedEmbed-small-v0.1",
1652
1700
  similarity_fn_name=ScoringFunction.COSINE,
1653
1701
  use_instructions=None,
@@ -1678,6 +1726,7 @@ avsolatorio__noinstruct_small_embedding_v0 = ModelMeta(
1678
1726
  languages=["eng-Latn"],
1679
1727
  loader=sentence_transformers_loader,
1680
1728
  n_parameters=33360000,
1729
+ n_embedding_parameters=11720448,
1681
1730
  memory_usage_mb=127,
1682
1731
  max_tokens=512.0,
1683
1732
  embed_dim=384,
@@ -1701,6 +1750,7 @@ brahmairesearch__slx_v0_1 = ModelMeta(
1701
1750
  languages=["eng-Latn"],
1702
1751
  loader=sentence_transformers_loader,
1703
1752
  n_parameters=22713216,
1753
+ n_embedding_parameters=11_720_448,
1704
1754
  memory_usage_mb=87,
1705
1755
  max_tokens=512.0,
1706
1756
  embed_dim=384,
@@ -1708,7 +1758,7 @@ brahmairesearch__slx_v0_1 = ModelMeta(
1708
1758
  open_weights=True,
1709
1759
  public_training_code=None,
1710
1760
  public_training_data=None,
1711
- framework=["PyTorch", "Sentence Transformers"],
1761
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
1712
1762
  reference="https://huggingface.co/brahmairesearch/slx-v0.1",
1713
1763
  similarity_fn_name=ScoringFunction.COSINE,
1714
1764
  use_instructions=None,
@@ -1724,6 +1774,7 @@ deepfile__embedder_100p = ModelMeta(
1724
1774
  languages=None,
1725
1775
  loader=sentence_transformers_loader,
1726
1776
  n_parameters=None,
1777
+ n_embedding_parameters=192_001_536,
1727
1778
  memory_usage_mb=1061,
1728
1779
  max_tokens=514.0,
1729
1780
  embed_dim=768,
@@ -1731,7 +1782,7 @@ deepfile__embedder_100p = ModelMeta(
1731
1782
  open_weights=True,
1732
1783
  public_training_code=None,
1733
1784
  public_training_data=None,
1734
- framework=["PyTorch"],
1785
+ framework=["PyTorch", "Transformers", "safetensors"],
1735
1786
  reference="https://huggingface.co/deepfile/embedder-100p",
1736
1787
  similarity_fn_name=ScoringFunction.COSINE,
1737
1788
  use_instructions=None,
@@ -1747,6 +1798,7 @@ infgrad__stella_base_en_v2 = ModelMeta(
1747
1798
  languages=["eng-Latn"],
1748
1799
  loader=sentence_transformers_loader,
1749
1800
  n_parameters=None,
1801
+ n_embedding_parameters=23_440_896,
1750
1802
  memory_usage_mb=None,
1751
1803
  max_tokens=512.0,
1752
1804
  embed_dim=None,
@@ -1754,7 +1806,7 @@ infgrad__stella_base_en_v2 = ModelMeta(
1754
1806
  open_weights=True,
1755
1807
  public_training_code=None,
1756
1808
  public_training_data=None,
1757
- framework=["PyTorch"],
1809
+ framework=["PyTorch", "Sentence Transformers"],
1758
1810
  reference="https://huggingface.co/infgrad/stella-base-en-v2",
1759
1811
  similarity_fn_name=ScoringFunction.COSINE,
1760
1812
  use_instructions=None,
@@ -1770,6 +1822,7 @@ malenia1__ternary_weight_embedding = ModelMeta(
1770
1822
  languages=None,
1771
1823
  loader=sentence_transformers_loader,
1772
1824
  n_parameters=98688000,
1825
+ n_embedding_parameters=None,
1773
1826
  memory_usage_mb=158,
1774
1827
  max_tokens=512.0,
1775
1828
  embed_dim=1024,
@@ -1777,7 +1830,7 @@ malenia1__ternary_weight_embedding = ModelMeta(
1777
1830
  open_weights=True,
1778
1831
  public_training_code=None,
1779
1832
  public_training_data=None,
1780
- framework=["PyTorch"],
1833
+ framework=["PyTorch", "safetensors"],
1781
1834
  reference="https://huggingface.co/malenia1/ternary-weight-embedding",
1782
1835
  similarity_fn_name=ScoringFunction.COSINE,
1783
1836
  use_instructions=None,
@@ -1793,6 +1846,7 @@ omarelshehy__arabic_english_sts_matryoshka = ModelMeta(
1793
1846
  languages=["ara-Arab", "eng-Latn"],
1794
1847
  loader=sentence_transformers_loader,
1795
1848
  n_parameters=559890432,
1849
+ n_embedding_parameters=256_002_048,
1796
1850
  memory_usage_mb=2136,
1797
1851
  max_tokens=514.0,
1798
1852
  embed_dim=1024,
@@ -1800,7 +1854,7 @@ omarelshehy__arabic_english_sts_matryoshka = ModelMeta(
1800
1854
  open_weights=True,
1801
1855
  public_training_code=None,
1802
1856
  public_training_data=None,
1803
- framework=["PyTorch", "Sentence Transformers"],
1857
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
1804
1858
  reference="https://huggingface.co/omarelshehy/arabic-english-sts-matryoshka",
1805
1859
  similarity_fn_name=ScoringFunction.COSINE,
1806
1860
  use_instructions=None,
@@ -1833,6 +1887,7 @@ openbmb__minicpm_embedding = ModelMeta(
1833
1887
  release_date="2024-09-04",
1834
1888
  languages=["zho-Hans", "eng-Latn"],
1835
1889
  n_parameters=2724880896,
1890
+ n_embedding_parameters=282_822_912,
1836
1891
  memory_usage_mb=5197,
1837
1892
  max_tokens=512.0,
1838
1893
  embed_dim=2304,
@@ -1840,7 +1895,7 @@ openbmb__minicpm_embedding = ModelMeta(
1840
1895
  open_weights=True,
1841
1896
  public_training_code=None,
1842
1897
  public_training_data=None,
1843
- framework=["PyTorch", "Sentence Transformers"],
1898
+ framework=["PyTorch", "Sentence Transformers", "Transformers", "safetensors"],
1844
1899
  reference="https://huggingface.co/openbmb/MiniCPM-Embedding",
1845
1900
  similarity_fn_name=ScoringFunction.COSINE,
1846
1901
  use_instructions=None,
@@ -1857,6 +1912,7 @@ silma_ai__silma_embedding_matryoshka_v0_1 = ModelMeta(
1857
1912
  languages=["ara-Arab", "eng-Latn"],
1858
1913
  loader=sentence_transformers_loader,
1859
1914
  n_parameters=135193344,
1915
+ n_embedding_parameters=49_152_000,
1860
1916
  memory_usage_mb=516,
1861
1917
  max_tokens=512.0,
1862
1918
  embed_dim=768,
@@ -1864,7 +1920,7 @@ silma_ai__silma_embedding_matryoshka_v0_1 = ModelMeta(
1864
1920
  open_weights=True,
1865
1921
  public_training_code=None,
1866
1922
  public_training_data=None,
1867
- framework=["PyTorch", "Sentence Transformers"],
1923
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
1868
1924
  reference="https://huggingface.co/silma-ai/silma-embeddding-matryoshka-v0.1",
1869
1925
  similarity_fn_name=ScoringFunction.COSINE,
1870
1926
  use_instructions=None,
@@ -1888,6 +1944,7 @@ sbert_chinese_general_v1 = ModelMeta(
1888
1944
  languages=["zho-Hans"],
1889
1945
  loader=sentence_transformers_loader,
1890
1946
  n_parameters=None,
1947
+ n_embedding_parameters=16_226_304,
1891
1948
  memory_usage_mb=None, # Not visible on repo
1892
1949
  max_tokens=512,
1893
1950
  embed_dim=128,
@@ -1895,7 +1952,7 @@ sbert_chinese_general_v1 = ModelMeta(
1895
1952
  open_weights=True,
1896
1953
  public_training_code=None,
1897
1954
  public_training_data=None,
1898
- framework=["PyTorch", "Sentence Transformers"],
1955
+ framework=["PyTorch", "Sentence Transformers", "Transformers"],
1899
1956
  reference="https://huggingface.co/DMetaSoul/sbert-chinese-general-v1",
1900
1957
  similarity_fn_name=ScoringFunction.COSINE,
1901
1958
  use_instructions=None,
@@ -1916,6 +1973,7 @@ dmeta_embedding_zh_small = ModelMeta(
1916
1973
  languages=["zho-Hans"],
1917
1974
  loader=sentence_transformers_loader,
1918
1975
  n_parameters=int(74.2 * 1e6),
1976
+ n_embedding_parameters=16_226_304,
1919
1977
  memory_usage_mb=283,
1920
1978
  max_tokens=1024,
1921
1979
  embed_dim=768,
@@ -1923,7 +1981,7 @@ dmeta_embedding_zh_small = ModelMeta(
1923
1981
  open_weights=True,
1924
1982
  public_training_code=None,
1925
1983
  public_training_data=None,
1926
- framework=["PyTorch", "Sentence Transformers"],
1984
+ framework=["PyTorch", "Sentence Transformers", "Transformers", "safetensors"],
1927
1985
  reference="https://huggingface.co/DMetaSoul/Dmeta-embedding-zh-small/",
1928
1986
  similarity_fn_name=ScoringFunction.COSINE,
1929
1987
  use_instructions=None,
@@ -1939,6 +1997,7 @@ xiaobu_embedding = ModelMeta(
1939
1997
  languages=["zho-Hans"],
1940
1998
  loader=sentence_transformers_loader,
1941
1999
  n_parameters=int(326 * 1e6),
2000
+ n_embedding_parameters=21_635_072,
1942
2001
  memory_usage_mb=1244,
1943
2002
  max_tokens=512,
1944
2003
  embed_dim=1024,
@@ -1946,7 +2005,7 @@ xiaobu_embedding = ModelMeta(
1946
2005
  open_weights=True,
1947
2006
  public_training_code=None,
1948
2007
  public_training_data=None,
1949
- framework=["PyTorch", "Sentence Transformers"],
2008
+ framework=["PyTorch", "Sentence Transformers", "Transformers"],
1950
2009
  reference="https://huggingface.co/lier007/xiaobu-embedding",
1951
2010
  similarity_fn_name=ScoringFunction.COSINE,
1952
2011
  use_instructions=None,
@@ -1963,6 +2022,7 @@ xiaobu_embedding_v2 = ModelMeta(
1963
2022
  languages=["zho-Hans"],
1964
2023
  loader=sentence_transformers_loader,
1965
2024
  n_parameters=int(326 * 1e6),
2025
+ n_embedding_parameters=21_635_072,
1966
2026
  memory_usage_mb=1242,
1967
2027
  max_tokens=512,
1968
2028
  embed_dim=768,
@@ -1970,7 +2030,7 @@ xiaobu_embedding_v2 = ModelMeta(
1970
2030
  open_weights=True,
1971
2031
  public_training_code=None,
1972
2032
  public_training_data=None,
1973
- framework=["PyTorch", "Sentence Transformers"],
2033
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
1974
2034
  reference="https://huggingface.co/lier007/xiaobu-embedding-v2",
1975
2035
  similarity_fn_name=ScoringFunction.COSINE,
1976
2036
  use_instructions=None,
@@ -1987,6 +2047,7 @@ yinka_embedding = ModelMeta(
1987
2047
  languages=["zho-Hans"],
1988
2048
  loader=sentence_transformers_loader,
1989
2049
  n_parameters=int(326 * 1e6),
2050
+ n_embedding_parameters=21_635_072,
1990
2051
  memory_usage_mb=1244,
1991
2052
  max_tokens=512,
1992
2053
  embed_dim=1024,
@@ -1994,7 +2055,7 @@ yinka_embedding = ModelMeta(
1994
2055
  open_weights=True,
1995
2056
  public_training_code=None,
1996
2057
  public_training_data=None,
1997
- framework=["PyTorch", "Sentence Transformers"],
2058
+ framework=["PyTorch", "Sentence Transformers", "Transformers"],
1998
2059
  reference="https://huggingface.co/Classical/Yinka",
1999
2060
  similarity_fn_name=ScoringFunction.COSINE,
2000
2061
  use_instructions=None,
@@ -2010,6 +2071,7 @@ conan_embedding = ModelMeta(
2010
2071
  languages=["zho-Hans"],
2011
2072
  loader=sentence_transformers_loader,
2012
2073
  n_parameters=int(326 * 1e6),
2074
+ n_embedding_parameters=21_635_072,
2013
2075
  memory_usage_mb=1242,
2014
2076
  max_tokens=512,
2015
2077
  embed_dim=768,
@@ -2017,7 +2079,7 @@ conan_embedding = ModelMeta(
2017
2079
  open_weights=True,
2018
2080
  public_training_code=None,
2019
2081
  public_training_data=None,
2020
- framework=["PyTorch", "Sentence Transformers"],
2082
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
2021
2083
  reference="https://huggingface.co/Classical/Yinka",
2022
2084
  similarity_fn_name=ScoringFunction.COSINE,
2023
2085
  use_instructions=None,
@@ -2025,13 +2087,13 @@ conan_embedding = ModelMeta(
2025
2087
  training_datasets=None, # They "scraped" things from the internet, we don't know, could be leakage
2026
2088
  superseded_by=None,
2027
2089
  citation="""@misc{li2024conanembeddinggeneraltextembedding,
2028
- title={Conan-embedding: General Text Embedding with More and Better Negative Samples},
2090
+ title={Conan-embedding: General Text Embedding with More and Better Negative Samples},
2029
2091
  author={Shiyu Li and Yang Tang and Shizhe Chen and Xi Chen},
2030
2092
  year={2024},
2031
2093
  eprint={2408.15710},
2032
2094
  archivePrefix={arXiv},
2033
2095
  primaryClass={cs.CL},
2034
- url={https://arxiv.org/abs/2408.15710},
2096
+ url={https://arxiv.org/abs/2408.15710},
2035
2097
  }""",
2036
2098
  )
2037
2099
 
@@ -2043,6 +2105,7 @@ ember_v1 = ModelMeta(
2043
2105
  release_date="2023-10-10",
2044
2106
  languages=["eng-Latn"],
2045
2107
  n_parameters=int(335 * 1e6),
2108
+ n_embedding_parameters=31_254_528,
2046
2109
  memory_usage_mb=1278,
2047
2110
  max_tokens=512,
2048
2111
  embed_dim=1024,
@@ -2050,14 +2113,14 @@ ember_v1 = ModelMeta(
2050
2113
  open_weights=True,
2051
2114
  public_training_code=None,
2052
2115
  public_training_data=None,
2053
- framework=["PyTorch", "Sentence Transformers"],
2116
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
2054
2117
  reference="https://huggingface.co/llmrails/ember-v1",
2055
2118
  similarity_fn_name=ScoringFunction.COSINE,
2056
2119
  use_instructions=None,
2057
2120
  training_datasets=None,
2058
2121
  superseded_by=None,
2059
2122
  citation="""@misc{nur2024emberv1,
2060
- title={ember-v1: SOTA embedding model},
2123
+ title={ember-v1: SOTA embedding model},
2061
2124
  author={Enrike Nur and Anar Aliyev},
2062
2125
  year={2023},
2063
2126
  }""",