mteb 2.7.3__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. mteb/abstasks/retrieval.py +1 -1
  2. mteb/benchmarks/benchmarks/__init__.py +2 -0
  3. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  4. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  5. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  6. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  7. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  8. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  9. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  10. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  11. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  12. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  13. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  14. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  15. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  16. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  17. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  18. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  19. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  20. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  21. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  22. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  23. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  24. mteb/models/model_implementations/align_models.py +1 -0
  25. mteb/models/model_implementations/amazon_models.py +1 -0
  26. mteb/models/model_implementations/andersborges.py +2 -0
  27. mteb/models/model_implementations/ara_models.py +1 -0
  28. mteb/models/model_implementations/arctic_models.py +8 -0
  29. mteb/models/model_implementations/b1ade_models.py +1 -0
  30. mteb/models/model_implementations/bedrock_models.py +4 -0
  31. mteb/models/model_implementations/bge_models.py +40 -1
  32. mteb/models/model_implementations/bica_model.py +1 -0
  33. mteb/models/model_implementations/blip2_models.py +2 -0
  34. mteb/models/model_implementations/blip_models.py +8 -0
  35. mteb/models/model_implementations/bm25.py +8 -5
  36. mteb/models/model_implementations/bmretriever_models.py +4 -0
  37. mteb/models/model_implementations/cadet_models.py +1 -0
  38. mteb/models/model_implementations/cde_models.py +2 -0
  39. mteb/models/model_implementations/clip_models.py +3 -0
  40. mteb/models/model_implementations/clips_models.py +3 -0
  41. mteb/models/model_implementations/codefuse_models.py +5 -0
  42. mteb/models/model_implementations/codesage_models.py +3 -0
  43. mteb/models/model_implementations/cohere_models.py +4 -0
  44. mteb/models/model_implementations/cohere_v.py +5 -0
  45. mteb/models/model_implementations/colpali_models.py +3 -0
  46. mteb/models/model_implementations/colqwen_models.py +7 -0
  47. mteb/models/model_implementations/colsmol_models.py +2 -0
  48. mteb/models/model_implementations/conan_models.py +1 -0
  49. mteb/models/model_implementations/dino_models.py +19 -0
  50. mteb/models/model_implementations/e5_instruct.py +4 -0
  51. mteb/models/model_implementations/e5_models.py +9 -0
  52. mteb/models/model_implementations/e5_v.py +1 -0
  53. mteb/models/model_implementations/eagerworks_models.py +1 -0
  54. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  55. mteb/models/model_implementations/en_code_retriever.py +1 -0
  56. mteb/models/model_implementations/euler_models.py +1 -0
  57. mteb/models/model_implementations/evaclip_models.py +4 -0
  58. mteb/models/model_implementations/fa_models.py +9 -0
  59. mteb/models/model_implementations/facebookai.py +2 -0
  60. mteb/models/model_implementations/geogpt_models.py +1 -0
  61. mteb/models/model_implementations/gme_v_models.py +2 -0
  62. mteb/models/model_implementations/google_models.py +5 -0
  63. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  64. mteb/models/model_implementations/gritlm_models.py +2 -0
  65. mteb/models/model_implementations/gte_models.py +9 -0
  66. mteb/models/model_implementations/hinvec_models.py +1 -0
  67. mteb/models/model_implementations/human.py +1 -0
  68. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  69. mteb/models/model_implementations/inf_models.py +2 -0
  70. mteb/models/model_implementations/jasper_models.py +2 -0
  71. mteb/models/model_implementations/jina_clip.py +1 -0
  72. mteb/models/model_implementations/jina_models.py +7 -0
  73. mteb/models/model_implementations/kalm_models.py +6 -0
  74. mteb/models/model_implementations/kblab.py +1 -0
  75. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  76. mteb/models/model_implementations/kfst.py +1 -0
  77. mteb/models/model_implementations/kowshik24_models.py +1 -0
  78. mteb/models/model_implementations/lens_models.py +2 -0
  79. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  80. mteb/models/model_implementations/linq_models.py +1 -0
  81. mteb/models/model_implementations/listconranker.py +1 -0
  82. mteb/models/model_implementations/llm2clip_models.py +3 -0
  83. mteb/models/model_implementations/llm2vec_models.py +8 -0
  84. mteb/models/model_implementations/mcinext_models.py +3 -0
  85. mteb/models/model_implementations/mdbr_models.py +2 -0
  86. mteb/models/model_implementations/misc_models.py +63 -0
  87. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  88. mteb/models/model_implementations/mme5_models.py +2 -1
  89. mteb/models/model_implementations/moco_models.py +2 -0
  90. mteb/models/model_implementations/mod_models.py +1 -0
  91. mteb/models/model_implementations/model2vec_models.py +13 -0
  92. mteb/models/model_implementations/moka_models.py +3 -0
  93. mteb/models/model_implementations/nbailab.py +3 -0
  94. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  95. mteb/models/model_implementations/nomic_models.py +6 -0
  96. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  97. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  98. mteb/models/model_implementations/nvidia_models.py +3 -0
  99. mteb/models/model_implementations/octen_models.py +2 -0
  100. mteb/models/model_implementations/openai_models.py +5 -0
  101. mteb/models/model_implementations/openclip_models.py +8 -0
  102. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  103. mteb/models/model_implementations/ops_moa_models.py +2 -0
  104. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  105. mteb/models/model_implementations/pawan_models.py +1 -0
  106. mteb/models/model_implementations/piccolo_models.py +2 -0
  107. mteb/models/model_implementations/promptriever_models.py +4 -0
  108. mteb/models/model_implementations/pylate_models.py +3 -0
  109. mteb/models/model_implementations/qodo_models.py +2 -0
  110. mteb/models/model_implementations/qtack_models.py +1 -0
  111. mteb/models/model_implementations/qwen3_models.py +3 -0
  112. mteb/models/model_implementations/qzhou_models.py +2 -0
  113. mteb/models/model_implementations/rasgaard_models.py +1 -0
  114. mteb/models/model_implementations/reasonir_model.py +65 -0
  115. mteb/models/model_implementations/repllama_models.py +2 -0
  116. mteb/models/model_implementations/rerankers_custom.py +3 -0
  117. mteb/models/model_implementations/rerankers_monot5_based.py +14 -0
  118. mteb/models/model_implementations/richinfoai_models.py +1 -0
  119. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  120. mteb/models/model_implementations/ruri_models.py +10 -0
  121. mteb/models/model_implementations/salesforce_models.py +3 -0
  122. mteb/models/model_implementations/samilpwc_models.py +1 -0
  123. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  124. mteb/models/model_implementations/searchmap_models.py +1 -0
  125. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  126. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +1 -0
  127. mteb/models/model_implementations/seed_models.py +1 -0
  128. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  129. mteb/models/model_implementations/shuu_model.py +1 -0
  130. mteb/models/model_implementations/siglip_models.py +10 -0
  131. mteb/models/model_implementations/sonar_models.py +2 -1
  132. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  133. mteb/models/model_implementations/stella_models.py +6 -0
  134. mteb/models/model_implementations/tarka_models.py +2 -0
  135. mteb/models/model_implementations/text2vec_models.py +3 -0
  136. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  137. mteb/models/model_implementations/uae_models.py +1 -0
  138. mteb/models/model_implementations/vdr_models.py +1 -0
  139. mteb/models/model_implementations/vi_vn_models.py +6 -0
  140. mteb/models/model_implementations/vista_models.py +2 -0
  141. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  142. mteb/models/model_implementations/voyage_models.py +15 -0
  143. mteb/models/model_implementations/voyage_v.py +1 -0
  144. mteb/models/model_implementations/xyz_models.py +1 -0
  145. mteb/models/model_implementations/youtu_models.py +1 -0
  146. mteb/models/model_implementations/yuan_models.py +1 -0
  147. mteb/models/model_implementations/yuan_models_en.py +1 -0
  148. mteb/models/model_meta.py +35 -2
  149. mteb/tasks/retrieval/eng/__init__.py +42 -0
  150. mteb/tasks/retrieval/eng/bright_retrieval.py +9 -1
  151. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  152. {mteb-2.7.3.dist-info → mteb-2.7.4.dist-info}/METADATA +1 -1
  153. {mteb-2.7.3.dist-info → mteb-2.7.4.dist-info}/RECORD +157 -136
  154. {mteb-2.7.3.dist-info → mteb-2.7.4.dist-info}/WHEEL +0 -0
  155. {mteb-2.7.3.dist-info → mteb-2.7.4.dist-info}/entry_points.txt +0 -0
  156. {mteb-2.7.3.dist-info → mteb-2.7.4.dist-info}/licenses/LICENSE +0 -0
  157. {mteb-2.7.3.dist-info → mteb-2.7.4.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,7 @@ ritrieve_zh_v1 = ModelMeta(
15
15
  revision="f8d5a707656c55705027678e311f9202c8ced12c",
16
16
  release_date="2025-03-25",
17
17
  n_parameters=int(326 * 1e6),
18
+ n_embedding_parameters=21_635_072,
18
19
  memory_usage_mb=1242,
19
20
  embed_dim=1792,
20
21
  license="mit",
@@ -244,6 +244,7 @@ rubert_tiny = ModelMeta(
244
244
  revision="5441c5ea8026d4f6d7505ec004845409f1259fb1",
245
245
  release_date="2021-05-24",
246
246
  n_parameters=11_900_000,
247
+ n_embedding_parameters=9_223_968,
247
248
  memory_usage_mb=45,
248
249
  embed_dim=312,
249
250
  license="mit",
@@ -270,6 +271,7 @@ rubert_tiny2 = ModelMeta(
270
271
  revision="dad72b8f77c5eef6995dd3e4691b758ba56b90c3",
271
272
  release_date="2021-10-28",
272
273
  n_parameters=29_400_000,
274
+ n_embedding_parameters=26_154_336,
273
275
  memory_usage_mb=112,
274
276
  embed_dim=312,
275
277
  license="mit",
@@ -297,6 +299,7 @@ sbert_large_nlu_ru = ModelMeta(
297
299
  revision="af977d5dfa46a3635e29bf0ef383f2df2a08d47a",
298
300
  release_date="2020-11-20",
299
301
  n_parameters=427_000_000,
302
+ n_embedding_parameters=123_021_312,
300
303
  memory_usage_mb=1629,
301
304
  embed_dim=1024,
302
305
  license="mit",
@@ -323,6 +326,7 @@ sbert_large_mt_nlu_ru = ModelMeta(
323
326
  revision="05300876c2b83f46d3ddd422a7f17e45cf633bb0",
324
327
  release_date="2021-05-18",
325
328
  n_parameters=427_000_000,
329
+ n_embedding_parameters=123_021_312,
326
330
  memory_usage_mb=1629,
327
331
  embed_dim=1024,
328
332
  license="not specified",
@@ -351,6 +355,7 @@ user_base_ru = ModelMeta(
351
355
  revision="436a489a2087d61aa670b3496a9915f84e46c861",
352
356
  release_date="2024-06-10",
353
357
  n_parameters=427_000_000,
358
+ n_embedding_parameters=38_603_520,
354
359
  memory_usage_mb=473,
355
360
  embed_dim=768,
356
361
  license="apache-2.0",
@@ -412,6 +417,7 @@ user_bge_m3 = ModelMeta(
412
417
  revision="0cc6cfe48e260fb0474c753087a69369e88709ae",
413
418
  release_date="2024-07-05",
414
419
  n_parameters=359_026_688,
420
+ n_embedding_parameters=47_273_984,
415
421
  memory_usage_mb=1370,
416
422
  embed_dim=1024,
417
423
  license="apache-2.0",
@@ -463,6 +469,7 @@ deberta_v1_ru = ModelMeta(
463
469
  revision="bdd30b0e19757e6940c92c7aff19e8fc0a60dff4",
464
470
  release_date="2023-02-07",
465
471
  n_parameters=124_000_000,
472
+ n_embedding_parameters=38_603_520,
466
473
  memory_usage_mb=473,
467
474
  embed_dim=768,
468
475
  license="apache-2.0",
@@ -494,6 +501,7 @@ rubert_base_cased = ModelMeta(
494
501
  revision="4036cab694767a299f2b9e6492909664d9414229",
495
502
  release_date="2020-03-04",
496
503
  n_parameters=1280_000_000,
504
+ n_embedding_parameters=91_812_096,
497
505
  memory_usage_mb=4883,
498
506
  embed_dim=768,
499
507
  license="not specified",
@@ -530,6 +538,7 @@ distilrubert_small_cased_conversational = ModelMeta(
530
538
  revision="e348066b4a7279b97138038299bddc6580a9169a",
531
539
  release_date="2022-06-28",
532
540
  n_parameters=107_000_000,
541
+ n_embedding_parameters=91_812_096,
533
542
  memory_usage_mb=408,
534
543
  embed_dim=768,
535
544
  license="not specified",
@@ -565,6 +574,7 @@ rubert_base_cased_sentence = ModelMeta(
565
574
  revision="78b5122d6365337dd4114281b0d08cd1edbb3bc8",
566
575
  release_date="2020-03-04",
567
576
  n_parameters=107_000_000,
577
+ n_embedding_parameters=91_812_096,
568
578
  memory_usage_mb=408,
569
579
  embed_dim=768,
570
580
  license="not specified",
@@ -590,6 +600,7 @@ labse_en_ru = ModelMeta(
590
600
  revision="cf0714e606d4af551e14ad69a7929cd6b0da7f7e",
591
601
  release_date="2021-06-10",
592
602
  n_parameters=129_000_000,
603
+ n_embedding_parameters=42_303_744,
593
604
  memory_usage_mb=492,
594
605
  embed_dim=768,
595
606
  license="not specified",
@@ -618,6 +629,7 @@ rubert_tiny_turbo = ModelMeta(
618
629
  revision="8ce0cf757446ce9bb2d5f5a4ac8103c7a1049054",
619
630
  release_date="2024-06-21",
620
631
  n_parameters=29_200_000,
632
+ n_embedding_parameters=26_154_336,
621
633
  memory_usage_mb=111,
622
634
  embed_dim=312,
623
635
  license="mit",
@@ -641,6 +653,7 @@ rubert_mini_frida = ModelMeta(
641
653
  revision="19b279b78afd945b5ccae78f63e284909814adc2",
642
654
  release_date="2025-03-02",
643
655
  n_parameters=32_300_000,
656
+ n_embedding_parameters=26_154_336,
644
657
  memory_usage_mb=123,
645
658
  embed_dim=312,
646
659
  license="mit",
@@ -669,6 +682,7 @@ labse_ru_turbo = ModelMeta(
669
682
  revision="1940b046c6b5e125df11722b899130329d0a46da",
670
683
  release_date="2024-06-27",
671
684
  n_parameters=129_000_000,
685
+ n_embedding_parameters=42_303_744,
672
686
  memory_usage_mb=490,
673
687
  embed_dim=768,
674
688
  license="mit",
@@ -720,6 +734,7 @@ rosberta_ru_en = ModelMeta(
720
734
  use_instructions=True,
721
735
  reference="https://huggingface.co/ai-forever/ru-en-RoSBERTa",
722
736
  n_parameters=404_000_000,
737
+ n_embedding_parameters=100_869_120,
723
738
  memory_usage_mb=1540,
724
739
  max_tokens=512,
725
740
  embed_dim=1024,
@@ -886,6 +901,7 @@ frida = ModelMeta(
886
901
  use_instructions=True,
887
902
  reference="https://huggingface.co/ai-forever/FRIDA",
888
903
  n_parameters=823_000_000,
904
+ n_embedding_parameters=143_847_936,
889
905
  memory_usage_mb=3141,
890
906
  max_tokens=512,
891
907
  embed_dim=1536,
@@ -918,6 +934,7 @@ giga_embeddings = ModelMeta(
918
934
  revision="0ad5b29bfecd806cecc9d66b927d828a736594dc",
919
935
  release_date="2025-09-23",
920
936
  n_parameters=3_227_176_961,
937
+ n_embedding_parameters=None,
921
938
  memory_usage_mb=12865,
922
939
  embed_dim=2048,
923
940
  license="mit",
@@ -950,6 +967,7 @@ berta = ModelMeta(
950
967
  revision="914c8c8aed14042ed890fc2c662d5e9e66b2faa7",
951
968
  release_date="2025-03-10",
952
969
  n_parameters=128_000_000,
970
+ n_embedding_parameters=42_303_744,
953
971
  memory_usage_mb=489,
954
972
  embed_dim=768,
955
973
  license="mit",
@@ -1025,6 +1043,7 @@ user2_small = ModelMeta(
1025
1043
  use_instructions=True,
1026
1044
  reference="https://huggingface.co/collections/deepvk/user2-6802650d7210f222ec60e05f",
1027
1045
  n_parameters=34_400_000,
1046
+ n_embedding_parameters=None,
1028
1047
  memory_usage_mb=131,
1029
1048
  max_tokens=8192,
1030
1049
  embed_dim=384,
@@ -1058,6 +1077,7 @@ user2_base = ModelMeta(
1058
1077
  use_instructions=True,
1059
1078
  reference="https://huggingface.co/collections/deepvk/user2-6802650d7210f222ec60e05f",
1060
1079
  n_parameters=149_000_000,
1080
+ n_embedding_parameters=None,
1061
1081
  memory_usage_mb=568,
1062
1082
  max_tokens=8192,
1063
1083
  embed_dim=768,
@@ -38,6 +38,7 @@ cl_nagoya_ruri_v3_30m = ModelMeta(
38
38
  revision="24899e5de370b56d179604a007c0d727bf144504",
39
39
  release_date="2025-04-07",
40
40
  n_parameters=36_705_536,
41
+ n_embedding_parameters=None,
41
42
  memory_usage_mb=140,
42
43
  embed_dim=256,
43
44
  license="apache-2.0",
@@ -69,6 +70,7 @@ cl_nagoya_ruri_v3_70m = ModelMeta(
69
70
  revision="07a8b0aba47d29d2ca21f89b915c1efe2c23d1cc",
70
71
  release_date="2025-04-09",
71
72
  n_parameters=36_705_536,
73
+ n_embedding_parameters=None,
72
74
  memory_usage_mb=140,
73
75
  embed_dim=256,
74
76
  license="apache-2.0",
@@ -98,6 +100,7 @@ cl_nagoya_ruri_v3_130m = ModelMeta(
98
100
  revision="e3114c6ee10dbab8b4b235fbc6dcf9dd4d5ac1a6",
99
101
  release_date="2025-04-09",
100
102
  n_parameters=132_140_544,
103
+ n_embedding_parameters=None,
101
104
  memory_usage_mb=504,
102
105
  embed_dim=512,
103
106
  license="apache-2.0",
@@ -127,6 +130,7 @@ cl_nagoya_ruri_v3_310m = ModelMeta(
127
130
  revision="18b60fb8c2b9df296fb4212bb7d23ef94e579cd3",
128
131
  release_date="2025-04-09",
129
132
  n_parameters=314_611_968,
133
+ n_embedding_parameters=None,
130
134
  memory_usage_mb=1200,
131
135
  embed_dim=768,
132
136
  license="apache-2.0",
@@ -157,6 +161,7 @@ cl_nagoya_ruri_small_v2 = ModelMeta(
157
161
  revision="db18646e673b713cd0518a5bb0fefdce21e77cd9",
158
162
  release_date="2024-12-05",
159
163
  n_parameters=68_087_808,
164
+ n_embedding_parameters=25_165_824,
160
165
  memory_usage_mb=260,
161
166
  embed_dim=768,
162
167
  license="apache-2.0",
@@ -186,6 +191,7 @@ cl_nagoya_ruri_base_v2 = ModelMeta(
186
191
  revision="8ce03882903668a01c83ca3b8111ac025a3bc734",
187
192
  release_date="2024-12-05",
188
193
  n_parameters=111_207_168,
194
+ n_embedding_parameters=25_165_824,
189
195
  memory_usage_mb=424,
190
196
  embed_dim=768,
191
197
  license="apache-2.0",
@@ -215,6 +221,7 @@ cl_nagoya_ruri_large_v2 = ModelMeta(
215
221
  revision="42898ef34a5574977380ebf0dfd28cbfbd36438b",
216
222
  release_date="2024-12-06",
217
223
  n_parameters=337_441_792,
224
+ n_embedding_parameters=33_554_432,
218
225
  memory_usage_mb=1287,
219
226
  embed_dim=1024,
220
227
  license="apache-2.0",
@@ -245,6 +252,7 @@ cl_nagoya_ruri_small_v1 = ModelMeta(
245
252
  revision="bc56ce90cd7a979f6eb199fc52dfe700bfd94bc3",
246
253
  release_date="2024-08-28",
247
254
  n_parameters=68_087_808,
255
+ n_embedding_parameters=25_165_824,
248
256
  memory_usage_mb=130,
249
257
  embed_dim=768,
250
258
  license="apache-2.0",
@@ -274,6 +282,7 @@ cl_nagoya_ruri_base_v1 = ModelMeta(
274
282
  revision="1ae40b8b6c78518a499425086bab8fc16c2e4b0e",
275
283
  release_date="2024-08-28",
276
284
  n_parameters=111_207_168,
285
+ n_embedding_parameters=25_165_824,
277
286
  memory_usage_mb=212,
278
287
  embed_dim=768,
279
288
  license="apache-2.0",
@@ -304,6 +313,7 @@ cl_nagoya_ruri_large_v1 = ModelMeta(
304
313
  revision="a011c39b13e8bc137ee13c6bc82191ece46c414c",
305
314
  release_date="2024-08-28",
306
315
  n_parameters=337_441_792,
316
+ n_embedding_parameters=33_554_432,
307
317
  memory_usage_mb=644,
308
318
  embed_dim=1024,
309
319
  license="apache-2.0",
@@ -58,6 +58,7 @@ SFR_Embedding_2_R = ModelMeta(
58
58
  revision="91762139d94ed4371a9fa31db5551272e0b83818",
59
59
  release_date="2024-06-14", # initial commit of hf model.
60
60
  n_parameters=7_110_000_000,
61
+ n_embedding_parameters=None,
61
62
  memory_usage_mb=13563,
62
63
  embed_dim=4096,
63
64
  license="cc-by-nc-4.0",
@@ -96,6 +97,7 @@ SFR_Embedding_Code_2B_R = ModelMeta(
96
97
  revision="c73d8631a005876ed5abde34db514b1fb6566973",
97
98
  release_date="2025-01-17", # initial commit of hf model.
98
99
  n_parameters=2_610_000_000,
100
+ n_embedding_parameters=None,
99
101
  memory_usage_mb=4986,
100
102
  embed_dim=2304,
101
103
  license="cc-by-nc-4.0",
@@ -134,6 +136,7 @@ SFR_Embedding_Mistral = ModelMeta(
134
136
  revision="938c560d1c236aa563b2dbdf084f28ab28bccb11",
135
137
  release_date="2024-01-24", # initial commit of hf model.
136
138
  n_parameters=7_110_000_000,
139
+ n_embedding_parameters=None,
137
140
  memory_usage_mb=13563,
138
141
  embed_dim=4096,
139
142
  license="cc-by-nc-4.0",
@@ -51,6 +51,7 @@ samilpwc_expr = ModelMeta(
51
51
  revision="33358978be40f36491045f9c2a359d38c3f50047",
52
52
  release_date="2025-08-12",
53
53
  n_parameters=560_000_000,
54
+ n_embedding_parameters=256_002_048,
54
55
  memory_usage_mb=2136,
55
56
  embed_dim=1024,
56
57
  license="apache-2.0",
@@ -124,6 +124,7 @@ sbintuitions_sarashina_embedding_v2_1b = ModelMeta(
124
124
  revision="1f3408afaa7b617e3445d891310a9c26dd0c68a5",
125
125
  release_date="2025-07-30",
126
126
  n_parameters=1_224_038_144,
127
+ n_embedding_parameters=183_500_800,
127
128
  memory_usage_mb=4669,
128
129
  embed_dim=1792,
129
130
  license="https://huggingface.co/sbintuitions/sarashina-embedding-v2-1b/blob/main/LICENSE",
@@ -150,6 +151,7 @@ sbintuitions_sarashina_embedding_v1_1b = ModelMeta(
150
151
  revision="d060fcd8984075071e7fad81baff035cbb3b6c7e",
151
152
  release_date="2024-11-22",
152
153
  n_parameters=1_224_038_144,
154
+ n_embedding_parameters=183_500_800,
153
155
  memory_usage_mb=4669,
154
156
  embed_dim=1792,
155
157
  license="https://huggingface.co/sbintuitions/sarashina-embedding-v1-1b/blob/main/LICENSE",
@@ -27,6 +27,7 @@ searchmap_preview = ModelMeta(
27
27
  use_instructions=True,
28
28
  release_date="2025-03-05",
29
29
  n_parameters=435_000_000,
30
+ n_embedding_parameters=None,
30
31
  memory_usage_mb=1660,
31
32
  embed_dim=4096,
32
33
  license="mit",
@@ -431,6 +431,7 @@ seed_embedding = ModelMeta(
431
431
  embed_dim=2048,
432
432
  open_weights=False,
433
433
  n_parameters=None,
434
+ n_embedding_parameters=None,
434
435
  memory_usage_mb=None,
435
436
  license=None,
436
437
  reference="https://seed1-6-embedding.github.io/",
@@ -616,6 +616,7 @@ seed_embedding = ModelMeta(
616
616
  embed_dim=2048,
617
617
  open_weights=False,
618
618
  n_parameters=None,
619
+ n_embedding_parameters=None,
619
620
  memory_usage_mb=None,
620
621
  license=None,
621
622
  reference="https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-embedding-vision",
@@ -253,6 +253,7 @@ seed_embedding = ModelMeta(
253
253
  embed_dim=2048,
254
254
  open_weights=False,
255
255
  n_parameters=None,
256
+ n_embedding_parameters=None,
256
257
  memory_usage_mb=None,
257
258
  license=None,
258
259
  reference="https://seed1-5-embedding.github.io/",
@@ -121,6 +121,7 @@ all_minilm_l6_v2 = ModelMeta(
121
121
  revision="8b3219a92973c328a8e22fadcfa821b5dc75636a",
122
122
  release_date="2021-08-30",
123
123
  n_parameters=22_700_000,
124
+ n_embedding_parameters=11_720_448,
124
125
  memory_usage_mb=87,
125
126
  embed_dim=384,
126
127
  license="apache-2.0",
@@ -152,6 +153,7 @@ all_minilm_l12_v2 = ModelMeta(
152
153
  revision="364dd28d28dcd3359b537f3cf1f5348ba679da62",
153
154
  release_date="2021-08-30",
154
155
  n_parameters=33_400_000,
156
+ n_embedding_parameters=11_720_448,
155
157
  memory_usage_mb=127,
156
158
  embed_dim=384,
157
159
  license="apache-2.0",
@@ -183,6 +185,7 @@ paraphrase_multilingual_minilm_l12_v2 = ModelMeta(
183
185
  revision="bf3bf13ab40c3157080a7ab344c831b9ad18b5eb",
184
186
  release_date="2019-11-01", # release date of paper
185
187
  n_parameters=118_000_000,
188
+ n_embedding_parameters=96_014_208,
186
189
  memory_usage_mb=449,
187
190
  embed_dim=768,
188
191
  license="apache-2.0",
@@ -214,6 +217,7 @@ paraphrase_multilingual_mpnet_base_v2 = ModelMeta(
214
217
  revision="79f2382ceacceacdf38563d7c5d16b9ff8d725d6",
215
218
  release_date="2019-11-01", # release date of paper
216
219
  n_parameters=278_000_000,
220
+ n_embedding_parameters=192_001_536,
217
221
  memory_usage_mb=1061,
218
222
  embed_dim=768,
219
223
  license="apache-2.0",
@@ -256,6 +260,7 @@ labse = ModelMeta(
256
260
  revision="e34fab64a3011d2176c99545a93d5cbddc9a91b7",
257
261
  release_date="2019-11-01", # release date of paper
258
262
  n_parameters=471_000_000,
263
+ n_embedding_parameters=384_885_504,
259
264
  memory_usage_mb=1796,
260
265
  embed_dim=768,
261
266
  license="apache-2.0",
@@ -294,6 +299,7 @@ multi_qa_minilm_l6_cos_v1 = ModelMeta(
294
299
  revision="b207367332321f8e44f96e224ef15bc607f4dbf0",
295
300
  release_date="2021-08-30",
296
301
  n_parameters=22_700_000,
302
+ n_embedding_parameters=11_720_448,
297
303
  memory_usage_mb=87,
298
304
  embed_dim=384,
299
305
  license="apache-2.0",
@@ -325,6 +331,7 @@ all_mpnet_base_v2 = ModelMeta(
325
331
  revision="9a3225965996d404b775526de6dbfe85d3368642",
326
332
  release_date="2021-08-30",
327
333
  n_parameters=109_000_000,
334
+ n_embedding_parameters=23_444_736,
328
335
  memory_usage_mb=418,
329
336
  embed_dim=768,
330
337
  license="apache-2.0",
@@ -435,6 +442,7 @@ static_similarity_mrl_multilingual_v1 = ModelMeta(
435
442
  revision="7264ea07c5365a11d7e6d87dbb6195889a13054f",
436
443
  release_date="2025-01-15",
437
444
  n_parameters=108_420_096,
445
+ n_embedding_parameters=None,
438
446
  memory_usage_mb=413,
439
447
  embed_dim=1024,
440
448
  license="apache-2.0",
@@ -468,6 +476,7 @@ contriever = ModelMeta(
468
476
  revision="abe8c1493371369031bcb1e02acb754cf4e162fa",
469
477
  release_date="2022-06-25", # release date of model on HF
470
478
  n_parameters=150_000_000,
479
+ n_embedding_parameters=23_440_896,
471
480
  memory_usage_mb=572,
472
481
  embed_dim=768,
473
482
  license=None,
@@ -498,6 +507,7 @@ microllama_text_embedding = ModelMeta(
498
507
  revision="98f70f14cdf12d7ea217ed2fd4e808b0195f1e7e",
499
508
  release_date="2024-11-10",
500
509
  n_parameters=272_000_000,
510
+ n_embedding_parameters=32_769_024,
501
511
  memory_usage_mb=1037,
502
512
  embed_dim=1024,
503
513
  license="apache-2.0",
@@ -544,6 +554,7 @@ sentence_t5_base = ModelMeta(
544
554
  revision="50c53e206f8b01c9621484a3c0aafce4e55efebf",
545
555
  release_date="2022-02-09",
546
556
  n_parameters=110_000_000,
557
+ n_embedding_parameters=24_674_304,
547
558
  memory_usage_mb=209,
548
559
  embed_dim=768,
549
560
  license="apache-2.0",
@@ -567,6 +578,7 @@ sentence_t5_large = ModelMeta(
567
578
  revision="1fc08ea477205aa54a3e5b13f0971ae16b86410a",
568
579
  release_date="2022-02-09",
569
580
  n_parameters=335_000_000,
581
+ n_embedding_parameters=32_899_072,
570
582
  memory_usage_mb=639,
571
583
  embed_dim=768,
572
584
  license="apache-2.0",
@@ -590,6 +602,7 @@ sentence_t5_xl = ModelMeta(
590
602
  revision="2965d31b368fb14117688e0bde77cbd720e91f53",
591
603
  release_date="2024-03-27",
592
604
  n_parameters=3_000_000_000,
605
+ n_embedding_parameters=32_899_072,
593
606
  memory_usage_mb=2367,
594
607
  embed_dim=768,
595
608
  license="apache-2.0",
@@ -613,6 +626,7 @@ sentence_t5_xxl = ModelMeta(
613
626
  revision="4d122282ba80e807e9e6eb8c358269e92796365d",
614
627
  release_date="2024-03-27",
615
628
  n_parameters=11_000_000_000,
629
+ n_embedding_parameters=None,
616
630
  memory_usage_mb=9279,
617
631
  embed_dim=768,
618
632
  license="apache-2.0",
@@ -646,6 +660,7 @@ gtr_t5_large = ModelMeta(
646
660
  revision="a2c8ac47f998531948d4cbe32a0b577a7037a5e3",
647
661
  release_date="2022-02-09",
648
662
  n_parameters=335_000_000,
663
+ n_embedding_parameters=32_899_072,
649
664
  memory_usage_mb=639,
650
665
  embed_dim=768,
651
666
  license="apache-2.0",
@@ -681,6 +696,7 @@ gtr_t5_xl = ModelMeta(
681
696
  revision="23a8d667a1ad2578af181ce762867003c498d1bf",
682
697
  release_date="2022-02-09",
683
698
  n_parameters=1_240_000_000,
699
+ n_embedding_parameters=32_899_072,
684
700
  memory_usage_mb=2367,
685
701
  embed_dim=768,
686
702
  license="apache-2.0",
@@ -715,6 +731,7 @@ gtr_t5_xxl = ModelMeta(
715
731
  revision="73f2a9156a3dcc2194dfdb2bf201cd7d17e17884",
716
732
  release_date="2022-02-09",
717
733
  n_parameters=4_860_000_000,
734
+ n_embedding_parameters=None,
718
735
  memory_usage_mb=9279,
719
736
  embed_dim=768,
720
737
  license="apache-2.0",
@@ -750,6 +767,7 @@ gtr_t5_base = ModelMeta(
750
767
  revision="7027e9594267928589816394bdd295273ddc0739",
751
768
  release_date="2022-02-09",
752
769
  n_parameters=110_000_000,
770
+ n_embedding_parameters=24_674_304,
753
771
  memory_usage_mb=209,
754
772
  embed_dim=768,
755
773
  license="apache-2.0",
@@ -10,6 +10,7 @@ codemodernbert_crow_meta = ModelMeta(
10
10
  revision="044a7a4b552f86e284817234c336bccf16f895ce",
11
11
  release_date="2025-04-21",
12
12
  n_parameters=151668480,
13
+ n_embedding_parameters=None,
13
14
  memory_usage_mb=607,
14
15
  embed_dim=768,
15
16
  license="apache-2.0",
@@ -136,6 +136,7 @@ siglip_so400m_patch14_224 = ModelMeta(
136
136
  release_date="2024-01-08",
137
137
  modalities=["image", "text"],
138
138
  n_parameters=877_000_000,
139
+ n_embedding_parameters=None,
139
140
  memory_usage_mb=3347,
140
141
  max_tokens=16,
141
142
  embed_dim=1152,
@@ -160,6 +161,7 @@ siglip_so400m_patch14_384 = ModelMeta(
160
161
  release_date="2024-01-08",
161
162
  modalities=["image", "text"],
162
163
  n_parameters=878_000_000,
164
+ n_embedding_parameters=None,
163
165
  memory_usage_mb=3349,
164
166
  max_tokens=64,
165
167
  embed_dim=1152,
@@ -184,6 +186,7 @@ siglip_so400m_patch16_256_i18n = ModelMeta(
184
186
  release_date="2024-01-08",
185
187
  modalities=["image", "text"],
186
188
  n_parameters=1_130_000_000,
189
+ n_embedding_parameters=None,
187
190
  memory_usage_mb=4306,
188
191
  max_tokens=64,
189
192
  embed_dim=1152,
@@ -208,6 +211,7 @@ siglip_base_patch16_256_multilingual = ModelMeta(
208
211
  release_date="2024-01-08",
209
212
  modalities=["image", "text"],
210
213
  n_parameters=371_000_000,
214
+ n_embedding_parameters=None,
211
215
  memory_usage_mb=1414,
212
216
  max_tokens=64,
213
217
  embed_dim=768,
@@ -232,6 +236,7 @@ siglip_base_patch16_256 = ModelMeta(
232
236
  release_date="2024-01-08",
233
237
  modalities=["image", "text"],
234
238
  n_parameters=203_000_000,
239
+ n_embedding_parameters=None,
235
240
  memory_usage_mb=775,
236
241
  max_tokens=64,
237
242
  embed_dim=768,
@@ -256,6 +261,7 @@ siglip_base_patch16_512 = ModelMeta(
256
261
  release_date="2024-01-08",
257
262
  modalities=["image", "text"],
258
263
  n_parameters=204_000_000,
264
+ n_embedding_parameters=None,
259
265
  memory_usage_mb=777,
260
266
  max_tokens=64,
261
267
  embed_dim=768,
@@ -280,6 +286,7 @@ siglip_base_patch16_384 = ModelMeta(
280
286
  release_date="2024-01-08",
281
287
  modalities=["image", "text"],
282
288
  n_parameters=203_000_000,
289
+ n_embedding_parameters=None,
283
290
  memory_usage_mb=776,
284
291
  max_tokens=64,
285
292
  embed_dim=768,
@@ -304,6 +311,7 @@ siglip_base_patch16_224 = ModelMeta(
304
311
  release_date="2024-01-08",
305
312
  modalities=["image", "text"],
306
313
  n_parameters=203_000_000,
314
+ n_embedding_parameters=None,
307
315
  memory_usage_mb=775,
308
316
  max_tokens=64,
309
317
  embed_dim=768,
@@ -328,6 +336,7 @@ siglip_large_patch16_256 = ModelMeta(
328
336
  release_date="2024-01-08",
329
337
  modalities=["image", "text"],
330
338
  n_parameters=652_000_000,
339
+ n_embedding_parameters=None,
331
340
  memory_usage_mb=2488,
332
341
  max_tokens=64,
333
342
  embed_dim=1024,
@@ -352,6 +361,7 @@ siglip_large_patch16_384 = ModelMeta(
352
361
  release_date="2024-01-08",
353
362
  modalities=["image", "text"],
354
363
  n_parameters=652_000_000,
364
+ n_embedding_parameters=None,
355
365
  memory_usage_mb=2489,
356
366
  max_tokens=64,
357
367
  embed_dim=1024,
@@ -224,7 +224,8 @@ sonar = ModelMeta(
224
224
  use_instructions=False, # it does take a language code as input
225
225
  revision="a551c586dcf4a49c8fd847de369412d556a7f2f2",
226
226
  release_date="2021-05-21",
227
- n_parameters=None, # it is really multiple models so not sure how to calculate this
227
+ n_parameters=None,
228
+ n_embedding_parameters=None, # it is really multiple models so not sure how to calculate this
228
229
  max_tokens=512, # https://github.com/facebookresearch/SONAR/blob/549d287466443bd8720f938047882630c1c5c3f7/sonar/models/sonar_text/builder.py#L139
229
230
  embed_dim=1024,
230
231
  license="mit",
@@ -12,6 +12,7 @@ spartan8806_atles_champion_embedding = ModelMeta(
12
12
  revision="d4c74d7000bbd25f3597fc0f2dcde59ef1386e8f",
13
13
  release_date="2025-11-15",
14
14
  n_parameters=110_000_000,
15
+ n_embedding_parameters=23_444_736,
15
16
  memory_usage_mb=420,
16
17
  max_tokens=512,
17
18
  embed_dim=768,
@@ -66,6 +66,7 @@ stella_en_400m = ModelMeta(
66
66
  revision="1bb50bc7bb726810eac2140e62155b88b0df198f",
67
67
  release_date="2024-07-12",
68
68
  n_parameters=435_000_000,
69
+ n_embedding_parameters=None,
69
70
  memory_usage_mb=1660,
70
71
  max_tokens=8192,
71
72
  embed_dim=4096,
@@ -101,6 +102,7 @@ stella_en_1_5b = ModelMeta(
101
102
  revision="d03be74b361d4eb24f42a2fe5bd2e29917df4604",
102
103
  release_date="2024-07-12",
103
104
  n_parameters=1_540_000_000,
105
+ n_embedding_parameters=232_928_256,
104
106
  memory_usage_mb=5887,
105
107
  max_tokens=131072,
106
108
  embed_dim=8960,
@@ -130,6 +132,7 @@ stella_large_zh_v3_1792d = ModelMeta(
130
132
  revision="d5d39eb8cd11c80a63df53314e59997074469f09",
131
133
  release_date="2024-02-17",
132
134
  n_parameters=None,
135
+ n_embedding_parameters=21_635_072,
133
136
  memory_usage_mb=None, # can't see on model card
134
137
  embed_dim=1792,
135
138
  license="not specified",
@@ -157,6 +160,7 @@ stella_base_zh_v3_1792d = ModelMeta(
157
160
  revision="82254892a0fba125aa2abf3a4800d2dd12821343",
158
161
  release_date="2024-02-17",
159
162
  n_parameters=None,
163
+ n_embedding_parameters=16_226_304,
160
164
  memory_usage_mb=None, # can't see on model card
161
165
  embed_dim=1792,
162
166
  license="mit",
@@ -185,6 +189,7 @@ stella_mrl_large_zh_v3_5_1792d = ModelMeta(
185
189
  revision="17bb1c32a93a8fc5f6fc9e91d5ea86da99983cfe",
186
190
  release_date="2024-02-27",
187
191
  n_parameters=int(326 * 1e6),
192
+ n_embedding_parameters=21_635_072,
188
193
  memory_usage_mb=1242,
189
194
  embed_dim=1792,
190
195
  license="mit",
@@ -209,6 +214,7 @@ zpoint_large_embedding_zh = ModelMeta(
209
214
  revision="b1075144f440ab4409c05622c1179130ebd57d03",
210
215
  release_date="2024-06-04",
211
216
  n_parameters=int(326 * 1e6),
217
+ n_embedding_parameters=21_635_072,
212
218
  memory_usage_mb=1242,
213
219
  embed_dim=1792,
214
220
  license="mit",
@@ -327,6 +327,7 @@ tarka_embedding_150m_v1 = ModelMeta(
327
327
  revision="b0ffecc4ef0d873e517507ed080e43b88b2704b9",
328
328
  release_date="2025-11-04",
329
329
  n_parameters=155_714_304,
330
+ n_embedding_parameters=None,
330
331
  embed_dim=768,
331
332
  max_tokens=2048,
332
333
  license="gemma",
@@ -361,6 +362,7 @@ tarka_embedding_350m_v1 = ModelMeta(
361
362
  revision="a850d6a329145474727424fed6b12b62096b8ba3",
362
363
  release_date="2025-11-11",
363
364
  n_parameters=354_483_968,
365
+ n_embedding_parameters=None,
364
366
  memory_usage_mb=676,
365
367
  embed_dim=1024,
366
368
  max_tokens=128000,
@@ -22,6 +22,7 @@ text2vec_base_chinese = ModelMeta(
22
22
  revision="183bb99aa7af74355fb58d16edf8c13ae7c5433e",
23
23
  release_date="2022-01-23",
24
24
  n_parameters=int(102 * 1e6),
25
+ n_embedding_parameters=16_226_304,
25
26
  embed_dim=768,
26
27
  license="apache-2.0",
27
28
  max_tokens=512,
@@ -51,6 +52,7 @@ text2vec_base_chinese_paraphrase = ModelMeta(
51
52
  revision="e90c150a9c7fb55a67712a766d6820c55fb83cdd",
52
53
  release_date="2023-06-19",
53
54
  n_parameters=118 * 1e6,
55
+ n_embedding_parameters=30_720_000,
54
56
  memory_usage_mb=450,
55
57
  embed_dim=768,
56
58
  license="apache-2.0",
@@ -95,6 +97,7 @@ text2vec_base_multilingual = ModelMeta(
95
97
  # So probably best not to.
96
98
  loader=sentence_transformers_loader,
97
99
  n_parameters=117654272,
100
+ n_embedding_parameters=96_014_208,
98
101
  memory_usage_mb=449,
99
102
  embed_dim=384,
100
103
  license="apache-2.0",
@@ -8,6 +8,7 @@ xlm_roberta_ua_distilled = ModelMeta(
8
8
  model_type=["dense"],
9
9
  loader=sentence_transformers_loader,
10
10
  n_parameters=278_000_000,
11
+ n_embedding_parameters=192_001_536,
11
12
  memory_usage_mb=1061,
12
13
  max_tokens=512,
13
14
  embed_dim=768,
@@ -72,6 +72,7 @@ uae_large_v1 = ModelMeta(
72
72
  revision="369c368f70f16a613f19f5598d4f12d9f44235d4",
73
73
  release_date="2023-12-04", # initial commit of hf model.
74
74
  n_parameters=int(335 * 1e6),
75
+ n_embedding_parameters=31_254_528,
75
76
  memory_usage_mb=1278,
76
77
  max_tokens=512,
77
78
  embed_dim=1024,