mteb 2.7.2__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. mteb/_create_dataloaders.py +16 -9
  2. mteb/_evaluators/any_sts_evaluator.py +10 -5
  3. mteb/_evaluators/clustering_evaluator.py +10 -4
  4. mteb/_evaluators/evaluator.py +9 -4
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  6. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  7. mteb/_evaluators/retrieval_evaluator.py +19 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +14 -10
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +17 -9
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +15 -6
  21. mteb/abstasks/clustering.py +17 -8
  22. mteb/abstasks/clustering_legacy.py +14 -6
  23. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  24. mteb/abstasks/multilabel_classification.py +11 -5
  25. mteb/abstasks/pair_classification.py +19 -9
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +28 -17
  28. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  29. mteb/abstasks/sts.py +19 -10
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +14 -7
  32. mteb/abstasks/text/summarization.py +17 -7
  33. mteb/abstasks/zeroshot_classification.py +15 -7
  34. mteb/benchmarks/_create_table.py +13 -3
  35. mteb/benchmarks/benchmark.py +11 -1
  36. mteb/benchmarks/benchmarks/__init__.py +2 -0
  37. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  38. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  39. mteb/cache.py +10 -5
  40. mteb/cli/_display_tasks.py +9 -3
  41. mteb/cli/build_cli.py +5 -2
  42. mteb/cli/generate_model_card.py +9 -2
  43. mteb/deprecated_evaluator.py +16 -12
  44. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  45. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  64. mteb/evaluate.py +20 -18
  65. mteb/filter_tasks.py +12 -7
  66. mteb/get_tasks.py +9 -4
  67. mteb/languages/language_scripts.py +8 -3
  68. mteb/leaderboard/app.py +7 -3
  69. mteb/leaderboard/table.py +7 -2
  70. mteb/load_results.py +9 -3
  71. mteb/models/abs_encoder.py +22 -12
  72. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  73. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  74. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  75. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  76. mteb/models/get_model_meta.py +11 -4
  77. mteb/models/instruct_wrapper.py +13 -5
  78. mteb/models/model_implementations/align_models.py +10 -4
  79. mteb/models/model_implementations/amazon_models.py +1 -0
  80. mteb/models/model_implementations/andersborges.py +2 -0
  81. mteb/models/model_implementations/ara_models.py +1 -0
  82. mteb/models/model_implementations/arctic_models.py +8 -0
  83. mteb/models/model_implementations/b1ade_models.py +1 -0
  84. mteb/models/model_implementations/bedrock_models.py +20 -6
  85. mteb/models/model_implementations/bge_models.py +40 -1
  86. mteb/models/model_implementations/bica_model.py +1 -0
  87. mteb/models/model_implementations/blip2_models.py +11 -4
  88. mteb/models/model_implementations/blip_models.py +17 -4
  89. mteb/models/model_implementations/bm25.py +22 -14
  90. mteb/models/model_implementations/bmretriever_models.py +10 -2
  91. mteb/models/model_implementations/cadet_models.py +1 -0
  92. mteb/models/model_implementations/cde_models.py +11 -5
  93. mteb/models/model_implementations/clip_models.py +12 -4
  94. mteb/models/model_implementations/clips_models.py +3 -0
  95. mteb/models/model_implementations/codefuse_models.py +5 -0
  96. mteb/models/model_implementations/codesage_models.py +3 -0
  97. mteb/models/model_implementations/cohere_models.py +14 -4
  98. mteb/models/model_implementations/cohere_v.py +14 -4
  99. mteb/models/model_implementations/colpali_models.py +7 -3
  100. mteb/models/model_implementations/colqwen_models.py +17 -31
  101. mteb/models/model_implementations/colsmol_models.py +3 -1
  102. mteb/models/model_implementations/conan_models.py +11 -4
  103. mteb/models/model_implementations/dino_models.py +28 -4
  104. mteb/models/model_implementations/e5_instruct.py +4 -0
  105. mteb/models/model_implementations/e5_models.py +9 -0
  106. mteb/models/model_implementations/e5_v.py +10 -4
  107. mteb/models/model_implementations/eagerworks_models.py +11 -4
  108. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  109. mteb/models/model_implementations/en_code_retriever.py +1 -0
  110. mteb/models/model_implementations/euler_models.py +1 -0
  111. mteb/models/model_implementations/evaclip_models.py +13 -4
  112. mteb/models/model_implementations/fa_models.py +9 -0
  113. mteb/models/model_implementations/facebookai.py +2 -0
  114. mteb/models/model_implementations/geogpt_models.py +1 -0
  115. mteb/models/model_implementations/gme_v_models.py +7 -3
  116. mteb/models/model_implementations/google_models.py +15 -4
  117. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  118. mteb/models/model_implementations/gritlm_models.py +2 -0
  119. mteb/models/model_implementations/gte_models.py +9 -0
  120. mteb/models/model_implementations/hinvec_models.py +6 -1
  121. mteb/models/model_implementations/human.py +1 -0
  122. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  123. mteb/models/model_implementations/inf_models.py +2 -0
  124. mteb/models/model_implementations/jasper_models.py +14 -5
  125. mteb/models/model_implementations/jina_clip.py +10 -4
  126. mteb/models/model_implementations/jina_models.py +17 -5
  127. mteb/models/model_implementations/kalm_models.py +24 -12
  128. mteb/models/model_implementations/kblab.py +1 -0
  129. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  130. mteb/models/model_implementations/kfst.py +1 -0
  131. mteb/models/model_implementations/kowshik24_models.py +1 -0
  132. mteb/models/model_implementations/lens_models.py +2 -0
  133. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  134. mteb/models/model_implementations/linq_models.py +7 -1
  135. mteb/models/model_implementations/listconranker.py +10 -4
  136. mteb/models/model_implementations/llm2clip_models.py +12 -4
  137. mteb/models/model_implementations/llm2vec_models.py +20 -6
  138. mteb/models/model_implementations/mcinext_models.py +8 -2
  139. mteb/models/model_implementations/mdbr_models.py +2 -0
  140. mteb/models/model_implementations/misc_models.py +63 -0
  141. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  142. mteb/models/model_implementations/mme5_models.py +2 -1
  143. mteb/models/model_implementations/moco_models.py +11 -4
  144. mteb/models/model_implementations/mod_models.py +2 -1
  145. mteb/models/model_implementations/model2vec_models.py +23 -4
  146. mteb/models/model_implementations/moka_models.py +3 -0
  147. mteb/models/model_implementations/nbailab.py +3 -0
  148. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  149. mteb/models/model_implementations/nomic_models.py +16 -4
  150. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  151. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  152. mteb/models/model_implementations/nvidia_models.py +15 -4
  153. mteb/models/model_implementations/octen_models.py +3 -1
  154. mteb/models/model_implementations/openai_models.py +14 -4
  155. mteb/models/model_implementations/openclip_models.py +17 -4
  156. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  157. mteb/models/model_implementations/ops_moa_models.py +9 -2
  158. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  159. mteb/models/model_implementations/pawan_models.py +1 -0
  160. mteb/models/model_implementations/piccolo_models.py +2 -0
  161. mteb/models/model_implementations/promptriever_models.py +16 -6
  162. mteb/models/model_implementations/pylate_models.py +22 -13
  163. mteb/models/model_implementations/qodo_models.py +2 -0
  164. mteb/models/model_implementations/qtack_models.py +1 -0
  165. mteb/models/model_implementations/qwen3_models.py +11 -1
  166. mteb/models/model_implementations/qzhou_models.py +2 -0
  167. mteb/models/model_implementations/random_baseline.py +4 -3
  168. mteb/models/model_implementations/rasgaard_models.py +1 -0
  169. mteb/models/model_implementations/reasonir_model.py +65 -0
  170. mteb/models/model_implementations/repllama_models.py +15 -6
  171. mteb/models/model_implementations/rerankers_custom.py +13 -4
  172. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  173. mteb/models/model_implementations/richinfoai_models.py +1 -0
  174. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  175. mteb/models/model_implementations/ruri_models.py +10 -0
  176. mteb/models/model_implementations/salesforce_models.py +10 -1
  177. mteb/models/model_implementations/samilpwc_models.py +1 -0
  178. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  179. mteb/models/model_implementations/searchmap_models.py +1 -0
  180. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  181. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  182. mteb/models/model_implementations/seed_models.py +2 -1
  183. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  184. mteb/models/model_implementations/shuu_model.py +1 -0
  185. mteb/models/model_implementations/siglip_models.py +19 -4
  186. mteb/models/model_implementations/slm_models.py +7 -4
  187. mteb/models/model_implementations/sonar_models.py +2 -1
  188. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  189. mteb/models/model_implementations/stella_models.py +6 -0
  190. mteb/models/model_implementations/tarka_models.py +2 -0
  191. mteb/models/model_implementations/text2vec_models.py +3 -0
  192. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  193. mteb/models/model_implementations/uae_models.py +10 -4
  194. mteb/models/model_implementations/vdr_models.py +8 -1
  195. mteb/models/model_implementations/vi_vn_models.py +6 -0
  196. mteb/models/model_implementations/vista_models.py +11 -4
  197. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  198. mteb/models/model_implementations/voyage_models.py +25 -4
  199. mteb/models/model_implementations/voyage_v.py +11 -6
  200. mteb/models/model_implementations/xyz_models.py +1 -0
  201. mteb/models/model_implementations/youtu_models.py +1 -0
  202. mteb/models/model_implementations/yuan_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models_en.py +2 -1
  204. mteb/models/model_meta.py +47 -9
  205. mteb/models/models_protocols.py +19 -18
  206. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  207. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  208. mteb/models/search_wrappers.py +19 -12
  209. mteb/models/sentence_transformer_wrapper.py +4 -3
  210. mteb/models/vllm_wrapper.py +8 -6
  211. mteb/results/benchmark_results.py +22 -17
  212. mteb/results/model_result.py +21 -15
  213. mteb/results/task_result.py +15 -9
  214. mteb/similarity_functions.py +8 -2
  215. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  216. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  220. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  223. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  224. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  225. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  226. mteb/tasks/retrieval/eng/__init__.py +42 -0
  227. mteb/tasks/retrieval/eng/bright_retrieval.py +9 -1
  228. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  229. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  230. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  231. mteb/types/_encoder_io.py +1 -1
  232. mteb/types/statistics.py +9 -2
  233. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/METADATA +1 -1
  234. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/RECORD +238 -217
  235. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/WHEEL +0 -0
  236. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/entry_points.txt +0 -0
  237. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/licenses/LICENSE +0 -0
  238. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,7 @@ Haon_Chen__speed_embedding_7b_instruct = ModelMeta(
18
18
  release_date="2024-10-31",
19
19
  languages=["eng-Latn"],
20
20
  n_parameters=7110660096,
21
+ n_embedding_parameters=None,
21
22
  memory_usage_mb=13563,
22
23
  max_tokens=32768.0,
23
24
  embed_dim=None,
@@ -47,6 +48,7 @@ Gameselo__STS_multilingual_mpnet_base_v2 = ModelMeta(
47
48
  languages=[],
48
49
  loader=sentence_transformers_loader,
49
50
  n_parameters=278043648,
51
+ n_embedding_parameters=192_001_536,
50
52
  memory_usage_mb=1061,
51
53
  max_tokens=514.0,
52
54
  embed_dim=768,
@@ -148,6 +150,7 @@ Hum_Works__lodestone_base_4096_v1 = ModelMeta(
148
150
  languages=["eng-Latn"],
149
151
  loader=sentence_transformers_loader,
150
152
  n_parameters=None,
153
+ n_embedding_parameters=None,
151
154
  memory_usage_mb=None,
152
155
  max_tokens=None,
153
156
  embed_dim=768,
@@ -215,6 +218,7 @@ Jaume__gemma_2b_embeddings = ModelMeta(
215
218
  languages=[],
216
219
  loader=sentence_transformers_loader,
217
220
  n_parameters=2506172416,
221
+ n_embedding_parameters=None,
218
222
  memory_usage_mb=9560,
219
223
  max_tokens=8192.0,
220
224
  embed_dim=2048,
@@ -250,6 +254,7 @@ Lajavaness__bilingual_embedding_base = ModelMeta(
250
254
  trust_remote_code=True,
251
255
  ),
252
256
  n_parameters=278043648,
257
+ n_embedding_parameters=192_001_536,
253
258
  memory_usage_mb=1061,
254
259
  max_tokens=514.0,
255
260
  embed_dim=768,
@@ -299,6 +304,7 @@ Lajavaness__bilingual_embedding_large = ModelMeta(
299
304
  trust_remote_code=True,
300
305
  ),
301
306
  n_parameters=559890432,
307
+ n_embedding_parameters=256_002_048,
302
308
  memory_usage_mb=2136,
303
309
  max_tokens=514.0,
304
310
  embed_dim=1024,
@@ -348,6 +354,7 @@ Lajavaness__bilingual_embedding_small = ModelMeta(
348
354
  trust_remote_code=True,
349
355
  ),
350
356
  n_parameters=117653760,
357
+ n_embedding_parameters=96_014_208,
351
358
  memory_usage_mb=449,
352
359
  max_tokens=512.0,
353
360
  embed_dim=384,
@@ -394,6 +401,7 @@ Mihaiii__Bulbasaur = ModelMeta(
394
401
  languages=None,
395
402
  loader=sentence_transformers_loader,
396
403
  n_parameters=17389824,
404
+ n_embedding_parameters=11_720_448,
397
405
  memory_usage_mb=66,
398
406
  max_tokens=512.0,
399
407
  embed_dim=384,
@@ -418,6 +426,7 @@ Mihaiii__Ivysaur = ModelMeta(
418
426
  languages=None,
419
427
  loader=sentence_transformers_loader,
420
428
  n_parameters=22713216,
429
+ n_embedding_parameters=11_720_448,
421
430
  memory_usage_mb=87,
422
431
  max_tokens=512.0,
423
432
  embed_dim=384,
@@ -442,6 +451,7 @@ Mihaiii__Squirtle = ModelMeta(
442
451
  languages=None,
443
452
  loader=sentence_transformers_loader,
444
453
  n_parameters=15615360,
454
+ n_embedding_parameters=11_720_448,
445
455
  memory_usage_mb=60,
446
456
  max_tokens=512.0,
447
457
  embed_dim=384,
@@ -466,6 +476,7 @@ Mihaiii__Venusaur = ModelMeta(
466
476
  languages=None,
467
477
  loader=sentence_transformers_loader,
468
478
  n_parameters=15615360,
479
+ n_embedding_parameters=11_720_448,
469
480
  memory_usage_mb=60,
470
481
  max_tokens=512.0,
471
482
  embed_dim=384,
@@ -490,6 +501,7 @@ Mihaiii__Wartortle = ModelMeta(
490
501
  languages=None,
491
502
  loader=sentence_transformers_loader,
492
503
  n_parameters=17389824,
504
+ n_embedding_parameters=11_720_448,
493
505
  memory_usage_mb=66,
494
506
  max_tokens=512.0,
495
507
  embed_dim=384,
@@ -514,6 +526,7 @@ Mihaiii__gte_micro = ModelMeta(
514
526
  languages=None,
515
527
  loader=sentence_transformers_loader,
516
528
  n_parameters=17389824,
529
+ n_embedding_parameters=11_720_448,
517
530
  memory_usage_mb=66,
518
531
  max_tokens=512.0,
519
532
  embed_dim=384,
@@ -537,6 +550,7 @@ Mihaiii__gte_micro_v4 = ModelMeta(
537
550
  languages=None,
538
551
  loader=sentence_transformers_loader,
539
552
  n_parameters=19164288,
553
+ n_embedding_parameters=11_720_448,
540
554
  memory_usage_mb=73,
541
555
  max_tokens=512.0,
542
556
  embed_dim=384,
@@ -560,6 +574,7 @@ OrdalieTech__Solon_embeddings_large_0_1 = ModelMeta(
560
574
  languages=["fra-Latn"],
561
575
  loader=sentence_transformers_loader,
562
576
  n_parameters=559890432,
577
+ n_embedding_parameters=256_002_048,
563
578
  memory_usage_mb=2136,
564
579
  max_tokens=514.0,
565
580
  embed_dim=1024,
@@ -583,6 +598,7 @@ Omartificial_Intelligence_Space__Arabert_all_nli_triplet_Matryoshka = ModelMeta(
583
598
  languages=["ara-Arab"],
584
599
  loader=sentence_transformers_loader,
585
600
  n_parameters=135193344,
601
+ n_embedding_parameters=49_152_000,
586
602
  memory_usage_mb=516,
587
603
  max_tokens=512.0,
588
604
  embed_dim=768,
@@ -615,6 +631,7 @@ Omartificial_Intelligence_Space__Arabic_MiniLM_L12_v2_all_nli_triplet = ModelMet
615
631
  languages=["ara-Arab"],
616
632
  loader=sentence_transformers_loader,
617
633
  n_parameters=117653760,
634
+ n_embedding_parameters=96_014_208,
618
635
  memory_usage_mb=449,
619
636
  max_tokens=512.0,
620
637
  embed_dim=384,
@@ -640,6 +657,7 @@ Omartificial_Intelligence_Space__Arabic_all_nli_triplet_Matryoshka = ModelMeta(
640
657
  languages=["ara-Arab"],
641
658
  loader=sentence_transformers_loader,
642
659
  n_parameters=278043648,
660
+ n_embedding_parameters=192_001_536,
643
661
  memory_usage_mb=1061,
644
662
  max_tokens=514.0,
645
663
  embed_dim=768,
@@ -674,6 +692,7 @@ Omartificial_Intelligence_Space__Arabic_labse_Matryoshka = ModelMeta(
674
692
  languages=["ara-Arab"],
675
693
  loader=sentence_transformers_loader,
676
694
  n_parameters=470926848,
695
+ n_embedding_parameters=384_885_504,
677
696
  memory_usage_mb=1796,
678
697
  max_tokens=512.0,
679
698
  embed_dim=768,
@@ -708,6 +727,7 @@ Omartificial_Intelligence_Space__Arabic_mpnet_base_all_nli_triplet = ModelMeta(
708
727
  languages=["ara-Arab"],
709
728
  loader=sentence_transformers_loader,
710
729
  n_parameters=109486464,
730
+ n_embedding_parameters=23_444_736,
711
731
  memory_usage_mb=418,
712
732
  max_tokens=514.0,
713
733
  embed_dim=768,
@@ -742,6 +762,7 @@ Omartificial_Intelligence_Space__Marbert_all_nli_triplet_Matryoshka = ModelMeta(
742
762
  languages=["ara-Arab"],
743
763
  loader=sentence_transformers_loader,
744
764
  n_parameters=162841344,
765
+ n_embedding_parameters=76_800_000,
745
766
  memory_usage_mb=621,
746
767
  max_tokens=512.0,
747
768
  embed_dim=768,
@@ -774,6 +795,7 @@ consciousai__cai_lunaris_text_embeddings = ModelMeta(
774
795
  languages=None,
775
796
  loader=sentence_transformers_loader,
776
797
  n_parameters=None,
798
+ n_embedding_parameters=31_254_528,
777
799
  memory_usage_mb=None,
778
800
  max_tokens=512.0,
779
801
  embed_dim=1024,
@@ -797,6 +819,7 @@ consciousai__cai_stellaris_text_embeddings = ModelMeta(
797
819
  languages=None,
798
820
  loader=sentence_transformers_loader,
799
821
  n_parameters=None,
822
+ n_embedding_parameters=None,
800
823
  memory_usage_mb=None,
801
824
  max_tokens=514.0,
802
825
  embed_dim=768,
@@ -829,6 +852,7 @@ manu__sentence_croissant_alpha_v0_2 = ModelMeta(
829
852
  languages=None,
830
853
  loader=sentence_transformers_loader,
831
854
  n_parameters=1279887360,
855
+ n_embedding_parameters=65_536_000,
832
856
  memory_usage_mb=2441,
833
857
  max_tokens=2048.0,
834
858
  embed_dim=2048,
@@ -852,6 +876,7 @@ manu__sentence_croissant_alpha_v0_3 = ModelMeta(
852
876
  languages=None,
853
877
  loader=sentence_transformers_loader,
854
878
  n_parameters=1279887360,
879
+ n_embedding_parameters=65_536_000,
855
880
  memory_usage_mb=2441,
856
881
  max_tokens=2048.0,
857
882
  embed_dim=2048,
@@ -875,6 +900,7 @@ manu__sentence_croissant_alpha_v0_4 = ModelMeta(
875
900
  languages=["fra-Latn", "eng-Latn"],
876
901
  loader=sentence_transformers_loader,
877
902
  n_parameters=1279887360,
903
+ n_embedding_parameters=65_536_000,
878
904
  memory_usage_mb=2441,
879
905
  max_tokens=2048.0,
880
906
  embed_dim=2048,
@@ -899,6 +925,7 @@ thenlper__gte_base = ModelMeta(
899
925
  languages=["eng-Latn"],
900
926
  loader=sentence_transformers_loader,
901
927
  n_parameters=109482752,
928
+ n_embedding_parameters=23_440_896,
902
929
  memory_usage_mb=209,
903
930
  max_tokens=512.0,
904
931
  embed_dim=768,
@@ -928,6 +955,7 @@ thenlper__gte_large = ModelMeta(
928
955
  languages=["eng-Latn"],
929
956
  loader=sentence_transformers_loader,
930
957
  n_parameters=335142400,
958
+ n_embedding_parameters=31_254_528,
931
959
  memory_usage_mb=639,
932
960
  max_tokens=512.0,
933
961
  embed_dim=1024,
@@ -957,6 +985,7 @@ thenlper__gte_small = ModelMeta(
957
985
  languages=["eng-Latn"],
958
986
  loader=sentence_transformers_loader,
959
987
  n_parameters=33360512,
988
+ n_embedding_parameters=11_720_448,
960
989
  memory_usage_mb=64,
961
990
  max_tokens=512.0,
962
991
  embed_dim=384,
@@ -986,6 +1015,7 @@ OrlikB__KartonBERT_USE_base_v1 = ModelMeta(
986
1015
  languages=["pol-Latn"],
987
1016
  loader=sentence_transformers_loader,
988
1017
  n_parameters=103705344,
1018
+ n_embedding_parameters=None,
989
1019
  memory_usage_mb=396,
990
1020
  max_tokens=512.0,
991
1021
  embed_dim=768,
@@ -1009,6 +1039,7 @@ OrlikB__st_polish_kartonberta_base_alpha_v1 = ModelMeta(
1009
1039
  languages=["pol-Latn"],
1010
1040
  loader=sentence_transformers_loader,
1011
1041
  n_parameters=None,
1042
+ n_embedding_parameters=None,
1012
1043
  memory_usage_mb=None,
1013
1044
  max_tokens=514.0,
1014
1045
  embed_dim=768,
@@ -1032,6 +1063,7 @@ sdadas__mmlw_e5_base = ModelMeta(
1032
1063
  languages=["pol-Latn"],
1033
1064
  loader=sentence_transformers_loader,
1034
1065
  n_parameters=278043648,
1066
+ n_embedding_parameters=192_001_536,
1035
1067
  memory_usage_mb=1061,
1036
1068
  max_tokens=514.0,
1037
1069
  embed_dim=768,
@@ -1063,6 +1095,7 @@ dwzhu__e5_base_4k = ModelMeta(
1063
1095
  languages=["eng-Latn"],
1064
1096
  loader=sentence_transformers_loader,
1065
1097
  n_parameters=None,
1098
+ n_embedding_parameters=23_440_896,
1066
1099
  memory_usage_mb=None,
1067
1100
  max_tokens=4096.0,
1068
1101
  embed_dim=None,
@@ -1092,6 +1125,7 @@ sdadas__mmlw_e5_large = ModelMeta(
1092
1125
  languages=["pol-Latn"],
1093
1126
  loader=sentence_transformers_loader,
1094
1127
  n_parameters=559890432,
1128
+ n_embedding_parameters=256_002_048,
1095
1129
  memory_usage_mb=2136,
1096
1130
  max_tokens=514.0,
1097
1131
  embed_dim=1024,
@@ -1123,6 +1157,7 @@ sdadas__mmlw_e5_small = ModelMeta(
1123
1157
  languages=["pol-Latn"],
1124
1158
  loader=sentence_transformers_loader,
1125
1159
  n_parameters=117653760,
1160
+ n_embedding_parameters=96_014_208,
1126
1161
  memory_usage_mb=449,
1127
1162
  max_tokens=512.0,
1128
1163
  embed_dim=384,
@@ -1154,6 +1189,7 @@ sdadas__mmlw_roberta_base = ModelMeta(
1154
1189
  languages=["pol-Latn"],
1155
1190
  loader=sentence_transformers_loader,
1156
1191
  n_parameters=124442880,
1192
+ n_embedding_parameters=38_400_768,
1157
1193
  memory_usage_mb=475,
1158
1194
  max_tokens=514.0,
1159
1195
  embed_dim=768,
@@ -1185,6 +1221,7 @@ sdadas__mmlw_roberta_large = ModelMeta(
1185
1221
  languages=["pol-Latn"],
1186
1222
  loader=sentence_transformers_loader,
1187
1223
  n_parameters=434961408,
1224
+ n_embedding_parameters=131_073_024,
1188
1225
  memory_usage_mb=1659,
1189
1226
  max_tokens=514.0,
1190
1227
  embed_dim=1024,
@@ -1271,6 +1308,7 @@ izhx__udever_bloom_1b1 = ModelMeta(
1271
1308
  languages=udever_languages,
1272
1309
  loader=sentence_transformers_loader,
1273
1310
  n_parameters=None,
1311
+ n_embedding_parameters=385_351_680,
1274
1312
  memory_usage_mb=None,
1275
1313
  max_tokens=None,
1276
1314
  embed_dim=None,
@@ -1300,6 +1338,7 @@ izhx__udever_bloom_3b = ModelMeta(
1300
1338
  languages=udever_languages,
1301
1339
  loader=sentence_transformers_loader,
1302
1340
  n_parameters=None,
1341
+ n_embedding_parameters=642_252_800,
1303
1342
  memory_usage_mb=None,
1304
1343
  max_tokens=None,
1305
1344
  embed_dim=None,
@@ -1329,6 +1368,7 @@ izhx__udever_bloom_560m = ModelMeta(
1329
1368
  languages=udever_languages,
1330
1369
  loader=sentence_transformers_loader,
1331
1370
  n_parameters=None,
1371
+ n_embedding_parameters=256_901_120,
1332
1372
  memory_usage_mb=None,
1333
1373
  max_tokens=None,
1334
1374
  embed_dim=None,
@@ -1358,6 +1398,7 @@ izhx__udever_bloom_7b1 = ModelMeta(
1358
1398
  languages=udever_languages,
1359
1399
  loader=sentence_transformers_loader,
1360
1400
  n_parameters=None,
1401
+ n_embedding_parameters=1_027_604_480,
1361
1402
  memory_usage_mb=None,
1362
1403
  max_tokens=None,
1363
1404
  embed_dim=None,
@@ -1387,6 +1428,7 @@ avsolatorio__gist_embedding_v0 = ModelMeta(
1387
1428
  languages=["eng-Latn"],
1388
1429
  loader=sentence_transformers_loader,
1389
1430
  n_parameters=109482240,
1431
+ n_embedding_parameters=23_440_896,
1390
1432
  memory_usage_mb=418,
1391
1433
  max_tokens=512.0,
1392
1434
  embed_dim=768,
@@ -1437,6 +1479,7 @@ avsolatorio__gist_all_minilm_l6_v2 = ModelMeta(
1437
1479
  languages=["eng-Latn"],
1438
1480
  loader=sentence_transformers_loader,
1439
1481
  n_parameters=22713216,
1482
+ n_embedding_parameters=11_720_448,
1440
1483
  memory_usage_mb=87,
1441
1484
  max_tokens=512.0,
1442
1485
  embed_dim=384,
@@ -1487,6 +1530,7 @@ avsolatorio__gist_large_embedding_v0 = ModelMeta(
1487
1530
  languages=["eng-Latn"],
1488
1531
  loader=sentence_transformers_loader,
1489
1532
  n_parameters=335141888,
1533
+ n_embedding_parameters=31_254_528,
1490
1534
  memory_usage_mb=1278,
1491
1535
  max_tokens=512.0,
1492
1536
  embed_dim=1024,
@@ -1537,6 +1581,7 @@ avsolatorio__gist_small_embedding_v0 = ModelMeta(
1537
1581
  languages=["eng-Latn"],
1538
1582
  loader=sentence_transformers_loader,
1539
1583
  n_parameters=33360000,
1584
+ n_embedding_parameters=11_720_448,
1540
1585
  memory_usage_mb=127,
1541
1586
  max_tokens=512.0,
1542
1587
  embed_dim=384,
@@ -1587,6 +1632,7 @@ bigscience__sgpt_bloom_7b1_msmarco = ModelMeta(
1587
1632
  languages=None,
1588
1633
  loader=sentence_transformers_loader,
1589
1634
  n_parameters=None,
1635
+ n_embedding_parameters=1_026_793_472,
1590
1636
  memory_usage_mb=None,
1591
1637
  max_tokens=None,
1592
1638
  embed_dim=4096,
@@ -1616,6 +1662,7 @@ aari1995__german_semantic_sts_v2 = ModelMeta(
1616
1662
  languages=["deu-Latn"],
1617
1663
  loader=sentence_transformers_loader,
1618
1664
  n_parameters=335736320,
1665
+ n_embedding_parameters=31_848_448,
1619
1666
  memory_usage_mb=1281,
1620
1667
  max_tokens=512.0,
1621
1668
  embed_dim=1024,
@@ -1640,6 +1687,7 @@ abhinand__medembed_small_v0_1 = ModelMeta(
1640
1687
  languages=["eng-Latn"],
1641
1688
  loader=sentence_transformers_loader,
1642
1689
  n_parameters=33360000,
1690
+ n_embedding_parameters=11_720_448,
1643
1691
  memory_usage_mb=127,
1644
1692
  max_tokens=512.0,
1645
1693
  embed_dim=384,
@@ -1678,6 +1726,7 @@ avsolatorio__noinstruct_small_embedding_v0 = ModelMeta(
1678
1726
  languages=["eng-Latn"],
1679
1727
  loader=sentence_transformers_loader,
1680
1728
  n_parameters=33360000,
1729
+ n_embedding_parameters=11720448,
1681
1730
  memory_usage_mb=127,
1682
1731
  max_tokens=512.0,
1683
1732
  embed_dim=384,
@@ -1701,6 +1750,7 @@ brahmairesearch__slx_v0_1 = ModelMeta(
1701
1750
  languages=["eng-Latn"],
1702
1751
  loader=sentence_transformers_loader,
1703
1752
  n_parameters=22713216,
1753
+ n_embedding_parameters=11_720_448,
1704
1754
  memory_usage_mb=87,
1705
1755
  max_tokens=512.0,
1706
1756
  embed_dim=384,
@@ -1724,6 +1774,7 @@ deepfile__embedder_100p = ModelMeta(
1724
1774
  languages=None,
1725
1775
  loader=sentence_transformers_loader,
1726
1776
  n_parameters=None,
1777
+ n_embedding_parameters=192_001_536,
1727
1778
  memory_usage_mb=1061,
1728
1779
  max_tokens=514.0,
1729
1780
  embed_dim=768,
@@ -1747,6 +1798,7 @@ infgrad__stella_base_en_v2 = ModelMeta(
1747
1798
  languages=["eng-Latn"],
1748
1799
  loader=sentence_transformers_loader,
1749
1800
  n_parameters=None,
1801
+ n_embedding_parameters=23_440_896,
1750
1802
  memory_usage_mb=None,
1751
1803
  max_tokens=512.0,
1752
1804
  embed_dim=None,
@@ -1770,6 +1822,7 @@ malenia1__ternary_weight_embedding = ModelMeta(
1770
1822
  languages=None,
1771
1823
  loader=sentence_transformers_loader,
1772
1824
  n_parameters=98688000,
1825
+ n_embedding_parameters=None,
1773
1826
  memory_usage_mb=158,
1774
1827
  max_tokens=512.0,
1775
1828
  embed_dim=1024,
@@ -1793,6 +1846,7 @@ omarelshehy__arabic_english_sts_matryoshka = ModelMeta(
1793
1846
  languages=["ara-Arab", "eng-Latn"],
1794
1847
  loader=sentence_transformers_loader,
1795
1848
  n_parameters=559890432,
1849
+ n_embedding_parameters=256_002_048,
1796
1850
  memory_usage_mb=2136,
1797
1851
  max_tokens=514.0,
1798
1852
  embed_dim=1024,
@@ -1833,6 +1887,7 @@ openbmb__minicpm_embedding = ModelMeta(
1833
1887
  release_date="2024-09-04",
1834
1888
  languages=["zho-Hans", "eng-Latn"],
1835
1889
  n_parameters=2724880896,
1890
+ n_embedding_parameters=282_822_912,
1836
1891
  memory_usage_mb=5197,
1837
1892
  max_tokens=512.0,
1838
1893
  embed_dim=2304,
@@ -1857,6 +1912,7 @@ silma_ai__silma_embedding_matryoshka_v0_1 = ModelMeta(
1857
1912
  languages=["ara-Arab", "eng-Latn"],
1858
1913
  loader=sentence_transformers_loader,
1859
1914
  n_parameters=135193344,
1915
+ n_embedding_parameters=49_152_000,
1860
1916
  memory_usage_mb=516,
1861
1917
  max_tokens=512.0,
1862
1918
  embed_dim=768,
@@ -1888,6 +1944,7 @@ sbert_chinese_general_v1 = ModelMeta(
1888
1944
  languages=["zho-Hans"],
1889
1945
  loader=sentence_transformers_loader,
1890
1946
  n_parameters=None,
1947
+ n_embedding_parameters=16_226_304,
1891
1948
  memory_usage_mb=None, # Not visible on repo
1892
1949
  max_tokens=512,
1893
1950
  embed_dim=128,
@@ -1916,6 +1973,7 @@ dmeta_embedding_zh_small = ModelMeta(
1916
1973
  languages=["zho-Hans"],
1917
1974
  loader=sentence_transformers_loader,
1918
1975
  n_parameters=int(74.2 * 1e6),
1976
+ n_embedding_parameters=16_226_304,
1919
1977
  memory_usage_mb=283,
1920
1978
  max_tokens=1024,
1921
1979
  embed_dim=768,
@@ -1939,6 +1997,7 @@ xiaobu_embedding = ModelMeta(
1939
1997
  languages=["zho-Hans"],
1940
1998
  loader=sentence_transformers_loader,
1941
1999
  n_parameters=int(326 * 1e6),
2000
+ n_embedding_parameters=21_635_072,
1942
2001
  memory_usage_mb=1244,
1943
2002
  max_tokens=512,
1944
2003
  embed_dim=1024,
@@ -1963,6 +2022,7 @@ xiaobu_embedding_v2 = ModelMeta(
1963
2022
  languages=["zho-Hans"],
1964
2023
  loader=sentence_transformers_loader,
1965
2024
  n_parameters=int(326 * 1e6),
2025
+ n_embedding_parameters=21_635_072,
1966
2026
  memory_usage_mb=1242,
1967
2027
  max_tokens=512,
1968
2028
  embed_dim=768,
@@ -1987,6 +2047,7 @@ yinka_embedding = ModelMeta(
1987
2047
  languages=["zho-Hans"],
1988
2048
  loader=sentence_transformers_loader,
1989
2049
  n_parameters=int(326 * 1e6),
2050
+ n_embedding_parameters=21_635_072,
1990
2051
  memory_usage_mb=1244,
1991
2052
  max_tokens=512,
1992
2053
  embed_dim=1024,
@@ -2010,6 +2071,7 @@ conan_embedding = ModelMeta(
2010
2071
  languages=["zho-Hans"],
2011
2072
  loader=sentence_transformers_loader,
2012
2073
  n_parameters=int(326 * 1e6),
2074
+ n_embedding_parameters=21_635_072,
2013
2075
  memory_usage_mb=1242,
2014
2076
  max_tokens=512,
2015
2077
  embed_dim=768,
@@ -2043,6 +2105,7 @@ ember_v1 = ModelMeta(
2043
2105
  release_date="2023-10-10",
2044
2106
  languages=["eng-Latn"],
2045
2107
  n_parameters=int(335 * 1e6),
2108
+ n_embedding_parameters=31_254_528,
2046
2109
  memory_usage_mb=1278,
2047
2110
  max_tokens=512,
2048
2111
  embed_dim=1024,
@@ -31,6 +31,7 @@ mxbai_embed_large_v1 = ModelMeta(
31
31
  revision="990580e27d329c7408b3741ecff85876e128e203",
32
32
  release_date="2024-03-07", # initial commit of hf model.
33
33
  n_parameters=335_000_000,
34
+ n_embedding_parameters=31_254_528,
34
35
  memory_usage_mb=639,
35
36
  max_tokens=512,
36
37
  embed_dim=1024,
@@ -75,6 +76,7 @@ mxbai_embed_2d_large_v1 = ModelMeta(
75
76
  revision="7e639ca8e344af398876ead3b19ec3c0b9068f49",
76
77
  release_date="2024-03-04", # initial commit of hf model.
77
78
  n_parameters=335_000_000,
79
+ n_embedding_parameters=31_254_528,
78
80
  memory_usage_mb=None,
79
81
  max_tokens=512,
80
82
  embed_dim=768,
@@ -106,6 +108,7 @@ mxbai_embed_xsmall_v1 = ModelMeta(
106
108
  revision="2f741ec33328bb57e4704e1238fc59a4a5745705",
107
109
  release_date="2024-08-13", # initial commit of hf model.
108
110
  n_parameters=24_100_000,
111
+ n_embedding_parameters=11_720_448,
109
112
  memory_usage_mb=None,
110
113
  max_tokens=512,
111
114
  embed_dim=384,
@@ -16,7 +16,8 @@ mme5_mllama = ModelMeta(
16
16
  revision="cbb328b9bf9ff5362c852c3166931903226d46f1",
17
17
  release_date="2025-02-12",
18
18
  languages=["eng-Latn"],
19
- n_parameters=10_600_000_000, # 10.6B
19
+ n_parameters=10_600_000_000,
20
+ n_embedding_parameters=None, # 10.6B
20
21
  memory_usage_mb=20300,
21
22
  max_tokens=128_000,
22
23
  embed_dim=4096,
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies, requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  MOCOV3_CITATION = """@Article{chen2021mocov3,
14
19
  author = {Xinlei Chen* and Saining Xie* and Kaiming He},
@@ -125,6 +130,7 @@ mocov3_vit_base = ModelMeta(
125
130
  release_date="2024-06-03",
126
131
  modalities=["image"],
127
132
  n_parameters=86_600_000,
133
+ n_embedding_parameters=None,
128
134
  memory_usage_mb=330,
129
135
  max_tokens=None,
130
136
  embed_dim=768,
@@ -149,6 +155,7 @@ mocov3_vit_large = ModelMeta(
149
155
  release_date="2024-06-03",
150
156
  modalities=["image"],
151
157
  n_parameters=304_000_000,
158
+ n_embedding_parameters=None,
152
159
  memory_usage_mb=1161,
153
160
  max_tokens=None,
154
161
  embed_dim=1024,
@@ -1,6 +1,6 @@
1
1
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
2
  from mteb.models.model_meta import ModelMeta
3
- from mteb.models.models_protocols import PromptType
3
+ from mteb.types import PromptType
4
4
 
5
5
 
6
6
  def instruction_template(
@@ -175,6 +175,7 @@ MoD_Embedding = ModelMeta(
175
175
  revision="acbb5b70fdab262226a6af2bc62001de8021b05c",
176
176
  release_date="2025-12-14",
177
177
  n_parameters=4021774336,
178
+ n_embedding_parameters=None,
178
179
  memory_usage_mb=7671,
179
180
  embed_dim=2560,
180
181
  max_tokens=32768,
@@ -1,17 +1,23 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import numpy as np
5
- from torch.utils.data import DataLoader
6
7
 
7
8
  from mteb._requires_package import requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
12
11
 
13
12
  from .bge_models import bge_training_data
14
13
 
14
+ if TYPE_CHECKING:
15
+ from torch.utils.data import DataLoader
16
+
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.types import Array, BatchedInput, PromptType
19
+
20
+
15
21
  logger = logging.getLogger(__name__)
16
22
 
17
23
  MODEL2VEC_CITATION = """@software{minishlab2024model2vec,
@@ -167,6 +173,7 @@ m2v_base_glove_subword = ModelMeta(
167
173
  revision="5f4f5ca159b7321a8b39739bba0794fa0debddf4",
168
174
  release_date="2024-09-21",
169
175
  n_parameters=int(103 * 1e6),
176
+ n_embedding_parameters=int(103 * 1e6),
170
177
  memory_usage_mb=391,
171
178
  max_tokens=np.inf, # Theoretically infinite
172
179
  embed_dim=256,
@@ -193,6 +200,7 @@ m2v_base_glove = ModelMeta(
193
200
  revision="38ebd7f10f71e67fa8db898290f92b82e9cfff2b",
194
201
  release_date="2024-09-21",
195
202
  n_parameters=int(102 * 1e6),
203
+ n_embedding_parameters=int(102 * 1e6),
196
204
  memory_usage_mb=391,
197
205
  max_tokens=np.inf,
198
206
  embed_dim=256,
@@ -218,6 +226,7 @@ m2v_base_output = ModelMeta(
218
226
  revision="02460ae401a22b09d2c6652e23371398329551e2",
219
227
  release_date="2024-09-21",
220
228
  n_parameters=int(7.56 * 1e6),
229
+ n_embedding_parameters=int(7.56 * 1e6),
221
230
  memory_usage_mb=29,
222
231
  max_tokens=np.inf,
223
232
  embed_dim=256,
@@ -243,6 +252,7 @@ m2v_multilingual_output = ModelMeta(
243
252
  revision="2cf4ec4e1f51aeca6c55cf9b93097d00711a6305",
244
253
  release_date="2024-09-21",
245
254
  n_parameters=int(128 * 1e6),
255
+ n_embedding_parameters=int(128 * 1e6),
246
256
  memory_usage_mb=489,
247
257
  max_tokens=np.inf,
248
258
  embed_dim=256,
@@ -268,6 +278,7 @@ potion_base_2m = ModelMeta(
268
278
  revision="86db093558fbced2072b929eb1690bce5272bd4b",
269
279
  release_date="2024-10-29",
270
280
  n_parameters=int(2 * 1e6),
281
+ n_embedding_parameters=int(2 * 1e6),
271
282
  memory_usage_mb=7,
272
283
  max_tokens=np.inf,
273
284
  embed_dim=64,
@@ -293,6 +304,7 @@ potion_base_4m = ModelMeta(
293
304
  revision="81b1802ada41afcd0987a37dc15e569c9fa76f04",
294
305
  release_date="2024-10-29",
295
306
  n_parameters=int(3.78 * 1e6),
307
+ n_embedding_parameters=int(3.78 * 1e6),
296
308
  memory_usage_mb=14,
297
309
  max_tokens=np.inf,
298
310
  embed_dim=128,
@@ -318,6 +330,7 @@ potion_base_8m = ModelMeta(
318
330
  revision="dcbec7aa2d52fc76754ac6291803feedd8c619ce",
319
331
  release_date="2024-10-29",
320
332
  n_parameters=int(7.56 * 1e6),
333
+ n_embedding_parameters=int(7.56 * 1e6),
321
334
  memory_usage_mb=29,
322
335
  max_tokens=np.inf,
323
336
  embed_dim=256,
@@ -343,6 +356,7 @@ potion_multilingual_128m = ModelMeta(
343
356
  revision="38ebd7f10f71e67fa8db898290f92b82e9cfff2a",
344
357
  release_date="2025-05-23",
345
358
  n_parameters=128 * 1e6,
359
+ n_embedding_parameters=128 * 1e6,
346
360
  memory_usage_mb=489,
347
361
  max_tokens=np.inf,
348
362
  embed_dim=256,
@@ -368,6 +382,7 @@ pubmed_bert_100k = ModelMeta(
368
382
  revision="bac5e3b12fb8c650e92a19c41b436732c4f16e9e",
369
383
  release_date="2025-01-03",
370
384
  n_parameters=1 * 1e5,
385
+ n_embedding_parameters=1 * 1e5,
371
386
  memory_usage_mb=0,
372
387
  max_tokens=np.inf,
373
388
  embed_dim=64,
@@ -392,6 +407,7 @@ pubmed_bert_500k = ModelMeta(
392
407
  revision="34ba71e35c393fdad7ed695113f653feb407b16b",
393
408
  release_date="2025-01-03",
394
409
  n_parameters=5 * 1e5,
410
+ n_embedding_parameters=5 * 1e5,
395
411
  memory_usage_mb=2,
396
412
  max_tokens=np.inf,
397
413
  embed_dim=64,
@@ -416,6 +432,7 @@ pubmed_bert_1m = ModelMeta(
416
432
  revision="2b7fed222594708da6d88bcda92ae9b434b7ddd1",
417
433
  release_date="2025-01-03",
418
434
  n_parameters=1 * 1e6,
435
+ n_embedding_parameters=1 * 1e6,
419
436
  memory_usage_mb=2,
420
437
  max_tokens=np.inf,
421
438
  embed_dim=64,
@@ -440,6 +457,7 @@ pubmed_bert_2m = ModelMeta(
440
457
  revision="1d7bbe04d6713e425161146bfdc71473cbed498a",
441
458
  release_date="2025-01-03",
442
459
  n_parameters=1.95 * 1e6,
460
+ n_embedding_parameters=1.95 * 1e6,
443
461
  memory_usage_mb=7,
444
462
  max_tokens=np.inf,
445
463
  embed_dim=64,
@@ -464,6 +482,7 @@ pubmed_bert_8m = ModelMeta(
464
482
  revision="387d350015e963744f4fafe56a574b7cd48646c9",
465
483
  release_date="2025-01-03",
466
484
  n_parameters=7.81 * 1e6,
485
+ n_embedding_parameters=7.81 * 1e6,
467
486
  memory_usage_mb=30,
468
487
  max_tokens=np.inf,
469
488
  embed_dim=256,