mteb 2.7.3__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. mteb/abstasks/retrieval.py +1 -1
  2. mteb/benchmarks/benchmarks/__init__.py +2 -0
  3. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  4. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  5. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  6. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  7. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  8. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  9. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  10. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  11. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  12. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  13. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  14. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  15. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  16. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  17. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  18. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  19. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  20. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  21. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  22. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  23. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  24. mteb/models/model_implementations/align_models.py +1 -0
  25. mteb/models/model_implementations/amazon_models.py +1 -0
  26. mteb/models/model_implementations/andersborges.py +2 -0
  27. mteb/models/model_implementations/ara_models.py +1 -0
  28. mteb/models/model_implementations/arctic_models.py +8 -0
  29. mteb/models/model_implementations/b1ade_models.py +1 -0
  30. mteb/models/model_implementations/bedrock_models.py +4 -0
  31. mteb/models/model_implementations/bge_models.py +40 -1
  32. mteb/models/model_implementations/bica_model.py +1 -0
  33. mteb/models/model_implementations/blip2_models.py +2 -0
  34. mteb/models/model_implementations/blip_models.py +8 -0
  35. mteb/models/model_implementations/bm25.py +8 -5
  36. mteb/models/model_implementations/bmretriever_models.py +4 -0
  37. mteb/models/model_implementations/cadet_models.py +1 -0
  38. mteb/models/model_implementations/cde_models.py +2 -0
  39. mteb/models/model_implementations/clip_models.py +3 -0
  40. mteb/models/model_implementations/clips_models.py +3 -0
  41. mteb/models/model_implementations/codefuse_models.py +5 -0
  42. mteb/models/model_implementations/codesage_models.py +3 -0
  43. mteb/models/model_implementations/cohere_models.py +4 -0
  44. mteb/models/model_implementations/cohere_v.py +5 -0
  45. mteb/models/model_implementations/colpali_models.py +3 -0
  46. mteb/models/model_implementations/colqwen_models.py +7 -0
  47. mteb/models/model_implementations/colsmol_models.py +2 -0
  48. mteb/models/model_implementations/conan_models.py +1 -0
  49. mteb/models/model_implementations/dino_models.py +19 -0
  50. mteb/models/model_implementations/e5_instruct.py +4 -0
  51. mteb/models/model_implementations/e5_models.py +9 -0
  52. mteb/models/model_implementations/e5_v.py +1 -0
  53. mteb/models/model_implementations/eagerworks_models.py +1 -0
  54. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  55. mteb/models/model_implementations/en_code_retriever.py +1 -0
  56. mteb/models/model_implementations/euler_models.py +1 -0
  57. mteb/models/model_implementations/evaclip_models.py +4 -0
  58. mteb/models/model_implementations/fa_models.py +9 -0
  59. mteb/models/model_implementations/facebookai.py +2 -0
  60. mteb/models/model_implementations/geogpt_models.py +1 -0
  61. mteb/models/model_implementations/gme_v_models.py +2 -0
  62. mteb/models/model_implementations/google_models.py +5 -0
  63. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  64. mteb/models/model_implementations/gritlm_models.py +2 -0
  65. mteb/models/model_implementations/gte_models.py +9 -0
  66. mteb/models/model_implementations/hinvec_models.py +1 -0
  67. mteb/models/model_implementations/human.py +1 -0
  68. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  69. mteb/models/model_implementations/inf_models.py +2 -0
  70. mteb/models/model_implementations/jasper_models.py +2 -0
  71. mteb/models/model_implementations/jina_clip.py +1 -0
  72. mteb/models/model_implementations/jina_models.py +7 -0
  73. mteb/models/model_implementations/kalm_models.py +6 -0
  74. mteb/models/model_implementations/kblab.py +1 -0
  75. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  76. mteb/models/model_implementations/kfst.py +1 -0
  77. mteb/models/model_implementations/kowshik24_models.py +1 -0
  78. mteb/models/model_implementations/lens_models.py +2 -0
  79. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  80. mteb/models/model_implementations/linq_models.py +1 -0
  81. mteb/models/model_implementations/listconranker.py +1 -0
  82. mteb/models/model_implementations/llm2clip_models.py +3 -0
  83. mteb/models/model_implementations/llm2vec_models.py +8 -0
  84. mteb/models/model_implementations/mcinext_models.py +3 -0
  85. mteb/models/model_implementations/mdbr_models.py +2 -0
  86. mteb/models/model_implementations/misc_models.py +63 -0
  87. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  88. mteb/models/model_implementations/mme5_models.py +2 -1
  89. mteb/models/model_implementations/moco_models.py +2 -0
  90. mteb/models/model_implementations/mod_models.py +1 -0
  91. mteb/models/model_implementations/model2vec_models.py +13 -0
  92. mteb/models/model_implementations/moka_models.py +3 -0
  93. mteb/models/model_implementations/nbailab.py +3 -0
  94. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  95. mteb/models/model_implementations/nomic_models.py +6 -0
  96. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  97. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  98. mteb/models/model_implementations/nvidia_models.py +3 -0
  99. mteb/models/model_implementations/octen_models.py +2 -0
  100. mteb/models/model_implementations/openai_models.py +5 -0
  101. mteb/models/model_implementations/openclip_models.py +8 -0
  102. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  103. mteb/models/model_implementations/ops_moa_models.py +2 -0
  104. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  105. mteb/models/model_implementations/pawan_models.py +1 -0
  106. mteb/models/model_implementations/piccolo_models.py +2 -0
  107. mteb/models/model_implementations/promptriever_models.py +4 -0
  108. mteb/models/model_implementations/pylate_models.py +3 -0
  109. mteb/models/model_implementations/qodo_models.py +2 -0
  110. mteb/models/model_implementations/qtack_models.py +1 -0
  111. mteb/models/model_implementations/qwen3_models.py +3 -0
  112. mteb/models/model_implementations/qzhou_models.py +2 -0
  113. mteb/models/model_implementations/rasgaard_models.py +1 -0
  114. mteb/models/model_implementations/reasonir_model.py +65 -0
  115. mteb/models/model_implementations/repllama_models.py +2 -0
  116. mteb/models/model_implementations/rerankers_custom.py +3 -0
  117. mteb/models/model_implementations/rerankers_monot5_based.py +14 -0
  118. mteb/models/model_implementations/richinfoai_models.py +1 -0
  119. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  120. mteb/models/model_implementations/ruri_models.py +10 -0
  121. mteb/models/model_implementations/salesforce_models.py +3 -0
  122. mteb/models/model_implementations/samilpwc_models.py +1 -0
  123. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  124. mteb/models/model_implementations/searchmap_models.py +1 -0
  125. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  126. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +1 -0
  127. mteb/models/model_implementations/seed_models.py +1 -0
  128. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  129. mteb/models/model_implementations/shuu_model.py +1 -0
  130. mteb/models/model_implementations/siglip_models.py +10 -0
  131. mteb/models/model_implementations/sonar_models.py +2 -1
  132. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  133. mteb/models/model_implementations/stella_models.py +6 -0
  134. mteb/models/model_implementations/tarka_models.py +2 -0
  135. mteb/models/model_implementations/text2vec_models.py +3 -0
  136. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  137. mteb/models/model_implementations/uae_models.py +1 -0
  138. mteb/models/model_implementations/vdr_models.py +1 -0
  139. mteb/models/model_implementations/vi_vn_models.py +6 -0
  140. mteb/models/model_implementations/vista_models.py +2 -0
  141. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  142. mteb/models/model_implementations/voyage_models.py +15 -0
  143. mteb/models/model_implementations/voyage_v.py +1 -0
  144. mteb/models/model_implementations/xyz_models.py +1 -0
  145. mteb/models/model_implementations/youtu_models.py +1 -0
  146. mteb/models/model_implementations/yuan_models.py +1 -0
  147. mteb/models/model_implementations/yuan_models_en.py +1 -0
  148. mteb/models/model_meta.py +35 -2
  149. mteb/tasks/retrieval/eng/__init__.py +42 -0
  150. mteb/tasks/retrieval/eng/bright_retrieval.py +9 -1
  151. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  152. {mteb-2.7.3.dist-info → mteb-2.7.4.dist-info}/METADATA +1 -1
  153. {mteb-2.7.3.dist-info → mteb-2.7.4.dist-info}/RECORD +157 -136
  154. {mteb-2.7.3.dist-info → mteb-2.7.4.dist-info}/WHEEL +0 -0
  155. {mteb-2.7.3.dist-info → mteb-2.7.4.dist-info}/entry_points.txt +0 -0
  156. {mteb-2.7.3.dist-info → mteb-2.7.4.dist-info}/licenses/LICENSE +0 -0
  157. {mteb-2.7.3.dist-info → mteb-2.7.4.dist-info}/top_level.txt +0 -0
@@ -215,6 +215,7 @@ nomic_embed_v1_5 = ModelMeta(
215
215
  release_date="2024-02-10", # first commit
216
216
  citation=NOMIC_CITATION,
217
217
  n_parameters=137_000_000,
218
+ n_embedding_parameters=None,
218
219
  memory_usage_mb=522,
219
220
  max_tokens=8192,
220
221
  embed_dim=768,
@@ -249,6 +250,7 @@ nomic_embed_v1 = ModelMeta(
249
250
  revision="0759316f275aa0cb93a5b830973843ca66babcf5",
250
251
  release_date="2024-01-31", # first commit
251
252
  n_parameters=None,
253
+ n_embedding_parameters=None,
252
254
  memory_usage_mb=522,
253
255
  max_tokens=8192,
254
256
  embed_dim=768,
@@ -284,6 +286,7 @@ nomic_embed_v1_ablated = ModelMeta(
284
286
  revision="7d948905c5d5d3874fa55a925d68e49dbf411e5f",
285
287
  release_date="2024-01-15", # first commit
286
288
  n_parameters=None,
289
+ n_embedding_parameters=None,
287
290
  memory_usage_mb=None,
288
291
  max_tokens=8192,
289
292
  embed_dim=768,
@@ -312,6 +315,7 @@ nomic_embed_v1_unsupervised = ModelMeta(
312
315
  revision="b53d557b15ae63852847c222d336c1609eced93c",
313
316
  release_date="2024-01-15", # first commit
314
317
  n_parameters=None,
318
+ n_embedding_parameters=None,
315
319
  memory_usage_mb=None,
316
320
  max_tokens=8192,
317
321
  embed_dim=768,
@@ -340,6 +344,7 @@ nomic_modern_bert_embed = ModelMeta(
340
344
  revision="5960f1566fb7cb1adf1eb6e816639cf4646d9b12",
341
345
  release_date="2024-12-29",
342
346
  n_parameters=149_000_000,
347
+ n_embedding_parameters=None,
343
348
  memory_usage_mb=568,
344
349
  max_tokens=8192,
345
350
  embed_dim=768,
@@ -479,6 +484,7 @@ nomic_embed_text_v2_moe = ModelMeta(
479
484
  revision="1066b6599d099fbb93dfcb64f9c37a7c9e503e85",
480
485
  release_date="2025-02-07",
481
486
  n_parameters=475292928,
487
+ n_embedding_parameters=None,
482
488
  memory_usage_mb=1813,
483
489
  max_tokens=512,
484
490
  embed_dim=768,
@@ -175,6 +175,7 @@ nomic_embed_vision_v1_5 = ModelMeta(
175
175
  release_date="2024-06-08",
176
176
  modalities=["image", "text"],
177
177
  n_parameters=92_900_000,
178
+ n_embedding_parameters=None,
178
179
  memory_usage_mb=355,
179
180
  max_tokens=2048,
180
181
  embed_dim=768,
@@ -162,6 +162,7 @@ llama_nemoretriever_colembed_1b_v1 = ModelMeta(
162
162
  release_date="2025-06-27",
163
163
  modalities=["image", "text"],
164
164
  n_parameters=2_418_000_000,
165
+ n_embedding_parameters=None,
165
166
  memory_usage_mb=4610,
166
167
  max_tokens=8192,
167
168
  embed_dim=2048,
@@ -189,6 +190,7 @@ llama_nemoretriever_colembed_3b_v1 = ModelMeta(
189
190
  release_date="2025-06-27",
190
191
  modalities=["image", "text"],
191
192
  n_parameters=4_407_000_000,
193
+ n_embedding_parameters=None,
192
194
  memory_usage_mb=8403,
193
195
  max_tokens=8192,
194
196
  embed_dim=3072,
@@ -204,6 +204,7 @@ NV_embed_v2 = ModelMeta(
204
204
  revision="7604d305b621f14095a1aa23d351674c2859553a",
205
205
  release_date="2024-09-09", # initial commit of hf model.
206
206
  n_parameters=7_850_000_000,
207
+ n_embedding_parameters=None,
207
208
  memory_usage_mb=14975,
208
209
  embed_dim=4096,
209
210
  license="cc-by-nc-4.0",
@@ -235,6 +236,7 @@ NV_embed_v1 = ModelMeta(
235
236
  revision="570834afd5fef5bf3a3c2311a2b6e0a66f6f4f2c",
236
237
  release_date="2024-09-13", # initial commit of hf model.
237
238
  n_parameters=7_850_000_000,
239
+ n_embedding_parameters=None,
238
240
  memory_usage_mb=14975,
239
241
  embed_dim=4096,
240
242
  license="cc-by-nc-4.0",
@@ -624,6 +626,7 @@ llama_embed_nemotron_8b = ModelMeta(
624
626
  revision="84a375593d27d3528beb4e104822515659e093b4",
625
627
  release_date="2025-10-23",
626
628
  n_parameters=7_504_924_672,
629
+ n_embedding_parameters=None,
627
630
  memory_usage_mb=28629,
628
631
  embed_dim=4096,
629
632
  license="https://huggingface.co/nvidia/llama-embed-nemotron-8b/blob/main/LICENSE",
@@ -208,6 +208,7 @@ Octen_Embedding_4B = ModelMeta(
208
208
  revision="6e188e3b072c3e3678b235ad84e6e97bcbb71e8f",
209
209
  release_date="2025-12-30",
210
210
  n_parameters=4021774336,
211
+ n_embedding_parameters=None,
211
212
  memory_usage_mb=7671,
212
213
  embed_dim=2560,
213
214
  max_tokens=32768,
@@ -238,6 +239,7 @@ Octen_Embedding_8B = ModelMeta(
238
239
  revision="f7db178d5a82fb841f606a6a67c423cead2fdbba",
239
240
  release_date="2025-12-23",
240
241
  n_parameters=7567295488,
242
+ n_embedding_parameters=None,
241
243
  memory_usage_mb=14433,
242
244
  embed_dim=4096,
243
245
  max_tokens=32768,
@@ -185,6 +185,7 @@ text_embedding_3_small = ModelMeta(
185
185
  embed_dim=1536,
186
186
  open_weights=False,
187
187
  n_parameters=None,
188
+ n_embedding_parameters=None,
188
189
  memory_usage_mb=None,
189
190
  license=None,
190
191
  reference="https://openai.com/index/new-embedding-models-and-api-updates/",
@@ -213,6 +214,7 @@ text_embedding_3_large = ModelMeta(
213
214
  framework=["API"],
214
215
  use_instructions=False,
215
216
  n_parameters=None,
217
+ n_embedding_parameters=None,
216
218
  memory_usage_mb=None,
217
219
  public_training_code=None,
218
220
  public_training_data=None, # assumed
@@ -238,6 +240,7 @@ text_embedding_ada_002 = ModelMeta(
238
240
  framework=["API"],
239
241
  use_instructions=False,
240
242
  n_parameters=None,
243
+ n_embedding_parameters=None,
241
244
  memory_usage_mb=None,
242
245
  public_training_code=None,
243
246
  public_training_data=None, # assumed
@@ -262,6 +265,7 @@ text_embedding_3_small_512 = ModelMeta(
262
265
  embed_dim=512,
263
266
  open_weights=False,
264
267
  n_parameters=None,
268
+ n_embedding_parameters=None,
265
269
  memory_usage_mb=None,
266
270
  license=None,
267
271
  reference="https://openai.com/index/new-embedding-models-and-api-updates/",
@@ -292,6 +296,7 @@ text_embedding_3_large_512 = ModelMeta(
292
296
  framework=["API"],
293
297
  use_instructions=False,
294
298
  n_parameters=None,
299
+ n_embedding_parameters=None,
295
300
  memory_usage_mb=None,
296
301
  public_training_code=None,
297
302
  public_training_data=None, # assumed
@@ -133,6 +133,7 @@ CLIP_ViT_L_14_DataComp_XL_s13B_b90K = ModelMeta(
133
133
  release_date="2023-04-26",
134
134
  modalities=["image", "text"],
135
135
  n_parameters=428_000_000,
136
+ n_embedding_parameters=None,
136
137
  memory_usage_mb=1633,
137
138
  max_tokens=77,
138
139
  embed_dim=768,
@@ -159,6 +160,7 @@ CLIP_ViT_B_32_DataComp_XL_s13B_b90K = ModelMeta(
159
160
  release_date="2023-04-26",
160
161
  modalities=["image", "text"],
161
162
  n_parameters=151_000_000,
163
+ n_embedding_parameters=None,
162
164
  memory_usage_mb=576,
163
165
  max_tokens=77,
164
166
  embed_dim=512,
@@ -185,6 +187,7 @@ CLIP_ViT_B_16_DataComp_XL_s13B_b90K = ModelMeta(
185
187
  release_date="2023-04-26",
186
188
  modalities=["image", "text"],
187
189
  n_parameters=150_000_000,
190
+ n_embedding_parameters=None,
188
191
  memory_usage_mb=572,
189
192
  max_tokens=77,
190
193
  embed_dim=512,
@@ -211,6 +214,7 @@ CLIP_ViT_bigG_14_laion2B_39B_b160k = ModelMeta(
211
214
  release_date="2023-01-23",
212
215
  modalities=["image", "text"],
213
216
  n_parameters=2_540_000_000,
217
+ n_embedding_parameters=None,
214
218
  memory_usage_mb=9689,
215
219
  max_tokens=77,
216
220
  embed_dim=1280,
@@ -237,6 +241,7 @@ CLIP_ViT_g_14_laion2B_s34B_b88K = ModelMeta(
237
241
  release_date="2023-03-06",
238
242
  modalities=["image", "text"],
239
243
  n_parameters=1_367_000_000,
244
+ n_embedding_parameters=None,
240
245
  memory_usage_mb=5215,
241
246
  max_tokens=77,
242
247
  embed_dim=1024,
@@ -263,6 +268,7 @@ CLIP_ViT_H_14_laion2B_s32B_b79K = ModelMeta(
263
268
  release_date="2022-09-15",
264
269
  modalities=["image", "text"],
265
270
  n_parameters=986_000_000,
271
+ n_embedding_parameters=None,
266
272
  memory_usage_mb=3762,
267
273
  max_tokens=77,
268
274
  embed_dim=1024,
@@ -289,6 +295,7 @@ CLIP_ViT_L_14_laion2B_s32B_b82K = ModelMeta(
289
295
  release_date="2022-09-15",
290
296
  modalities=["image", "text"],
291
297
  n_parameters=428_000_000,
298
+ n_embedding_parameters=None,
292
299
  memory_usage_mb=1631,
293
300
  max_tokens=77,
294
301
  embed_dim=768,
@@ -315,6 +322,7 @@ CLIP_ViT_B_32_laion2B_s34B_b79K = ModelMeta(
315
322
  release_date="2022-09-15",
316
323
  modalities=["image", "text"],
317
324
  n_parameters=151_000_000,
325
+ n_embedding_parameters=None,
318
326
  memory_usage_mb=577,
319
327
  max_tokens=77,
320
328
  embed_dim=512,
@@ -140,6 +140,7 @@ opensearch_neural_sparse_encoding_doc_v3_gte = ModelMeta(
140
140
  revision="a8abaa916125ee512a7a8f4d706d07eb0128a8e6",
141
141
  release_date="2025-06-18",
142
142
  n_parameters=137_394_234,
143
+ n_embedding_parameters=23_440_896,
143
144
  memory_usage_mb=549,
144
145
  embed_dim=30522,
145
146
  license="apache-2.0",
@@ -166,6 +167,7 @@ opensearch_neural_sparse_encoding_doc_v3_distill = ModelMeta(
166
167
  revision="babf71f3c48695e2e53a978208e8aba48335e3c0",
167
168
  release_date="2025-03-28",
168
169
  n_parameters=66_985_530,
170
+ n_embedding_parameters=23_440_896,
169
171
  memory_usage_mb=267,
170
172
  embed_dim=30522,
171
173
  license="apache-2.0",
@@ -188,6 +190,7 @@ opensearch_neural_sparse_encoding_doc_v2_distill = ModelMeta(
188
190
  revision="8921a26c78b8559d6604eb1f5c0b74c079bee38f",
189
191
  release_date="2024-07-17",
190
192
  n_parameters=66_985_530,
193
+ n_embedding_parameters=23_440_896,
191
194
  memory_usage_mb=267,
192
195
  embed_dim=30522,
193
196
  license="apache-2.0",
@@ -211,6 +214,7 @@ opensearch_neural_sparse_encoding_doc_v2_mini = ModelMeta(
211
214
  revision="4af867a426867dfdd744097531046f4289a32fdd",
212
215
  release_date="2024-07-18",
213
216
  n_parameters=22_744_506,
217
+ n_embedding_parameters=11_720_448,
214
218
  memory_usage_mb=86,
215
219
  embed_dim=30522,
216
220
  license="apache-2.0",
@@ -233,6 +237,7 @@ opensearch_neural_sparse_encoding_doc_v1 = ModelMeta(
233
237
  revision="98cdcbd72867c547f72f2b7b7bed9cdf9f09922d",
234
238
  release_date="2024-03-07",
235
239
  n_parameters=132_955_194,
240
+ n_embedding_parameters=23_440_896,
236
241
  memory_usage_mb=507,
237
242
  embed_dim=30522,
238
243
  license="apache-2.0",
@@ -33,6 +33,7 @@ ops_moa_conan_embedding = ModelMeta(
33
33
  languages=["zho-Hans"],
34
34
  loader=OPSWrapper,
35
35
  n_parameters=int(343 * 1e6),
36
+ n_embedding_parameters=21_635_072,
36
37
  memory_usage_mb=1308,
37
38
  max_tokens=512,
38
39
  embed_dim=1536,
@@ -65,6 +66,7 @@ ops_moa_yuan_embedding = ModelMeta(
65
66
  languages=["zho-Hans"],
66
67
  loader=OPSWrapper,
67
68
  n_parameters=int(343 * 1e6),
69
+ n_embedding_parameters=21_635_072,
68
70
  memory_usage_mb=1242,
69
71
  max_tokens=512,
70
72
  embed_dim=1536,
@@ -4,6 +4,7 @@ solon_embeddings_1_1 = ModelMeta(
4
4
  name="OrdalieTech/Solon-embeddings-mini-beta-1.1",
5
5
  languages=["fra-Latn"],
6
6
  n_parameters=210_000_000,
7
+ n_embedding_parameters=None,
7
8
  public_training_code=None,
8
9
  memory_usage_mb=808.0,
9
10
  open_weights=True,
@@ -20,6 +20,7 @@ pawan_embd_68m = ModelMeta(
20
20
  revision="32f295145802bdbd65699ad65fd27d2a5b69a909",
21
21
  release_date="2025-12-08",
22
22
  n_parameters=68_000_000,
23
+ n_embedding_parameters=None,
23
24
  memory_usage_mb=260,
24
25
  embed_dim=768,
25
26
  license="apache-2.0",
@@ -12,6 +12,7 @@ piccolo_base_zh = ModelMeta(
12
12
  revision="47c0a63b8f667c3482e05b2fd45577bb19252196",
13
13
  release_date="2023-09-04", # first commit
14
14
  n_parameters=None,
15
+ n_embedding_parameters=16_226_304,
15
16
  memory_usage_mb=None, # can't see on model card
16
17
  embed_dim=768,
17
18
  license="mit",
@@ -37,6 +38,7 @@ piccolo_large_zh_v2 = ModelMeta(
37
38
  revision="05948c1d889355936bdf9db7d30df57dd78d25a3",
38
39
  release_date="2024-04-22", # first commit
39
40
  n_parameters=None,
41
+ n_embedding_parameters=None,
40
42
  memory_usage_mb=None, # we don't know because they removed the model
41
43
  embed_dim=1024,
42
44
  license="not specified",
@@ -87,6 +87,7 @@ promptriever_llama2 = ModelMeta(
87
87
  revision="01c7f73d771dfac7d292323805ebc428287df4f9-30b14e3813c0fa45facfd01a594580c3fe5ecf23", # base-peft revision
88
88
  release_date="2024-09-15",
89
89
  n_parameters=7_000_000_000,
90
+ n_embedding_parameters=None,
90
91
  memory_usage_mb=26703,
91
92
  max_tokens=4096,
92
93
  embed_dim=4096,
@@ -123,6 +124,7 @@ promptriever_llama3 = ModelMeta(
123
124
  },
124
125
  release_date="2024-09-15",
125
126
  n_parameters=8_000_000_000,
127
+ n_embedding_parameters=None,
126
128
  memory_usage_mb=30518,
127
129
  max_tokens=8192,
128
130
  embed_dim=4096,
@@ -152,6 +154,7 @@ promptriever_llama3_instruct = ModelMeta(
152
154
  revision="5206a32e0bd3067aef1ce90f5528ade7d866253f-8b677258615625122c2eb7329292b8c402612c21", # base-peft revision
153
155
  release_date="2024-09-15",
154
156
  n_parameters=8_000_000_000,
157
+ n_embedding_parameters=None,
155
158
  memory_usage_mb=30518,
156
159
  max_tokens=8192,
157
160
  embed_dim=4096,
@@ -185,6 +188,7 @@ promptriever_mistral_v1 = ModelMeta(
185
188
  revision="7231864981174d9bee8c7687c24c8344414eae6b-876d63e49b6115ecb6839893a56298fadee7e8f5", # base-peft revision
186
189
  release_date="2024-09-15",
187
190
  n_parameters=7_000_000_000,
191
+ n_embedding_parameters=131_072_000,
188
192
  memory_usage_mb=26703,
189
193
  training_datasets={
190
194
  # "samaya-ai/msmarco-w-instructions",
@@ -352,6 +352,7 @@ colbert_v2 = ModelMeta(
352
352
  public_training_data=None,
353
353
  release_date="2024-09-21",
354
354
  n_parameters=int(110 * 1e6),
355
+ n_embedding_parameters=23_440_896,
355
356
  memory_usage_mb=418,
356
357
  max_tokens=180,
357
358
  embed_dim=None,
@@ -408,6 +409,7 @@ jina_colbert_v2 = ModelMeta(
408
409
  public_training_data=None,
409
410
  release_date="2024-08-16",
410
411
  n_parameters=int(559 * 1e6),
412
+ n_embedding_parameters=None,
411
413
  memory_usage_mb=1067,
412
414
  max_tokens=8192,
413
415
  embed_dim=None,
@@ -464,6 +466,7 @@ lightonai__gte_moderncolbert_v1 = ModelMeta(
464
466
  public_training_data="https://huggingface.co/datasets/lightonai/ms-marco-en-bge-gemma",
465
467
  release_date="2025-04-30",
466
468
  n_parameters=int(149 * 1e6),
469
+ n_embedding_parameters=None,
467
470
  memory_usage_mb=None,
468
471
  max_tokens=8192,
469
472
  embed_dim=None,
@@ -36,6 +36,7 @@ Qodo_Embed_1_1_5B = ModelMeta(
36
36
  revision="84bbef079b32e8823ec226d4e9e92902706b0eb6",
37
37
  release_date="2025-02-19",
38
38
  n_parameters=1_780_000_000,
39
+ n_embedding_parameters=232_928_256,
39
40
  memory_usage_mb=6776,
40
41
  embed_dim=1536,
41
42
  license="https://huggingface.co/Qodo/Qodo-Embed-1-1.5B/blob/main/LICENSE",
@@ -59,6 +60,7 @@ Qodo_Embed_1_7B = ModelMeta(
59
60
  revision="f9edd9bf7f687c0e832424058e265120f603cd81",
60
61
  release_date="2025-02-24",
61
62
  n_parameters=7_613_000_000,
63
+ n_embedding_parameters=None,
62
64
  memory_usage_mb=29040,
63
65
  embed_dim=3584,
64
66
  license="https://huggingface.co/Qodo/Qodo-Embed-1-1.5B/blob/main/LICENSE",
@@ -31,6 +31,7 @@ mini_gte = ModelMeta(
31
31
  revision="7fbe6f9b4cc42615e0747299f837ad7769025492",
32
32
  release_date="2025-01-28",
33
33
  n_parameters=int(66.3 * 1e6),
34
+ n_embedding_parameters=23_440_896,
34
35
  memory_usage_mb=253,
35
36
  embed_dim=768,
36
37
  license="apache-2.0",
@@ -147,6 +147,7 @@ Qwen3_Embedding_0B6 = ModelMeta(
147
147
  revision="b22da495047858cce924d27d76261e96be6febc0", # Commit of @tomaarsen
148
148
  release_date="2025-06-05",
149
149
  n_parameters=595776512,
150
+ n_embedding_parameters=None,
150
151
  memory_usage_mb=1136,
151
152
  embed_dim=1024,
152
153
  max_tokens=32768,
@@ -170,6 +171,7 @@ Qwen3_Embedding_4B = ModelMeta(
170
171
  revision="636cd9bf47d976946cdbb2b0c3ca0cb2f8eea5ff", # Commit of @tomaarsen
171
172
  release_date="2025-06-05",
172
173
  n_parameters=4021774336,
174
+ n_embedding_parameters=None,
173
175
  memory_usage_mb=7671,
174
176
  embed_dim=2560,
175
177
  max_tokens=32768,
@@ -193,6 +195,7 @@ Qwen3_Embedding_8B = ModelMeta(
193
195
  revision="4e423935c619ae4df87b646a3ce949610c66241c", # Commit of @tomaarsen
194
196
  release_date="2025-06-05",
195
197
  n_parameters=7567295488,
198
+ n_embedding_parameters=None,
196
199
  memory_usage_mb=14433,
197
200
  embed_dim=4096,
198
201
  max_tokens=32768,
@@ -64,6 +64,7 @@ QZhou_Embedding = ModelMeta(
64
64
  revision="f1e6c03ee3882e7b9fa5cec91217715272e433b8",
65
65
  release_date="2025-08-24",
66
66
  n_parameters=7_070_619_136,
67
+ n_embedding_parameters=None,
67
68
  memory_usage_mb=14436,
68
69
  embed_dim=3584,
69
70
  license="apache-2.0",
@@ -98,6 +99,7 @@ QZhou_Embedding_Zh = ModelMeta(
98
99
  revision="0321ccb126413d1e49c5ce908e802b63d35f18e2",
99
100
  release_date="2025-09-28",
100
101
  n_parameters=7_575_747_328,
102
+ n_embedding_parameters=None,
101
103
  memory_usage_mb=29431,
102
104
  embed_dim=1792,
103
105
  license="apache-2.0",
@@ -12,6 +12,7 @@ potion_base_8m = ModelMeta(
12
12
  revision="387897cfb09992e6d45ea9cd7b28b9fcf119e23a",
13
13
  release_date="2025-10-08",
14
14
  n_parameters=22893312,
15
+ n_embedding_parameters=22893312,
15
16
  memory_usage_mb=87,
16
17
  max_tokens=np.inf,
17
18
  embed_dim=256,
@@ -36,12 +36,76 @@ REASONIR_TRAINING_DATA = {
36
36
  "DuRetrieval",
37
37
  "QuoraRetrieval",
38
38
  }
39
+ _prompts_dict = {
40
+ "BrightBiologyRetrieval": {
41
+ "query": "Given a Biology post, retrieve relevant passages that help answer the post"
42
+ },
43
+ "BrightEarthScienceRetrieval": {
44
+ "query": "Given a Earth Science post, retrieve relevant passages that help answer the post"
45
+ },
46
+ "BrightEconomicsRetrieval": {
47
+ "query": "Given a Economics post, retrieve relevant passages that help answer the post"
48
+ },
49
+ "BrightPsychologyRetrieval": {
50
+ "query": "Given a Psychology post, retrieve relevant passages that help answer the post"
51
+ },
52
+ "BrightRoboticsRetrieval": {
53
+ "query": "Given a Robotics post, retrieve relevant passages that help answer the post"
54
+ },
55
+ "BrightStackoverflowRetrieval": {
56
+ "query": "Given a Stackoverflow post, retrieve relevant passages that help answer the post"
57
+ },
58
+ "BrightSustainableLivingRetrieval": {
59
+ "query": "Given a Sustainable Living post, retrieve relevant passages that help answer the post"
60
+ },
61
+ "BrightPonyRetrieval": {
62
+ "query": "Given a Pony question, retrieve relevant passages that help answer the question"
63
+ },
64
+ "BrightLeetcodeRetrieval": {
65
+ "query": "Given a coding problem, retrieve relevant examples that help answer the problem",
66
+ },
67
+ "BrightAopsRetrieval": {
68
+ "query": "Given a Math problem, retrieve relevant examples that help answer the problem"
69
+ },
70
+ "BrightTheoremQATheoremsRetrieval": {
71
+ "query": "Given a Math problem, retrieve relevant theorems that help answer the problem",
72
+ },
73
+ "BrightTheoremQAQuestionsRetrieval": {
74
+ "query": "Given a Math problem, retrieve relevant examples that help answer the problem",
75
+ },
76
+ "BrightBiologyLongRetrieval": {
77
+ "query": "Given a Biology post, retrieve relevant documents that help answer the post"
78
+ },
79
+ "BrightEarthScienceLongRetrieval": {
80
+ "query": "Given a Earth Science post, retrieve relevant documents that help answer the post"
81
+ },
82
+ "BrightEconomicsLongRetrieval": {
83
+ "query": "Given a Economics post, retrieve relevant documents that help answer the post"
84
+ },
85
+ "BrightPsychologyLongRetrieval": {
86
+ "query": "Given a Psychology post, retrieve relevant documents that help answer the post"
87
+ },
88
+ "BrightRoboticsLongRetrieval": {
89
+ "query": "Given a Robotics post, retrieve relevant documents that help answer the post"
90
+ },
91
+ "BrightStackoverflowLongRetrieval": {
92
+ "query": "Given a Stackoverflow post, retrieve relevant documents that help answer the post"
93
+ },
94
+ "BrightSustainableLivingLongRetrieval": {
95
+ "query": "Given a Sustainable Living post, retrieve relevant documents that help answer the post"
96
+ },
97
+ "BrightPonyLongRetrieval": {
98
+ "query": "Given a Pony question, retrieve relevant documents that help answer the question"
99
+ },
100
+ }
101
+
39
102
 
40
103
  ReasonIR_8B = ModelMeta(
41
104
  loader=InstructSentenceTransformerModel,
42
105
  loader_kwargs=dict(
43
106
  instruction_template=instruction_template,
44
107
  trust_remote_code=True,
108
+ prompts_dict=_prompts_dict,
45
109
  ),
46
110
  name="ReasonIR/ReasonIR-8B",
47
111
  model_type=["dense"],
@@ -50,6 +114,7 @@ ReasonIR_8B = ModelMeta(
50
114
  revision="c3d0690370ff4a8c3d3882d8dfa85c43650034fa",
51
115
  release_date="2025-04-29",
52
116
  n_parameters=7_500_000_000,
117
+ n_embedding_parameters=None,
53
118
  memory_usage_mb=None,
54
119
  embed_dim=4096,
55
120
  license="cc-by-nc-4.0",
@@ -179,6 +179,7 @@ repllama_llama2_original = ModelMeta(
179
179
  "mMARCO-NL", # translation not trained on
180
180
  },
181
181
  n_parameters=7_000_000,
182
+ n_embedding_parameters=131_072_000,
182
183
  memory_usage_mb=27,
183
184
  max_tokens=4096,
184
185
  embed_dim=4096,
@@ -208,6 +209,7 @@ repllama_llama2_reproduced = ModelMeta(
208
209
  revision="01c7f73d771dfac7d292323805ebc428287df4f9-ad5c1d0938a1e02954bcafb4d811ba2f34052e71", # base-peft revision
209
210
  release_date="2024-09-15",
210
211
  n_parameters=7_000_000,
212
+ n_embedding_parameters=None,
211
213
  memory_usage_mb=27,
212
214
  max_tokens=4096,
213
215
  embed_dim=4096,
@@ -231,6 +231,7 @@ monobert_large = ModelMeta(
231
231
  revision="0a97706f3827389da43b83348d5d18c9d53876fa",
232
232
  release_date="2020-05-28",
233
233
  n_parameters=None,
234
+ n_embedding_parameters=31_254_528,
234
235
  memory_usage_mb=None,
235
236
  max_tokens=None,
236
237
  embed_dim=None,
@@ -256,6 +257,7 @@ jina_reranker_multilingual = ModelMeta(
256
257
  revision="126747772a932960028d9f4dc93bd5d9c4869be4",
257
258
  release_date="2024-09-26",
258
259
  n_parameters=None,
260
+ n_embedding_parameters=None,
259
261
  memory_usage_mb=531,
260
262
  max_tokens=None,
261
263
  embed_dim=None,
@@ -319,6 +321,7 @@ bge_reranker_v2_m3 = ModelMeta(
319
321
  revision="953dc6f6f85a1b2dbfca4c34a2796e7dde08d41e",
320
322
  release_date="2024-06-24",
321
323
  n_parameters=None,
324
+ n_embedding_parameters=256_002_048,
322
325
  memory_usage_mb=2166,
323
326
  max_tokens=None,
324
327
  embed_dim=None,
@@ -327,6 +327,7 @@ monot5_small = ModelMeta(
327
327
  revision="77f8e3f7b1eb1afe353aa21a7c3a2fc8feca702e",
328
328
  release_date="2022-03-28",
329
329
  n_parameters=None,
330
+ n_embedding_parameters=16_449_536,
330
331
  memory_usage_mb=None,
331
332
  max_tokens=None,
332
333
  embed_dim=None,
@@ -369,6 +370,7 @@ monot5_base = ModelMeta(
369
370
  url={https://arxiv.org/abs/2206.02873},
370
371
  }""",
371
372
  n_parameters=None,
373
+ n_embedding_parameters=24_674_304,
372
374
  memory_usage_mb=None,
373
375
  max_tokens=None,
374
376
  embed_dim=None,
@@ -393,6 +395,7 @@ monot5_large = ModelMeta(
393
395
  revision="48cfad1d8dd587670393f27ee8ec41fde63e3d98",
394
396
  release_date="2022-03-28",
395
397
  n_parameters=None,
398
+ n_embedding_parameters=32_899_072,
396
399
  memory_usage_mb=None,
397
400
  max_tokens=None,
398
401
  embed_dim=None,
@@ -426,6 +429,7 @@ monot5_3b = ModelMeta(
426
429
  revision="bc0c419a438c81f592f878ce32430a1823f5db6c",
427
430
  release_date="2022-03-28",
428
431
  n_parameters=None,
432
+ n_embedding_parameters=32_899_072,
429
433
  memory_usage_mb=None,
430
434
  max_tokens=None,
431
435
  embed_dim=None,
@@ -482,6 +486,7 @@ flant5_base = ModelMeta(
482
486
  # "qed": ["train"],
483
487
  ),
484
488
  n_parameters=None,
489
+ n_embedding_parameters=24_674_304,
485
490
  memory_usage_mb=944,
486
491
  max_tokens=None,
487
492
  embed_dim=None,
@@ -528,6 +533,7 @@ flant5_large = ModelMeta(
528
533
  # "qed": ["train"],
529
534
  ),
530
535
  n_parameters=None,
536
+ n_embedding_parameters=32_899_072,
531
537
  memory_usage_mb=2987,
532
538
  max_tokens=None,
533
539
  embed_dim=None,
@@ -574,6 +580,7 @@ flant5_xl = ModelMeta(
574
580
  # "qed": ["train"],
575
581
  ),
576
582
  n_parameters=None,
583
+ n_embedding_parameters=65_798_144,
577
584
  memory_usage_mb=10871,
578
585
  max_tokens=None,
579
586
  embed_dim=None,
@@ -620,6 +627,7 @@ flant5_xxl = ModelMeta(
620
627
  # "qed": ["train"],
621
628
  ),
622
629
  n_parameters=None,
630
+ n_embedding_parameters=131_596_288,
623
631
  memory_usage_mb=42980,
624
632
  max_tokens=None,
625
633
  embed_dim=None,
@@ -644,6 +652,7 @@ llama2_7b = ModelMeta(
644
652
  revision="01c7f73d771dfac7d292323805ebc428287df4f9",
645
653
  release_date="2023-07-18",
646
654
  n_parameters=None,
655
+ n_embedding_parameters=131_072_000,
647
656
  memory_usage_mb=None,
648
657
  max_tokens=None,
649
658
  embed_dim=None,
@@ -686,6 +695,7 @@ llama2_7b_chat = ModelMeta(
686
695
  url={https://arxiv.org/abs/2307.09288},
687
696
  }""",
688
697
  n_parameters=None,
698
+ n_embedding_parameters=131_072_000,
689
699
  memory_usage_mb=None,
690
700
  max_tokens=None,
691
701
  embed_dim=None,
@@ -710,6 +720,7 @@ mistral_7b = ModelMeta(
710
720
  revision="3ad372fc79158a2148299e3318516c786aeded6c",
711
721
  release_date="2023-12-11",
712
722
  n_parameters=None,
723
+ n_embedding_parameters=None,
713
724
  memory_usage_mb=None,
714
725
  max_tokens=None,
715
726
  embed_dim=None,
@@ -746,6 +757,7 @@ followir_7b = ModelMeta(
746
757
  # "jhu-clsp/FollowIR-train"
747
758
  ),
748
759
  n_parameters=None,
760
+ n_embedding_parameters=None,
749
761
  memory_usage_mb=13813,
750
762
  max_tokens=None,
751
763
  embed_dim=None,
@@ -896,6 +908,7 @@ mt5_base_mmarco_v2 = ModelMeta(
896
908
  """,
897
909
  training_datasets={"MSMARCO"},
898
910
  n_parameters=None,
911
+ n_embedding_parameters=192_086_016,
899
912
  memory_usage_mb=None,
900
913
  max_tokens=None,
901
914
  embed_dim=None,
@@ -919,6 +932,7 @@ mt5_13b_mmarco_100k = ModelMeta(
919
932
  revision="e1a4317e102a525ea9e16745ad21394a4f1bffbc",
920
933
  release_date="2022-11-04",
921
934
  n_parameters=None,
935
+ n_embedding_parameters=1_024_458_752,
922
936
  memory_usage_mb=None,
923
937
  max_tokens=None,
924
938
  embed_dim=None,