mteb 2.7.2__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. mteb/_create_dataloaders.py +16 -9
  2. mteb/_evaluators/any_sts_evaluator.py +10 -5
  3. mteb/_evaluators/clustering_evaluator.py +10 -4
  4. mteb/_evaluators/evaluator.py +9 -4
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  6. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  7. mteb/_evaluators/retrieval_evaluator.py +19 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +14 -10
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +17 -9
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +15 -6
  21. mteb/abstasks/clustering.py +17 -8
  22. mteb/abstasks/clustering_legacy.py +14 -6
  23. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  24. mteb/abstasks/multilabel_classification.py +11 -5
  25. mteb/abstasks/pair_classification.py +19 -9
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +28 -17
  28. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  29. mteb/abstasks/sts.py +19 -10
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +14 -7
  32. mteb/abstasks/text/summarization.py +17 -7
  33. mteb/abstasks/zeroshot_classification.py +15 -7
  34. mteb/benchmarks/_create_table.py +13 -3
  35. mteb/benchmarks/benchmark.py +11 -1
  36. mteb/benchmarks/benchmarks/__init__.py +2 -0
  37. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  38. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  39. mteb/cache.py +10 -5
  40. mteb/cli/_display_tasks.py +9 -3
  41. mteb/cli/build_cli.py +5 -2
  42. mteb/cli/generate_model_card.py +9 -2
  43. mteb/deprecated_evaluator.py +16 -12
  44. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  45. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  64. mteb/evaluate.py +20 -18
  65. mteb/filter_tasks.py +12 -7
  66. mteb/get_tasks.py +9 -4
  67. mteb/languages/language_scripts.py +8 -3
  68. mteb/leaderboard/app.py +7 -3
  69. mteb/leaderboard/table.py +7 -2
  70. mteb/load_results.py +9 -3
  71. mteb/models/abs_encoder.py +22 -12
  72. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  73. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  74. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  75. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  76. mteb/models/get_model_meta.py +11 -4
  77. mteb/models/instruct_wrapper.py +13 -5
  78. mteb/models/model_implementations/align_models.py +10 -4
  79. mteb/models/model_implementations/amazon_models.py +1 -0
  80. mteb/models/model_implementations/andersborges.py +2 -0
  81. mteb/models/model_implementations/ara_models.py +1 -0
  82. mteb/models/model_implementations/arctic_models.py +8 -0
  83. mteb/models/model_implementations/b1ade_models.py +1 -0
  84. mteb/models/model_implementations/bedrock_models.py +20 -6
  85. mteb/models/model_implementations/bge_models.py +40 -1
  86. mteb/models/model_implementations/bica_model.py +1 -0
  87. mteb/models/model_implementations/blip2_models.py +11 -4
  88. mteb/models/model_implementations/blip_models.py +17 -4
  89. mteb/models/model_implementations/bm25.py +22 -14
  90. mteb/models/model_implementations/bmretriever_models.py +10 -2
  91. mteb/models/model_implementations/cadet_models.py +1 -0
  92. mteb/models/model_implementations/cde_models.py +11 -5
  93. mteb/models/model_implementations/clip_models.py +12 -4
  94. mteb/models/model_implementations/clips_models.py +3 -0
  95. mteb/models/model_implementations/codefuse_models.py +5 -0
  96. mteb/models/model_implementations/codesage_models.py +3 -0
  97. mteb/models/model_implementations/cohere_models.py +14 -4
  98. mteb/models/model_implementations/cohere_v.py +14 -4
  99. mteb/models/model_implementations/colpali_models.py +7 -3
  100. mteb/models/model_implementations/colqwen_models.py +17 -31
  101. mteb/models/model_implementations/colsmol_models.py +3 -1
  102. mteb/models/model_implementations/conan_models.py +11 -4
  103. mteb/models/model_implementations/dino_models.py +28 -4
  104. mteb/models/model_implementations/e5_instruct.py +4 -0
  105. mteb/models/model_implementations/e5_models.py +9 -0
  106. mteb/models/model_implementations/e5_v.py +10 -4
  107. mteb/models/model_implementations/eagerworks_models.py +11 -4
  108. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  109. mteb/models/model_implementations/en_code_retriever.py +1 -0
  110. mteb/models/model_implementations/euler_models.py +1 -0
  111. mteb/models/model_implementations/evaclip_models.py +13 -4
  112. mteb/models/model_implementations/fa_models.py +9 -0
  113. mteb/models/model_implementations/facebookai.py +2 -0
  114. mteb/models/model_implementations/geogpt_models.py +1 -0
  115. mteb/models/model_implementations/gme_v_models.py +7 -3
  116. mteb/models/model_implementations/google_models.py +15 -4
  117. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  118. mteb/models/model_implementations/gritlm_models.py +2 -0
  119. mteb/models/model_implementations/gte_models.py +9 -0
  120. mteb/models/model_implementations/hinvec_models.py +6 -1
  121. mteb/models/model_implementations/human.py +1 -0
  122. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  123. mteb/models/model_implementations/inf_models.py +2 -0
  124. mteb/models/model_implementations/jasper_models.py +14 -5
  125. mteb/models/model_implementations/jina_clip.py +10 -4
  126. mteb/models/model_implementations/jina_models.py +17 -5
  127. mteb/models/model_implementations/kalm_models.py +24 -12
  128. mteb/models/model_implementations/kblab.py +1 -0
  129. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  130. mteb/models/model_implementations/kfst.py +1 -0
  131. mteb/models/model_implementations/kowshik24_models.py +1 -0
  132. mteb/models/model_implementations/lens_models.py +2 -0
  133. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  134. mteb/models/model_implementations/linq_models.py +7 -1
  135. mteb/models/model_implementations/listconranker.py +10 -4
  136. mteb/models/model_implementations/llm2clip_models.py +12 -4
  137. mteb/models/model_implementations/llm2vec_models.py +20 -6
  138. mteb/models/model_implementations/mcinext_models.py +8 -2
  139. mteb/models/model_implementations/mdbr_models.py +2 -0
  140. mteb/models/model_implementations/misc_models.py +63 -0
  141. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  142. mteb/models/model_implementations/mme5_models.py +2 -1
  143. mteb/models/model_implementations/moco_models.py +11 -4
  144. mteb/models/model_implementations/mod_models.py +2 -1
  145. mteb/models/model_implementations/model2vec_models.py +23 -4
  146. mteb/models/model_implementations/moka_models.py +3 -0
  147. mteb/models/model_implementations/nbailab.py +3 -0
  148. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  149. mteb/models/model_implementations/nomic_models.py +16 -4
  150. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  151. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  152. mteb/models/model_implementations/nvidia_models.py +15 -4
  153. mteb/models/model_implementations/octen_models.py +3 -1
  154. mteb/models/model_implementations/openai_models.py +14 -4
  155. mteb/models/model_implementations/openclip_models.py +17 -4
  156. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  157. mteb/models/model_implementations/ops_moa_models.py +9 -2
  158. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  159. mteb/models/model_implementations/pawan_models.py +1 -0
  160. mteb/models/model_implementations/piccolo_models.py +2 -0
  161. mteb/models/model_implementations/promptriever_models.py +16 -6
  162. mteb/models/model_implementations/pylate_models.py +22 -13
  163. mteb/models/model_implementations/qodo_models.py +2 -0
  164. mteb/models/model_implementations/qtack_models.py +1 -0
  165. mteb/models/model_implementations/qwen3_models.py +11 -1
  166. mteb/models/model_implementations/qzhou_models.py +2 -0
  167. mteb/models/model_implementations/random_baseline.py +4 -3
  168. mteb/models/model_implementations/rasgaard_models.py +1 -0
  169. mteb/models/model_implementations/reasonir_model.py +65 -0
  170. mteb/models/model_implementations/repllama_models.py +15 -6
  171. mteb/models/model_implementations/rerankers_custom.py +13 -4
  172. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  173. mteb/models/model_implementations/richinfoai_models.py +1 -0
  174. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  175. mteb/models/model_implementations/ruri_models.py +10 -0
  176. mteb/models/model_implementations/salesforce_models.py +10 -1
  177. mteb/models/model_implementations/samilpwc_models.py +1 -0
  178. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  179. mteb/models/model_implementations/searchmap_models.py +1 -0
  180. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  181. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  182. mteb/models/model_implementations/seed_models.py +2 -1
  183. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  184. mteb/models/model_implementations/shuu_model.py +1 -0
  185. mteb/models/model_implementations/siglip_models.py +19 -4
  186. mteb/models/model_implementations/slm_models.py +7 -4
  187. mteb/models/model_implementations/sonar_models.py +2 -1
  188. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  189. mteb/models/model_implementations/stella_models.py +6 -0
  190. mteb/models/model_implementations/tarka_models.py +2 -0
  191. mteb/models/model_implementations/text2vec_models.py +3 -0
  192. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  193. mteb/models/model_implementations/uae_models.py +10 -4
  194. mteb/models/model_implementations/vdr_models.py +8 -1
  195. mteb/models/model_implementations/vi_vn_models.py +6 -0
  196. mteb/models/model_implementations/vista_models.py +11 -4
  197. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  198. mteb/models/model_implementations/voyage_models.py +25 -4
  199. mteb/models/model_implementations/voyage_v.py +11 -6
  200. mteb/models/model_implementations/xyz_models.py +1 -0
  201. mteb/models/model_implementations/youtu_models.py +1 -0
  202. mteb/models/model_implementations/yuan_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models_en.py +2 -1
  204. mteb/models/model_meta.py +47 -9
  205. mteb/models/models_protocols.py +19 -18
  206. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  207. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  208. mteb/models/search_wrappers.py +19 -12
  209. mteb/models/sentence_transformer_wrapper.py +4 -3
  210. mteb/models/vllm_wrapper.py +8 -6
  211. mteb/results/benchmark_results.py +22 -17
  212. mteb/results/model_result.py +21 -15
  213. mteb/results/task_result.py +15 -9
  214. mteb/similarity_functions.py +8 -2
  215. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  216. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  220. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  223. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  224. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  225. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  226. mteb/tasks/retrieval/eng/__init__.py +42 -0
  227. mteb/tasks/retrieval/eng/bright_retrieval.py +9 -1
  228. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  229. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  230. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  231. mteb/types/_encoder_io.py +1 -1
  232. mteb/types/statistics.py +9 -2
  233. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/METADATA +1 -1
  234. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/RECORD +238 -217
  235. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/WHEEL +0 -0
  236. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/entry_points.txt +0 -0
  237. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/licenses/LICENSE +0 -0
  238. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,23 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import (
9
10
  requires_image_dependencies,
10
11
  requires_package,
11
12
  )
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models.abs_encoder import AbsEncoder
14
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
15
- from mteb.types import Array, BatchedInput, PromptType
15
+
16
+ if TYPE_CHECKING:
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput, PromptType
16
21
 
17
22
  from .colpali_models import (
18
23
  COLPALI_CITATION,
@@ -219,6 +224,7 @@ colqwen2 = ModelMeta(
219
224
  release_date="2025-11-03",
220
225
  modalities=["image", "text"],
221
226
  n_parameters=2_210_000_000,
227
+ n_embedding_parameters=None,
222
228
  memory_usage_mb=7200,
223
229
  max_tokens=32768,
224
230
  embed_dim=128,
@@ -246,6 +252,7 @@ colqwen2_5 = ModelMeta(
246
252
  release_date="2025-01-31",
247
253
  modalities=["image", "text"],
248
254
  n_parameters=3_000_000_000,
255
+ n_embedding_parameters=None,
249
256
  memory_usage_mb=7200,
250
257
  max_tokens=128000,
251
258
  embed_dim=128,
@@ -290,6 +297,7 @@ colqwen3_8b = ModelMeta(
290
297
  release_date="2025-11-26",
291
298
  modalities=["image", "text"],
292
299
  n_parameters=8_000_000_000,
300
+ n_embedding_parameters=None,
293
301
  memory_usage_mb=16724,
294
302
  max_tokens=262144,
295
303
  embed_dim=320,
@@ -314,6 +322,7 @@ colqwen3_4b = ModelMeta(
314
322
  release_date="2025-11-26",
315
323
  modalities=["image", "text"],
316
324
  n_parameters=4_000_000_000,
325
+ n_embedding_parameters=None,
317
326
  memory_usage_mb=8466,
318
327
  max_tokens=262144,
319
328
  embed_dim=320,
@@ -329,32 +338,6 @@ colqwen3_4b = ModelMeta(
329
338
  citation=TOMORO_CITATION,
330
339
  )
331
340
 
332
- colnomic_7b = ModelMeta(
333
- loader=ColQwen2_5Wrapper,
334
- loader_kwargs=dict(
335
- torch_dtype=torch.float16,
336
- ),
337
- name="nomic-ai/colnomic-embed-multimodal-7b",
338
- model_type=["late-interaction"],
339
- languages=["eng-Latn"],
340
- revision="530094e83a40ca4edcb5c9e5ddfa61a4b5ea0d2f",
341
- release_date="2025-03-31",
342
- modalities=["image", "text"],
343
- n_parameters=7_000_000_000,
344
- memory_usage_mb=14400,
345
- max_tokens=128000,
346
- embed_dim=128,
347
- license="apache-2.0",
348
- open_weights=True,
349
- public_training_code="https://github.com/nomic-ai/colpali",
350
- public_training_data="https://huggingface.co/datasets/vidore/colpali_train_set",
351
- framework=["ColPali", "safetensors"],
352
- reference="https://huggingface.co/nomic-ai/colnomic-embed-multimodal-7b",
353
- similarity_fn_name="MaxSim",
354
- use_instructions=True,
355
- training_datasets=COLPALI_TRAINING_DATA,
356
- citation=COLPALI_CITATION,
357
- )
358
341
 
359
342
  COLNOMIC_CITATION = """
360
343
  @misc{nomicembedmultimodal2025,
@@ -386,6 +369,7 @@ colnomic_3b = ModelMeta(
386
369
  release_date="2025-03-31",
387
370
  modalities=["image", "text"],
388
371
  n_parameters=3_000_000_000,
372
+ n_embedding_parameters=None,
389
373
  memory_usage_mb=7200,
390
374
  max_tokens=128000,
391
375
  embed_dim=128,
@@ -402,7 +386,7 @@ colnomic_3b = ModelMeta(
402
386
  )
403
387
 
404
388
  colnomic_7b = ModelMeta(
405
- loader=ColQwen2Wrapper,
389
+ loader=ColQwen2_5Wrapper,
406
390
  loader_kwargs=dict(
407
391
  torch_dtype=torch.float16,
408
392
  ),
@@ -451,6 +435,7 @@ evoqwen25_vl_retriever_3b_v1 = ModelMeta(
451
435
  release_date="2025-11-04",
452
436
  modalities=["image", "text"],
453
437
  n_parameters=3_000_000_000,
438
+ n_embedding_parameters=None,
454
439
  memory_usage_mb=7200,
455
440
  max_tokens=128000,
456
441
  embed_dim=128,
@@ -477,6 +462,7 @@ evoqwen25_vl_retriever_7b_v1 = ModelMeta(
477
462
  release_date="2025-11-04",
478
463
  modalities=["image", "text"],
479
464
  n_parameters=7_000_000_000,
465
+ n_embedding_parameters=None,
480
466
  memory_usage_mb=14400,
481
467
  max_tokens=128000,
482
468
  embed_dim=128,
@@ -56,10 +56,11 @@ colsmol_256m = ModelMeta(
56
56
  name="vidore/colSmol-256M",
57
57
  model_type=["late-interaction"],
58
58
  languages=["eng-Latn"],
59
- revision="530094e83a40ca4edcb5c9e5ddfa61a4b5ea0d2f",
59
+ revision="a59110fdf114638b8018e6c9a018907e12f14855",
60
60
  release_date="2025-01-22",
61
61
  modalities=["image", "text"],
62
62
  n_parameters=256_000_000,
63
+ n_embedding_parameters=None,
63
64
  memory_usage_mb=800,
64
65
  max_tokens=8192,
65
66
  embed_dim=128,
@@ -87,6 +88,7 @@ colsmol_500m = ModelMeta(
87
88
  release_date="2025-01-22",
88
89
  modalities=["image", "text"],
89
90
  n_parameters=500_000_000,
91
+ n_embedding_parameters=None,
90
92
  memory_usage_mb=1200,
91
93
  max_tokens=8192,
92
94
  embed_dim=128,
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import hashlib
2
4
  import json
3
5
  import logging
@@ -5,20 +7,24 @@ import os
5
7
  import random
6
8
  import string
7
9
  import time
8
- from typing import Any
10
+ from typing import TYPE_CHECKING, Any
9
11
 
10
12
  import numpy as np
11
13
  import requests
12
- from torch.utils.data import DataLoader
13
14
 
14
- from mteb.abstasks.task_metadata import TaskMetadata
15
15
  from mteb.models.abs_encoder import AbsEncoder
16
16
  from mteb.models.model_meta import ModelMeta
17
- from mteb.types import Array, BatchedInput, PromptType
18
17
 
19
18
  from .bge_models import bge_full_data
20
19
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
21
20
 
21
+ if TYPE_CHECKING:
22
+ from torch.utils.data import DataLoader
23
+
24
+ from mteb.abstasks.task_metadata import TaskMetadata
25
+ from mteb.types import Array, BatchedInput, PromptType
26
+
27
+
22
28
  conan_zh_datasets = {
23
29
  "BQ",
24
30
  "LCQMC",
@@ -205,6 +211,7 @@ Conan_embedding_v2 = ModelMeta(
205
211
  embed_dim=3584,
206
212
  open_weights=False,
207
213
  n_parameters=None,
214
+ n_embedding_parameters=None,
208
215
  memory_usage_mb=None,
209
216
  license="apache-2.0",
210
217
  reference="https://huggingface.co/TencentBAC/Conan-embedding-v2",
@@ -1,13 +1,18 @@
1
- from typing import Any, Literal
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any, Literal
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.abs_encoder import AbsEncoder
9
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
 
13
18
  class DINOModel(AbsEncoder):
@@ -112,6 +117,7 @@ dinov2_small = ModelMeta(
112
117
  release_date="2023-07-18",
113
118
  modalities=["image"],
114
119
  n_parameters=22_100_000,
120
+ n_embedding_parameters=None,
115
121
  memory_usage_mb=84,
116
122
  max_tokens=None,
117
123
  embed_dim=384,
@@ -143,6 +149,7 @@ dinov2_base = ModelMeta(
143
149
  release_date="2023-07-18",
144
150
  modalities=["image"],
145
151
  n_parameters=86_600_000,
152
+ n_embedding_parameters=None,
146
153
  memory_usage_mb=330,
147
154
  max_tokens=None,
148
155
  embed_dim=768,
@@ -174,6 +181,7 @@ dinov2_large = ModelMeta(
174
181
  release_date="2023-07-18",
175
182
  modalities=["image"],
176
183
  n_parameters=304_000_000,
184
+ n_embedding_parameters=None,
177
185
  memory_usage_mb=1161,
178
186
  max_tokens=None,
179
187
  embed_dim=1024,
@@ -205,6 +213,7 @@ dinov2_giant = ModelMeta(
205
213
  release_date="2023-07-18",
206
214
  modalities=["image"],
207
215
  n_parameters=1_140_000_000,
216
+ n_embedding_parameters=None,
208
217
  memory_usage_mb=4335,
209
218
  max_tokens=None,
210
219
  embed_dim=1536,
@@ -240,6 +249,7 @@ webssl_dino300m_full2b = ModelMeta(
240
249
  release_date="2025-04-24",
241
250
  modalities=["image"],
242
251
  n_parameters=304_000_000,
252
+ n_embedding_parameters=None,
243
253
  memory_usage_mb=1158,
244
254
  max_tokens=None,
245
255
  embed_dim=1024,
@@ -271,6 +281,7 @@ webssl_dino1b_full2b = ModelMeta(
271
281
  release_date="2025-04-24",
272
282
  modalities=["image"],
273
283
  n_parameters=1_130_000_000,
284
+ n_embedding_parameters=None,
274
285
  memory_usage_mb=4329,
275
286
  max_tokens=None,
276
287
  embed_dim=1536,
@@ -302,6 +313,7 @@ webssl_dino2b_full2b = ModelMeta(
302
313
  release_date="2025-04-24",
303
314
  modalities=["image"],
304
315
  n_parameters=2_080_000_000,
316
+ n_embedding_parameters=None,
305
317
  memory_usage_mb=7951,
306
318
  max_tokens=None,
307
319
  embed_dim=2688,
@@ -333,6 +345,7 @@ webssl_dino3b_full2b = ModelMeta(
333
345
  release_date="2025-04-24",
334
346
  modalities=["image"],
335
347
  n_parameters=3_000_000_000,
348
+ n_embedding_parameters=None,
336
349
  memory_usage_mb=11247,
337
350
  max_tokens=None,
338
351
  embed_dim=3072,
@@ -364,6 +377,7 @@ webssl_dino5b_full2b = ModelMeta(
364
377
  release_date="2025-04-24",
365
378
  modalities=["image"],
366
379
  n_parameters=5_000_000_000,
380
+ n_embedding_parameters=None,
367
381
  memory_usage_mb=18838,
368
382
  max_tokens=None,
369
383
  embed_dim=3584,
@@ -395,6 +409,7 @@ webssl_dino7b_full8b_224 = ModelMeta(
395
409
  release_date="2025-04-24",
396
410
  modalities=["image"],
397
411
  n_parameters=7_000_000_000,
412
+ n_embedding_parameters=None,
398
413
  memory_usage_mb=24605,
399
414
  max_tokens=None,
400
415
  embed_dim=4096,
@@ -426,6 +441,7 @@ webssl_dino7b_full8b_378 = ModelMeta(
426
441
  release_date="2025-04-24",
427
442
  modalities=["image"],
428
443
  n_parameters=7_000_000_000,
444
+ n_embedding_parameters=None,
429
445
  memory_usage_mb=24613,
430
446
  max_tokens=None,
431
447
  embed_dim=4096,
@@ -457,6 +473,7 @@ webssl_dino7b_full8b_518 = ModelMeta(
457
473
  release_date="2025-04-24",
458
474
  modalities=["image"],
459
475
  n_parameters=7_000_000_000,
476
+ n_embedding_parameters=None,
460
477
  memory_usage_mb=24623,
461
478
  max_tokens=None,
462
479
  embed_dim=4096,
@@ -489,6 +506,7 @@ webssl_dino2b_light2b = ModelMeta(
489
506
  release_date="2025-04-24",
490
507
  modalities=["image"],
491
508
  n_parameters=2_000_000_000,
509
+ n_embedding_parameters=None,
492
510
  memory_usage_mb=7951,
493
511
  max_tokens=None,
494
512
  embed_dim=2688,
@@ -520,6 +538,7 @@ webssl_dino2b_heavy2b = ModelMeta(
520
538
  release_date="2025-04-24",
521
539
  modalities=["image"],
522
540
  n_parameters=2_000_000_000,
541
+ n_embedding_parameters=None,
523
542
  memory_usage_mb=7951,
524
543
  max_tokens=None,
525
544
  embed_dim=2688,
@@ -551,6 +570,7 @@ webssl_dino3b_light2b = ModelMeta(
551
570
  release_date="2025-04-24",
552
571
  modalities=["image"],
553
572
  n_parameters=3_000_000_000,
573
+ n_embedding_parameters=None,
554
574
  memory_usage_mb=11247,
555
575
  max_tokens=None,
556
576
  embed_dim=3072,
@@ -582,6 +602,7 @@ webssl_dino3b_heavy2b = ModelMeta(
582
602
  release_date="2025-04-24",
583
603
  modalities=["image"],
584
604
  n_parameters=3_000_000_000,
605
+ n_embedding_parameters=None,
585
606
  memory_usage_mb=11247,
586
607
  max_tokens=None,
587
608
  embed_dim=3072,
@@ -613,6 +634,7 @@ webssl_mae300m_full2b = ModelMeta(
613
634
  release_date="2025-04-24",
614
635
  modalities=["image"],
615
636
  n_parameters=304_000_000,
637
+ n_embedding_parameters=None,
616
638
  memory_usage_mb=1161,
617
639
  max_tokens=None,
618
640
  embed_dim=1024,
@@ -644,6 +666,7 @@ webssl_mae700m_full2b = ModelMeta(
644
666
  release_date="2025-04-24",
645
667
  modalities=["image"],
646
668
  n_parameters=700_000_000,
669
+ n_embedding_parameters=None,
647
670
  memory_usage_mb=2412,
648
671
  max_tokens=None,
649
672
  embed_dim=1280,
@@ -675,6 +698,7 @@ webssl_mae1b_full2b = ModelMeta(
675
698
  release_date="2025-04-24",
676
699
  modalities=["image"],
677
700
  n_parameters=1_000_000_000,
701
+ n_embedding_parameters=None,
678
702
  memory_usage_mb=4337,
679
703
  max_tokens=None,
680
704
  embed_dim=1536,
@@ -57,6 +57,7 @@ e5_instruct = ModelMeta(
57
57
  use_instructions=True,
58
58
  reference="https://huggingface.co/intfloat/multilingual-e5-large-instruct",
59
59
  n_parameters=560_000_000,
60
+ n_embedding_parameters=256_002_048,
60
61
  memory_usage_mb=1068,
61
62
  embed_dim=1024,
62
63
  license="mit",
@@ -102,6 +103,7 @@ e5_mistral = ModelMeta(
102
103
  use_instructions=True,
103
104
  reference="https://huggingface.co/intfloat/e5-mistral-7b-instruct",
104
105
  n_parameters=7_111_000_000,
106
+ n_embedding_parameters=131_072_000,
105
107
  memory_usage_mb=13563,
106
108
  embed_dim=4096,
107
109
  license="mit",
@@ -145,6 +147,7 @@ zeta_alpha_ai__zeta_alpha_e5_mistral = ModelMeta(
145
147
  release_date="2024-08-30",
146
148
  languages=["eng-Latn"],
147
149
  n_parameters=7110660096,
150
+ n_embedding_parameters=None,
148
151
  memory_usage_mb=13563,
149
152
  max_tokens=32768.0,
150
153
  embed_dim=4096,
@@ -228,6 +231,7 @@ BeastyZ__e5_R_mistral_7b = ModelMeta(
228
231
  release_date="2024-06-28",
229
232
  languages=["eng-Latn"],
230
233
  n_parameters=7241732096,
234
+ n_embedding_parameters=131_072_000,
231
235
  memory_usage_mb=27625,
232
236
  max_tokens=32768.0,
233
237
  embed_dim=4096,
@@ -76,6 +76,7 @@ e5_mult_small = ModelMeta(
76
76
  revision="fd1525a9fd15316a2d503bf26ab031a61d056e98",
77
77
  release_date=E5_PAPER_RELEASE_DATE,
78
78
  n_parameters=118_000_000,
79
+ n_embedding_parameters=96_014_208,
79
80
  memory_usage_mb=449,
80
81
  embed_dim=384,
81
82
  license="mit",
@@ -103,6 +104,7 @@ e5_mult_base = ModelMeta(
103
104
  revision="d13f1b27baf31030b7fd040960d60d909913633f",
104
105
  release_date=E5_PAPER_RELEASE_DATE,
105
106
  n_parameters=278_000_000,
107
+ n_embedding_parameters=192_001_536,
106
108
  memory_usage_mb=1061,
107
109
  embed_dim=768,
108
110
  license="mit",
@@ -130,6 +132,7 @@ e5_mult_large = ModelMeta(
130
132
  revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb",
131
133
  release_date=E5_PAPER_RELEASE_DATE,
132
134
  n_parameters=560_000_000,
135
+ n_embedding_parameters=256_002_048,
133
136
  memory_usage_mb=2136,
134
137
  embed_dim=1024,
135
138
  license="mit",
@@ -157,6 +160,7 @@ e5_eng_small_v2 = ModelMeta(
157
160
  revision="dca8b1a9dae0d4575df2bf423a5edb485a431236",
158
161
  release_date=E5_PAPER_RELEASE_DATE,
159
162
  n_parameters=33_000_000,
163
+ n_embedding_parameters=11_720_448,
160
164
  memory_usage_mb=127,
161
165
  embed_dim=384,
162
166
  license="mit",
@@ -184,6 +188,7 @@ e5_eng_small = ModelMeta(
184
188
  revision="e272f3049e853b47cb5ca3952268c6662abda68f",
185
189
  release_date=E5_PAPER_RELEASE_DATE,
186
190
  n_parameters=33_000_000,
191
+ n_embedding_parameters=11_720_448,
187
192
  memory_usage_mb=127,
188
193
  embed_dim=384,
189
194
  license="mit",
@@ -211,6 +216,7 @@ e5_eng_base_v2 = ModelMeta(
211
216
  revision="1c644c92ad3ba1efdad3f1451a637716616a20e8",
212
217
  release_date=E5_PAPER_RELEASE_DATE,
213
218
  n_parameters=109_000_000,
219
+ n_embedding_parameters=23_440_896,
214
220
  memory_usage_mb=418,
215
221
  embed_dim=768,
216
222
  license="mit",
@@ -239,6 +245,7 @@ e5_eng_large_v2 = ModelMeta(
239
245
  revision="b322e09026e4ea05f42beadf4d661fb4e101d311",
240
246
  release_date=E5_PAPER_RELEASE_DATE,
241
247
  n_parameters=335_000_000,
248
+ n_embedding_parameters=31_254_528,
242
249
  memory_usage_mb=1278,
243
250
  embed_dim=1024,
244
251
  license="mit",
@@ -267,6 +274,7 @@ e5_large = ModelMeta(
267
274
  revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81",
268
275
  release_date="2022-12-26",
269
276
  n_parameters=335_000_000,
277
+ n_embedding_parameters=31_254_528,
270
278
  memory_usage_mb=1278,
271
279
  embed_dim=1024,
272
280
  license="apache-2.0",
@@ -295,6 +303,7 @@ e5_base = ModelMeta(
295
303
  revision="b533fe4636f4a2507c08ddab40644d20b0006d6a",
296
304
  release_date="2022-12-26",
297
305
  n_parameters=109_000_000,
306
+ n_embedding_parameters=23_440_896,
298
307
  memory_usage_mb=418,
299
308
  embed_dim=768,
300
309
  license="apache-2.0",
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
6
  from packaging import version
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  E5_V_TRANSFORMERS_VERSION = (
14
19
  "4.44.2" # Issue 1647: Only works with transformers==4.44.2.
@@ -166,6 +171,7 @@ e5_v = ModelMeta(
166
171
  release_date="2024-07-17",
167
172
  modalities=["image", "text"],
168
173
  n_parameters=8_360_000_000,
174
+ n_embedding_parameters=None,
169
175
  memory_usage_mb=15936,
170
176
  max_tokens=8192,
171
177
  embed_dim=4096,
@@ -1,17 +1,23 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import (
8
9
  requires_image_dependencies,
9
10
  requires_package,
10
11
  )
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
12
  from mteb.models.abs_encoder import AbsEncoder
13
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
14
+ from mteb.types import PromptType
15
+
16
+ if TYPE_CHECKING:
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput
15
21
 
16
22
 
17
23
  class EagerEmbedV1Wrapper(AbsEncoder):
@@ -147,6 +153,7 @@ Eager_Embed_V1 = ModelMeta(
147
153
  release_date="2025-11-20",
148
154
  modalities=["image", "text"],
149
155
  n_parameters=4_000_000_000,
156
+ n_embedding_parameters=None,
150
157
  memory_usage_mb=16929,
151
158
  max_tokens=262144,
152
159
  embed_dim=2560,
@@ -10,6 +10,7 @@ embedding_gemma_300m_scandi = ModelMeta(
10
10
  revision="9f3307b9f601db564a9190cb475324d128dcfe86",
11
11
  release_date="2025-10-17",
12
12
  n_parameters=307_581_696,
13
+ n_embedding_parameters=None,
13
14
  embed_dim=768,
14
15
  max_tokens=2048,
15
16
  license="apache-2.0",
@@ -43,6 +44,7 @@ qwen_scandi = ModelMeta(
43
44
  revision="cf1e7ba36ebd3d605549d8f02930a18e17b54513",
44
45
  release_date="2025-10-17",
45
46
  n_parameters=595776512,
47
+ n_embedding_parameters=None,
46
48
  memory_usage_mb=2272,
47
49
  embed_dim=1024,
48
50
  max_tokens=32768,
@@ -67,6 +69,7 @@ mmbert_scandi = ModelMeta(
67
69
  revision="82d74c7a5d8e1ddf31b132865df2d16b2b0294ee",
68
70
  release_date="2025-10-17",
69
71
  n_parameters=306939648,
72
+ n_embedding_parameters=None,
70
73
  memory_usage_mb=1171,
71
74
  embed_dim=768,
72
75
  max_tokens=8192,
@@ -18,6 +18,7 @@ english_code_retriever = ModelMeta(
18
18
  revision="be653fab7d27a7348a0c2c3d16b9f92a7f10cb0c",
19
19
  release_date="2025-07-10",
20
20
  n_parameters=149_000_000,
21
+ n_embedding_parameters=None,
21
22
  memory_usage_mb=568,
22
23
  embed_dim=768,
23
24
  license="mit",
@@ -9,6 +9,7 @@ Euler_Legal_Embedding_V1 = ModelMeta(
9
9
  release_date="2025-11-06",
10
10
  languages=["eng-Latn"],
11
11
  n_parameters=8000000000,
12
+ n_embedding_parameters=None,
12
13
  memory_usage_mb=15618,
13
14
  max_tokens=1536,
14
15
  embed_dim=4096,
@@ -1,15 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  from pathlib import Path
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import requires_image_dependencies
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
10
  from mteb.models.abs_encoder import AbsEncoder
11
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
13
18
 
14
19
  EVA_CLIP_CITATION = """@article{EVA-CLIP,
15
20
  title={EVA-CLIP: Improved Training Techniques for CLIP at Scale},
@@ -144,6 +149,7 @@ EVA02_CLIP_B_16 = ModelMeta(
144
149
  release_date="2023-04-26",
145
150
  modalities=["image", "text"],
146
151
  n_parameters=149_000_000,
152
+ n_embedding_parameters=None,
147
153
  memory_usage_mb=568,
148
154
  max_tokens=77,
149
155
  embed_dim=512,
@@ -168,6 +174,7 @@ EVA02_CLIP_L_14 = ModelMeta(
168
174
  release_date="2023-04-26",
169
175
  modalities=["image", "text"],
170
176
  n_parameters=428_000_000,
177
+ n_embedding_parameters=None,
171
178
  memory_usage_mb=1633,
172
179
  max_tokens=77,
173
180
  embed_dim=768,
@@ -192,6 +199,7 @@ EVA02_CLIP_bigE_14 = ModelMeta(
192
199
  release_date="2023-04-26",
193
200
  modalities=["image", "text"],
194
201
  n_parameters=4_700_000_000,
202
+ n_embedding_parameters=None,
195
203
  memory_usage_mb=17929,
196
204
  max_tokens=77,
197
205
  embed_dim=1024,
@@ -217,6 +225,7 @@ EVA02_CLIP_bigE_14_plus = ModelMeta(
217
225
  release_date="2023-04-26",
218
226
  modalities=["image", "text"],
219
227
  n_parameters=5_000_000_000,
228
+ n_embedding_parameters=None,
220
229
  memory_usage_mb=19073,
221
230
  max_tokens=77,
222
231
  embed_dim=1024,
@@ -12,6 +12,7 @@ parsbert = ModelMeta(
12
12
  revision="d73a0e2c7492c33bd5819bcdb23eba207404dd19",
13
13
  release_date="2021-05-19",
14
14
  n_parameters=162_841_344,
15
+ n_embedding_parameters=76_800_000,
15
16
  memory_usage_mb=621,
16
17
  embed_dim=768,
17
18
  license="not specified",
@@ -48,6 +49,7 @@ bert_zwnj = ModelMeta(
48
49
  revision="b9506ddc579ac8c398ae6dae680401ae0a1a5b23",
49
50
  release_date="2021-06-28",
50
51
  n_parameters=118_297_344,
52
+ n_embedding_parameters=32_256_000,
51
53
  memory_usage_mb=451,
52
54
  embed_dim=768,
53
55
  license="not specified",
@@ -74,6 +76,7 @@ roberta_zwnj = ModelMeta(
74
76
  revision="36f912ac44e22250aee16ea533a4ff8cd848c1a1",
75
77
  release_date="2021-06-28",
76
78
  n_parameters=118_298_112,
79
+ n_embedding_parameters=32_256_000,
77
80
  memory_usage_mb=451,
78
81
  embed_dim=768,
79
82
  license="not specified",
@@ -99,6 +102,7 @@ sentence_transformer_parsbert = ModelMeta(
99
102
  revision="72bd0a3557622f0ae08a092f4643609e0b950cdd",
100
103
  release_date="2024-12-10",
101
104
  n_parameters=162_841_344,
105
+ n_embedding_parameters=76_800_000,
102
106
  memory_usage_mb=621,
103
107
  embed_dim=768,
104
108
  license="apache-2.0",
@@ -123,6 +127,7 @@ tooka_bert_base = ModelMeta(
123
127
  revision="fa5ca89df5670700d9325b8872ac65c17cb24582",
124
128
  release_date="2024-12-08",
125
129
  n_parameters=122_905_344,
130
+ n_embedding_parameters=36_864_000,
126
131
  memory_usage_mb=469,
127
132
  embed_dim=768,
128
133
  license="apache-2.0",
@@ -150,6 +155,7 @@ tooka_sbert = ModelMeta(
150
155
  revision="5d07f0c543aca654373b931ae07cd197769110fd",
151
156
  release_date="2024-12-07",
152
157
  n_parameters=353_039_360,
158
+ n_embedding_parameters=49_152_000,
153
159
  memory_usage_mb=1347,
154
160
  embed_dim=1024,
155
161
  license="apache-2.0",
@@ -181,6 +187,7 @@ fa_bert = ModelMeta(
181
187
  revision="a0e3973064c97768e121b9b95f21adc94e0ca3fb",
182
188
  release_date="2024-10-07",
183
189
  n_parameters=124_441_344,
190
+ n_embedding_parameters=38_400_000,
184
191
  memory_usage_mb=475,
185
192
  embed_dim=768,
186
193
  license="not specified",
@@ -229,6 +236,7 @@ tooka_sbert_v2_small = ModelMeta(
229
236
  revision="8bbed87e36669387f71437c061430ba56d1b496f",
230
237
  release_date="2025-05-01",
231
238
  n_parameters=122_905_344,
239
+ n_embedding_parameters=36_864_000,
232
240
  memory_usage_mb=496,
233
241
  embed_dim=768,
234
242
  license="not specified",
@@ -260,6 +268,7 @@ tooka_sbert_v2_large = ModelMeta(
260
268
  revision="b59682efa961122cc0e4408296d5852870c82eae",
261
269
  release_date="2025-05-01",
262
270
  n_parameters=353_039_360,
271
+ n_embedding_parameters=49_152_000,
263
272
  memory_usage_mb=1347,
264
273
  embed_dim=1024,
265
274
  license="not specified",