mteb 2.5.1__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. mteb/abstasks/abstask.py +6 -6
  2. mteb/abstasks/aggregated_task.py +4 -10
  3. mteb/abstasks/clustering_legacy.py +3 -2
  4. mteb/abstasks/task_metadata.py +2 -3
  5. mteb/cache.py +7 -4
  6. mteb/cli/build_cli.py +10 -5
  7. mteb/cli/generate_model_card.py +4 -3
  8. mteb/deprecated_evaluator.py +4 -3
  9. mteb/evaluate.py +4 -1
  10. mteb/get_tasks.py +4 -3
  11. mteb/leaderboard/app.py +70 -3
  12. mteb/models/abs_encoder.py +5 -3
  13. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +4 -1
  14. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +13 -12
  15. mteb/models/model_implementations/align_models.py +1 -0
  16. mteb/models/model_implementations/amazon_models.py +1 -0
  17. mteb/models/model_implementations/andersborges.py +2 -0
  18. mteb/models/model_implementations/ara_models.py +1 -0
  19. mteb/models/model_implementations/arctic_models.py +8 -0
  20. mteb/models/model_implementations/b1ade_models.py +1 -0
  21. mteb/models/model_implementations/bedrock_models.py +4 -0
  22. mteb/models/model_implementations/bge_models.py +17 -0
  23. mteb/models/model_implementations/bica_model.py +1 -0
  24. mteb/models/model_implementations/blip2_models.py +2 -0
  25. mteb/models/model_implementations/blip_models.py +8 -0
  26. mteb/models/model_implementations/bm25.py +1 -0
  27. mteb/models/model_implementations/bmretriever_models.py +4 -0
  28. mteb/models/model_implementations/cadet_models.py +1 -0
  29. mteb/models/model_implementations/cde_models.py +2 -0
  30. mteb/models/model_implementations/clip_models.py +3 -0
  31. mteb/models/model_implementations/clips_models.py +3 -0
  32. mteb/models/model_implementations/codefuse_models.py +3 -0
  33. mteb/models/model_implementations/codesage_models.py +3 -0
  34. mteb/models/model_implementations/cohere_models.py +4 -0
  35. mteb/models/model_implementations/cohere_v.py +5 -0
  36. mteb/models/model_implementations/colpali_models.py +3 -0
  37. mteb/models/model_implementations/colqwen_models.py +9 -0
  38. mteb/models/model_implementations/colsmol_models.py +2 -0
  39. mteb/models/model_implementations/conan_models.py +1 -0
  40. mteb/models/model_implementations/dino_models.py +19 -0
  41. mteb/models/model_implementations/e5_instruct.py +4 -0
  42. mteb/models/model_implementations/e5_models.py +9 -0
  43. mteb/models/model_implementations/e5_v.py +1 -0
  44. mteb/models/model_implementations/eagerworks_models.py +1 -0
  45. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  46. mteb/models/model_implementations/en_code_retriever.py +1 -0
  47. mteb/models/model_implementations/euler_models.py +1 -0
  48. mteb/models/model_implementations/evaclip_models.py +4 -0
  49. mteb/models/model_implementations/fa_models.py +8 -0
  50. mteb/models/model_implementations/facebookai.py +2 -0
  51. mteb/models/model_implementations/geogpt_models.py +1 -0
  52. mteb/models/model_implementations/gme_v_models.py +6 -3
  53. mteb/models/model_implementations/google_models.py +5 -0
  54. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  55. mteb/models/model_implementations/gritlm_models.py +2 -0
  56. mteb/models/model_implementations/gte_models.py +9 -0
  57. mteb/models/model_implementations/hinvec_models.py +1 -0
  58. mteb/models/model_implementations/human.py +1 -0
  59. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  60. mteb/models/model_implementations/inf_models.py +2 -0
  61. mteb/models/model_implementations/jasper_models.py +2 -0
  62. mteb/models/model_implementations/jina_clip.py +1 -0
  63. mteb/models/model_implementations/jina_models.py +7 -1
  64. mteb/models/model_implementations/kalm_models.py +6 -0
  65. mteb/models/model_implementations/kblab.py +1 -0
  66. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  67. mteb/models/model_implementations/kfst.py +1 -0
  68. mteb/models/model_implementations/kowshik24_models.py +1 -0
  69. mteb/models/model_implementations/lens_models.py +2 -0
  70. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  71. mteb/models/model_implementations/linq_models.py +1 -0
  72. mteb/models/model_implementations/listconranker.py +1 -1
  73. mteb/models/model_implementations/llm2clip_models.py +3 -0
  74. mteb/models/model_implementations/llm2vec_models.py +8 -0
  75. mteb/models/model_implementations/mcinext_models.py +7 -1
  76. mteb/models/model_implementations/mdbr_models.py +2 -0
  77. mteb/models/model_implementations/misc_models.py +63 -0
  78. mteb/models/model_implementations/mme5_models.py +1 -0
  79. mteb/models/model_implementations/moco_models.py +2 -0
  80. mteb/models/model_implementations/model2vec_models.py +13 -0
  81. mteb/models/model_implementations/moka_models.py +3 -0
  82. mteb/models/model_implementations/mxbai_models.py +3 -0
  83. mteb/models/model_implementations/nbailab.py +3 -0
  84. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  85. mteb/models/model_implementations/nomic_models.py +6 -0
  86. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  87. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  88. mteb/models/model_implementations/nvidia_models.py +3 -0
  89. mteb/models/model_implementations/octen_models.py +195 -0
  90. mteb/models/model_implementations/openai_models.py +5 -0
  91. mteb/models/model_implementations/openclip_models.py +8 -0
  92. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  93. mteb/models/model_implementations/ops_moa_models.py +2 -0
  94. mteb/models/model_implementations/pawan_models.py +1 -0
  95. mteb/models/model_implementations/piccolo_models.py +2 -0
  96. mteb/models/model_implementations/promptriever_models.py +4 -0
  97. mteb/models/model_implementations/pylate_models.py +3 -0
  98. mteb/models/model_implementations/qodo_models.py +2 -0
  99. mteb/models/model_implementations/qtack_models.py +1 -0
  100. mteb/models/model_implementations/qwen3_models.py +3 -0
  101. mteb/models/model_implementations/qzhou_models.py +2 -0
  102. mteb/models/model_implementations/random_baseline.py +2 -1
  103. mteb/models/model_implementations/rasgaard_models.py +1 -0
  104. mteb/models/model_implementations/reasonir_model.py +1 -0
  105. mteb/models/model_implementations/repllama_models.py +2 -0
  106. mteb/models/model_implementations/rerankers_custom.py +3 -3
  107. mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
  108. mteb/models/model_implementations/richinfoai_models.py +1 -0
  109. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  110. mteb/models/model_implementations/ruri_models.py +10 -0
  111. mteb/models/model_implementations/salesforce_models.py +3 -0
  112. mteb/models/model_implementations/samilpwc_models.py +1 -0
  113. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  114. mteb/models/model_implementations/searchmap_models.py +1 -0
  115. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  116. mteb/models/model_implementations/seed_models.py +1 -0
  117. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  118. mteb/models/model_implementations/shuu_model.py +32 -31
  119. mteb/models/model_implementations/siglip_models.py +10 -0
  120. mteb/models/model_implementations/sonar_models.py +1 -0
  121. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  122. mteb/models/model_implementations/stella_models.py +6 -0
  123. mteb/models/model_implementations/tarka_models.py +2 -0
  124. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  125. mteb/models/model_implementations/uae_models.py +1 -0
  126. mteb/models/model_implementations/vdr_models.py +1 -0
  127. mteb/models/model_implementations/vi_vn_models.py +6 -0
  128. mteb/models/model_implementations/vista_models.py +2 -0
  129. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  130. mteb/models/model_implementations/voyage_models.py +15 -0
  131. mteb/models/model_implementations/voyage_v.py +1 -0
  132. mteb/models/model_implementations/xyz_models.py +1 -0
  133. mteb/models/model_implementations/youtu_models.py +1 -0
  134. mteb/models/model_implementations/yuan_models.py +1 -0
  135. mteb/models/model_implementations/yuan_models_en.py +1 -0
  136. mteb/models/model_meta.py +49 -4
  137. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +4 -1
  138. mteb/models/search_wrappers.py +4 -2
  139. mteb/models/sentence_transformer_wrapper.py +10 -10
  140. mteb/results/benchmark_results.py +67 -43
  141. mteb/results/model_result.py +3 -1
  142. mteb/results/task_result.py +22 -17
  143. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/METADATA +1 -1
  144. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/RECORD +148 -147
  145. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/WHEEL +0 -0
  146. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/entry_points.txt +0 -0
  147. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/licenses/LICENSE +0 -0
  148. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/top_level.txt +0 -0
@@ -113,6 +113,7 @@ sent_trf_training_dataset = {
113
113
  all_minilm_l6_v2 = ModelMeta(
114
114
  loader=sentence_transformers_loader,
115
115
  name="sentence-transformers/all-MiniLM-L6-v2",
116
+ model_type=["dense"],
116
117
  languages=["eng-Latn"],
117
118
  open_weights=True,
118
119
  revision="8b3219a92973c328a8e22fadcfa821b5dc75636a",
@@ -137,6 +138,7 @@ all_minilm_l6_v2 = ModelMeta(
137
138
  all_minilm_l12_v2 = ModelMeta(
138
139
  loader=sentence_transformers_loader,
139
140
  name="sentence-transformers/all-MiniLM-L12-v2",
141
+ model_type=["dense"],
140
142
  languages=["eng-Latn"],
141
143
  open_weights=True,
142
144
  revision="364dd28d28dcd3359b537f3cf1f5348ba679da62",
@@ -161,6 +163,7 @@ all_minilm_l12_v2 = ModelMeta(
161
163
  paraphrase_multilingual_minilm_l12_v2 = ModelMeta(
162
164
  loader=sentence_transformers_loader,
163
165
  name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
166
+ model_type=["dense"],
164
167
  languages=paraphrase_langs,
165
168
  open_weights=True,
166
169
  revision="bf3bf13ab40c3157080a7ab344c831b9ad18b5eb",
@@ -185,6 +188,7 @@ paraphrase_multilingual_minilm_l12_v2 = ModelMeta(
185
188
  paraphrase_multilingual_mpnet_base_v2 = ModelMeta(
186
189
  loader=sentence_transformers_loader,
187
190
  name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
191
+ model_type=["dense"],
188
192
  languages=paraphrase_langs,
189
193
  open_weights=True,
190
194
  revision="79f2382ceacceacdf38563d7c5d16b9ff8d725d6",
@@ -220,6 +224,7 @@ paraphrase_multilingual_mpnet_base_v2 = ModelMeta(
220
224
  labse = ModelMeta(
221
225
  loader=sentence_transformers_loader,
222
226
  name="sentence-transformers/LaBSE",
227
+ model_type=["dense"],
223
228
  languages=paraphrase_langs,
224
229
  open_weights=True,
225
230
  revision="e34fab64a3011d2176c99545a93d5cbddc9a91b7",
@@ -257,6 +262,7 @@ labse = ModelMeta(
257
262
  multi_qa_minilm_l6_cos_v1 = ModelMeta(
258
263
  loader=sentence_transformers_loader,
259
264
  name="sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
265
+ model_type=["dense"],
260
266
  languages=["eng-Latn"],
261
267
  open_weights=True,
262
268
  revision="b207367332321f8e44f96e224ef15bc607f4dbf0",
@@ -281,6 +287,7 @@ multi_qa_minilm_l6_cos_v1 = ModelMeta(
281
287
  all_mpnet_base_v2 = ModelMeta(
282
288
  loader=sentence_transformers_loader,
283
289
  name="sentence-transformers/all-mpnet-base-v2",
290
+ model_type=["dense"],
284
291
  languages=["eng-Latn"],
285
292
  open_weights=True,
286
293
  revision="9a3225965996d404b775526de6dbfe85d3368642",
@@ -380,6 +387,7 @@ static_multi_languages = [
380
387
 
381
388
  static_similarity_mrl_multilingual_v1 = ModelMeta(
382
389
  name="sentence-transformers/static-similarity-mrl-multilingual-v1",
390
+ model_type=["dense"],
383
391
  loader=SentenceTransformerEncoderWrapper,
384
392
  loader_kwargs=dict(
385
393
  device="cpu", # CPU is just as quick, if not quicker
@@ -416,6 +424,7 @@ static_similarity_mrl_multilingual_v1 = ModelMeta(
416
424
  contriever = ModelMeta(
417
425
  loader=SentenceTransformerEncoderWrapper,
418
426
  name="facebook/contriever-msmarco",
427
+ model_type=["dense"],
419
428
  languages=["eng-Latn"],
420
429
  open_weights=True,
421
430
  revision="abe8c1493371369031bcb1e02acb754cf4e162fa",
@@ -445,6 +454,7 @@ contriever = ModelMeta(
445
454
  microllama_text_embedding = ModelMeta(
446
455
  loader=sentence_transformers_loader,
447
456
  name="keeeeenw/MicroLlama-text-embedding",
457
+ model_type=["dense"],
448
458
  languages=["eng-Latn"],
449
459
  open_weights=True,
450
460
  revision="98f70f14cdf12d7ea217ed2fd4e808b0195f1e7e",
@@ -490,6 +500,7 @@ SENTENCE_T5_CITATION = """
490
500
  sentence_t5_base = ModelMeta(
491
501
  loader=sentence_transformers_loader,
492
502
  name="sentence-transformers/sentence-t5-base",
503
+ model_type=["dense"],
493
504
  languages=["eng-Latn"],
494
505
  open_weights=True,
495
506
  revision="50c53e206f8b01c9621484a3c0aafce4e55efebf",
@@ -512,6 +523,7 @@ sentence_t5_base = ModelMeta(
512
523
  sentence_t5_large = ModelMeta(
513
524
  loader=sentence_transformers_loader,
514
525
  name="sentence-transformers/sentence-t5-large",
526
+ model_type=["dense"],
515
527
  languages=["eng-Latn"],
516
528
  open_weights=True,
517
529
  revision="1fc08ea477205aa54a3e5b13f0971ae16b86410a",
@@ -534,6 +546,7 @@ sentence_t5_large = ModelMeta(
534
546
  sentence_t5_xl = ModelMeta(
535
547
  loader=sentence_transformers_loader,
536
548
  name="sentence-transformers/sentence-t5-xl",
549
+ model_type=["dense"],
537
550
  languages=["eng-Latn"],
538
551
  open_weights=True,
539
552
  revision="2965d31b368fb14117688e0bde77cbd720e91f53",
@@ -556,6 +569,7 @@ sentence_t5_xl = ModelMeta(
556
569
  sentence_t5_xxl = ModelMeta(
557
570
  loader=sentence_transformers_loader,
558
571
  name="sentence-transformers/sentence-t5-xxl",
572
+ model_type=["dense"],
559
573
  languages=["eng-Latn"],
560
574
  open_weights=True,
561
575
  revision="4d122282ba80e807e9e6eb8c358269e92796365d",
@@ -588,6 +602,7 @@ GTR_CITATION = """
588
602
  gtr_t5_large = ModelMeta(
589
603
  loader=sentence_transformers_loader,
590
604
  name="sentence-transformers/gtr-t5-large",
605
+ model_type=["dense"],
591
606
  languages=["eng-Latn"], # in format eng-Latn
592
607
  open_weights=True,
593
608
  revision="a2c8ac47f998531948d4cbe32a0b577a7037a5e3",
@@ -622,6 +637,7 @@ gtr_t5_large = ModelMeta(
622
637
  gtr_t5_xl = ModelMeta(
623
638
  loader=sentence_transformers_loader,
624
639
  name="sentence-transformers/gtr-t5-xl",
640
+ model_type=["dense"],
625
641
  languages=["eng-Latn"], # in format eng-Latn
626
642
  open_weights=True,
627
643
  revision="23a8d667a1ad2578af181ce762867003c498d1bf",
@@ -655,6 +671,7 @@ gtr_t5_xl = ModelMeta(
655
671
  gtr_t5_xxl = ModelMeta(
656
672
  loader=sentence_transformers_loader,
657
673
  name="sentence-transformers/gtr-t5-xxl",
674
+ model_type=["dense"],
658
675
  languages=["eng-Latn"], # in format eng-Latn
659
676
  open_weights=True,
660
677
  revision="73f2a9156a3dcc2194dfdb2bf201cd7d17e17884",
@@ -689,6 +706,7 @@ gtr_t5_xxl = ModelMeta(
689
706
  gtr_t5_base = ModelMeta(
690
707
  loader=sentence_transformers_loader,
691
708
  name="sentence-transformers/gtr-t5-base",
709
+ model_type=["dense"],
692
710
  languages=["eng-Latn"], # in format eng-Latn
693
711
  open_weights=True,
694
712
  revision="7027e9594267928589816394bdd295273ddc0739",
@@ -1,31 +1,32 @@
1
- from mteb.models.model_meta import ModelMeta
2
- from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
3
-
4
- codemodernbert_crow_meta = ModelMeta(
5
- loader=sentence_transformers_loader,
6
- name="Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
7
- languages=["eng-Latn"],
8
- open_weights=True,
9
- revision="044a7a4b552f86e284817234c336bccf16f895ce",
10
- release_date="2025-04-21",
11
- n_parameters=151668480,
12
- memory_usage_mb=607,
13
- embed_dim=768,
14
- license="apache-2.0",
15
- max_tokens=1024,
16
- reference="https://huggingface.co/Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
17
- similarity_fn_name="cosine",
18
- framework=["Sentence Transformers", "PyTorch"],
19
- use_instructions=False,
20
- public_training_code=None,
21
- public_training_data=None,
22
- training_datasets={
23
- "CodeSearchNetRetrieval",
24
- # "code-search-net/code_search_net",
25
- # "Shuu12121/python-codesearch-filtered",
26
- # "Shuu12121/java-codesearch-filtered",
27
- # "Shuu12121/javascript-codesearch-filtered",
28
- # "Shuu12121/ruby-codesearch-filtered",
29
- # "Shuu12121/rust-codesearch-filtered",
30
- },
31
- )
1
+ from mteb.models.model_meta import ModelMeta
2
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
3
+
4
+ codemodernbert_crow_meta = ModelMeta(
5
+ loader=sentence_transformers_loader,
6
+ name="Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
7
+ model_type=["dense"],
8
+ languages=["eng-Latn"],
9
+ open_weights=True,
10
+ revision="044a7a4b552f86e284817234c336bccf16f895ce",
11
+ release_date="2025-04-21",
12
+ n_parameters=151668480,
13
+ memory_usage_mb=607,
14
+ embed_dim=768,
15
+ license="apache-2.0",
16
+ max_tokens=1024,
17
+ reference="https://huggingface.co/Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
18
+ similarity_fn_name="cosine",
19
+ framework=["Sentence Transformers", "PyTorch"],
20
+ use_instructions=False,
21
+ public_training_code=None,
22
+ public_training_data=None,
23
+ training_datasets={
24
+ "CodeSearchNetRetrieval",
25
+ # "code-search-net/code_search_net",
26
+ # "Shuu12121/python-codesearch-filtered",
27
+ # "Shuu12121/java-codesearch-filtered",
28
+ # "Shuu12121/javascript-codesearch-filtered",
29
+ # "Shuu12121/ruby-codesearch-filtered",
30
+ # "Shuu12121/rust-codesearch-filtered",
31
+ },
32
+ )
@@ -125,6 +125,7 @@ siglip_training_datasets = set(
125
125
  siglip_so400m_patch14_224 = ModelMeta(
126
126
  loader=SiglipModelWrapper, # type: ignore
127
127
  name="google/siglip-so400m-patch14-224",
128
+ model_type=["dense"],
128
129
  languages=["eng-Latn"],
129
130
  revision="d04cf29fca7b6374f74d8bea1969314492266b5e",
130
131
  release_date="2024-01-08",
@@ -148,6 +149,7 @@ siglip_so400m_patch14_224 = ModelMeta(
148
149
  siglip_so400m_patch14_384 = ModelMeta(
149
150
  loader=SiglipModelWrapper, # type: ignore
150
151
  name="google/siglip-so400m-patch14-384",
152
+ model_type=["dense"],
151
153
  languages=["eng-Latn"],
152
154
  revision="9fdffc58afc957d1a03a25b10dba0329ab15c2a3",
153
155
  release_date="2024-01-08",
@@ -171,6 +173,7 @@ siglip_so400m_patch14_384 = ModelMeta(
171
173
  siglip_so400m_patch16_256_i18n = ModelMeta(
172
174
  loader=SiglipModelWrapper, # type: ignore
173
175
  name="google/siglip-so400m-patch16-256-i18n",
176
+ model_type=["dense"],
174
177
  languages=["eng-Latn"],
175
178
  revision="365d321c0cfdea96bc28e3a29787a11a062681a1",
176
179
  release_date="2024-01-08",
@@ -194,6 +197,7 @@ siglip_so400m_patch16_256_i18n = ModelMeta(
194
197
  siglip_base_patch16_256_multilingual = ModelMeta(
195
198
  loader=SiglipModelWrapper, # type: ignore
196
199
  name="google/siglip-base-patch16-256-multilingual",
200
+ model_type=["dense"],
197
201
  languages=["eng-Latn"],
198
202
  revision="8952a4eafcde3cb7ab46b1dd629b33f8784ca9c6",
199
203
  release_date="2024-01-08",
@@ -217,6 +221,7 @@ siglip_base_patch16_256_multilingual = ModelMeta(
217
221
  siglip_base_patch16_256 = ModelMeta(
218
222
  loader=SiglipModelWrapper, # type: ignore
219
223
  name="google/siglip-base-patch16-256",
224
+ model_type=["dense"],
220
225
  languages=["eng-Latn"],
221
226
  revision="b078df89e446d623010d890864d4207fe6399f61",
222
227
  release_date="2024-01-08",
@@ -240,6 +245,7 @@ siglip_base_patch16_256 = ModelMeta(
240
245
  siglip_base_patch16_512 = ModelMeta(
241
246
  loader=SiglipModelWrapper, # type: ignore
242
247
  name="google/siglip-base-patch16-512",
248
+ model_type=["dense"],
243
249
  languages=["eng-Latn"],
244
250
  revision="753a949581523b60257d93e18391e8c27f72eb22",
245
251
  release_date="2024-01-08",
@@ -263,6 +269,7 @@ siglip_base_patch16_512 = ModelMeta(
263
269
  siglip_base_patch16_384 = ModelMeta(
264
270
  loader=SiglipModelWrapper, # type: ignore
265
271
  name="google/siglip-base-patch16-384",
272
+ model_type=["dense"],
266
273
  languages=["eng-Latn"],
267
274
  revision="41aec1c83b32e0a6fca20ad88ba058aa5b5ea394",
268
275
  release_date="2024-01-08",
@@ -286,6 +293,7 @@ siglip_base_patch16_384 = ModelMeta(
286
293
  siglip_base_patch16_224 = ModelMeta(
287
294
  loader=SiglipModelWrapper, # type: ignore
288
295
  name="google/siglip-base-patch16-224",
296
+ model_type=["dense"],
289
297
  languages=["eng-Latn"],
290
298
  revision="7fd15f0689c79d79e38b1c2e2e2370a7bf2761ed",
291
299
  release_date="2024-01-08",
@@ -309,6 +317,7 @@ siglip_base_patch16_224 = ModelMeta(
309
317
  siglip_large_patch16_256 = ModelMeta(
310
318
  loader=SiglipModelWrapper, # type: ignore
311
319
  name="google/siglip-large-patch16-256",
320
+ model_type=["dense"],
312
321
  languages=["eng-Latn"],
313
322
  revision="d0da9f876e7d66b4e250cd2450c3ba2ce735e447",
314
323
  release_date="2024-01-08",
@@ -332,6 +341,7 @@ siglip_large_patch16_256 = ModelMeta(
332
341
  siglip_large_patch16_384 = ModelMeta(
333
342
  loader=SiglipModelWrapper, # type: ignore
334
343
  name="google/siglip-large-patch16-384",
344
+ model_type=["dense"],
335
345
  languages=["eng-Latn"],
336
346
  revision="ce005573a40965dfd21fd937fbdeeebf2439fc35",
337
347
  release_date="2024-01-08",
@@ -218,6 +218,7 @@ sonar_langs = [
218
218
  sonar = ModelMeta(
219
219
  loader=None,
220
220
  name="facebook/SONAR",
221
+ model_type=["dense"],
221
222
  languages=sonar_langs,
222
223
  open_weights=True,
223
224
  use_instructions=False, # it does take a language code as input
@@ -6,6 +6,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
6
6
  spartan8806_atles_champion_embedding = ModelMeta(
7
7
  loader=sentence_transformers_loader,
8
8
  name="spartan8806/atles-champion-embedding",
9
+ model_type=["dense"],
9
10
  languages=["eng-Latn"],
10
11
  open_weights=True,
11
12
  revision="d4c74d7000bbd25f3597fc0f2dcde59ef1386e8f",
@@ -59,6 +59,7 @@ stella_en_400m = ModelMeta(
59
59
  torch_dtype="auto",
60
60
  ),
61
61
  name="NovaSearch/stella_en_400M_v5",
62
+ model_type=["dense"],
62
63
  languages=["eng-Latn"],
63
64
  open_weights=True,
64
65
  use_instructions=True,
@@ -87,6 +88,7 @@ stella_en_1_5b = ModelMeta(
87
88
  torch_dtype="auto",
88
89
  ),
89
90
  name="NovaSearch/stella_en_1.5B_v5",
91
+ model_type=["dense"],
90
92
  languages=["eng-Latn"],
91
93
  open_weights=True,
92
94
  use_instructions=True,
@@ -109,6 +111,7 @@ stella_en_1_5b = ModelMeta(
109
111
  stella_large_zh_v3_1792d = ModelMeta(
110
112
  loader=sentence_transformers_loader,
111
113
  name="dunzhang/stella-large-zh-v3-1792d",
114
+ model_type=["dense"],
112
115
  languages=["zho-Hans"],
113
116
  open_weights=True,
114
117
  revision="d5d39eb8cd11c80a63df53314e59997074469f09",
@@ -135,6 +138,7 @@ stella_large_zh_v3_1792d = ModelMeta(
135
138
  stella_base_zh_v3_1792d = ModelMeta(
136
139
  loader=sentence_transformers_loader,
137
140
  name="infgrad/stella-base-zh-v3-1792d",
141
+ model_type=["dense"],
138
142
  languages=["zho-Hans"],
139
143
  open_weights=True,
140
144
  revision="82254892a0fba125aa2abf3a4800d2dd12821343",
@@ -162,6 +166,7 @@ stella_base_zh_v3_1792d = ModelMeta(
162
166
  stella_mrl_large_zh_v3_5_1792d = ModelMeta(
163
167
  loader=sentence_transformers_loader,
164
168
  name="dunzhang/stella-mrl-large-zh-v3.5-1792d",
169
+ model_type=["dense"],
165
170
  languages=["zho-Hans"],
166
171
  open_weights=True,
167
172
  revision="17bb1c32a93a8fc5f6fc9e91d5ea86da99983cfe",
@@ -185,6 +190,7 @@ stella_mrl_large_zh_v3_5_1792d = ModelMeta(
185
190
  zpoint_large_embedding_zh = ModelMeta(
186
191
  loader=sentence_transformers_loader,
187
192
  name="iampanda/zpoint_large_embedding_zh",
193
+ model_type=["dense"],
188
194
  languages=["zho-Hans"],
189
195
  open_weights=True,
190
196
  revision="b1075144f440ab4409c05622c1179130ebd57d03",
@@ -321,6 +321,7 @@ training_data = {
321
321
  tarka_embedding_150m_v1 = ModelMeta(
322
322
  loader=gemma_embedding_loader,
323
323
  name="Tarka-AIR/Tarka-Embedding-150M-V1",
324
+ model_type=["dense"],
324
325
  languages=MULTILINGUAL_EVALUATED_LANGUAGES,
325
326
  open_weights=True,
326
327
  revision="b0ffecc4ef0d873e517507ed080e43b88b2704b9",
@@ -354,6 +355,7 @@ tarka_embedding_350m_v1 = ModelMeta(
354
355
  loader=InstructSentenceTransformerModel,
355
356
  loader_kwargs=tark_embedding_350_v1_kwargs,
356
357
  name="Tarka-AIR/Tarka-Embedding-350M-V1",
358
+ model_type=["dense"],
357
359
  languages=MULTILINGUAL_EVALUATED_LANGUAGES,
358
360
  open_weights=True,
359
361
  revision="a850d6a329145474727424fed6b12b62096b8ba3",
@@ -5,6 +5,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
5
5
 
6
6
  xlm_roberta_ua_distilled = ModelMeta(
7
7
  name="panalexeu/xlm-roberta-ua-distilled",
8
+ model_type=["dense"],
8
9
  loader=sentence_transformers_loader,
9
10
  n_parameters=278_000_000,
10
11
  memory_usage_mb=1061,
@@ -61,6 +61,7 @@ uae_large_v1 = ModelMeta(
61
61
  },
62
62
  ),
63
63
  name="WhereIsAI/UAE-Large-V1",
64
+ model_type=["dense"],
64
65
  languages=["eng-Latn"],
65
66
  open_weights=True,
66
67
  revision="369c368f70f16a613f19f5598d4f12d9f44235d4",
@@ -25,6 +25,7 @@ vdr_2b_multi_v1 = ModelMeta(
25
25
  apply_instruction_to_passages=True,
26
26
  ),
27
27
  name="llamaindex/vdr-2b-multi-v1",
28
+ model_type=["dense"],
28
29
  languages=vdr_languages,
29
30
  open_weights=True,
30
31
  revision="2c4e54c8db4071cc61fc3c62f4490124e40c37db",
@@ -7,6 +7,7 @@ greennode_embedding_large_vn_v1_training_data = {
7
7
 
8
8
  greennode_embedding_large_vn_v1 = ModelMeta(
9
9
  name="GreenNode/GreenNode-Embedding-Large-VN-V1",
10
+ model_type=["dense"],
10
11
  revision="660def1f6e1c8ecdf39f6f9c95829e3cf0cef837",
11
12
  release_date="2024-04-11",
12
13
  languages=[
@@ -31,6 +32,7 @@ greennode_embedding_large_vn_v1 = ModelMeta(
31
32
 
32
33
  greennode_embedding_large_vn_mixed_v1 = ModelMeta(
33
34
  name="GreenNode/GreenNode-Embedding-Large-VN-Mixed-V1",
35
+ model_type=["dense"],
34
36
  revision="1d3dddb3862292dab4bd3eddf0664c0335ad5843",
35
37
  release_date="2024-04-11",
36
38
  languages=[
@@ -55,6 +57,7 @@ greennode_embedding_large_vn_mixed_v1 = ModelMeta(
55
57
 
56
58
  aiteamvn_vietnamese_embeddings = ModelMeta(
57
59
  name="AITeamVN/Vietnamese_Embedding",
60
+ model_type=["dense"],
58
61
  revision="fcbbb905e6c3757d421aaa5db6fd7c53d038f6fb",
59
62
  release_date="2024-03-17",
60
63
  languages=[
@@ -85,6 +88,7 @@ aiteamvn_vietnamese_embeddings = ModelMeta(
85
88
 
86
89
  hiieu_halong_embedding = ModelMeta(
87
90
  name="hiieu/halong_embedding",
91
+ model_type=["dense"],
88
92
  revision="b57776031035f70ed2030d2e35ecc533eb0f8f71",
89
93
  release_date="2024-07-06",
90
94
  languages=[
@@ -115,6 +119,7 @@ hiieu_halong_embedding = ModelMeta(
115
119
 
116
120
  sup_simcse_vietnamese_phobert_base_ = ModelMeta(
117
121
  name="VoVanPhuc/sup-SimCSE-VietNamese-phobert-base",
122
+ model_type=["dense"],
118
123
  revision="608779b86741a8acd8c8d38132974ff04086b138",
119
124
  release_date="2021-05-26",
120
125
  languages=[
@@ -152,6 +157,7 @@ pages = {1037--1042}
152
157
 
153
158
  bkai_foundation_models_vietnamese_bi_encoder = ModelMeta(
154
159
  name="bkai-foundation-models/vietnamese-bi-encoder",
160
+ model_type=["dense"],
155
161
  revision="84f9d9ada0d1a3c37557398b9ae9fcedcdf40be0",
156
162
  release_date="2023-09-09",
157
163
  languages=[
@@ -247,6 +247,7 @@ visualized_bge_base = ModelMeta(
247
247
  image_tokens_num=196,
248
248
  ),
249
249
  name="BAAI/bge-visualized-base",
250
+ model_type=["dense"],
250
251
  languages=["eng-Latn"],
251
252
  revision="98db10b10d22620010d06f11733346e1c98c34aa",
252
253
  release_date="2024-06-06",
@@ -274,6 +275,7 @@ visualized_bge_m3 = ModelMeta(
274
275
  image_tokens_num=256,
275
276
  ),
276
277
  name="BAAI/bge-visualized-m3",
278
+ model_type=["dense"],
277
279
  languages=["eng-Latn"],
278
280
  revision="98db10b10d22620010d06f11733346e1c98c34aa",
279
281
  release_date="2024-06-06",
@@ -269,6 +269,7 @@ vlm2vec_training_datasets = set(
269
269
  vlm2vec_lora = ModelMeta(
270
270
  loader=VLM2VecWrapper,
271
271
  name="TIGER-Lab/VLM2Vec-LoRA",
272
+ model_type=["dense"],
272
273
  languages=["eng-Latn"],
273
274
  revision="7403b6327958071c1e33c822c7453adadccc7298",
274
275
  release_date="2024-10-08",
@@ -292,6 +293,7 @@ vlm2vec_lora = ModelMeta(
292
293
  vlm2vec_full = ModelMeta(
293
294
  loader=VLM2VecWrapper,
294
295
  name="TIGER-Lab/VLM2Vec-Full",
296
+ model_type=["dense"],
295
297
  languages=["eng-Latn"],
296
298
  revision="e9afa98002097ac2471827ba23ea1f2ddd229480",
297
299
  release_date="2024-10-08",
@@ -208,6 +208,7 @@ model_prompts = {
208
208
 
209
209
  voyage_3_large = ModelMeta(
210
210
  name="voyageai/voyage-3-large", # Date of publication of this post https://blog.voyageai.com/2025/01/07/voyage-3-large/
211
+ model_type=["dense"],
211
212
  revision="1",
212
213
  release_date="2025-01-07",
213
214
  languages=None, # supported languages not specified
@@ -234,6 +235,7 @@ voyage_3_large = ModelMeta(
234
235
 
235
236
  voyage_3_5 = ModelMeta(
236
237
  name="voyageai/voyage-3.5",
238
+ model_type=["dense"],
237
239
  revision="1",
238
240
  release_date="2025-01-21",
239
241
  languages=None, # supported languages not specified
@@ -259,6 +261,7 @@ voyage_3_5 = ModelMeta(
259
261
 
260
262
  voyage_3_5_int8 = ModelMeta(
261
263
  name="voyageai/voyage-3.5 (output_dtype=int8)",
264
+ model_type=["dense"],
262
265
  revision="1",
263
266
  release_date="2025-01-21",
264
267
  languages=None, # supported languages not specified
@@ -285,6 +288,7 @@ voyage_3_5_int8 = ModelMeta(
285
288
 
286
289
  voyage_3_5_binary = ModelMeta(
287
290
  name="voyageai/voyage-3.5 (output_dtype=binary)",
291
+ model_type=["dense"],
288
292
  revision="1",
289
293
  release_date="2025-01-21",
290
294
  languages=None, # supported languages not specified
@@ -311,6 +315,7 @@ voyage_3_5_binary = ModelMeta(
311
315
 
312
316
  voyage_large_2_instruct = ModelMeta(
313
317
  name="voyageai/voyage-large-2-instruct",
318
+ model_type=["dense"],
314
319
  revision="1",
315
320
  release_date="2024-05-05",
316
321
  languages=None, # supported languages not specified
@@ -336,6 +341,7 @@ voyage_large_2_instruct = ModelMeta(
336
341
 
337
342
  voyage_finance_2 = ModelMeta(
338
343
  name="voyageai/voyage-finance-2",
344
+ model_type=["dense"],
339
345
  revision="1",
340
346
  release_date="2024-05-30",
341
347
  languages=None, # supported languages not specified
@@ -361,6 +367,7 @@ voyage_finance_2 = ModelMeta(
361
367
 
362
368
  voyage_law_2 = ModelMeta(
363
369
  name="voyageai/voyage-law-2",
370
+ model_type=["dense"],
364
371
  revision="1",
365
372
  release_date="2024-04-15",
366
373
  languages=None, # supported languages not specified
@@ -386,6 +393,7 @@ voyage_law_2 = ModelMeta(
386
393
 
387
394
  voyage_code_2 = ModelMeta(
388
395
  name="voyageai/voyage-code-2",
396
+ model_type=["dense"],
389
397
  revision="1",
390
398
  release_date="2024-01-23",
391
399
  languages=None, # supported languages not specified
@@ -411,6 +419,7 @@ voyage_code_2 = ModelMeta(
411
419
 
412
420
  voyage_code_3 = ModelMeta(
413
421
  name="voyageai/voyage-code-3",
422
+ model_type=["dense"],
414
423
  revision="1",
415
424
  release_date="2024-12-04",
416
425
  languages=None, # supported languages not specified
@@ -437,6 +446,7 @@ voyage_code_3 = ModelMeta(
437
446
 
438
447
  voyage_large_2 = ModelMeta(
439
448
  name="voyageai/voyage-large-2", # Date of publication of this post https://blog.voyageai.com/2023/10/29/voyage-embeddings/
449
+ model_type=["dense"],
440
450
  revision="1",
441
451
  release_date="2023-10-29",
442
452
  languages=None, # supported languages not specified
@@ -462,6 +472,7 @@ voyage_large_2 = ModelMeta(
462
472
 
463
473
  voyage_2 = ModelMeta(
464
474
  name="voyageai/voyage-2",
475
+ model_type=["dense"],
465
476
  revision="1",
466
477
  release_date="2023-10-29",
467
478
  languages=None, # supported languages not specified
@@ -486,6 +497,7 @@ voyage_2 = ModelMeta(
486
497
  )
487
498
  voyage_multilingual_2 = ModelMeta(
488
499
  name="voyageai/voyage-multilingual-2",
500
+ model_type=["dense"],
489
501
  revision="1",
490
502
  release_date="2024-06-10",
491
503
  languages=None, # supported languages not specified
@@ -511,6 +523,7 @@ voyage_multilingual_2 = ModelMeta(
511
523
 
512
524
  voyage_3 = ModelMeta(
513
525
  name="voyageai/voyage-3",
526
+ model_type=["dense"],
514
527
  revision="1",
515
528
  release_date="2024-09-18",
516
529
  languages=None, # supported languages not specified
@@ -536,6 +549,7 @@ voyage_3 = ModelMeta(
536
549
 
537
550
  voyage_3_lite = ModelMeta(
538
551
  name="voyageai/voyage-3-lite",
552
+ model_type=["dense"],
539
553
  revision="1",
540
554
  release_date="2024-09-18",
541
555
  languages=None, # supported languages not specified
@@ -561,6 +575,7 @@ voyage_3_lite = ModelMeta(
561
575
 
562
576
  voyage_3_exp = ModelMeta(
563
577
  name="voyageai/voyage-3-m-exp",
578
+ model_type=["dense"],
564
579
  revision="1",
565
580
  release_date="2025-01-08",
566
581
  languages=["eng-Latn"],
@@ -204,6 +204,7 @@ def voyage_v_loader(model_name, **kwargs):
204
204
  voyage_v = ModelMeta(
205
205
  loader=voyage_v_loader, # type: ignore
206
206
  name="voyageai/voyage-multimodal-3",
207
+ model_type=["dense"],
207
208
  languages=[], # Unknown
208
209
  revision="1",
209
210
  release_date="2024-11-10",
@@ -24,6 +24,7 @@ xyz_zh_datasets = {
24
24
 
25
25
  xyz_embedding = ModelMeta(
26
26
  name="fangxq/XYZ-embedding",
27
+ model_type=["dense"],
27
28
  languages=["zho-Hans"],
28
29
  loader=sentence_transformers_loader,
29
30
  open_weights=True,
@@ -115,6 +115,7 @@ Youtu_Embedding_V1 = ModelMeta(
115
115
  max_seq_length=8192,
116
116
  ),
117
117
  name="tencent/Youtu-Embedding",
118
+ model_type=["dense"],
118
119
  languages=["zho-Hans"],
119
120
  revision="32e04afc24817c187a8422e7bdbb493b19796d47",
120
121
  release_date="2025-09-28",
@@ -13,6 +13,7 @@ yuan_emb_zh_datasets = {
13
13
 
14
14
  yuan_embedding_2_zh = ModelMeta(
15
15
  name="IEITYuan/Yuan-embedding-2.0-zh",
16
+ model_type=["dense"],
16
17
  loader=sentence_transformers_loader,
17
18
  languages=["zho-Hans"],
18
19
  open_weights=True,
@@ -37,6 +37,7 @@ yuan_embedding_2_en = ModelMeta(
37
37
  apply_instruction_to_passages=False,
38
38
  ),
39
39
  name="IEITYuan/Yuan-embedding-2.0-en",
40
+ model_type=["dense"],
40
41
  languages=["eng-Latn"],
41
42
  open_weights=True,
42
43
  revision="b2fd15da3bcae3473c8529593825c15068f09fce",