mteb 2.5.1__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. mteb/abstasks/abstask.py +6 -6
  2. mteb/abstasks/aggregated_task.py +4 -10
  3. mteb/abstasks/clustering_legacy.py +3 -2
  4. mteb/abstasks/task_metadata.py +2 -3
  5. mteb/cache.py +7 -4
  6. mteb/cli/build_cli.py +10 -5
  7. mteb/cli/generate_model_card.py +4 -3
  8. mteb/deprecated_evaluator.py +4 -3
  9. mteb/evaluate.py +4 -1
  10. mteb/get_tasks.py +4 -3
  11. mteb/leaderboard/app.py +70 -3
  12. mteb/models/abs_encoder.py +5 -3
  13. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +4 -1
  14. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +13 -12
  15. mteb/models/model_implementations/align_models.py +1 -0
  16. mteb/models/model_implementations/amazon_models.py +1 -0
  17. mteb/models/model_implementations/andersborges.py +2 -0
  18. mteb/models/model_implementations/ara_models.py +1 -0
  19. mteb/models/model_implementations/arctic_models.py +8 -0
  20. mteb/models/model_implementations/b1ade_models.py +1 -0
  21. mteb/models/model_implementations/bedrock_models.py +4 -0
  22. mteb/models/model_implementations/bge_models.py +17 -0
  23. mteb/models/model_implementations/bica_model.py +1 -0
  24. mteb/models/model_implementations/blip2_models.py +2 -0
  25. mteb/models/model_implementations/blip_models.py +8 -0
  26. mteb/models/model_implementations/bm25.py +1 -0
  27. mteb/models/model_implementations/bmretriever_models.py +4 -0
  28. mteb/models/model_implementations/cadet_models.py +1 -0
  29. mteb/models/model_implementations/cde_models.py +2 -0
  30. mteb/models/model_implementations/clip_models.py +3 -0
  31. mteb/models/model_implementations/clips_models.py +3 -0
  32. mteb/models/model_implementations/codefuse_models.py +3 -0
  33. mteb/models/model_implementations/codesage_models.py +3 -0
  34. mteb/models/model_implementations/cohere_models.py +4 -0
  35. mteb/models/model_implementations/cohere_v.py +5 -0
  36. mteb/models/model_implementations/colpali_models.py +3 -0
  37. mteb/models/model_implementations/colqwen_models.py +9 -0
  38. mteb/models/model_implementations/colsmol_models.py +2 -0
  39. mteb/models/model_implementations/conan_models.py +1 -0
  40. mteb/models/model_implementations/dino_models.py +19 -0
  41. mteb/models/model_implementations/e5_instruct.py +4 -0
  42. mteb/models/model_implementations/e5_models.py +9 -0
  43. mteb/models/model_implementations/e5_v.py +1 -0
  44. mteb/models/model_implementations/eagerworks_models.py +1 -0
  45. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  46. mteb/models/model_implementations/en_code_retriever.py +1 -0
  47. mteb/models/model_implementations/euler_models.py +1 -0
  48. mteb/models/model_implementations/evaclip_models.py +4 -0
  49. mteb/models/model_implementations/fa_models.py +8 -0
  50. mteb/models/model_implementations/facebookai.py +2 -0
  51. mteb/models/model_implementations/geogpt_models.py +1 -0
  52. mteb/models/model_implementations/gme_v_models.py +6 -3
  53. mteb/models/model_implementations/google_models.py +5 -0
  54. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  55. mteb/models/model_implementations/gritlm_models.py +2 -0
  56. mteb/models/model_implementations/gte_models.py +9 -0
  57. mteb/models/model_implementations/hinvec_models.py +1 -0
  58. mteb/models/model_implementations/human.py +1 -0
  59. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  60. mteb/models/model_implementations/inf_models.py +2 -0
  61. mteb/models/model_implementations/jasper_models.py +2 -0
  62. mteb/models/model_implementations/jina_clip.py +1 -0
  63. mteb/models/model_implementations/jina_models.py +7 -1
  64. mteb/models/model_implementations/kalm_models.py +6 -0
  65. mteb/models/model_implementations/kblab.py +1 -0
  66. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  67. mteb/models/model_implementations/kfst.py +1 -0
  68. mteb/models/model_implementations/kowshik24_models.py +1 -0
  69. mteb/models/model_implementations/lens_models.py +2 -0
  70. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  71. mteb/models/model_implementations/linq_models.py +1 -0
  72. mteb/models/model_implementations/listconranker.py +1 -1
  73. mteb/models/model_implementations/llm2clip_models.py +3 -0
  74. mteb/models/model_implementations/llm2vec_models.py +8 -0
  75. mteb/models/model_implementations/mcinext_models.py +7 -1
  76. mteb/models/model_implementations/mdbr_models.py +2 -0
  77. mteb/models/model_implementations/misc_models.py +63 -0
  78. mteb/models/model_implementations/mme5_models.py +1 -0
  79. mteb/models/model_implementations/moco_models.py +2 -0
  80. mteb/models/model_implementations/model2vec_models.py +13 -0
  81. mteb/models/model_implementations/moka_models.py +3 -0
  82. mteb/models/model_implementations/mxbai_models.py +3 -0
  83. mteb/models/model_implementations/nbailab.py +3 -0
  84. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  85. mteb/models/model_implementations/nomic_models.py +6 -0
  86. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  87. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  88. mteb/models/model_implementations/nvidia_models.py +3 -0
  89. mteb/models/model_implementations/octen_models.py +195 -0
  90. mteb/models/model_implementations/openai_models.py +5 -0
  91. mteb/models/model_implementations/openclip_models.py +8 -0
  92. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  93. mteb/models/model_implementations/ops_moa_models.py +2 -0
  94. mteb/models/model_implementations/pawan_models.py +1 -0
  95. mteb/models/model_implementations/piccolo_models.py +2 -0
  96. mteb/models/model_implementations/promptriever_models.py +4 -0
  97. mteb/models/model_implementations/pylate_models.py +3 -0
  98. mteb/models/model_implementations/qodo_models.py +2 -0
  99. mteb/models/model_implementations/qtack_models.py +1 -0
  100. mteb/models/model_implementations/qwen3_models.py +3 -0
  101. mteb/models/model_implementations/qzhou_models.py +2 -0
  102. mteb/models/model_implementations/random_baseline.py +2 -1
  103. mteb/models/model_implementations/rasgaard_models.py +1 -0
  104. mteb/models/model_implementations/reasonir_model.py +1 -0
  105. mteb/models/model_implementations/repllama_models.py +2 -0
  106. mteb/models/model_implementations/rerankers_custom.py +3 -3
  107. mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
  108. mteb/models/model_implementations/richinfoai_models.py +1 -0
  109. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  110. mteb/models/model_implementations/ruri_models.py +10 -0
  111. mteb/models/model_implementations/salesforce_models.py +3 -0
  112. mteb/models/model_implementations/samilpwc_models.py +1 -0
  113. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  114. mteb/models/model_implementations/searchmap_models.py +1 -0
  115. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  116. mteb/models/model_implementations/seed_models.py +1 -0
  117. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  118. mteb/models/model_implementations/shuu_model.py +32 -31
  119. mteb/models/model_implementations/siglip_models.py +10 -0
  120. mteb/models/model_implementations/sonar_models.py +1 -0
  121. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  122. mteb/models/model_implementations/stella_models.py +6 -0
  123. mteb/models/model_implementations/tarka_models.py +2 -0
  124. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  125. mteb/models/model_implementations/uae_models.py +1 -0
  126. mteb/models/model_implementations/vdr_models.py +1 -0
  127. mteb/models/model_implementations/vi_vn_models.py +6 -0
  128. mteb/models/model_implementations/vista_models.py +2 -0
  129. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  130. mteb/models/model_implementations/voyage_models.py +15 -0
  131. mteb/models/model_implementations/voyage_v.py +1 -0
  132. mteb/models/model_implementations/xyz_models.py +1 -0
  133. mteb/models/model_implementations/youtu_models.py +1 -0
  134. mteb/models/model_implementations/yuan_models.py +1 -0
  135. mteb/models/model_implementations/yuan_models_en.py +1 -0
  136. mteb/models/model_meta.py +49 -4
  137. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +4 -1
  138. mteb/models/search_wrappers.py +4 -2
  139. mteb/models/sentence_transformer_wrapper.py +10 -10
  140. mteb/results/benchmark_results.py +67 -43
  141. mteb/results/model_result.py +3 -1
  142. mteb/results/task_result.py +22 -17
  143. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/METADATA +1 -1
  144. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/RECORD +148 -147
  145. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/WHEEL +0 -0
  146. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/entry_points.txt +0 -0
  147. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/licenses/LICENSE +0 -0
  148. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/top_level.txt +0 -0
@@ -219,6 +219,7 @@ monobert_large = ModelMeta(
219
219
  fp_options="float16",
220
220
  ),
221
221
  name="castorini/monobert-large-msmarco",
222
+ model_type=["cross-encoder"],
222
223
  languages=["eng-Latn"],
223
224
  open_weights=True,
224
225
  revision="0a97706f3827389da43b83348d5d18c9d53876fa",
@@ -234,7 +235,6 @@ monobert_large = ModelMeta(
234
235
  use_instructions=None,
235
236
  training_datasets=None,
236
237
  framework=["Sentence Transformers", "PyTorch"],
237
- is_cross_encoder=True,
238
238
  )
239
239
 
240
240
  # languages unclear: https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual/discussions/28
@@ -244,6 +244,7 @@ jina_reranker_multilingual = ModelMeta(
244
244
  fp_options="float16",
245
245
  ),
246
246
  name="jinaai/jina-reranker-v2-base-multilingual",
247
+ model_type=["cross-encoder"],
247
248
  languages=["eng-Latn"],
248
249
  open_weights=True,
249
250
  revision="126747772a932960028d9f4dc93bd5d9c4869be4",
@@ -259,7 +260,6 @@ jina_reranker_multilingual = ModelMeta(
259
260
  use_instructions=None,
260
261
  training_datasets=None,
261
262
  framework=["Sentence Transformers", "PyTorch"],
262
- is_cross_encoder=True,
263
263
  )
264
264
 
265
265
  bge_reranker_v2_m3 = ModelMeta(
@@ -268,6 +268,7 @@ bge_reranker_v2_m3 = ModelMeta(
268
268
  fp_options="float16",
269
269
  ),
270
270
  name="BAAI/bge-reranker-v2-m3",
271
+ model_type=["cross-encoder"],
271
272
  languages=[
272
273
  "eng-Latn",
273
274
  "ara-Arab",
@@ -316,7 +317,6 @@ bge_reranker_v2_m3 = ModelMeta(
316
317
  use_instructions=None,
317
318
  training_datasets=bge_m3_training_data,
318
319
  framework=["Sentence Transformers", "PyTorch"],
319
- is_cross_encoder=True,
320
320
  citation="""
321
321
  @misc{li2023making,
322
322
  title={Making Large Language Models A Better Foundation For Dense Retrieval},
@@ -315,6 +315,7 @@ monot5_small = ModelMeta(
315
315
  fp_options="float16",
316
316
  ),
317
317
  name="castorini/monot5-small-msmarco-10k",
318
+ model_type=["cross-encoder"],
318
319
  languages=["eng-Latn"],
319
320
  open_weights=True,
320
321
  revision="77f8e3f7b1eb1afe353aa21a7c3a2fc8feca702e",
@@ -330,7 +331,6 @@ monot5_small = ModelMeta(
330
331
  use_instructions=None,
331
332
  training_datasets=None,
332
333
  framework=["PyTorch"],
333
- is_cross_encoder=True,
334
334
  citation="""@misc{rosa2022parameterleftbehinddistillation,
335
335
  title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
336
336
  author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
@@ -348,6 +348,7 @@ monot5_base = ModelMeta(
348
348
  fp_options="float16",
349
349
  ),
350
350
  name="castorini/monot5-base-msmarco-10k",
351
+ model_type=["cross-encoder"],
351
352
  languages=["eng-Latn"],
352
353
  open_weights=True,
353
354
  revision="f15657ab3d2a5dd0b9a30c8c0b6a0a73c9cb5884",
@@ -372,7 +373,6 @@ monot5_base = ModelMeta(
372
373
  use_instructions=None,
373
374
  training_datasets=None,
374
375
  framework=["PyTorch"],
375
- is_cross_encoder=True,
376
376
  )
377
377
 
378
378
  monot5_large = ModelMeta(
@@ -381,6 +381,7 @@ monot5_large = ModelMeta(
381
381
  fp_options="float16",
382
382
  ),
383
383
  name="castorini/monot5-large-msmarco-10k",
384
+ model_type=["cross-encoder"],
384
385
  languages=["eng-Latn"],
385
386
  open_weights=True,
386
387
  revision="48cfad1d8dd587670393f27ee8ec41fde63e3d98",
@@ -396,7 +397,6 @@ monot5_large = ModelMeta(
396
397
  use_instructions=None,
397
398
  training_datasets=None,
398
399
  framework=["PyTorch"],
399
- is_cross_encoder=True,
400
400
  citation="""@misc{rosa2022parameterleftbehinddistillation,
401
401
  title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
402
402
  author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
@@ -414,6 +414,7 @@ monot5_3b = ModelMeta(
414
414
  fp_options="float16",
415
415
  ),
416
416
  name="castorini/monot5-3b-msmarco-10k",
417
+ model_type=["cross-encoder"],
417
418
  languages=["eng-Latn"],
418
419
  open_weights=True,
419
420
  revision="bc0c419a438c81f592f878ce32430a1823f5db6c",
@@ -429,7 +430,6 @@ monot5_3b = ModelMeta(
429
430
  use_instructions=None,
430
431
  training_datasets=None,
431
432
  framework=["PyTorch"],
432
- is_cross_encoder=True,
433
433
  citation="""@misc{rosa2022parameterleftbehinddistillation,
434
434
  title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
435
435
  author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
@@ -447,6 +447,7 @@ flant5_base = ModelMeta(
447
447
  fp_options="float16",
448
448
  ),
449
449
  name="google/flan-t5-base",
450
+ model_type=["cross-encoder"],
450
451
  languages=["eng-Latn"],
451
452
  open_weights=True,
452
453
  revision="7bcac572ce56db69c1ea7c8af255c5d7c9672fc2",
@@ -484,7 +485,6 @@ flant5_base = ModelMeta(
484
485
  similarity_fn_name=None,
485
486
  use_instructions=None,
486
487
  framework=["PyTorch"],
487
- is_cross_encoder=True,
488
488
  )
489
489
 
490
490
  flant5_large = ModelMeta(
@@ -493,6 +493,7 @@ flant5_large = ModelMeta(
493
493
  fp_options="float16",
494
494
  ),
495
495
  name="google/flan-t5-large",
496
+ model_type=["cross-encoder"],
496
497
  languages=["eng-Latn"],
497
498
  open_weights=True,
498
499
  revision="0613663d0d48ea86ba8cb3d7a44f0f65dc596a2a",
@@ -530,7 +531,6 @@ flant5_large = ModelMeta(
530
531
  similarity_fn_name=None,
531
532
  use_instructions=None,
532
533
  framework=["PyTorch"],
533
- is_cross_encoder=True,
534
534
  )
535
535
 
536
536
  flant5_xl = ModelMeta(
@@ -539,6 +539,7 @@ flant5_xl = ModelMeta(
539
539
  fp_options="float16",
540
540
  ),
541
541
  name="google/flan-t5-xl",
542
+ model_type=["cross-encoder"],
542
543
  languages=["eng-Latn"],
543
544
  open_weights=True,
544
545
  revision="7d6315df2c2fb742f0f5b556879d730926ca9001",
@@ -576,7 +577,6 @@ flant5_xl = ModelMeta(
576
577
  similarity_fn_name=None,
577
578
  use_instructions=None,
578
579
  framework=["PyTorch"],
579
- is_cross_encoder=True,
580
580
  )
581
581
 
582
582
  flant5_xxl = ModelMeta(
@@ -585,6 +585,7 @@ flant5_xxl = ModelMeta(
585
585
  fp_options="float16",
586
586
  ),
587
587
  name="google/flan-t5-xxl",
588
+ model_type=["cross-encoder"],
588
589
  languages=["eng-Latn"],
589
590
  open_weights=True,
590
591
  revision="ae7c9136adc7555eeccc78cdd960dfd60fb346ce",
@@ -622,7 +623,6 @@ flant5_xxl = ModelMeta(
622
623
  similarity_fn_name=None,
623
624
  use_instructions=None,
624
625
  framework=["PyTorch"],
625
- is_cross_encoder=True,
626
626
  )
627
627
 
628
628
 
@@ -632,6 +632,7 @@ llama2_7b = ModelMeta(
632
632
  fp_options="float16",
633
633
  ),
634
634
  name="meta-llama/Llama-2-7b-hf",
635
+ model_type=["cross-encoder"],
635
636
  languages=["eng-Latn"],
636
637
  open_weights=True,
637
638
  revision="01c7f73d771dfac7d292323805ebc428287df4f9",
@@ -656,7 +657,6 @@ llama2_7b = ModelMeta(
656
657
  primaryClass={cs.CL},
657
658
  url={https://arxiv.org/abs/2307.09288},
658
659
  }""",
659
- is_cross_encoder=True,
660
660
  )
661
661
 
662
662
  llama2_7b_chat = ModelMeta(
@@ -665,6 +665,7 @@ llama2_7b_chat = ModelMeta(
665
665
  fp_options="float16",
666
666
  ),
667
667
  name="meta-llama/Llama-2-7b-chat-hf",
668
+ model_type=["cross-encoder"],
668
669
  languages=["eng-Latn"],
669
670
  open_weights=True,
670
671
  revision="f5db02db724555f92da89c216ac04704f23d4590",
@@ -689,7 +690,6 @@ llama2_7b_chat = ModelMeta(
689
690
  use_instructions=None,
690
691
  training_datasets=None,
691
692
  framework=["PyTorch"],
692
- is_cross_encoder=True,
693
693
  )
694
694
 
695
695
  mistral_7b = ModelMeta(
@@ -698,6 +698,7 @@ mistral_7b = ModelMeta(
698
698
  fp_options="float16",
699
699
  ),
700
700
  name="mistralai/Mistral-7B-Instruct-v0.2",
701
+ model_type=["cross-encoder"],
701
702
  languages=["eng-Latn"],
702
703
  open_weights=True,
703
704
  revision="3ad372fc79158a2148299e3318516c786aeded6c",
@@ -722,7 +723,6 @@ mistral_7b = ModelMeta(
722
723
  primaryClass={cs.CL},
723
724
  url={https://arxiv.org/abs/2310.06825},
724
725
  }""",
725
- is_cross_encoder=True,
726
726
  )
727
727
 
728
728
  followir_7b = ModelMeta(
@@ -731,6 +731,7 @@ followir_7b = ModelMeta(
731
731
  fp_options="float16",
732
732
  ),
733
733
  name="jhu-clsp/FollowIR-7B",
734
+ model_type=["cross-encoder"],
734
735
  languages=["eng-Latn"],
735
736
  open_weights=True,
736
737
  revision="4d25d437e38b510c01852070c0731e8f6e1875d1",
@@ -758,7 +759,6 @@ followir_7b = ModelMeta(
758
759
  primaryClass={cs.IR}
759
760
  }
760
761
  """,
761
- is_cross_encoder=True,
762
762
  )
763
763
 
764
764
 
@@ -874,6 +874,7 @@ mt5_base_mmarco_v2 = ModelMeta(
874
874
  fp_options="float16",
875
875
  ),
876
876
  name="unicamp-dl/mt5-base-mmarco-v2",
877
+ model_type=["cross-encoder"],
877
878
  languages=mt5_languages,
878
879
  open_weights=True,
879
880
  revision="cc0a949b9f21efcaba45c8cabb998ad02ce8d4e7",
@@ -898,7 +899,6 @@ mt5_base_mmarco_v2 = ModelMeta(
898
899
  similarity_fn_name=None,
899
900
  use_instructions=None,
900
901
  framework=["PyTorch"],
901
- is_cross_encoder=True,
902
902
  )
903
903
 
904
904
  mt5_13b_mmarco_100k = ModelMeta(
@@ -907,6 +907,7 @@ mt5_13b_mmarco_100k = ModelMeta(
907
907
  fp_options="float16",
908
908
  ),
909
909
  name="unicamp-dl/mt5-13b-mmarco-100k",
910
+ model_type=["cross-encoder"],
910
911
  languages=mt5_languages,
911
912
  open_weights=True,
912
913
  revision="e1a4317e102a525ea9e16745ad21394a4f1bffbc",
@@ -922,5 +923,4 @@ mt5_13b_mmarco_100k = ModelMeta(
922
923
  use_instructions=None,
923
924
  training_datasets=None,
924
925
  framework=["PyTorch"],
925
- is_cross_encoder=True,
926
926
  )
@@ -9,6 +9,7 @@ from .stella_models import stella_zh_datasets
9
9
  ritrieve_zh_v1 = ModelMeta(
10
10
  loader=SentenceTransformerEncoderWrapper,
11
11
  name="richinfoai/ritrieve_zh_v1",
12
+ model_type=["dense"],
12
13
  languages=["zho-Hans"],
13
14
  open_weights=True,
14
15
  revision="f8d5a707656c55705027678e311f9202c8ced12c",
@@ -238,6 +238,7 @@ GIGA_task_prompts = {
238
238
  rubert_tiny = ModelMeta(
239
239
  loader=sentence_transformers_loader,
240
240
  name="cointegrated/rubert-tiny",
241
+ model_type=["dense"],
241
242
  languages=["rus-Cyrl"],
242
243
  open_weights=True,
243
244
  revision="5441c5ea8026d4f6d7505ec004845409f1259fb1",
@@ -263,6 +264,7 @@ rubert_tiny = ModelMeta(
263
264
  rubert_tiny2 = ModelMeta(
264
265
  loader=sentence_transformers_loader,
265
266
  name="cointegrated/rubert-tiny2",
267
+ model_type=["dense"],
266
268
  languages=["rus-Cyrl"],
267
269
  open_weights=True,
268
270
  revision="dad72b8f77c5eef6995dd3e4691b758ba56b90c3",
@@ -289,6 +291,7 @@ rubert_tiny2 = ModelMeta(
289
291
  sbert_large_nlu_ru = ModelMeta(
290
292
  loader=sentence_transformers_loader,
291
293
  name="ai-forever/sbert_large_nlu_ru",
294
+ model_type=["dense"],
292
295
  languages=["rus-Cyrl"],
293
296
  open_weights=True,
294
297
  revision="af977d5dfa46a3635e29bf0ef383f2df2a08d47a",
@@ -314,6 +317,7 @@ sbert_large_nlu_ru = ModelMeta(
314
317
  sbert_large_mt_nlu_ru = ModelMeta(
315
318
  loader=sentence_transformers_loader,
316
319
  name="ai-forever/sbert_large_mt_nlu_ru",
320
+ model_type=["dense"],
317
321
  languages=["rus-Cyrl"],
318
322
  open_weights=True,
319
323
  revision="05300876c2b83f46d3ddd422a7f17e45cf633bb0",
@@ -341,6 +345,7 @@ user_base_ru = ModelMeta(
341
345
  model_prompts={"query": "query: ", "document": "passage: "},
342
346
  ),
343
347
  name="deepvk/USER-base",
348
+ model_type=["dense"],
344
349
  languages=["rus-Cyrl"],
345
350
  open_weights=True,
346
351
  revision="436a489a2087d61aa670b3496a9915f84e46c861",
@@ -401,6 +406,7 @@ user_base_ru = ModelMeta(
401
406
  user_bge_m3 = ModelMeta(
402
407
  loader=sentence_transformers_loader,
403
408
  name="deepvk/USER-bge-m3",
409
+ model_type=["dense"],
404
410
  languages=["rus-Cyrl"],
405
411
  open_weights=True,
406
412
  revision="0cc6cfe48e260fb0474c753087a69369e88709ae",
@@ -451,6 +457,7 @@ user_bge_m3 = ModelMeta(
451
457
  deberta_v1_ru = ModelMeta(
452
458
  loader=sentence_transformers_loader,
453
459
  name="deepvk/deberta-v1-base",
460
+ model_type=["dense"],
454
461
  languages=["rus-Cyrl"],
455
462
  open_weights=True,
456
463
  revision="bdd30b0e19757e6940c92c7aff19e8fc0a60dff4",
@@ -481,6 +488,7 @@ deberta_v1_ru = ModelMeta(
481
488
  rubert_base_cased = ModelMeta(
482
489
  loader=sentence_transformers_loader,
483
490
  name="DeepPavlov/rubert-base-cased",
491
+ model_type=["dense"],
484
492
  languages=["rus-Cyrl"],
485
493
  open_weights=True,
486
494
  revision="4036cab694767a299f2b9e6492909664d9414229",
@@ -516,6 +524,7 @@ rubert_base_cased = ModelMeta(
516
524
  distilrubert_small_cased_conversational = ModelMeta(
517
525
  loader=sentence_transformers_loader,
518
526
  name="DeepPavlov/distilrubert-small-cased-conversational",
527
+ model_type=["dense"],
519
528
  languages=["rus-Cyrl"],
520
529
  open_weights=True,
521
530
  revision="e348066b4a7279b97138038299bddc6580a9169a",
@@ -550,6 +559,7 @@ distilrubert_small_cased_conversational = ModelMeta(
550
559
  rubert_base_cased_sentence = ModelMeta(
551
560
  loader=sentence_transformers_loader,
552
561
  name="DeepPavlov/rubert-base-cased-sentence",
562
+ model_type=["dense"],
553
563
  languages=["rus-Cyrl"],
554
564
  open_weights=True,
555
565
  revision="78b5122d6365337dd4114281b0d08cd1edbb3bc8",
@@ -574,6 +584,7 @@ rubert_base_cased_sentence = ModelMeta(
574
584
  labse_en_ru = ModelMeta(
575
585
  loader=sentence_transformers_loader,
576
586
  name="cointegrated/LaBSE-en-ru",
587
+ model_type=["dense"],
577
588
  languages=["rus-Cyrl"],
578
589
  open_weights=True,
579
590
  revision="cf0714e606d4af551e14ad69a7929cd6b0da7f7e",
@@ -601,6 +612,7 @@ turbo_models_datasets = set(
601
612
  rubert_tiny_turbo = ModelMeta(
602
613
  loader=sentence_transformers_loader,
603
614
  name="sergeyzh/rubert-tiny-turbo",
615
+ model_type=["dense"],
604
616
  languages=["rus-Cyrl"],
605
617
  open_weights=True,
606
618
  revision="8ce0cf757446ce9bb2d5f5a4ac8103c7a1049054",
@@ -623,6 +635,7 @@ rubert_tiny_turbo = ModelMeta(
623
635
  rubert_mini_frida = ModelMeta(
624
636
  loader=sentence_transformers_loader,
625
637
  name="sergeyzh/rubert-mini-frida",
638
+ model_type=["dense"],
626
639
  languages=["rus-Cyrl"],
627
640
  open_weights=True,
628
641
  revision="19b279b78afd945b5ccae78f63e284909814adc2",
@@ -650,6 +663,7 @@ rubert_mini_frida = ModelMeta(
650
663
  labse_ru_turbo = ModelMeta(
651
664
  loader=sentence_transformers_loader,
652
665
  name="sergeyzh/LaBSE-ru-turbo",
666
+ model_type=["dense"],
653
667
  languages=["rus-Cyrl"],
654
668
  open_weights=True,
655
669
  revision="1940b046c6b5e125df11722b899130329d0a46da",
@@ -698,6 +712,7 @@ rosberta_ru_en = ModelMeta(
698
712
  model_prompts=rosberta_prompts,
699
713
  ),
700
714
  name="ai-forever/ru-en-RoSBERTa",
715
+ model_type=["dense"],
701
716
  languages=["rus-Cyrl"],
702
717
  open_weights=True,
703
718
  revision="89fb1651989adbb1cfcfdedafd7d102951ad0555",
@@ -863,6 +878,7 @@ frida = ModelMeta(
863
878
  model_prompts=frida_prompts,
864
879
  ),
865
880
  name="ai-forever/FRIDA",
881
+ model_type=["dense"],
866
882
  languages=["rus-Cyrl"],
867
883
  open_weights=True,
868
884
  revision="7292217af9a9e6dbf07048f76b434ad1e2aa8b76",
@@ -896,6 +912,7 @@ giga_embeddings = ModelMeta(
896
912
  },
897
913
  ),
898
914
  name="ai-sage/Giga-Embeddings-instruct",
915
+ model_type=["dense"],
899
916
  languages=["eng-Latn", "rus-Cyrl"],
900
917
  open_weights=True,
901
918
  revision="0ad5b29bfecd806cecc9d66b927d828a736594dc",
@@ -927,6 +944,7 @@ berta_training_datasets = (
927
944
  berta = ModelMeta(
928
945
  loader=sentence_transformers_loader,
929
946
  name="sergeyzh/BERTA",
947
+ model_type=["dense"],
930
948
  languages=["rus-Cyrl"],
931
949
  open_weights=True,
932
950
  revision="914c8c8aed14042ed890fc2c662d5e9e66b2faa7",
@@ -999,6 +1017,7 @@ user2_small = ModelMeta(
999
1017
  model_prompts=user2_prompts,
1000
1018
  ),
1001
1019
  name="deepvk/USER2-small",
1020
+ model_type=["dense"],
1002
1021
  languages=["rus-Cyrl"],
1003
1022
  open_weights=True,
1004
1023
  revision="23f65b34cf7632032061f5cc66c14714e6d4cee4",
@@ -1031,6 +1050,7 @@ user2_base = ModelMeta(
1031
1050
  model_prompts=user2_prompts,
1032
1051
  ),
1033
1052
  name="deepvk/USER2-base",
1053
+ model_type=["dense"],
1034
1054
  languages=["rus-Cyrl"],
1035
1055
  open_weights=True,
1036
1056
  revision="0942cf96909b6d52e61f79a01e2d30c7be640b27",
@@ -32,6 +32,7 @@ cl_nagoya_ruri_v3_30m = ModelMeta(
32
32
  model_prompts=RURI_V3_PROMPTS,
33
33
  ),
34
34
  name="cl-nagoya/ruri-v3-30m",
35
+ model_type=["dense"],
35
36
  languages=["jpn-Jpan"],
36
37
  open_weights=True,
37
38
  revision="24899e5de370b56d179604a007c0d727bf144504",
@@ -62,6 +63,7 @@ cl_nagoya_ruri_v3_70m = ModelMeta(
62
63
  model_prompts=RURI_V3_PROMPTS,
63
64
  ),
64
65
  name="cl-nagoya/ruri-v3-70m",
66
+ model_type=["dense"],
65
67
  languages=["jpn-Jpan"],
66
68
  open_weights=True,
67
69
  revision="07a8b0aba47d29d2ca21f89b915c1efe2c23d1cc",
@@ -90,6 +92,7 @@ cl_nagoya_ruri_v3_130m = ModelMeta(
90
92
  model_prompts=RURI_V3_PROMPTS,
91
93
  ),
92
94
  name="cl-nagoya/ruri-v3-130m",
95
+ model_type=["dense"],
93
96
  languages=["jpn-Jpan"],
94
97
  open_weights=True,
95
98
  revision="e3114c6ee10dbab8b4b235fbc6dcf9dd4d5ac1a6",
@@ -118,6 +121,7 @@ cl_nagoya_ruri_v3_310m = ModelMeta(
118
121
  model_prompts=RURI_V3_PROMPTS,
119
122
  ),
120
123
  name="cl-nagoya/ruri-v3-310m",
124
+ model_type=["dense"],
121
125
  languages=["jpn-Jpan"],
122
126
  open_weights=True,
123
127
  revision="18b60fb8c2b9df296fb4212bb7d23ef94e579cd3",
@@ -147,6 +151,7 @@ cl_nagoya_ruri_small_v2 = ModelMeta(
147
151
  trust_remote_code=True,
148
152
  ),
149
153
  name="cl-nagoya/ruri-small-v2",
154
+ model_type=["dense"],
150
155
  languages=["jpn-Jpan"],
151
156
  open_weights=True,
152
157
  revision="db18646e673b713cd0518a5bb0fefdce21e77cd9",
@@ -175,6 +180,7 @@ cl_nagoya_ruri_base_v2 = ModelMeta(
175
180
  model_prompts=RURI_V1_V2_PROMPTS,
176
181
  ),
177
182
  name="cl-nagoya/ruri-base-v2",
183
+ model_type=["dense"],
178
184
  languages=["jpn-Jpan"],
179
185
  open_weights=True,
180
186
  revision="8ce03882903668a01c83ca3b8111ac025a3bc734",
@@ -203,6 +209,7 @@ cl_nagoya_ruri_large_v2 = ModelMeta(
203
209
  model_prompts=RURI_V1_V2_PROMPTS,
204
210
  ),
205
211
  name="cl-nagoya/ruri-large-v2",
212
+ model_type=["dense"],
206
213
  languages=["jpn-Jpan"],
207
214
  open_weights=True,
208
215
  revision="42898ef34a5574977380ebf0dfd28cbfbd36438b",
@@ -232,6 +239,7 @@ cl_nagoya_ruri_small_v1 = ModelMeta(
232
239
  trust_remote_code=True,
233
240
  ),
234
241
  name="cl-nagoya/ruri-small",
242
+ model_type=["dense"],
235
243
  languages=["jpn-Jpan"],
236
244
  open_weights=True,
237
245
  revision="bc56ce90cd7a979f6eb199fc52dfe700bfd94bc3",
@@ -260,6 +268,7 @@ cl_nagoya_ruri_base_v1 = ModelMeta(
260
268
  model_prompts=RURI_V1_V2_PROMPTS,
261
269
  ),
262
270
  name="cl-nagoya/ruri-base",
271
+ model_type=["dense"],
263
272
  languages=["jpn-Jpan"],
264
273
  open_weights=True,
265
274
  revision="1ae40b8b6c78518a499425086bab8fc16c2e4b0e",
@@ -289,6 +298,7 @@ cl_nagoya_ruri_large_v1 = ModelMeta(
289
298
  model_prompts=RURI_V1_V2_PROMPTS,
290
299
  ),
291
300
  name="cl-nagoya/ruri-large",
301
+ model_type=["dense"],
292
302
  languages=["jpn-Jpan"],
293
303
  open_weights=True,
294
304
  revision="a011c39b13e8bc137ee13c6bc82191ece46c414c",
@@ -46,6 +46,7 @@ SFR_Embedding_2_R = ModelMeta(
46
46
  normalized=True,
47
47
  ),
48
48
  name="Salesforce/SFR-Embedding-2_R",
49
+ model_type=["dense"],
49
50
  languages=["eng-Latn"],
50
51
  open_weights=True,
51
52
  revision="91762139d94ed4371a9fa31db5551272e0b83818",
@@ -83,6 +84,7 @@ SFR_Embedding_Code_2B_R = ModelMeta(
83
84
  normalized=True,
84
85
  ),
85
86
  name="Salesforce/SFR-Embedding-Code-2B_R",
87
+ model_type=["dense"],
86
88
  languages=["eng-Latn"],
87
89
  open_weights=True,
88
90
  revision="c73d8631a005876ed5abde34db514b1fb6566973",
@@ -120,6 +122,7 @@ SFR_Embedding_Mistral = ModelMeta(
120
122
  normalized=True,
121
123
  ),
122
124
  name="Salesforce/SFR-Embedding-Mistral",
125
+ model_type=["dense"],
123
126
  languages=["eng-Latn"],
124
127
  open_weights=True,
125
128
  revision="938c560d1c236aa563b2dbdf084f28ab28bccb11",
@@ -43,6 +43,7 @@ samilpwc_expr = ModelMeta(
43
43
  apply_instruction_to_passages=False,
44
44
  ),
45
45
  name="SamilPwC-AXNode-GenAI/PwC-Embedding_expr",
46
+ model_type=["dense"],
46
47
  languages=[
47
48
  "kor-Hang",
48
49
  ],
@@ -118,6 +118,7 @@ sbintuitions_sarashina_embedding_v2_1b = ModelMeta(
118
118
  max_seq_length=8192,
119
119
  ),
120
120
  name="sbintuitions/sarashina-embedding-v2-1b",
121
+ model_type=["dense"],
121
122
  languages=["jpn-Jpan"],
122
123
  open_weights=True,
123
124
  revision="1f3408afaa7b617e3445d891310a9c26dd0c68a5",
@@ -143,6 +144,7 @@ sbintuitions_sarashina_embedding_v2_1b = ModelMeta(
143
144
  sbintuitions_sarashina_embedding_v1_1b = ModelMeta(
144
145
  loader=sentence_transformers_loader,
145
146
  name="sbintuitions/sarashina-embedding-v1-1b",
147
+ model_type=["dense"],
146
148
  languages=["jpn-Jpan"],
147
149
  open_weights=True,
148
150
  revision="d060fcd8984075071e7fad81baff035cbb3b6c7e",
@@ -20,6 +20,7 @@ searchmap_preview = ModelMeta(
20
20
  "model_prompts": task_instructions,
21
21
  },
22
22
  name="VPLabs/SearchMap_Preview",
23
+ model_type=["dense"],
23
24
  revision="69de17ef48278ed08ba1a4e65ead8179912b696e",
24
25
  languages=["eng-Latn"],
25
26
  open_weights=True,
@@ -413,6 +413,7 @@ TASK_NAME_TO_INSTRUCTION = {
413
413
 
414
414
  seed_embedding = ModelMeta(
415
415
  name="Bytedance/Seed1.6-embedding",
416
+ model_type=["dense"],
416
417
  revision="1",
417
418
  release_date="2025-06-18",
418
419
  languages=[
@@ -236,6 +236,7 @@ DEFAULT_INSTRUCTION = "Instruct: Given a web search query, retrieve relevant pas
236
236
 
237
237
  seed_embedding = ModelMeta(
238
238
  name="ByteDance-Seed/Seed1.5-Embedding",
239
+ model_type=["dense"],
239
240
  revision="4",
240
241
  release_date="2025-04-25",
241
242
  languages=[