mteb 2.5.1__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. mteb/abstasks/abstask.py +6 -6
  2. mteb/abstasks/aggregated_task.py +4 -10
  3. mteb/abstasks/clustering_legacy.py +3 -2
  4. mteb/abstasks/task_metadata.py +2 -3
  5. mteb/cache.py +7 -4
  6. mteb/cli/build_cli.py +10 -5
  7. mteb/cli/generate_model_card.py +4 -3
  8. mteb/deprecated_evaluator.py +4 -3
  9. mteb/evaluate.py +4 -1
  10. mteb/get_tasks.py +4 -3
  11. mteb/leaderboard/app.py +70 -3
  12. mteb/models/abs_encoder.py +5 -3
  13. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +4 -1
  14. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +13 -12
  15. mteb/models/model_implementations/align_models.py +1 -0
  16. mteb/models/model_implementations/amazon_models.py +1 -0
  17. mteb/models/model_implementations/andersborges.py +2 -0
  18. mteb/models/model_implementations/ara_models.py +1 -0
  19. mteb/models/model_implementations/arctic_models.py +8 -0
  20. mteb/models/model_implementations/b1ade_models.py +1 -0
  21. mteb/models/model_implementations/bedrock_models.py +4 -0
  22. mteb/models/model_implementations/bge_models.py +17 -0
  23. mteb/models/model_implementations/bica_model.py +1 -0
  24. mteb/models/model_implementations/blip2_models.py +2 -0
  25. mteb/models/model_implementations/blip_models.py +8 -0
  26. mteb/models/model_implementations/bm25.py +1 -0
  27. mteb/models/model_implementations/bmretriever_models.py +4 -0
  28. mteb/models/model_implementations/cadet_models.py +1 -0
  29. mteb/models/model_implementations/cde_models.py +2 -0
  30. mteb/models/model_implementations/clip_models.py +3 -0
  31. mteb/models/model_implementations/clips_models.py +3 -0
  32. mteb/models/model_implementations/codefuse_models.py +3 -0
  33. mteb/models/model_implementations/codesage_models.py +3 -0
  34. mteb/models/model_implementations/cohere_models.py +4 -0
  35. mteb/models/model_implementations/cohere_v.py +5 -0
  36. mteb/models/model_implementations/colpali_models.py +3 -0
  37. mteb/models/model_implementations/colqwen_models.py +9 -0
  38. mteb/models/model_implementations/colsmol_models.py +2 -0
  39. mteb/models/model_implementations/conan_models.py +1 -0
  40. mteb/models/model_implementations/dino_models.py +19 -0
  41. mteb/models/model_implementations/e5_instruct.py +4 -0
  42. mteb/models/model_implementations/e5_models.py +9 -0
  43. mteb/models/model_implementations/e5_v.py +1 -0
  44. mteb/models/model_implementations/eagerworks_models.py +1 -0
  45. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  46. mteb/models/model_implementations/en_code_retriever.py +1 -0
  47. mteb/models/model_implementations/euler_models.py +1 -0
  48. mteb/models/model_implementations/evaclip_models.py +4 -0
  49. mteb/models/model_implementations/fa_models.py +8 -0
  50. mteb/models/model_implementations/facebookai.py +2 -0
  51. mteb/models/model_implementations/geogpt_models.py +1 -0
  52. mteb/models/model_implementations/gme_v_models.py +6 -3
  53. mteb/models/model_implementations/google_models.py +5 -0
  54. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  55. mteb/models/model_implementations/gritlm_models.py +2 -0
  56. mteb/models/model_implementations/gte_models.py +9 -0
  57. mteb/models/model_implementations/hinvec_models.py +1 -0
  58. mteb/models/model_implementations/human.py +1 -0
  59. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  60. mteb/models/model_implementations/inf_models.py +2 -0
  61. mteb/models/model_implementations/jasper_models.py +2 -0
  62. mteb/models/model_implementations/jina_clip.py +1 -0
  63. mteb/models/model_implementations/jina_models.py +7 -1
  64. mteb/models/model_implementations/kalm_models.py +6 -0
  65. mteb/models/model_implementations/kblab.py +1 -0
  66. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  67. mteb/models/model_implementations/kfst.py +1 -0
  68. mteb/models/model_implementations/kowshik24_models.py +1 -0
  69. mteb/models/model_implementations/lens_models.py +2 -0
  70. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  71. mteb/models/model_implementations/linq_models.py +1 -0
  72. mteb/models/model_implementations/listconranker.py +1 -1
  73. mteb/models/model_implementations/llm2clip_models.py +3 -0
  74. mteb/models/model_implementations/llm2vec_models.py +8 -0
  75. mteb/models/model_implementations/mcinext_models.py +7 -1
  76. mteb/models/model_implementations/mdbr_models.py +2 -0
  77. mteb/models/model_implementations/misc_models.py +63 -0
  78. mteb/models/model_implementations/mme5_models.py +1 -0
  79. mteb/models/model_implementations/moco_models.py +2 -0
  80. mteb/models/model_implementations/model2vec_models.py +13 -0
  81. mteb/models/model_implementations/moka_models.py +3 -0
  82. mteb/models/model_implementations/mxbai_models.py +3 -0
  83. mteb/models/model_implementations/nbailab.py +3 -0
  84. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  85. mteb/models/model_implementations/nomic_models.py +6 -0
  86. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  87. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  88. mteb/models/model_implementations/nvidia_models.py +3 -0
  89. mteb/models/model_implementations/octen_models.py +195 -0
  90. mteb/models/model_implementations/openai_models.py +5 -0
  91. mteb/models/model_implementations/openclip_models.py +8 -0
  92. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  93. mteb/models/model_implementations/ops_moa_models.py +2 -0
  94. mteb/models/model_implementations/pawan_models.py +1 -0
  95. mteb/models/model_implementations/piccolo_models.py +2 -0
  96. mteb/models/model_implementations/promptriever_models.py +4 -0
  97. mteb/models/model_implementations/pylate_models.py +3 -0
  98. mteb/models/model_implementations/qodo_models.py +2 -0
  99. mteb/models/model_implementations/qtack_models.py +1 -0
  100. mteb/models/model_implementations/qwen3_models.py +3 -0
  101. mteb/models/model_implementations/qzhou_models.py +2 -0
  102. mteb/models/model_implementations/random_baseline.py +2 -1
  103. mteb/models/model_implementations/rasgaard_models.py +1 -0
  104. mteb/models/model_implementations/reasonir_model.py +1 -0
  105. mteb/models/model_implementations/repllama_models.py +2 -0
  106. mteb/models/model_implementations/rerankers_custom.py +3 -3
  107. mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
  108. mteb/models/model_implementations/richinfoai_models.py +1 -0
  109. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  110. mteb/models/model_implementations/ruri_models.py +10 -0
  111. mteb/models/model_implementations/salesforce_models.py +3 -0
  112. mteb/models/model_implementations/samilpwc_models.py +1 -0
  113. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  114. mteb/models/model_implementations/searchmap_models.py +1 -0
  115. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  116. mteb/models/model_implementations/seed_models.py +1 -0
  117. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  118. mteb/models/model_implementations/shuu_model.py +32 -31
  119. mteb/models/model_implementations/siglip_models.py +10 -0
  120. mteb/models/model_implementations/sonar_models.py +1 -0
  121. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  122. mteb/models/model_implementations/stella_models.py +6 -0
  123. mteb/models/model_implementations/tarka_models.py +2 -0
  124. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  125. mteb/models/model_implementations/uae_models.py +1 -0
  126. mteb/models/model_implementations/vdr_models.py +1 -0
  127. mteb/models/model_implementations/vi_vn_models.py +6 -0
  128. mteb/models/model_implementations/vista_models.py +2 -0
  129. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  130. mteb/models/model_implementations/voyage_models.py +15 -0
  131. mteb/models/model_implementations/voyage_v.py +1 -0
  132. mteb/models/model_implementations/xyz_models.py +1 -0
  133. mteb/models/model_implementations/youtu_models.py +1 -0
  134. mteb/models/model_implementations/yuan_models.py +1 -0
  135. mteb/models/model_implementations/yuan_models_en.py +1 -0
  136. mteb/models/model_meta.py +49 -4
  137. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +4 -1
  138. mteb/models/search_wrappers.py +4 -2
  139. mteb/models/sentence_transformer_wrapper.py +10 -10
  140. mteb/results/benchmark_results.py +67 -43
  141. mteb/results/model_result.py +3 -1
  142. mteb/results/task_result.py +22 -17
  143. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/METADATA +1 -1
  144. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/RECORD +148 -147
  145. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/WHEEL +0 -0
  146. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/entry_points.txt +0 -0
  147. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/licenses/LICENSE +0 -0
  148. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/top_level.txt +0 -0
@@ -50,6 +50,7 @@ inf_retriever_v1 = ModelMeta(
50
50
  trust_remote_code=True,
51
51
  ),
52
52
  name="infly/inf-retriever-v1",
53
+ model_type=["dense"],
53
54
  languages=["eng-Latn", "zho-Hans"],
54
55
  open_weights=True,
55
56
  revision="cb70ca7c31dfa866b2eff2dad229c144d8ddfd91",
@@ -76,6 +77,7 @@ inf_retriever_v1_1_5b = ModelMeta(
76
77
  trust_remote_code=True,
77
78
  ),
78
79
  name="infly/inf-retriever-v1-1.5b",
80
+ model_type=["dense"],
79
81
  languages=["eng-Latn", "zho-Hans"],
80
82
  open_weights=True,
81
83
  revision="c9c05c2dd50707a486966ba81703021ae2094a06",
@@ -286,6 +286,7 @@ jasper_en_v1 = ModelMeta(
286
286
  instruction_template="Instruct: {instruction}\nQuery: ",
287
287
  ),
288
288
  name="NovaSearch/jasper_en_vision_language_v1",
289
+ model_type=["dense"],
289
290
  languages=["eng-Latn"],
290
291
  open_weights=True,
291
292
  revision="d6330ce98f8a0d741e781df845904c9484f00efa",
@@ -332,6 +333,7 @@ Jasper_Token_Compression_600M = ModelMeta(
332
333
  loader=InstructSentenceTransformerModel,
333
334
  loader_kwargs=jasper_token_compression_600m_loader_kwargs,
334
335
  name="infgrad/Jasper-Token-Compression-600M",
336
+ model_type=["dense"],
335
337
  languages=["eng-Latn", "zho-Hans"],
336
338
  open_weights=True,
337
339
  revision="06a100f753a5a96d9e583b3af79c6fcdfacc4719",
@@ -123,6 +123,7 @@ class JinaCLIPModel(AbsEncoder):
123
123
  jina_clip_v1 = ModelMeta(
124
124
  loader=JinaCLIPModel, # type: ignore
125
125
  name="jinaai/jina-clip-v1",
126
+ model_type=["dense"],
126
127
  languages=["eng-Latn"],
127
128
  revision="06150c7c382d7a4faedc7d5a0d8cdb59308968f4",
128
129
  release_date="2024-05-30",
@@ -720,6 +720,7 @@ jina_reranker_v3 = ModelMeta(
720
720
  trust_remote_code=True,
721
721
  ),
722
722
  name="jinaai/jina-reranker-v3",
723
+ model_type=["cross-encoder"],
723
724
  languages=multilingual_langs,
724
725
  open_weights=True,
725
726
  revision="050e171c4f75dfec5b648ed8470a2475e5a30f30",
@@ -734,7 +735,6 @@ jina_reranker_v3 = ModelMeta(
734
735
  framework=["PyTorch"],
735
736
  use_instructions=None,
736
737
  reference="https://huggingface.co/jinaai/jina-reranker-v3",
737
- is_cross_encoder=True,
738
738
  public_training_code=None,
739
739
  public_training_data=None,
740
740
  training_datasets=JINARerankerV3_TRAINING_DATA,
@@ -763,6 +763,7 @@ jina_embeddings_v4 = ModelMeta(
763
763
  },
764
764
  ),
765
765
  name="jinaai/jina-embeddings-v4",
766
+ model_type=["dense"],
766
767
  languages=XLMR_LANGUAGES,
767
768
  open_weights=True,
768
769
  revision="4a58ca57710c49f51896e4bc820e202fbf64904b",
@@ -811,6 +812,7 @@ jina_embeddings_v3 = ModelMeta(
811
812
  },
812
813
  ),
813
814
  name="jinaai/jina-embeddings-v3",
815
+ model_type=["dense"],
814
816
  languages=XLMR_LANGUAGES,
815
817
  open_weights=True,
816
818
  revision="215a6e121fa0183376388ac6b1ae230326bfeaed",
@@ -864,6 +866,7 @@ jina_embeddings_v2_base_en = ModelMeta(
864
866
  trust_remote_code=True,
865
867
  ),
866
868
  name="jinaai/jina-embeddings-v2-base-en",
869
+ model_type=["dense"],
867
870
  languages=["eng-Latn"],
868
871
  open_weights=True,
869
872
  revision="6e85f575bc273f1fd840a658067d0157933c83f0",
@@ -927,6 +930,7 @@ jina_embeddings_v2_small_en = ModelMeta(
927
930
  trust_remote_code=True,
928
931
  ),
929
932
  name="jinaai/jina-embeddings-v2-small-en",
933
+ model_type=["dense"],
930
934
  languages=["eng-Latn"],
931
935
  open_weights=True,
932
936
  revision="44e7d1d6caec8c883c2d4b207588504d519788d0",
@@ -987,6 +991,7 @@ jina_embeddings_v2_small_en = ModelMeta(
987
991
  jina_embedding_b_en_v1 = ModelMeta(
988
992
  loader=SentenceTransformerEncoderWrapper,
989
993
  name="jinaai/jina-embedding-b-en-v1",
994
+ model_type=["dense"],
990
995
  languages=["eng-Latn"],
991
996
  open_weights=True,
992
997
  revision="32aa658e5ceb90793454d22a57d8e3a14e699516",
@@ -1043,6 +1048,7 @@ jina_embedding_b_en_v1 = ModelMeta(
1043
1048
  jina_embedding_s_en_v1 = ModelMeta(
1044
1049
  loader=SentenceTransformerEncoderWrapper,
1045
1050
  name="jinaai/jina-embedding-s-en-v1",
1051
+ model_type=["dense"],
1046
1052
  languages=["eng-Latn"],
1047
1053
  open_weights=True,
1048
1054
  revision="5ac6cd473e2324c6d5f9e558a6a9f65abb57143e",
@@ -769,6 +769,7 @@ HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v1 = ModelMeta(
769
769
  prompts_dict=KaLM_task_prompts,
770
770
  ),
771
771
  name="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1",
772
+ model_type=["dense"],
772
773
  revision="45e42c89990c40aca042659133fc8b13c28634b5",
773
774
  release_date="2024-10-23",
774
775
  languages=["eng-Latn", "zho-Hans"],
@@ -793,6 +794,7 @@ HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v1 = ModelMeta(
793
794
  HIT_TMG__KaLM_embedding_multilingual_mini_v1 = ModelMeta(
794
795
  loader=sentence_transformers_loader,
795
796
  name="HIT-TMG/KaLM-embedding-multilingual-mini-v1",
797
+ model_type=["dense"],
796
798
  revision="8a82a0cd2b322b91723e252486f7cce6fd8ac9d3",
797
799
  release_date="2024-08-27",
798
800
  languages=["eng-Latn", "zho-Hans"],
@@ -823,6 +825,7 @@ HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v1_5 = ModelMeta(
823
825
  prompts_dict=KaLM_task_prompts,
824
826
  ),
825
827
  name="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5",
828
+ model_type=["dense"],
826
829
  revision="fcff2f8a54e4cd96b7766fef1ee960a43d42bb3c",
827
830
  release_date="2024-12-26",
828
831
  languages=["eng-Latn", "zho-Hans"],
@@ -853,6 +856,7 @@ HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v2 = ModelMeta(
853
856
  prompts_dict=KaLM_v2_task_prompts,
854
857
  ),
855
858
  name="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2",
859
+ model_type=["dense"],
856
860
  revision="d2a21c232dc712ae8230af56d1027cf21b7864bf",
857
861
  release_date="2025-06-25",
858
862
  languages=["eng-Latn", "zho-Hans"],
@@ -883,6 +887,7 @@ KaLM_Embedding_KaLM_embedding_multilingual_mini_instruct_v2_5 = ModelMeta(
883
887
  prompts_dict=KaLM_v2_task_prompts,
884
888
  ),
885
889
  name="KaLM-Embedding/KaLM-embedding-multilingual-mini-instruct-v2.5",
890
+ model_type=["dense"],
886
891
  revision="6a4cfc1084cb459ebd4729b53a8656a61448c720",
887
892
  release_date="2025-09-30",
888
893
  languages=["eng-Latn", "zho-Hans"],
@@ -931,6 +936,7 @@ KaLM_Embedding_gemma_3_12b_2511 = ModelMeta(
931
936
  prompts_dict=KaLM_Embedding_gemma_3_12b_task_prompts,
932
937
  ),
933
938
  name="tencent/KaLM-Embedding-Gemma3-12B-2511",
939
+ model_type=["dense"],
934
940
  revision="edf22f4753f58b05e3f5495818d31f12db63056d",
935
941
  languages=None,
936
942
  open_weights=True,
@@ -4,6 +4,7 @@ from mteb.models.model_meta import ModelMeta, ScoringFunction
4
4
  sbert_swedish = ModelMeta(
5
5
  loader=sentence_transformers_loader, # type: ignore[arg-type]
6
6
  name="KBLab/sentence-bert-swedish-cased",
7
+ model_type=["dense"],
7
8
  languages=["swe-Latn"],
8
9
  open_weights=True,
9
10
  revision="6b5e83cd29c03729cfdc33d13b1423399b0efb5c",
@@ -6,6 +6,7 @@ from mteb.models.sentence_transformer_wrapper import (
6
6
  dfm_enc_large = ModelMeta(
7
7
  loader=sentence_transformers_loader, # type: ignore
8
8
  name="KennethEnevoldsen/dfm-sentence-encoder-large",
9
+ model_type=["dense"],
9
10
  languages=["dan-Latn"],
10
11
  open_weights=True,
11
12
  revision="132c53391e7a780dc6a2f9a03724d0158fe7122c",
@@ -40,6 +41,7 @@ dfm_enc_large = ModelMeta(
40
41
  dfm_enc_med = ModelMeta(
41
42
  loader=sentence_transformers_loader, # type: ignore
42
43
  name="KennethEnevoldsen/dfm-sentence-encoder-medium",
44
+ model_type=["dense"],
43
45
  languages=["dan-Latn"],
44
46
  open_weights=True,
45
47
  revision="701bce95d499fa97610d57e8823c54fd1fb79930",
@@ -4,6 +4,7 @@ from mteb.models.model_meta import ModelMeta, ScoringFunction
4
4
  xlmr_scandi = ModelMeta(
5
5
  loader=sentence_transformers_loader, # type: ignore[arg-type]
6
6
  name="KFST/XLMRoberta-en-da-sv-nb",
7
+ model_type=["dense"],
7
8
  languages=["swe-Latn", "nob-Latn", "nno-Latn", "dan-Latn", "eng-Latn"],
8
9
  open_weights=True,
9
10
  revision="d40c10ca7b1e68b5a8372f2d112dac9eb3279df1",
@@ -3,6 +3,7 @@ from mteb.models import ModelMeta, sentence_transformers_loader
3
3
  kowshik24_bangla_embedding_model = ModelMeta(
4
4
  loader=sentence_transformers_loader,
5
5
  name="Kowshik24/bangla-sentence-transformer-ft-matryoshka-paraphrase-multilingual-mpnet-base-v2",
6
+ model_type=["dense"],
6
7
  languages=["ben-Beng"], # Bengali using Bengali script
7
8
  open_weights=True,
8
9
  revision="6689c21e69be5950596bad084457cbaa138728d8",
@@ -12,6 +12,7 @@ LENS_CITATION = """@article{lei2025lens,
12
12
  lens_d4000 = ModelMeta(
13
13
  loader=None,
14
14
  name="yibinlei/LENS-d4000",
15
+ model_type=["dense"],
15
16
  languages=None,
16
17
  open_weights=True,
17
18
  revision="e473b33364e6c48a324796fd1411d3b93670c6fe",
@@ -34,6 +35,7 @@ lens_d4000 = ModelMeta(
34
35
  lens_d8000 = ModelMeta(
35
36
  loader=None,
36
37
  name="yibinlei/LENS-d8000",
38
+ model_type=["dense"],
37
39
  languages=None,
38
40
  open_weights=True,
39
41
  revision="a0b87bd91cb27b6f2f0b0fe22c28026da1d464ef",
@@ -44,6 +44,7 @@ LGAI_EMBEDDING_TRAINING_DATA = {
44
44
  lgai_embedding_en = ModelMeta(
45
45
  loader=sentence_transformers_loader,
46
46
  name="annamodels/LGAI-Embedding-Preview",
47
+ model_type=["dense"],
47
48
  languages=[
48
49
  "eng-Latn",
49
50
  ],
@@ -32,6 +32,7 @@ Linq_Embed_Mistral = ModelMeta(
32
32
  normalized=True,
33
33
  ),
34
34
  name="Linq-AI-Research/Linq-Embed-Mistral",
35
+ model_type=["dense"],
35
36
  languages=["eng-Latn"],
36
37
  open_weights=True,
37
38
  revision="0c1a0b0589177079acc552433cad51d7c9132379",
@@ -112,6 +112,7 @@ listconranker = ModelMeta(
112
112
  fp_options="float16",
113
113
  ),
114
114
  name="ByteDance/ListConRanker",
115
+ model_type=["cross-encoder"],
115
116
  languages=["zho-Hans"],
116
117
  open_weights=True,
117
118
  revision="95ae6a5f422a916bc36520f0f3e198e7d91520a0",
@@ -128,6 +129,5 @@ listconranker = ModelMeta(
128
129
  use_instructions=False,
129
130
  public_training_code=None,
130
131
  public_training_data=None,
131
- is_cross_encoder=True,
132
132
  citation=LISTCONRANKER_CITATION,
133
133
  )
@@ -183,6 +183,7 @@ llm2clip_training_sets = set(
183
183
  llm2clip_openai_l_14_336 = ModelMeta(
184
184
  loader=llm2clip_loader, # type: ignore
185
185
  name="microsoft/LLM2CLIP-Openai-L-14-336",
186
+ model_type=["dense"],
186
187
  languages=["eng-Latn"],
187
188
  revision="92512331f393a003c3d98404677f991c188162c9",
188
189
  release_date="2024-11-07",
@@ -207,6 +208,7 @@ llm2clip_openai_l_14_336 = ModelMeta(
207
208
  llm2clip_openai_l_14_224 = ModelMeta(
208
209
  loader=llm2clip_loader, # type: ignore
209
210
  name="microsoft/LLM2CLIP-Openai-L-14-224",
211
+ model_type=["dense"],
210
212
  languages=["eng-Latn"],
211
213
  revision="6b8a11a94ff380fa220dfefe73ac9293d2677575",
212
214
  release_date="2024-11-07",
@@ -230,6 +232,7 @@ llm2clip_openai_l_14_224 = ModelMeta(
230
232
  llm2clip_openai_b_16 = ModelMeta(
231
233
  loader=llm2clip_loader, # type: ignore
232
234
  name="microsoft/LLM2CLIP-Openai-B-16",
235
+ model_type=["dense"],
233
236
  languages=["eng-Latn"],
234
237
  revision="ecfb347eb3dcfeb2fbc2a2eae7de6ac5a001aaf8",
235
238
  release_date="2024-11-07",
@@ -132,6 +132,7 @@ llm2vec_llama3_8b_supervised = ModelMeta(
132
132
  torch_dtype=torch.bfloat16,
133
133
  ),
134
134
  name="McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised",
135
+ model_type=["dense"],
135
136
  languages=["eng-Latn"],
136
137
  open_weights=True,
137
138
  revision="baa8ebf04a1c2500e61288e7dad65e8ae42601a7",
@@ -161,6 +162,7 @@ llm2vec_llama3_8b_unsupervised = ModelMeta(
161
162
  torch_dtype=torch.bfloat16,
162
163
  ),
163
164
  name="McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse",
165
+ model_type=["dense"],
164
166
  languages=["eng-Latn"],
165
167
  open_weights=True,
166
168
  revision="1cb7b735326d13a8541db8f57f35da5373f5e9c6",
@@ -189,6 +191,7 @@ llm2vec_mistral7b_supervised = ModelMeta(
189
191
  torch_dtype=torch.bfloat16,
190
192
  ),
191
193
  name="McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised",
194
+ model_type=["dense"],
192
195
  languages=["eng-Latn"],
193
196
  open_weights=True,
194
197
  revision="0ae69bdd5816105778b971c3138e8f8a18eaa3ae",
@@ -217,6 +220,7 @@ llm2vec_mistral7b_unsupervised = ModelMeta(
217
220
  torch_dtype=torch.bfloat16,
218
221
  ),
219
222
  name="McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse",
223
+ model_type=["dense"],
220
224
  languages=["eng-Latn"],
221
225
  open_weights=True,
222
226
  revision="2c055a5d77126c0d3dc6cd8ffa30e2908f4f45f8",
@@ -245,6 +249,7 @@ llm2vec_llama2_7b_supervised = ModelMeta(
245
249
  torch_dtype=torch.bfloat16,
246
250
  ),
247
251
  name="McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised",
252
+ model_type=["dense"],
248
253
  languages=["eng-Latn"],
249
254
  open_weights=True,
250
255
  revision="2c055a5d77126c0d3dc6cd8ffa30e2908f4f45f8",
@@ -273,6 +278,7 @@ llm2vec_llama2_7b_unsupervised = ModelMeta(
273
278
  torch_dtype=torch.bfloat16,
274
279
  ),
275
280
  name="McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse",
281
+ model_type=["dense"],
276
282
  languages=["eng-Latn"],
277
283
  open_weights=True,
278
284
  revision="a76944871d169ebe7c97eb921764cd063afed785",
@@ -301,6 +307,7 @@ llm2vec_sheared_llama_supervised = ModelMeta(
301
307
  torch_dtype=torch.bfloat16,
302
308
  ),
303
309
  name="McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
310
+ model_type=["dense"],
304
311
  languages=["eng-Latn"],
305
312
  open_weights=True,
306
313
  revision="a5943d406c6b016fef3f07906aac183cf1a0b47d",
@@ -329,6 +336,7 @@ llm2vec_sheared_llama_unsupervised = ModelMeta(
329
336
  torch_dtype=torch.bfloat16,
330
337
  ),
331
338
  name="McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse",
339
+ model_type=["dense"],
332
340
  languages=["eng-Latn"],
333
341
  open_weights=True,
334
342
  revision="a5943d406c6b016fef3f07906aac183cf1a0b47d",
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  import os
3
3
  import time
4
+ import warnings
4
5
  from typing import Any
5
6
 
6
7
  import numpy as np
@@ -246,7 +247,9 @@ class HakimModelWrapper(AbsEncoder):
246
247
  task_prompt, task_id = DATASET_TASKS.get(task_name, (None, None))
247
248
 
248
249
  if not task_prompt:
249
- logger.warning(f"Unknown dataset: {task_name}, no preprocessing applied.")
250
+ msg = f"Unknown dataset: {task_name}, no preprocessing applied."
251
+ logger.warning(msg)
252
+ warnings.warn(msg)
250
253
  return sample
251
254
 
252
255
  task_prompt = f"مسئله : {task_prompt}"
@@ -344,6 +347,7 @@ hakim = ModelMeta(
344
347
  loader=HakimModelWrapper,
345
348
  loader_kwargs=dict(
346
349
  api_model_name="hakim",
350
+ model_type=["dense"],
347
351
  ),
348
352
  name="MCINext/Hakim",
349
353
  languages=["fas-Arab"],
@@ -411,6 +415,7 @@ hakim_small = ModelMeta(
411
415
  loader=HakimModelWrapper,
412
416
  loader_kwargs=dict(
413
417
  api_model_name="hakim-small",
418
+ model_type=["dense"],
414
419
  ),
415
420
  name="MCINext/Hakim-small",
416
421
  languages=["fas-Arab"],
@@ -477,6 +482,7 @@ hakim_unsup = ModelMeta(
477
482
  loader=HakimModelWrapper,
478
483
  loader_kwargs=dict(
479
484
  api_model_name="hakim-unsup",
485
+ model_type=["dense"],
480
486
  ),
481
487
  name="MCINext/Hakim-unsup",
482
488
  languages=["fas-Arab"],
@@ -30,6 +30,7 @@ mdbr_leaf_ir = ModelMeta(
30
30
  model_prompts=model_prompts,
31
31
  ),
32
32
  name="MongoDB/mdbr-leaf-ir",
33
+ model_type=["dense"],
33
34
  revision="2e46f5aac796e621d51f678c306a66ede4712ecb",
34
35
  release_date="2025-08-27",
35
36
  languages=["eng-Latn"],
@@ -57,6 +58,7 @@ mdbr_leaf_mt = ModelMeta(
57
58
  model_prompts=model_prompts,
58
59
  ),
59
60
  name="MongoDB/mdbr-leaf-mt",
61
+ model_type=["dense"],
60
62
  revision="66c47ba6d753efc208d54412b5af6c744a39a4df",
61
63
  release_date="2025-08-27",
62
64
  languages=["eng-Latn"],