mteb 2.5.1__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. mteb/abstasks/abstask.py +6 -6
  2. mteb/abstasks/aggregated_task.py +4 -10
  3. mteb/abstasks/clustering_legacy.py +3 -2
  4. mteb/abstasks/task_metadata.py +2 -3
  5. mteb/cache.py +7 -4
  6. mteb/cli/build_cli.py +10 -5
  7. mteb/cli/generate_model_card.py +4 -3
  8. mteb/deprecated_evaluator.py +4 -3
  9. mteb/evaluate.py +4 -1
  10. mteb/get_tasks.py +4 -3
  11. mteb/leaderboard/app.py +70 -3
  12. mteb/models/abs_encoder.py +5 -3
  13. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +4 -1
  14. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +13 -12
  15. mteb/models/model_implementations/align_models.py +1 -0
  16. mteb/models/model_implementations/amazon_models.py +1 -0
  17. mteb/models/model_implementations/andersborges.py +2 -0
  18. mteb/models/model_implementations/ara_models.py +1 -0
  19. mteb/models/model_implementations/arctic_models.py +8 -0
  20. mteb/models/model_implementations/b1ade_models.py +1 -0
  21. mteb/models/model_implementations/bedrock_models.py +4 -0
  22. mteb/models/model_implementations/bge_models.py +17 -0
  23. mteb/models/model_implementations/bica_model.py +1 -0
  24. mteb/models/model_implementations/blip2_models.py +2 -0
  25. mteb/models/model_implementations/blip_models.py +8 -0
  26. mteb/models/model_implementations/bm25.py +1 -0
  27. mteb/models/model_implementations/bmretriever_models.py +4 -0
  28. mteb/models/model_implementations/cadet_models.py +1 -0
  29. mteb/models/model_implementations/cde_models.py +2 -0
  30. mteb/models/model_implementations/clip_models.py +3 -0
  31. mteb/models/model_implementations/clips_models.py +3 -0
  32. mteb/models/model_implementations/codefuse_models.py +3 -0
  33. mteb/models/model_implementations/codesage_models.py +3 -0
  34. mteb/models/model_implementations/cohere_models.py +4 -0
  35. mteb/models/model_implementations/cohere_v.py +5 -0
  36. mteb/models/model_implementations/colpali_models.py +3 -0
  37. mteb/models/model_implementations/colqwen_models.py +9 -0
  38. mteb/models/model_implementations/colsmol_models.py +2 -0
  39. mteb/models/model_implementations/conan_models.py +1 -0
  40. mteb/models/model_implementations/dino_models.py +19 -0
  41. mteb/models/model_implementations/e5_instruct.py +4 -0
  42. mteb/models/model_implementations/e5_models.py +9 -0
  43. mteb/models/model_implementations/e5_v.py +1 -0
  44. mteb/models/model_implementations/eagerworks_models.py +1 -0
  45. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  46. mteb/models/model_implementations/en_code_retriever.py +1 -0
  47. mteb/models/model_implementations/euler_models.py +1 -0
  48. mteb/models/model_implementations/evaclip_models.py +4 -0
  49. mteb/models/model_implementations/fa_models.py +8 -0
  50. mteb/models/model_implementations/facebookai.py +2 -0
  51. mteb/models/model_implementations/geogpt_models.py +1 -0
  52. mteb/models/model_implementations/gme_v_models.py +6 -3
  53. mteb/models/model_implementations/google_models.py +5 -0
  54. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  55. mteb/models/model_implementations/gritlm_models.py +2 -0
  56. mteb/models/model_implementations/gte_models.py +9 -0
  57. mteb/models/model_implementations/hinvec_models.py +1 -0
  58. mteb/models/model_implementations/human.py +1 -0
  59. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  60. mteb/models/model_implementations/inf_models.py +2 -0
  61. mteb/models/model_implementations/jasper_models.py +2 -0
  62. mteb/models/model_implementations/jina_clip.py +1 -0
  63. mteb/models/model_implementations/jina_models.py +7 -1
  64. mteb/models/model_implementations/kalm_models.py +6 -0
  65. mteb/models/model_implementations/kblab.py +1 -0
  66. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  67. mteb/models/model_implementations/kfst.py +1 -0
  68. mteb/models/model_implementations/kowshik24_models.py +1 -0
  69. mteb/models/model_implementations/lens_models.py +2 -0
  70. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  71. mteb/models/model_implementations/linq_models.py +1 -0
  72. mteb/models/model_implementations/listconranker.py +1 -1
  73. mteb/models/model_implementations/llm2clip_models.py +3 -0
  74. mteb/models/model_implementations/llm2vec_models.py +8 -0
  75. mteb/models/model_implementations/mcinext_models.py +7 -1
  76. mteb/models/model_implementations/mdbr_models.py +2 -0
  77. mteb/models/model_implementations/misc_models.py +63 -0
  78. mteb/models/model_implementations/mme5_models.py +1 -0
  79. mteb/models/model_implementations/moco_models.py +2 -0
  80. mteb/models/model_implementations/model2vec_models.py +13 -0
  81. mteb/models/model_implementations/moka_models.py +3 -0
  82. mteb/models/model_implementations/mxbai_models.py +3 -0
  83. mteb/models/model_implementations/nbailab.py +3 -0
  84. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  85. mteb/models/model_implementations/nomic_models.py +6 -0
  86. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  87. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  88. mteb/models/model_implementations/nvidia_models.py +3 -0
  89. mteb/models/model_implementations/octen_models.py +195 -0
  90. mteb/models/model_implementations/openai_models.py +5 -0
  91. mteb/models/model_implementations/openclip_models.py +8 -0
  92. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  93. mteb/models/model_implementations/ops_moa_models.py +2 -0
  94. mteb/models/model_implementations/pawan_models.py +1 -0
  95. mteb/models/model_implementations/piccolo_models.py +2 -0
  96. mteb/models/model_implementations/promptriever_models.py +4 -0
  97. mteb/models/model_implementations/pylate_models.py +3 -0
  98. mteb/models/model_implementations/qodo_models.py +2 -0
  99. mteb/models/model_implementations/qtack_models.py +1 -0
  100. mteb/models/model_implementations/qwen3_models.py +3 -0
  101. mteb/models/model_implementations/qzhou_models.py +2 -0
  102. mteb/models/model_implementations/random_baseline.py +2 -1
  103. mteb/models/model_implementations/rasgaard_models.py +1 -0
  104. mteb/models/model_implementations/reasonir_model.py +1 -0
  105. mteb/models/model_implementations/repllama_models.py +2 -0
  106. mteb/models/model_implementations/rerankers_custom.py +3 -3
  107. mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
  108. mteb/models/model_implementations/richinfoai_models.py +1 -0
  109. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  110. mteb/models/model_implementations/ruri_models.py +10 -0
  111. mteb/models/model_implementations/salesforce_models.py +3 -0
  112. mteb/models/model_implementations/samilpwc_models.py +1 -0
  113. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  114. mteb/models/model_implementations/searchmap_models.py +1 -0
  115. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  116. mteb/models/model_implementations/seed_models.py +1 -0
  117. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  118. mteb/models/model_implementations/shuu_model.py +32 -31
  119. mteb/models/model_implementations/siglip_models.py +10 -0
  120. mteb/models/model_implementations/sonar_models.py +1 -0
  121. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  122. mteb/models/model_implementations/stella_models.py +6 -0
  123. mteb/models/model_implementations/tarka_models.py +2 -0
  124. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  125. mteb/models/model_implementations/uae_models.py +1 -0
  126. mteb/models/model_implementations/vdr_models.py +1 -0
  127. mteb/models/model_implementations/vi_vn_models.py +6 -0
  128. mteb/models/model_implementations/vista_models.py +2 -0
  129. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  130. mteb/models/model_implementations/voyage_models.py +15 -0
  131. mteb/models/model_implementations/voyage_v.py +1 -0
  132. mteb/models/model_implementations/xyz_models.py +1 -0
  133. mteb/models/model_implementations/youtu_models.py +1 -0
  134. mteb/models/model_implementations/yuan_models.py +1 -0
  135. mteb/models/model_implementations/yuan_models_en.py +1 -0
  136. mteb/models/model_meta.py +49 -4
  137. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +4 -1
  138. mteb/models/search_wrappers.py +4 -2
  139. mteb/models/sentence_transformer_wrapper.py +10 -10
  140. mteb/results/benchmark_results.py +67 -43
  141. mteb/results/model_result.py +3 -1
  142. mteb/results/task_result.py +22 -17
  143. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/METADATA +1 -1
  144. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/RECORD +148 -147
  145. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/WHEEL +0 -0
  146. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/entry_points.txt +0 -0
  147. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/licenses/LICENSE +0 -0
  148. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/top_level.txt +0 -0
@@ -106,6 +106,7 @@ dinov2_training_datasets = set(
106
106
  dinov2_small = ModelMeta(
107
107
  loader=DINOModel, # type: ignore
108
108
  name="facebook/dinov2-small",
109
+ model_type=["dense"],
109
110
  languages=["eng-Latn"],
110
111
  revision="ed25f3a31f01632728cabb09d1542f84ab7b0056",
111
112
  release_date="2023-07-18",
@@ -136,6 +137,7 @@ dinov2_small = ModelMeta(
136
137
  dinov2_base = ModelMeta(
137
138
  loader=DINOModel, # type: ignore
138
139
  name="facebook/dinov2-base",
140
+ model_type=["dense"],
139
141
  languages=["eng-Latn"],
140
142
  revision="f9e44c814b77203eaa57a6bdbbd535f21ede1415",
141
143
  release_date="2023-07-18",
@@ -166,6 +168,7 @@ dinov2_base = ModelMeta(
166
168
  dinov2_large = ModelMeta(
167
169
  loader=DINOModel, # type: ignore
168
170
  name="facebook/dinov2-large",
171
+ model_type=["dense"],
169
172
  languages=["eng-Latn"],
170
173
  revision="47b73eefe95e8d44ec3623f8890bd894b6ea2d6c",
171
174
  release_date="2023-07-18",
@@ -196,6 +199,7 @@ dinov2_large = ModelMeta(
196
199
  dinov2_giant = ModelMeta(
197
200
  loader=DINOModel, # type: ignore
198
201
  name="facebook/dinov2-giant",
202
+ model_type=["dense"],
199
203
  languages=["eng-Latn"],
200
204
  revision="611a9d42f2335e0f921f1e313ad3c1b7178d206d",
201
205
  release_date="2023-07-18",
@@ -230,6 +234,7 @@ webssl_dino_training_datasets = set(
230
234
  webssl_dino300m_full2b = ModelMeta(
231
235
  loader=DINOModel,
232
236
  name="facebook/webssl-dino300m-full2b-224",
237
+ model_type=["dense"],
233
238
  languages=["eng-Latn"],
234
239
  revision="8529cdb3fb75014932af3b896455fc21c386168e",
235
240
  release_date="2025-04-24",
@@ -260,6 +265,7 @@ webssl_dino300m_full2b = ModelMeta(
260
265
  webssl_dino1b_full2b = ModelMeta(
261
266
  loader=DINOModel,
262
267
  name="facebook/webssl-dino1b-full2b-224",
268
+ model_type=["dense"],
263
269
  languages=["eng-Latn"],
264
270
  revision="d3bf033d9c8cc62ea9e73c40956642cad2ec568a",
265
271
  release_date="2025-04-24",
@@ -290,6 +296,7 @@ webssl_dino1b_full2b = ModelMeta(
290
296
  webssl_dino2b_full2b = ModelMeta(
291
297
  loader=DINOModel,
292
298
  name="facebook/webssl-dino2b-full2b-224",
299
+ model_type=["dense"],
293
300
  languages=["eng-Latn"],
294
301
  revision="cd5893e3fd2e988eb716792049b3dd53b3f1b68b",
295
302
  release_date="2025-04-24",
@@ -320,6 +327,7 @@ webssl_dino2b_full2b = ModelMeta(
320
327
  webssl_dino3b_full2b = ModelMeta(
321
328
  loader=DINOModel,
322
329
  name="facebook/webssl-dino3b-full2b-224",
330
+ model_type=["dense"],
323
331
  languages=["eng-Latn"],
324
332
  revision="2d015c340b16bc47bc6557fcb4e6c83a9d4aa1d3",
325
333
  release_date="2025-04-24",
@@ -350,6 +358,7 @@ webssl_dino3b_full2b = ModelMeta(
350
358
  webssl_dino5b_full2b = ModelMeta(
351
359
  loader=DINOModel,
352
360
  name="facebook/webssl-dino5b-full2b-224",
361
+ model_type=["dense"],
353
362
  languages=["eng-Latn"],
354
363
  revision="88006b18b9af369f6c611db7a64d908bde3714e0",
355
364
  release_date="2025-04-24",
@@ -380,6 +389,7 @@ webssl_dino5b_full2b = ModelMeta(
380
389
  webssl_dino7b_full8b_224 = ModelMeta(
381
390
  loader=DINOModel,
382
391
  name="facebook/webssl-dino7b-full8b-224",
392
+ model_type=["dense"],
383
393
  languages=["eng-Latn"],
384
394
  revision="c6085463ea680043042a80c6d41db2c65e85f466",
385
395
  release_date="2025-04-24",
@@ -410,6 +420,7 @@ webssl_dino7b_full8b_224 = ModelMeta(
410
420
  webssl_dino7b_full8b_378 = ModelMeta(
411
421
  loader=DINOModel,
412
422
  name="facebook/webssl-dino7b-full8b-378",
423
+ model_type=["dense"],
413
424
  languages=["eng-Latn"],
414
425
  revision="53c8c5b43070bd2ddb3f66161140408ce832301f",
415
426
  release_date="2025-04-24",
@@ -440,6 +451,7 @@ webssl_dino7b_full8b_378 = ModelMeta(
440
451
  webssl_dino7b_full8b_518 = ModelMeta(
441
452
  loader=DINOModel,
442
453
  name="facebook/webssl-dino7b-full8b-518",
454
+ model_type=["dense"],
443
455
  languages=["eng-Latn"],
444
456
  revision="aee350d2c5e3e5fdb7ee6985291d808ea5eef431",
445
457
  release_date="2025-04-24",
@@ -471,6 +483,7 @@ webssl_dino7b_full8b_518 = ModelMeta(
471
483
  webssl_dino2b_light2b = ModelMeta(
472
484
  loader=DINOModel,
473
485
  name="facebook/webssl-dino2b-light2b-224",
486
+ model_type=["dense"],
474
487
  languages=["eng-Latn"],
475
488
  revision="633a663f304e63cc3cbec3f7f9ca2fbc94736128",
476
489
  release_date="2025-04-24",
@@ -501,6 +514,7 @@ webssl_dino2b_light2b = ModelMeta(
501
514
  webssl_dino2b_heavy2b = ModelMeta(
502
515
  loader=DINOModel,
503
516
  name="facebook/webssl-dino2b-heavy2b-224",
517
+ model_type=["dense"],
504
518
  languages=["eng-Latn"],
505
519
  revision="9f46eb0c0129656a1ef195fde072e3765abdb7c6",
506
520
  release_date="2025-04-24",
@@ -531,6 +545,7 @@ webssl_dino2b_heavy2b = ModelMeta(
531
545
  webssl_dino3b_light2b = ModelMeta(
532
546
  loader=DINOModel,
533
547
  name="facebook/webssl-dino3b-light2b-224",
548
+ model_type=["dense"],
534
549
  languages=["eng-Latn"],
535
550
  revision="4d0160f60673805431f4ad14983e712ed88be5b8",
536
551
  release_date="2025-04-24",
@@ -561,6 +576,7 @@ webssl_dino3b_light2b = ModelMeta(
561
576
  webssl_dino3b_heavy2b = ModelMeta(
562
577
  loader=DINOModel,
563
578
  name="facebook/webssl-dino3b-heavy2b-224",
579
+ model_type=["dense"],
564
580
  languages=["eng-Latn"],
565
581
  revision="dd39c2910747561b332285d96c4dce0bdb240775",
566
582
  release_date="2025-04-24",
@@ -591,6 +607,7 @@ webssl_dino3b_heavy2b = ModelMeta(
591
607
  webssl_mae300m_full2b = ModelMeta(
592
608
  loader=DINOModel,
593
609
  name="facebook/webssl-mae300m-full2b-224",
610
+ model_type=["dense"],
594
611
  languages=["eng-Latn"],
595
612
  revision="4655a0ac1726c206ba14d5ccb26758c62a4d03b0",
596
613
  release_date="2025-04-24",
@@ -621,6 +638,7 @@ webssl_mae300m_full2b = ModelMeta(
621
638
  webssl_mae700m_full2b = ModelMeta(
622
639
  loader=DINOModel,
623
640
  name="facebook/webssl-mae700m-full2b-224",
641
+ model_type=["dense"],
624
642
  languages=["eng-Latn"],
625
643
  revision="c32be382e757d73a178de1ead62c27391d4b4280",
626
644
  release_date="2025-04-24",
@@ -651,6 +669,7 @@ webssl_mae700m_full2b = ModelMeta(
651
669
  webssl_mae1b_full2b = ModelMeta(
652
670
  loader=DINOModel,
653
671
  name="facebook/webssl-mae1b-full2b-224",
672
+ model_type=["dense"],
654
673
  languages=["eng-Latn"],
655
674
  revision="5880aefedbad8db0f44d27358f6f08e8576f70fc",
656
675
  release_date="2025-04-24",
@@ -40,6 +40,7 @@ e5_instruct = ModelMeta(
40
40
  normalized=True,
41
41
  ),
42
42
  name="intfloat/multilingual-e5-large-instruct",
43
+ model_type=["dense"],
43
44
  languages=XLMR_LANGUAGES,
44
45
  open_weights=True,
45
46
  revision="baa7be480a7de1539afce709c8f13f833a510e0a",
@@ -78,6 +79,7 @@ e5_mistral = ModelMeta(
78
79
  normalized=True,
79
80
  ),
80
81
  name="intfloat/e5-mistral-7b-instruct",
82
+ model_type=["dense"],
81
83
  languages=MISTRAL_LANGUAGES,
82
84
  open_weights=True,
83
85
  revision="07163b72af1488142a360786df853f237b1a3ca1",
@@ -125,6 +127,7 @@ zeta_alpha_ai__zeta_alpha_e5_mistral = ModelMeta(
125
127
  normalized=True,
126
128
  ),
127
129
  name="zeta-alpha-ai/Zeta-Alpha-E5-Mistral",
130
+ model_type=["dense"],
128
131
  revision="c791d37474fa6a5c72eb3a2522be346bc21fbfc3",
129
132
  release_date="2024-08-30",
130
133
  languages=["eng-Latn"],
@@ -201,6 +204,7 @@ BeastyZ__e5_R_mistral_7b = ModelMeta(
201
204
  tokenizer_kwargs={"pad_token": "</s>"},
202
205
  ),
203
206
  name="BeastyZ/e5-R-mistral-7b",
207
+ model_type=["dense"],
204
208
  revision="3f810a6a7fd220369ad248e3705cf13d71803602",
205
209
  release_date="2024-06-28",
206
210
  languages=["eng-Latn"],
@@ -70,6 +70,7 @@ e5_mult_small = ModelMeta(
70
70
  model_prompts=model_prompts,
71
71
  ),
72
72
  name="intfloat/multilingual-e5-small",
73
+ model_type=["dense"],
73
74
  languages=XLMR_LANGUAGES,
74
75
  open_weights=True,
75
76
  revision="fd1525a9fd15316a2d503bf26ab031a61d056e98",
@@ -96,6 +97,7 @@ e5_mult_base = ModelMeta(
96
97
  model_prompts=model_prompts,
97
98
  ),
98
99
  name="intfloat/multilingual-e5-base",
100
+ model_type=["dense"],
99
101
  languages=XLMR_LANGUAGES,
100
102
  open_weights=True,
101
103
  revision="d13f1b27baf31030b7fd040960d60d909913633f",
@@ -122,6 +124,7 @@ e5_mult_large = ModelMeta(
122
124
  model_prompts=model_prompts,
123
125
  ),
124
126
  name="intfloat/multilingual-e5-large",
127
+ model_type=["dense"],
125
128
  languages=XLMR_LANGUAGES,
126
129
  open_weights=True,
127
130
  revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb",
@@ -148,6 +151,7 @@ e5_eng_small_v2 = ModelMeta(
148
151
  model_prompts=model_prompts,
149
152
  ),
150
153
  name="intfloat/e5-small-v2",
154
+ model_type=["dense"],
151
155
  languages=["eng-Latn"],
152
156
  open_weights=True,
153
157
  revision="dca8b1a9dae0d4575df2bf423a5edb485a431236",
@@ -174,6 +178,7 @@ e5_eng_small = ModelMeta(
174
178
  model_prompts=model_prompts,
175
179
  ),
176
180
  name="intfloat/e5-small",
181
+ model_type=["dense"],
177
182
  languages=["eng-Latn"],
178
183
  open_weights=True,
179
184
  revision="e272f3049e853b47cb5ca3952268c6662abda68f",
@@ -200,6 +205,7 @@ e5_eng_base_v2 = ModelMeta(
200
205
  model_prompts=model_prompts,
201
206
  ),
202
207
  name="intfloat/e5-base-v2",
208
+ model_type=["dense"],
203
209
  languages=["eng-Latn"],
204
210
  open_weights=True,
205
211
  revision="1c644c92ad3ba1efdad3f1451a637716616a20e8",
@@ -227,6 +233,7 @@ e5_eng_large_v2 = ModelMeta(
227
233
  model_prompts=model_prompts,
228
234
  ),
229
235
  name="intfloat/e5-large-v2",
236
+ model_type=["dense"],
230
237
  languages=["eng-Latn"],
231
238
  open_weights=True,
232
239
  revision="b322e09026e4ea05f42beadf4d661fb4e101d311",
@@ -254,6 +261,7 @@ e5_large = ModelMeta(
254
261
  model_prompts=model_prompts,
255
262
  ),
256
263
  name="intfloat/e5-large",
264
+ model_type=["dense"],
257
265
  languages=["eng-Latn"],
258
266
  open_weights=True,
259
267
  revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81",
@@ -281,6 +289,7 @@ e5_base = ModelMeta(
281
289
  model_prompts=model_prompts,
282
290
  ),
283
291
  name="intfloat/e5-base",
292
+ model_type=["dense"],
284
293
  languages=["eng-Latn"],
285
294
  open_weights=True,
286
295
  revision="b533fe4636f4a2507c08ddab40644d20b0006d6a",
@@ -160,6 +160,7 @@ e5_v = ModelMeta(
160
160
  device_map="auto",
161
161
  ),
162
162
  name="royokong/e5-v",
163
+ model_type=["dense"],
163
164
  languages=["eng-Latn"],
164
165
  revision="0c1f22679417b3ae925d779442221c40cd1861ab",
165
166
  release_date="2024-07-17",
@@ -141,6 +141,7 @@ Eager_Embed_V1 = ModelMeta(
141
141
  image_size=784,
142
142
  ),
143
143
  name="eagerworks/eager-embed-v1",
144
+ model_type=["dense"],
144
145
  languages=["fra-Latn", "spa-Latn", "eng-Latn", "deu-Latn"],
145
146
  revision="a6bec272729c5056e2c26618ce085205c82a3b3c",
146
147
  release_date="2025-11-20",
@@ -4,6 +4,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
4
4
  embedding_gemma_300m_scandi = ModelMeta(
5
5
  loader=sentence_transformers_loader, # type: ignore
6
6
  name="emillykkejensen/EmbeddingGemma-Scandi-300m",
7
+ model_type=["dense"],
7
8
  languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"],
8
9
  open_weights=True,
9
10
  revision="9f3307b9f601db564a9190cb475324d128dcfe86",
@@ -36,6 +37,7 @@ embedding_gemma_300m_scandi = ModelMeta(
36
37
  qwen_scandi = ModelMeta(
37
38
  loader=sentence_transformers_loader, # type: ignore
38
39
  name="emillykkejensen/Qwen3-Embedding-Scandi-0.6B",
40
+ model_type=["dense"],
39
41
  languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"],
40
42
  open_weights=True,
41
43
  revision="cf1e7ba36ebd3d605549d8f02930a18e17b54513",
@@ -59,6 +61,7 @@ qwen_scandi = ModelMeta(
59
61
  mmbert_scandi = ModelMeta(
60
62
  loader=sentence_transformers_loader, # type: ignore
61
63
  name="emillykkejensen/mmBERTscandi-base-embedding",
64
+ model_type=["dense"],
62
65
  languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"],
63
66
  open_weights=True,
64
67
  revision="82d74c7a5d8e1ddf31b132865df2d16b2b0294ee",
@@ -12,6 +12,7 @@ english_code_retriever = ModelMeta(
12
12
  },
13
13
  ),
14
14
  name="fyaronskiy/english_code_retriever",
15
+ model_type=["dense"],
15
16
  languages=["eng-Latn"],
16
17
  open_weights=True,
17
18
  revision="be653fab7d27a7348a0c2c3d16b9f92a7f10cb0c",
@@ -4,6 +4,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
4
4
  Euler_Legal_Embedding_V1 = ModelMeta(
5
5
  loader=sentence_transformers_loader,
6
6
  name="Mira190/Euler-Legal-Embedding-V1",
7
+ model_type=["dense"],
7
8
  revision="df607ed9e25e569514a99c27cdaaab16e76b6dd4",
8
9
  release_date="2025-11-06",
9
10
  languages=["eng-Latn"],
@@ -138,6 +138,7 @@ laion_2b = set(
138
138
  EVA02_CLIP_B_16 = ModelMeta(
139
139
  loader=evaclip_loader,
140
140
  name="QuanSun/EVA02-CLIP-B-16",
141
+ model_type=["dense"],
141
142
  languages=["eng-Latn"],
142
143
  revision="11afd202f2ae80869d6cef18b1ec775e79bd8d12",
143
144
  release_date="2023-04-26",
@@ -161,6 +162,7 @@ EVA02_CLIP_B_16 = ModelMeta(
161
162
  EVA02_CLIP_L_14 = ModelMeta(
162
163
  loader=evaclip_loader,
163
164
  name="QuanSun/EVA02-CLIP-L-14",
165
+ model_type=["dense"],
164
166
  languages=["eng-Latn"],
165
167
  revision="11afd202f2ae80869d6cef18b1ec775e79bd8d12",
166
168
  release_date="2023-04-26",
@@ -184,6 +186,7 @@ EVA02_CLIP_L_14 = ModelMeta(
184
186
  EVA02_CLIP_bigE_14 = ModelMeta(
185
187
  loader=evaclip_loader,
186
188
  name="QuanSun/EVA02-CLIP-bigE-14",
189
+ model_type=["dense"],
187
190
  languages=["eng-Latn"],
188
191
  revision="11afd202f2ae80869d6cef18b1ec775e79bd8d12",
189
192
  release_date="2023-04-26",
@@ -208,6 +211,7 @@ EVA02_CLIP_bigE_14 = ModelMeta(
208
211
  EVA02_CLIP_bigE_14_plus = ModelMeta(
209
212
  loader=evaclip_loader,
210
213
  name="QuanSun/EVA02-CLIP-bigE-14-plus",
214
+ model_type=["dense"],
211
215
  languages=["eng-Latn"],
212
216
  revision="11afd202f2ae80869d6cef18b1ec775e79bd8d12",
213
217
  release_date="2023-04-26",
@@ -6,6 +6,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
6
6
  parsbert = ModelMeta(
7
7
  loader=sentence_transformers_loader,
8
8
  name="HooshvareLab/bert-base-parsbert-uncased",
9
+ model_type=["dense"],
9
10
  languages=["fas-Arab"],
10
11
  open_weights=True,
11
12
  revision="d73a0e2c7492c33bd5819bcdb23eba207404dd19",
@@ -41,6 +42,7 @@ parsbert = ModelMeta(
41
42
  bert_zwnj = ModelMeta(
42
43
  loader=sentence_transformers_loader,
43
44
  name="m3hrdadfi/bert-zwnj-wnli-mean-tokens",
45
+ model_type=["dense"],
44
46
  languages=["fas-Arab"],
45
47
  open_weights=True,
46
48
  revision="b9506ddc579ac8c398ae6dae680401ae0a1a5b23",
@@ -66,6 +68,7 @@ bert_zwnj = ModelMeta(
66
68
  roberta_zwnj = ModelMeta(
67
69
  loader=sentence_transformers_loader,
68
70
  name="m3hrdadfi/roberta-zwnj-wnli-mean-tokens",
71
+ model_type=["dense"],
69
72
  languages=["fas-Arab"],
70
73
  open_weights=True,
71
74
  revision="36f912ac44e22250aee16ea533a4ff8cd848c1a1",
@@ -90,6 +93,7 @@ roberta_zwnj = ModelMeta(
90
93
  sentence_transformer_parsbert = ModelMeta(
91
94
  loader=sentence_transformers_loader,
92
95
  name="myrkur/sentence-transformer-parsbert-fa",
96
+ model_type=["dense"],
93
97
  languages=["fas-Arab"],
94
98
  open_weights=True,
95
99
  revision="72bd0a3557622f0ae08a092f4643609e0b950cdd",
@@ -140,6 +144,7 @@ tooka_bert_base = ModelMeta(
140
144
  tooka_sbert = ModelMeta(
141
145
  loader=sentence_transformers_loader,
142
146
  name="PartAI/Tooka-SBERT",
147
+ model_type=["dense"],
143
148
  languages=["fas-Arab"],
144
149
  open_weights=True,
145
150
  revision="5d07f0c543aca654373b931ae07cd197769110fd",
@@ -170,6 +175,7 @@ tooka_sbert = ModelMeta(
170
175
  fa_bert = ModelMeta(
171
176
  loader=sentence_transformers_loader,
172
177
  name="sbunlp/fabert",
178
+ model_type=["dense"],
173
179
  languages=["fas-Arab"],
174
180
  open_weights=True,
175
181
  revision="a0e3973064c97768e121b9b95f21adc94e0ca3fb",
@@ -217,6 +223,7 @@ fa_bert = ModelMeta(
217
223
  tooka_sbert_v2_small = ModelMeta(
218
224
  loader=sentence_transformers_loader,
219
225
  name="PartAI/Tooka-SBERT-V2-Small",
226
+ model_type=["dense"],
220
227
  languages=["fas-Arab"],
221
228
  open_weights=True,
222
229
  revision="8bbed87e36669387f71437c061430ba56d1b496f",
@@ -247,6 +254,7 @@ tooka_sbert_v2_small = ModelMeta(
247
254
  tooka_sbert_v2_large = ModelMeta(
248
255
  loader=sentence_transformers_loader,
249
256
  name="PartAI/Tooka-SBERT-V2-Large",
257
+ model_type=["dense"],
250
258
  languages=["fas-Arab"],
251
259
  open_weights=True,
252
260
  revision="b59682efa961122cc0e4408296d5852870c82eae",
@@ -107,6 +107,7 @@ XLMR_LANGUAGES = [
107
107
  xlmr_base = ModelMeta(
108
108
  loader=sentence_transformers_loader, # type: ignore[arg-type]
109
109
  name="FacebookAI/xlm-roberta-base",
110
+ model_type=["dense"],
110
111
  languages=XLMR_LANGUAGES,
111
112
  open_weights=True,
112
113
  revision="e73636d4f797dec63c3081bb6ed5c7b0bb3f2089",
@@ -150,6 +151,7 @@ xlmr_base = ModelMeta(
150
151
  xlmr_large = ModelMeta(
151
152
  loader=sentence_transformers_loader, # type: ignore[arg-type]
152
153
  name="FacebookAI/xlm-roberta-large",
154
+ model_type=["dense"],
153
155
  languages=XLMR_LANGUAGES,
154
156
  open_weights=True,
155
157
  revision="c23d21b0620b635a76227c604d44e43a9f0ee389",
@@ -7,6 +7,7 @@ from mteb.models.model_meta import ModelMeta
7
7
 
8
8
  geoembedding = ModelMeta(
9
9
  name="GeoGPT-Research-Project/GeoEmbedding",
10
+ model_type=["dense"],
10
11
  languages=["eng-Latn"],
11
12
  open_weights=True,
12
13
  revision="29803c28ea7ef6871194a8ebc85ad7bfe174928e",
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import math
5
+ import warnings
5
6
  from typing import TYPE_CHECKING, Any
6
7
 
7
8
  import torch
@@ -261,9 +262,9 @@ def smart_resize(
261
262
  w_bar = ceil_by_factor(width * beta, factor)
262
263
 
263
264
  if max(h_bar, w_bar) / min(h_bar, w_bar) > MAX_RATIO:
264
- logger.warning(
265
- f"Absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(h_bar, w_bar) / min(h_bar, w_bar)}"
266
- )
265
+ msg = f"Absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(h_bar, w_bar) / min(h_bar, w_bar)}"
266
+ logger.warning(msg)
267
+ warnings.warn(msg)
267
268
  if h_bar > w_bar:
268
269
  h_bar = w_bar * MAX_RATIO
269
270
  else:
@@ -346,6 +347,7 @@ training_data = {
346
347
  gme_qwen2vl_2b = ModelMeta(
347
348
  loader=GmeQwen2VL,
348
349
  name="Alibaba-NLP/gme-Qwen2-VL-2B-Instruct",
350
+ model_type=["dense"],
349
351
  languages=["eng-Latn", "cmn-Hans"],
350
352
  open_weights=True,
351
353
  revision="ce765ae71b8cdb208203cd8fb64a170b1b84293a",
@@ -369,6 +371,7 @@ gme_qwen2vl_2b = ModelMeta(
369
371
  gme_qwen2vl_7b = ModelMeta(
370
372
  loader=GmeQwen2VL,
371
373
  name="Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
374
+ model_type=["dense"],
372
375
  languages=["eng-Latn", "cmn-Hans"],
373
376
  open_weights=True,
374
377
  revision="477027a6480f8630363be77751f169cc3434b673",
@@ -150,6 +150,7 @@ google_text_emb_004 = ModelMeta(
150
150
  model_prompts=MODEL_PROMPTS,
151
151
  ),
152
152
  name="google/text-embedding-004",
153
+ model_type=["dense"],
153
154
  languages=["eng-Latn"],
154
155
  open_weights=False,
155
156
  revision="1", # revision is intended for implementation
@@ -174,6 +175,7 @@ google_text_emb_005 = ModelMeta(
174
175
  model_prompts=MODEL_PROMPTS,
175
176
  ),
176
177
  name="google/text-embedding-005",
178
+ model_type=["dense"],
177
179
  languages=["eng-Latn"],
178
180
  open_weights=False,
179
181
  revision="1", # revision is intended for implementation
@@ -198,6 +200,7 @@ google_text_multilingual_emb_002 = ModelMeta(
198
200
  model_prompts=MODEL_PROMPTS,
199
201
  ),
200
202
  name="google/text-multilingual-embedding-002",
203
+ model_type=["dense"],
201
204
  languages=MULTILINGUAL_EVALUATED_LANGUAGES, # From the list of evaluated languages in https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#supported_text_languages
202
205
  open_weights=False,
203
206
  revision="1",
@@ -222,6 +225,7 @@ google_gemini_embedding_001 = ModelMeta(
222
225
  model_prompts=MODEL_PROMPTS,
223
226
  ),
224
227
  name="google/gemini-embedding-001",
228
+ model_type=["dense"],
225
229
  languages=MULTILINGUAL_EVALUATED_LANGUAGES,
226
230
  open_weights=False,
227
231
  revision="1",
@@ -256,6 +260,7 @@ def gemma_embedding_loader(model_name: str, revision: str, **kwargs):
256
260
  embedding_gemma_300m = ModelMeta(
257
261
  loader=gemma_embedding_loader,
258
262
  name="google/embeddinggemma-300m",
263
+ model_type=["dense"],
259
264
  languages=MULTILINGUAL_EVALUATED_LANGUAGES,
260
265
  open_weights=True,
261
266
  revision="64614b0b8b64f0c6c1e52b07e4e9a4e8fe4d2da2",
@@ -166,6 +166,7 @@ granite_vision_embedding = ModelMeta(
166
166
  torch_dtype=torch.float16,
167
167
  ),
168
168
  name="ibm-granite/granite-vision-3.3-2b-embedding",
169
+ model_type=["dense"],
169
170
  languages=["eng-Latn"],
170
171
  revision="cee615db64d89d1552a4ee39c50f25c0fc5c66ca",
171
172
  release_date="2025-06-11",
@@ -38,6 +38,7 @@ gritlm7b = ModelMeta(
38
38
  torch_dtype="auto",
39
39
  ),
40
40
  name="GritLM/GritLM-7B",
41
+ model_type=["dense"],
41
42
  languages=["eng-Latn", "fra-Latn", "deu-Latn", "ita-Latn", "spa-Latn"],
42
43
  open_weights=True,
43
44
  revision="13f00a0e36500c80ce12870ea513846a066004af",
@@ -66,6 +67,7 @@ gritlm8x7b = ModelMeta(
66
67
  torch_dtype="auto",
67
68
  ),
68
69
  name="GritLM/GritLM-8x7B",
70
+ model_type=["dense"],
69
71
  languages=["eng-Latn", "fra-Latn", "deu-Latn", "ita-Latn", "spa-Latn"],
70
72
  open_weights=True,
71
73
  revision="7f089b13e3345510281733ca1e6ff871b5b4bc76",
@@ -42,6 +42,7 @@ gte_qwen2_7b_instruct = ModelMeta(
42
42
  embed_eos="<|endoftext|>",
43
43
  ),
44
44
  name="Alibaba-NLP/gte-Qwen2-7B-instruct",
45
+ model_type=["dense"],
45
46
  languages=None,
46
47
  open_weights=True,
47
48
  revision="e26182b2122f4435e8b3ebecbf363990f409b45b",
@@ -73,6 +74,7 @@ gte_qwen1_5_7b_instruct = ModelMeta(
73
74
  embed_eos="<|endoftext|>",
74
75
  ),
75
76
  name="Alibaba-NLP/gte-Qwen1.5-7B-instruct",
77
+ model_type=["dense"],
76
78
  languages=["eng-Latn"],
77
79
  open_weights=True,
78
80
  revision="07d27e5226328010336563bc1b564a5e3436a298",
@@ -109,6 +111,7 @@ gte_qwen2_1_5b_instruct = ModelMeta(
109
111
  embed_eos="<|endoftext|>",
110
112
  ),
111
113
  name="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
114
+ model_type=["dense"],
112
115
  languages=["eng-Latn"],
113
116
  open_weights=True,
114
117
  revision="c6c1b92f4a3e1b92b326ad29dd3c8433457df8dd",
@@ -136,6 +139,7 @@ gte_qwen2_1_5b_instruct = ModelMeta(
136
139
  gte_small_zh = ModelMeta(
137
140
  loader=sentence_transformers_loader,
138
141
  name="thenlper/gte-small-zh",
142
+ model_type=["dense"],
139
143
  languages=["zho-Hans"],
140
144
  open_weights=True,
141
145
  revision="af7bd46fbb00b3a6963c8dd7f1786ddfbfbe973a",
@@ -163,6 +167,7 @@ gte_small_zh = ModelMeta(
163
167
  gte_base_zh = ModelMeta(
164
168
  loader=sentence_transformers_loader,
165
169
  name="thenlper/gte-base-zh",
170
+ model_type=["dense"],
166
171
  languages=["zho-Hans"],
167
172
  open_weights=True,
168
173
  revision="71ab7947d6fac5b64aa299e6e40e6c2b2e85976c",
@@ -190,6 +195,7 @@ gte_base_zh = ModelMeta(
190
195
  gte_large_zh = ModelMeta(
191
196
  loader=sentence_transformers_loader,
192
197
  name="thenlper/gte-large-zh",
198
+ model_type=["dense"],
193
199
  languages=["zho-Hans"],
194
200
  open_weights=True,
195
201
  revision="64c364e579de308104a9b2c170ca009502f4f545",
@@ -318,6 +324,7 @@ gte_multi_training_data = {
318
324
  gte_multilingual_base = ModelMeta(
319
325
  loader=sentence_transformers_loader,
320
326
  name="Alibaba-NLP/gte-multilingual-base",
327
+ model_type=["dense"],
321
328
  languages=gte_multilingual_langs,
322
329
  open_weights=True,
323
330
  revision="ca1791e0bcc104f6db161f27de1340241b13c5a4",
@@ -346,6 +353,7 @@ gte_multilingual_base = ModelMeta(
346
353
  gte_modernbert_base = ModelMeta(
347
354
  loader=sentence_transformers_loader,
348
355
  name="Alibaba-NLP/gte-modernbert-base",
356
+ model_type=["dense"],
349
357
  languages=["eng-Latn"],
350
358
  open_weights=True,
351
359
  revision="7ca8b4ca700621b67618669f5378fe5f5820b8e4",
@@ -382,6 +390,7 @@ gte_modernbert_base = ModelMeta(
382
390
  gte_base_en_v15 = ModelMeta(
383
391
  loader=sentence_transformers_loader,
384
392
  name="Alibaba-NLP/gte-base-en-v1.5",
393
+ model_type=["dense"],
385
394
  languages=["eng-Latn"],
386
395
  open_weights=True,
387
396
  revision="a829fd0e060bb84554da0dfd354d0de0f7712b7f", # can be any
@@ -37,6 +37,7 @@ Hinvec_bidir = ModelMeta(
37
37
  add_eos_token=True,
38
38
  ),
39
39
  name="Sailesh97/Hinvec",
40
+ model_type=["dense"],
40
41
  languages=["eng-Latn", "hin-Deva"],
41
42
  open_weights=True,
42
43
  revision="d4fc678720cc1b8c5d18599ce2d9a4d6090c8b6b",
@@ -3,6 +3,7 @@ from mteb.models import ModelMeta
3
3
  human = ModelMeta(
4
4
  loader=None,
5
5
  name="Human",
6
+ model_type=["dense"],
6
7
  languages=["eng-Latn", "ara-Arab", "rus-Cyrl", "dan-Latn", "nob-Latn"],
7
8
  open_weights=True,
8
9
  revision="2025_09_25",
@@ -94,6 +94,7 @@ granite_training_data = {
94
94
  granite_107m_multilingual = ModelMeta(
95
95
  loader=sentence_transformers_loader,
96
96
  name="ibm-granite/granite-embedding-107m-multilingual",
97
+ model_type=["dense"],
97
98
  languages=GRANITE_LANGUAGES,
98
99
  open_weights=True,
99
100
  revision="47db56afe692f731540413c67dd818ff492277e7",
@@ -118,6 +119,7 @@ granite_107m_multilingual = ModelMeta(
118
119
  granite_278m_multilingual = ModelMeta(
119
120
  loader=sentence_transformers_loader,
120
121
  name="ibm-granite/granite-embedding-278m-multilingual",
122
+ model_type=["dense"],
121
123
  languages=GRANITE_LANGUAGES,
122
124
  open_weights=True,
123
125
  revision="84e3546b88b0cb69f8078608a1df558020bcbf1f",
@@ -142,6 +144,7 @@ granite_278m_multilingual = ModelMeta(
142
144
  granite_30m_english = ModelMeta(
143
145
  loader=sentence_transformers_loader,
144
146
  name="ibm-granite/granite-embedding-30m-english",
147
+ model_type=["dense"],
145
148
  languages=["eng-Latn"],
146
149
  open_weights=True,
147
150
  revision="eddbb57470f896b5f8e2bfcb823d8f0e2d2024a5",
@@ -166,6 +169,7 @@ granite_30m_english = ModelMeta(
166
169
  granite_125m_english = ModelMeta(
167
170
  loader=sentence_transformers_loader,
168
171
  name="ibm-granite/granite-embedding-125m-english",
172
+ model_type=["dense"],
169
173
  languages=["eng-Latn"],
170
174
  open_weights=True,
171
175
  revision="e48d3a5b47eaa18e3fe07d4676e187fd80f32730",
@@ -191,6 +195,7 @@ granite_125m_english = ModelMeta(
191
195
  granite_english_r2 = ModelMeta(
192
196
  loader=sentence_transformers_loader,
193
197
  name="ibm-granite/granite-embedding-english-r2",
198
+ model_type=["dense"],
194
199
  languages=["eng-Latn"],
195
200
  open_weights=True,
196
201
  revision="6e7b8ce0e76270394ac4669ba4bbd7133b60b7f9",
@@ -215,6 +220,7 @@ granite_english_r2 = ModelMeta(
215
220
  granite_small_english_r2 = ModelMeta(
216
221
  loader=sentence_transformers_loader,
217
222
  name="ibm-granite/granite-embedding-small-english-r2",
223
+ model_type=["dense"],
218
224
  languages=["eng-Latn"],
219
225
  open_weights=True,
220
226
  revision="54a8d2616a0844355a5164432d3f6dafb37b17a3",