mteb 2.5.1__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. mteb/abstasks/abstask.py +6 -6
  2. mteb/abstasks/aggregated_task.py +4 -10
  3. mteb/abstasks/clustering_legacy.py +3 -2
  4. mteb/abstasks/task_metadata.py +2 -3
  5. mteb/cache.py +7 -4
  6. mteb/cli/build_cli.py +10 -5
  7. mteb/cli/generate_model_card.py +4 -3
  8. mteb/deprecated_evaluator.py +4 -3
  9. mteb/evaluate.py +4 -1
  10. mteb/get_tasks.py +4 -3
  11. mteb/leaderboard/app.py +70 -3
  12. mteb/models/abs_encoder.py +5 -3
  13. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +4 -1
  14. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +13 -12
  15. mteb/models/model_implementations/align_models.py +1 -0
  16. mteb/models/model_implementations/amazon_models.py +1 -0
  17. mteb/models/model_implementations/andersborges.py +2 -0
  18. mteb/models/model_implementations/ara_models.py +1 -0
  19. mteb/models/model_implementations/arctic_models.py +8 -0
  20. mteb/models/model_implementations/b1ade_models.py +1 -0
  21. mteb/models/model_implementations/bedrock_models.py +4 -0
  22. mteb/models/model_implementations/bge_models.py +17 -0
  23. mteb/models/model_implementations/bica_model.py +1 -0
  24. mteb/models/model_implementations/blip2_models.py +2 -0
  25. mteb/models/model_implementations/blip_models.py +8 -0
  26. mteb/models/model_implementations/bm25.py +1 -0
  27. mteb/models/model_implementations/bmretriever_models.py +4 -0
  28. mteb/models/model_implementations/cadet_models.py +1 -0
  29. mteb/models/model_implementations/cde_models.py +2 -0
  30. mteb/models/model_implementations/clip_models.py +3 -0
  31. mteb/models/model_implementations/clips_models.py +3 -0
  32. mteb/models/model_implementations/codefuse_models.py +3 -0
  33. mteb/models/model_implementations/codesage_models.py +3 -0
  34. mteb/models/model_implementations/cohere_models.py +4 -0
  35. mteb/models/model_implementations/cohere_v.py +5 -0
  36. mteb/models/model_implementations/colpali_models.py +3 -0
  37. mteb/models/model_implementations/colqwen_models.py +9 -0
  38. mteb/models/model_implementations/colsmol_models.py +2 -0
  39. mteb/models/model_implementations/conan_models.py +1 -0
  40. mteb/models/model_implementations/dino_models.py +19 -0
  41. mteb/models/model_implementations/e5_instruct.py +4 -0
  42. mteb/models/model_implementations/e5_models.py +9 -0
  43. mteb/models/model_implementations/e5_v.py +1 -0
  44. mteb/models/model_implementations/eagerworks_models.py +1 -0
  45. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  46. mteb/models/model_implementations/en_code_retriever.py +1 -0
  47. mteb/models/model_implementations/euler_models.py +1 -0
  48. mteb/models/model_implementations/evaclip_models.py +4 -0
  49. mteb/models/model_implementations/fa_models.py +8 -0
  50. mteb/models/model_implementations/facebookai.py +2 -0
  51. mteb/models/model_implementations/geogpt_models.py +1 -0
  52. mteb/models/model_implementations/gme_v_models.py +6 -3
  53. mteb/models/model_implementations/google_models.py +5 -0
  54. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  55. mteb/models/model_implementations/gritlm_models.py +2 -0
  56. mteb/models/model_implementations/gte_models.py +9 -0
  57. mteb/models/model_implementations/hinvec_models.py +1 -0
  58. mteb/models/model_implementations/human.py +1 -0
  59. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  60. mteb/models/model_implementations/inf_models.py +2 -0
  61. mteb/models/model_implementations/jasper_models.py +2 -0
  62. mteb/models/model_implementations/jina_clip.py +1 -0
  63. mteb/models/model_implementations/jina_models.py +7 -1
  64. mteb/models/model_implementations/kalm_models.py +6 -0
  65. mteb/models/model_implementations/kblab.py +1 -0
  66. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  67. mteb/models/model_implementations/kfst.py +1 -0
  68. mteb/models/model_implementations/kowshik24_models.py +1 -0
  69. mteb/models/model_implementations/lens_models.py +2 -0
  70. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  71. mteb/models/model_implementations/linq_models.py +1 -0
  72. mteb/models/model_implementations/listconranker.py +1 -1
  73. mteb/models/model_implementations/llm2clip_models.py +3 -0
  74. mteb/models/model_implementations/llm2vec_models.py +8 -0
  75. mteb/models/model_implementations/mcinext_models.py +7 -1
  76. mteb/models/model_implementations/mdbr_models.py +2 -0
  77. mteb/models/model_implementations/misc_models.py +63 -0
  78. mteb/models/model_implementations/mme5_models.py +1 -0
  79. mteb/models/model_implementations/moco_models.py +2 -0
  80. mteb/models/model_implementations/model2vec_models.py +13 -0
  81. mteb/models/model_implementations/moka_models.py +3 -0
  82. mteb/models/model_implementations/mxbai_models.py +3 -0
  83. mteb/models/model_implementations/nbailab.py +3 -0
  84. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  85. mteb/models/model_implementations/nomic_models.py +6 -0
  86. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  87. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  88. mteb/models/model_implementations/nvidia_models.py +3 -0
  89. mteb/models/model_implementations/octen_models.py +195 -0
  90. mteb/models/model_implementations/openai_models.py +5 -0
  91. mteb/models/model_implementations/openclip_models.py +8 -0
  92. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  93. mteb/models/model_implementations/ops_moa_models.py +2 -0
  94. mteb/models/model_implementations/pawan_models.py +1 -0
  95. mteb/models/model_implementations/piccolo_models.py +2 -0
  96. mteb/models/model_implementations/promptriever_models.py +4 -0
  97. mteb/models/model_implementations/pylate_models.py +3 -0
  98. mteb/models/model_implementations/qodo_models.py +2 -0
  99. mteb/models/model_implementations/qtack_models.py +1 -0
  100. mteb/models/model_implementations/qwen3_models.py +3 -0
  101. mteb/models/model_implementations/qzhou_models.py +2 -0
  102. mteb/models/model_implementations/random_baseline.py +2 -1
  103. mteb/models/model_implementations/rasgaard_models.py +1 -0
  104. mteb/models/model_implementations/reasonir_model.py +1 -0
  105. mteb/models/model_implementations/repllama_models.py +2 -0
  106. mteb/models/model_implementations/rerankers_custom.py +3 -3
  107. mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
  108. mteb/models/model_implementations/richinfoai_models.py +1 -0
  109. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  110. mteb/models/model_implementations/ruri_models.py +10 -0
  111. mteb/models/model_implementations/salesforce_models.py +3 -0
  112. mteb/models/model_implementations/samilpwc_models.py +1 -0
  113. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  114. mteb/models/model_implementations/searchmap_models.py +1 -0
  115. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  116. mteb/models/model_implementations/seed_models.py +1 -0
  117. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  118. mteb/models/model_implementations/shuu_model.py +32 -31
  119. mteb/models/model_implementations/siglip_models.py +10 -0
  120. mteb/models/model_implementations/sonar_models.py +1 -0
  121. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  122. mteb/models/model_implementations/stella_models.py +6 -0
  123. mteb/models/model_implementations/tarka_models.py +2 -0
  124. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  125. mteb/models/model_implementations/uae_models.py +1 -0
  126. mteb/models/model_implementations/vdr_models.py +1 -0
  127. mteb/models/model_implementations/vi_vn_models.py +6 -0
  128. mteb/models/model_implementations/vista_models.py +2 -0
  129. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  130. mteb/models/model_implementations/voyage_models.py +15 -0
  131. mteb/models/model_implementations/voyage_v.py +1 -0
  132. mteb/models/model_implementations/xyz_models.py +1 -0
  133. mteb/models/model_implementations/youtu_models.py +1 -0
  134. mteb/models/model_implementations/yuan_models.py +1 -0
  135. mteb/models/model_implementations/yuan_models_en.py +1 -0
  136. mteb/models/model_meta.py +49 -4
  137. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +4 -1
  138. mteb/models/search_wrappers.py +4 -2
  139. mteb/models/sentence_transformer_wrapper.py +10 -10
  140. mteb/results/benchmark_results.py +67 -43
  141. mteb/results/model_result.py +3 -1
  142. mteb/results/task_result.py +22 -17
  143. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/METADATA +1 -1
  144. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/RECORD +148 -147
  145. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/WHEEL +0 -0
  146. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/entry_points.txt +0 -0
  147. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/licenses/LICENSE +0 -0
  148. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/top_level.txt +0 -0
@@ -199,6 +199,7 @@ nomic_embed_v1_5 = ModelMeta(
199
199
  model_prompts=model_prompts,
200
200
  ),
201
201
  name="nomic-ai/nomic-embed-text-v1.5",
202
+ model_type=["dense"],
202
203
  languages=["eng-Latn"],
203
204
  open_weights=True,
204
205
  revision="b0753ae76394dd36bcfb912a46018088bca48be0",
@@ -227,6 +228,7 @@ nomic_embed_v1 = ModelMeta(
227
228
  model_prompts=model_prompts,
228
229
  ),
229
230
  name="nomic-ai/nomic-embed-text-v1",
231
+ model_type=["dense"],
230
232
  languages=["eng-Latn"],
231
233
  open_weights=True,
232
234
  revision="0759316f275aa0cb93a5b830973843ca66babcf5",
@@ -255,6 +257,7 @@ nomic_embed_v1_ablated = ModelMeta(
255
257
  model_prompts=model_prompts,
256
258
  ),
257
259
  name="nomic-ai/nomic-embed-text-v1-ablated",
260
+ model_type=["dense"],
258
261
  languages=["eng-Latn"],
259
262
  open_weights=True,
260
263
  revision="7d948905c5d5d3874fa55a925d68e49dbf411e5f",
@@ -282,6 +285,7 @@ nomic_embed_v1_unsupervised = ModelMeta(
282
285
  model_prompts=model_prompts,
283
286
  ),
284
287
  name="nomic-ai/nomic-embed-text-v1-unsupervised",
288
+ model_type=["dense"],
285
289
  languages=["eng-Latn"],
286
290
  open_weights=True,
287
291
  revision="b53d557b15ae63852847c222d336c1609eced93c",
@@ -309,6 +313,7 @@ nomic_modern_bert_embed = ModelMeta(
309
313
  model_prompts=model_prompts,
310
314
  ),
311
315
  name="nomic-ai/modernbert-embed-base",
316
+ model_type=["dense"],
312
317
  languages=["eng-Latn"],
313
318
  open_weights=True,
314
319
  revision="5960f1566fb7cb1adf1eb6e816639cf4646d9b12",
@@ -447,6 +452,7 @@ nomic_embed_text_v2_moe = ModelMeta(
447
452
  model_prompts=model_prompts,
448
453
  ),
449
454
  name="nomic-ai/nomic-embed-text-v2-moe",
455
+ model_type=["dense"],
450
456
  languages=m_languages,
451
457
  open_weights=True,
452
458
  revision="1066b6599d099fbb93dfcb64f9c37a7c9e503e85",
@@ -168,6 +168,7 @@ nomic_embed_vision_v1_5 = ModelMeta(
168
168
  "text_model_revision": "a03db6748c80237063eb0546ac6b627eca2318cb",
169
169
  },
170
170
  name="nomic-ai/nomic-embed-vision-v1.5",
171
+ model_type=["dense"],
171
172
  languages=["eng-Latn"],
172
173
  revision="af2246fffdab78d8458418480e4886a8e48b70a7",
173
174
  release_date="2024-06-08",
@@ -146,6 +146,7 @@ llama_nemoretriever_colembed_1b_v1 = ModelMeta(
146
146
  trust_remote_code=True,
147
147
  ),
148
148
  name="nvidia/llama-nemoretriever-colembed-1b-v1",
149
+ model_type=["late-interaction"],
149
150
  languages=["eng-Latn"],
150
151
  revision="1f0fdea7f5b19532a750be109b19072d719b8177",
151
152
  release_date="2025-06-27",
@@ -172,6 +173,7 @@ llama_nemoretriever_colembed_3b_v1 = ModelMeta(
172
173
  trust_remote_code=True,
173
174
  ),
174
175
  name="nvidia/llama-nemoretriever-colembed-3b-v1",
176
+ model_type=["late-interaction"],
175
177
  languages=["eng-Latn"],
176
178
  revision="50c36f4d5271c6851aa08bd26d69f6e7ca8b870c",
177
179
  release_date="2025-06-27",
@@ -111,6 +111,7 @@ NV_embed_v2 = ModelMeta(
111
111
  add_eos_token=True,
112
112
  ),
113
113
  name="nvidia/NV-Embed-v2",
114
+ model_type=["dense"],
114
115
  languages=["eng-Latn"],
115
116
  open_weights=True,
116
117
  revision="7604d305b621f14095a1aa23d351674c2859553a",
@@ -141,6 +142,7 @@ NV_embed_v1 = ModelMeta(
141
142
  add_eos_token=True,
142
143
  ),
143
144
  name="nvidia/NV-Embed-v1",
145
+ model_type=["dense"],
144
146
  languages=["eng-Latn"],
145
147
  open_weights=True,
146
148
  revision="570834afd5fef5bf3a3c2311a2b6e0a66f6f4f2c",
@@ -528,6 +530,7 @@ class LlamaEmbedNemotron(AbsEncoder):
528
530
  llama_embed_nemotron_8b = ModelMeta(
529
531
  loader=LlamaEmbedNemotron,
530
532
  name="nvidia/llama-embed-nemotron-8b",
533
+ model_type=["dense"],
531
534
  languages=llama_embed_nemotron_evaluated_languages,
532
535
  open_weights=True,
533
536
  revision="84a375593d27d3528beb4e104822515659e093b4",
@@ -0,0 +1,195 @@
1
+ from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
+ from mteb.models.model_meta import ModelMeta
3
+ from mteb.models.models_protocols import PromptType
4
+
5
+
6
+ def instruction_template(
7
+ instruction: str, prompt_type: PromptType | None = None
8
+ ) -> str:
9
+ if (
10
+ prompt_type == PromptType.document
11
+ ): # to avoid this issue: https://huggingface.co/Qwen/Qwen3-Embedding-8B/discussions/21
12
+ return " "
13
+ if not instruction:
14
+ return ""
15
+ if isinstance(instruction, dict):
16
+ if prompt_type is None:
17
+ instruction = next(iter(instruction.values())) # TODO
18
+ else:
19
+ instruction = instruction[prompt_type]
20
+ return f"Instruct: {instruction}\nQuery:"
21
+
22
+
23
+ multilingual_langs = [
24
+ "afr-Latn",
25
+ "ara-Arab",
26
+ "aze-Latn",
27
+ "bel-Cyrl",
28
+ "bul-Cyrl",
29
+ "ben-Beng",
30
+ "cat-Latn",
31
+ "ceb-Latn",
32
+ "ces-Latn",
33
+ "cym-Latn",
34
+ "dan-Latn",
35
+ "deu-Latn",
36
+ "ell-Grek",
37
+ "eng-Latn",
38
+ "spa-Latn",
39
+ "est-Latn",
40
+ "eus-Latn",
41
+ "fas-Arab",
42
+ "fin-Latn",
43
+ "fra-Latn",
44
+ "glg-Latn",
45
+ "guj-Gujr",
46
+ "heb-Hebr",
47
+ "hin-Deva",
48
+ "hrv-Latn",
49
+ "hat-Latn",
50
+ "hun-Latn",
51
+ "hye-Armn",
52
+ "ind-Latn",
53
+ "isl-Latn",
54
+ "ita-Latn",
55
+ "jpn-Jpan",
56
+ "jav-Latn",
57
+ "kat-Geor",
58
+ "kaz-Cyrl",
59
+ "khm-Khmr",
60
+ "kan-Knda",
61
+ "kor-Hang",
62
+ "kir-Cyrl",
63
+ "lao-Laoo",
64
+ "lit-Latn",
65
+ "lav-Latn",
66
+ "mkd-Cyrl",
67
+ "mal-Mlym",
68
+ "mon-Cyrl",
69
+ "mar-Deva",
70
+ "msa-Latn",
71
+ "mya-Mymr",
72
+ "nep-Deva",
73
+ "nld-Latn",
74
+ "nor-Latn",
75
+ "nob-Latn",
76
+ "nno-Latn",
77
+ "pan-Guru",
78
+ "pol-Latn",
79
+ "por-Latn",
80
+ "que-Latn",
81
+ "ron-Latn",
82
+ "rus-Cyrl",
83
+ "sin-Sinh",
84
+ "slk-Latn",
85
+ "slv-Latn",
86
+ "swa-Latn",
87
+ "tam-Taml",
88
+ "tel-Telu",
89
+ "tha-Thai",
90
+ "tgl-Latn",
91
+ "tur-Latn",
92
+ "ukr-Cyrl",
93
+ "urd-Arab",
94
+ "vie-Latn",
95
+ "yor-Latn",
96
+ "zho-Hans",
97
+ ]
98
+
99
+ OCTEN_CITATION = """@misc{octen-embedding-2025,
100
+ title={Octen-Embedding-8B: A Fine-tuned Multilingual Text Embedding Model},
101
+ author={Octen Team},
102
+ year={2025},
103
+ url={https://huggingface.co/bflhc/bflhc/Octen-Embedding-8B}
104
+ }"""
105
+
106
+ training_data = {
107
+ "T2Retrieval",
108
+ "DuRetrieval",
109
+ "MMarcoReranking",
110
+ "CMedQAv2-reranking",
111
+ "NQ",
112
+ "MSMARCO",
113
+ "HotpotQA",
114
+ "FEVER",
115
+ "MrTidyRetrieval",
116
+ "MIRACLRetrieval",
117
+ "CodeSearchNet",
118
+ }
119
+
120
+ # Predefined prompts for various RTEB tasks
121
+ _PREDEFINED_PROMPTS = {
122
+ # ========== Open Datasets ==========
123
+ # Legal domain
124
+ "AILACasedocs": "Given a legal case scenario, retrieve the most relevant case documents",
125
+ "AILAStatutes": "Given a legal scenario, retrieve the most relevant statute documents",
126
+ "LegalQuAD": "Given a legal question, retrieve relevant legal documents that answer the question",
127
+ "LegalSummarization": "Given a query, retrieve relevant legal documents for summarization",
128
+ # Code domain
129
+ "AppsRetrieval": "Given a query about mobile applications, retrieve relevant app information",
130
+ "HumanEvalRetrieval": "Given a code problem description, retrieve relevant code examples",
131
+ "MBPPRetrieval": "Given a programming problem description, retrieve relevant code solutions",
132
+ "DS1000Retrieval": "Given a data science problem, retrieve relevant code snippets",
133
+ "FreshStackRetrieval": "Given a programming question, retrieve relevant Stack Overflow posts",
134
+ # Finance domain
135
+ "FinQARetrieval": "Given a financial question, retrieve relevant financial documents",
136
+ "FinanceBenchRetrieval": "Given a financial query, retrieve relevant financial information",
137
+ "HC3FinanceRetrieval": "Given a finance-related query, retrieve relevant documents",
138
+ # Medical domain
139
+ "CUREv1": "Given a medical query, retrieve relevant clinical documents",
140
+ "ChatDoctorRetrieval": "Given a medical question, retrieve relevant medical information",
141
+ # SQL domain
142
+ "WikiSQLRetrieval": "Given a natural language query, retrieve relevant SQL examples",
143
+ # Multilingual
144
+ "MIRACLRetrievalHardNegatives": "Given a question, retrieve Wikipedia passages that answer the question",
145
+ # ========== Private/Closed Datasets ==========
146
+ # Code domain (Private)
147
+ "Code1Retrieval": "Given a code problem description, retrieve relevant code examples",
148
+ "JapaneseCode1Retrieval": "Given a code problem description, retrieve relevant code examples",
149
+ # Finance domain (Private)
150
+ "EnglishFinance1Retrieval": "Given a financial query, retrieve relevant financial documents",
151
+ "EnglishFinance2Retrieval": "Given a financial query, retrieve relevant financial documents",
152
+ "EnglishFinance3Retrieval": "Given a financial query, retrieve relevant financial documents",
153
+ "EnglishFinance4Retrieval": "Given a financial query, retrieve relevant financial documents",
154
+ # Healthcare domain (Private)
155
+ "EnglishHealthcare1Retrieval": "Given a medical question, retrieve relevant medical information",
156
+ "GermanHealthcare1Retrieval": "Given a medical question, retrieve relevant medical information",
157
+ # Legal domain (Private)
158
+ "FrenchLegal1Retrieval": "Given a legal query, retrieve relevant legal documents",
159
+ "GermanLegal1Retrieval": "Given a legal query, retrieve relevant legal documents",
160
+ "JapaneseLegal1Retrieval": "Given a legal query, retrieve relevant legal documents",
161
+ # General/Multilingual (Private)
162
+ "French1Retrieval": "Given a query, retrieve relevant passages",
163
+ "German1Retrieval": "Given a query, retrieve relevant passages",
164
+ }
165
+
166
+
167
+ Octen_Embedding_8B = ModelMeta(
168
+ loader=InstructSentenceTransformerModel,
169
+ loader_kwargs=dict(
170
+ instruction_template=instruction_template,
171
+ apply_instruction_to_passages=True,
172
+ prompts_dict=_PREDEFINED_PROMPTS,
173
+ max_seq_length=18480,
174
+ model_kwargs={"torch_dtype": "bfloat16"},
175
+ ),
176
+ name="bflhc/Octen-Embedding-8B",
177
+ languages=multilingual_langs,
178
+ open_weights=True,
179
+ revision="2030603c2926ab005fafd824fac5911e271be21f",
180
+ release_date="2025-12-23",
181
+ n_parameters=7567295488,
182
+ memory_usage_mb=14433,
183
+ embed_dim=4096,
184
+ max_tokens=32768,
185
+ license="apache-2.0",
186
+ reference="https://huggingface.co/bflhc/Octen-Embedding-8B",
187
+ similarity_fn_name="cosine",
188
+ framework=["Sentence Transformers", "PyTorch"],
189
+ use_instructions=True,
190
+ public_training_code=None,
191
+ public_training_data=None,
192
+ training_datasets=training_data,
193
+ citation=OCTEN_CITATION,
194
+ adapted_from="Qwen/Qwen3-Embedding-8B",
195
+ )
@@ -167,6 +167,7 @@ class OpenAIModel(AbsEncoder):
167
167
 
168
168
  text_embedding_3_small = ModelMeta(
169
169
  name="openai/text-embedding-3-small",
170
+ model_type=["dense"],
170
171
  revision="3",
171
172
  release_date="2024-01-25",
172
173
  languages=None, # supported languages not specified
@@ -191,6 +192,7 @@ text_embedding_3_small = ModelMeta(
191
192
  )
192
193
  text_embedding_3_large = ModelMeta(
193
194
  name="openai/text-embedding-3-large",
195
+ model_type=["dense"],
194
196
  revision="3",
195
197
  release_date="2024-01-25",
196
198
  languages=None, # supported languages not specified
@@ -215,6 +217,7 @@ text_embedding_3_large = ModelMeta(
215
217
  )
216
218
  text_embedding_ada_002 = ModelMeta(
217
219
  name="openai/text-embedding-ada-002",
220
+ model_type=["dense"],
218
221
  revision="3",
219
222
  release_date="2022-12-15",
220
223
  languages=None, # supported languages not specified
@@ -240,6 +243,7 @@ text_embedding_ada_002 = ModelMeta(
240
243
 
241
244
  text_embedding_3_small_512 = ModelMeta(
242
245
  name="openai/text-embedding-3-small (embed_dim=512)",
246
+ model_type=["dense"],
243
247
  revision="3",
244
248
  release_date="2024-01-25",
245
249
  languages=None, # supported languages not specified
@@ -266,6 +270,7 @@ text_embedding_3_small_512 = ModelMeta(
266
270
 
267
271
  text_embedding_3_large_512 = ModelMeta(
268
272
  name="openai/text-embedding-3-large (embed_dim=512)",
273
+ model_type=["dense"],
269
274
  revision="3",
270
275
  release_date="2024-01-25",
271
276
  languages=None, # supported languages not specified
@@ -122,6 +122,7 @@ def openclip_loader(model_name, **kwargs):
122
122
  CLIP_ViT_L_14_DataComp_XL_s13B_b90K = ModelMeta(
123
123
  loader=openclip_loader, # type: ignore
124
124
  name="laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K",
125
+ model_type=["dense"],
125
126
  languages=["eng-Latn"],
126
127
  revision="84c9828e63dc9a9351d1fe637c346d4c1c4db341",
127
128
  release_date="2023-04-26",
@@ -147,6 +148,7 @@ CLIP_ViT_L_14_DataComp_XL_s13B_b90K = ModelMeta(
147
148
  CLIP_ViT_B_32_DataComp_XL_s13B_b90K = ModelMeta(
148
149
  loader=openclip_loader, # type: ignore
149
150
  name="laion/CLIP-ViT-B-32-DataComp.XL-s13B-b90K",
151
+ model_type=["dense"],
150
152
  languages=["eng-Latn"],
151
153
  revision="f0e2ffa09cbadab3db6a261ec1ec56407ce42912",
152
154
  release_date="2023-04-26",
@@ -172,6 +174,7 @@ CLIP_ViT_B_32_DataComp_XL_s13B_b90K = ModelMeta(
172
174
  CLIP_ViT_B_16_DataComp_XL_s13B_b90K = ModelMeta(
173
175
  loader=openclip_loader, # type: ignore
174
176
  name="laion/CLIP-ViT-B-16-DataComp.XL-s13B-b90K",
177
+ model_type=["dense"],
175
178
  languages=["eng-Latn"],
176
179
  revision="d110532e8d4ff91c574ee60a342323f28468b287",
177
180
  release_date="2023-04-26",
@@ -197,6 +200,7 @@ CLIP_ViT_B_16_DataComp_XL_s13B_b90K = ModelMeta(
197
200
  CLIP_ViT_bigG_14_laion2B_39B_b160k = ModelMeta(
198
201
  loader=openclip_loader, # type: ignore
199
202
  name="laion/CLIP-ViT-bigG-14-laion2B-39B-b160k",
203
+ model_type=["dense"],
200
204
  languages=["eng-Latn"],
201
205
  revision="bc7788f151930d91b58474715fdce5524ad9a189",
202
206
  release_date="2023-01-23",
@@ -222,6 +226,7 @@ CLIP_ViT_bigG_14_laion2B_39B_b160k = ModelMeta(
222
226
  CLIP_ViT_g_14_laion2B_s34B_b88K = ModelMeta(
223
227
  loader=openclip_loader, # type: ignore
224
228
  name="laion/CLIP-ViT-g-14-laion2B-s34B-b88K",
229
+ model_type=["dense"],
225
230
  languages=["eng-Latn"],
226
231
  revision="15efd0f6ac0c40c0f9da7becca03c974d7012604",
227
232
  release_date="2023-03-06",
@@ -247,6 +252,7 @@ CLIP_ViT_g_14_laion2B_s34B_b88K = ModelMeta(
247
252
  CLIP_ViT_H_14_laion2B_s32B_b79K = ModelMeta(
248
253
  loader=openclip_loader, # type: ignore
249
254
  name="laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
255
+ model_type=["dense"],
250
256
  languages=["eng-Latn"],
251
257
  revision="de081ac0a0ca8dc9d1533eed1ae884bb8ae1404b",
252
258
  release_date="2022-09-15",
@@ -272,6 +278,7 @@ CLIP_ViT_H_14_laion2B_s32B_b79K = ModelMeta(
272
278
  CLIP_ViT_L_14_laion2B_s32B_b82K = ModelMeta(
273
279
  loader=openclip_loader, # type: ignore
274
280
  name="laion/CLIP-ViT-L-14-laion2B-s32B-b82K",
281
+ model_type=["dense"],
275
282
  languages=["eng-Latn"],
276
283
  revision="1627032197142fbe2a7cfec626f4ced3ae60d07a",
277
284
  release_date="2022-09-15",
@@ -297,6 +304,7 @@ CLIP_ViT_L_14_laion2B_s32B_b82K = ModelMeta(
297
304
  CLIP_ViT_B_32_laion2B_s34B_b79K = ModelMeta(
298
305
  loader=openclip_loader,
299
306
  name="laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
307
+ model_type=["dense"],
300
308
  languages=["eng-Latn"],
301
309
  revision="08f73555f1b2fb7c82058aebbd492887a94968ef",
302
310
  release_date="2022-09-15",
@@ -128,6 +128,7 @@ class SparseEncoderWrapper(AbsEncoder):
128
128
 
129
129
  opensearch_neural_sparse_encoding_doc_v3_gte = ModelMeta(
130
130
  name="opensearch-project/opensearch-neural-sparse-encoding-doc-v3-gte",
131
+ model_type=["dense"],
131
132
  languages=["eng-Latn"],
132
133
  open_weights=True,
133
134
  revision="a8abaa916125ee512a7a8f4d706d07eb0128a8e6",
@@ -153,6 +154,7 @@ opensearch_neural_sparse_encoding_doc_v3_gte = ModelMeta(
153
154
 
154
155
  opensearch_neural_sparse_encoding_doc_v3_distill = ModelMeta(
155
156
  name="opensearch-project/opensearch-neural-sparse-encoding-doc-v3-distill",
157
+ model_type=["dense"],
156
158
  languages=["eng-Latn"],
157
159
  open_weights=True,
158
160
  revision="babf71f3c48695e2e53a978208e8aba48335e3c0",
@@ -174,6 +176,7 @@ opensearch_neural_sparse_encoding_doc_v3_distill = ModelMeta(
174
176
 
175
177
  opensearch_neural_sparse_encoding_doc_v2_distill = ModelMeta(
176
178
  name="opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill",
179
+ model_type=["dense"],
177
180
  languages=["eng-Latn"],
178
181
  open_weights=True,
179
182
  revision="8921a26c78b8559d6604eb1f5c0b74c079bee38f",
@@ -196,6 +199,7 @@ opensearch_neural_sparse_encoding_doc_v2_distill = ModelMeta(
196
199
 
197
200
  opensearch_neural_sparse_encoding_doc_v2_mini = ModelMeta(
198
201
  name="opensearch-project/opensearch-neural-sparse-encoding-doc-v2-mini",
202
+ model_type=["dense"],
199
203
  languages=["eng-Latn"],
200
204
  open_weights=True,
201
205
  revision="4af867a426867dfdd744097531046f4289a32fdd",
@@ -217,6 +221,7 @@ opensearch_neural_sparse_encoding_doc_v2_mini = ModelMeta(
217
221
 
218
222
  opensearch_neural_sparse_encoding_doc_v1 = ModelMeta(
219
223
  name="opensearch-project/opensearch-neural-sparse-encoding-doc-v1",
224
+ model_type=["dense"],
220
225
  languages=["eng-Latn"],
221
226
  open_weights=True,
222
227
  revision="98cdcbd72867c547f72f2b7b7bed9cdf9f09922d",
@@ -22,6 +22,7 @@ class OPSWrapper(AbsEncoder):
22
22
 
23
23
  ops_moa_conan_embedding = ModelMeta(
24
24
  name="OpenSearch-AI/Ops-MoA-Conan-embedding-v1",
25
+ model_type=["dense"],
25
26
  revision="46dcd58753f3daa920c66f89e47086a534089350",
26
27
  release_date="2025-03-26",
27
28
  languages=["zho-Hans"],
@@ -53,6 +54,7 @@ ops_moa_conan_embedding = ModelMeta(
53
54
 
54
55
  ops_moa_yuan_embedding = ModelMeta(
55
56
  name="OpenSearch-AI/Ops-MoA-Yuan-embedding-1.0",
57
+ model_type=["dense"],
56
58
  revision="23712d0766417b0eb88a2513c6e212a58b543268",
57
59
  release_date="2025-03-26",
58
60
  languages=["zho-Hans"],
@@ -14,6 +14,7 @@ PAWAN_EMBD_CITATION = """@misc{medhi2025pawanembd,
14
14
  pawan_embd_68m = ModelMeta(
15
15
  loader=sentence_transformers_loader,
16
16
  name="dmedhi/PawanEmbd-68M",
17
+ model_type=["dense"],
17
18
  languages=["eng-Latn"],
18
19
  open_weights=True,
19
20
  revision="32f295145802bdbd65699ad65fd27d2a5b69a909",
@@ -6,6 +6,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
6
6
  piccolo_base_zh = ModelMeta(
7
7
  loader=sentence_transformers_loader,
8
8
  name="sensenova/piccolo-base-zh",
9
+ model_type=["dense"],
9
10
  languages=["zho-Hans"],
10
11
  open_weights=True,
11
12
  revision="47c0a63b8f667c3482e05b2fd45577bb19252196",
@@ -29,6 +30,7 @@ piccolo_base_zh = ModelMeta(
29
30
  piccolo_large_zh_v2 = ModelMeta(
30
31
  loader=sentence_transformers_loader,
31
32
  name="sensenova/piccolo-large-zh-v2",
33
+ model_type=["dense"],
32
34
  languages=["zho-Hans"],
33
35
  open_weights=False, # They "temporarily" removed it in may last year
34
36
  # "Due to certain internal company considerations"
@@ -75,6 +75,7 @@ promptriever_llama2 = ModelMeta(
75
75
  model_prompts=model_prompts,
76
76
  ),
77
77
  name="samaya-ai/promptriever-llama2-7b-v1",
78
+ model_type=["dense"],
78
79
  languages=["eng-Latn"],
79
80
  open_weights=True,
80
81
  revision="01c7f73d771dfac7d292323805ebc428287df4f9-30b14e3813c0fa45facfd01a594580c3fe5ecf23", # base-peft revision
@@ -106,6 +107,7 @@ promptriever_llama3 = ModelMeta(
106
107
  model_prompts=model_prompts,
107
108
  ),
108
109
  name="samaya-ai/promptriever-llama3.1-8b-v1",
110
+ model_type=["dense"],
109
111
  languages=["eng-Latn"],
110
112
  open_weights=True,
111
113
  revision="48d6d0fc4e02fb1269b36940650a1b7233035cbb-2ead22cfb1b0e0c519c371c63c2ab90ffc511b8a", # base-peft revision
@@ -138,6 +140,7 @@ promptriever_llama3_instruct = ModelMeta(
138
140
  model_prompts=model_prompts,
139
141
  ),
140
142
  name="samaya-ai/promptriever-llama3.1-8b-instruct-v1",
143
+ model_type=["dense"],
141
144
  languages=["eng-Latn"],
142
145
  open_weights=True,
143
146
  revision="5206a32e0bd3067aef1ce90f5528ade7d866253f-8b677258615625122c2eb7329292b8c402612c21", # base-peft revision
@@ -170,6 +173,7 @@ promptriever_mistral_v1 = ModelMeta(
170
173
  model_prompts=model_prompts,
171
174
  ),
172
175
  name="samaya-ai/promptriever-mistral-v0.1-7b-v1",
176
+ model_type=["dense"],
173
177
  languages=["eng-Latn"],
174
178
  open_weights=True,
175
179
  revision="7231864981174d9bee8c7687c24c8344414eae6b-876d63e49b6115ecb6839893a56298fadee7e8f5", # base-peft revision
@@ -337,6 +337,7 @@ class MultiVectorModel(AbsEncoder, PylateSearchEncoder):
337
337
  colbert_v2 = ModelMeta(
338
338
  loader=MultiVectorModel,
339
339
  name="colbert-ir/colbertv2.0",
340
+ model_type=["late-interaction"],
340
341
  languages=["eng-Latn"],
341
342
  open_weights=True,
342
343
  revision="c1e84128e85ef755c096a95bdb06b47793b13acf",
@@ -369,6 +370,7 @@ jina_colbert_v2 = ModelMeta(
369
370
  trust_remote_code=True,
370
371
  ),
371
372
  name="jinaai/jina-colbert-v2",
373
+ model_type=["late-interaction"],
372
374
  languages=[
373
375
  "ara-Arab",
374
376
  "ben-Beng",
@@ -445,6 +447,7 @@ jina_colbert_v2 = ModelMeta(
445
447
  lightonai__gte_moderncolbert_v1 = ModelMeta(
446
448
  loader=MultiVectorModel,
447
449
  name="lightonai/GTE-ModernColBERT-v1",
450
+ model_type=["late-interaction"],
448
451
  languages=[
449
452
  "eng-Latn",
450
453
  ],
@@ -30,6 +30,7 @@ qodo_languages = [
30
30
  Qodo_Embed_1_1_5B = ModelMeta(
31
31
  loader=sentence_transformers_loader,
32
32
  name="Qodo/Qodo-Embed-1-1.5B",
33
+ model_type=["dense"],
33
34
  languages=qodo_languages,
34
35
  open_weights=True,
35
36
  revision="84bbef079b32e8823ec226d4e9e92902706b0eb6",
@@ -52,6 +53,7 @@ Qodo_Embed_1_1_5B = ModelMeta(
52
53
  Qodo_Embed_1_7B = ModelMeta(
53
54
  loader=sentence_transformers_loader,
54
55
  name="Qodo/Qodo-Embed-1-7B",
56
+ model_type=["dense"],
55
57
  languages=qodo_languages,
56
58
  open_weights=True,
57
59
  revision="f9edd9bf7f687c0e832424058e265120f603cd81",
@@ -25,6 +25,7 @@ mini_gte_datasets = {
25
25
  mini_gte = ModelMeta(
26
26
  loader=sentence_transformers_loader,
27
27
  name="prdev/mini-gte",
28
+ model_type=["dense"],
28
29
  languages=["eng-Latn"],
29
30
  open_weights=True,
30
31
  revision="7fbe6f9b4cc42615e0747299f837ad7769025492",
@@ -134,6 +134,7 @@ def q3e_instruct_loader(
134
134
  Qwen3_Embedding_0B6 = ModelMeta(
135
135
  loader=q3e_instruct_loader,
136
136
  name="Qwen/Qwen3-Embedding-0.6B",
137
+ model_type=["dense"],
137
138
  languages=multilingual_langs,
138
139
  open_weights=True,
139
140
  revision="b22da495047858cce924d27d76261e96be6febc0", # Commit of @tomaarsen
@@ -156,6 +157,7 @@ Qwen3_Embedding_0B6 = ModelMeta(
156
157
  Qwen3_Embedding_4B = ModelMeta(
157
158
  loader=q3e_instruct_loader,
158
159
  name="Qwen/Qwen3-Embedding-4B",
160
+ model_type=["dense"],
159
161
  languages=multilingual_langs,
160
162
  open_weights=True,
161
163
  revision="636cd9bf47d976946cdbb2b0c3ca0cb2f8eea5ff", # Commit of @tomaarsen
@@ -178,6 +180,7 @@ Qwen3_Embedding_4B = ModelMeta(
178
180
  Qwen3_Embedding_8B = ModelMeta(
179
181
  loader=q3e_instruct_loader,
180
182
  name="Qwen/Qwen3-Embedding-8B",
183
+ model_type=["dense"],
181
184
  languages=multilingual_langs,
182
185
  open_weights=True,
183
186
  revision="4e423935c619ae4df87b646a3ce949610c66241c", # Commit of @tomaarsen
@@ -58,6 +58,7 @@ QZhou_Embedding = ModelMeta(
58
58
  apply_instruction_to_passages=False,
59
59
  ),
60
60
  name="Kingsoft-LLM/QZhou-Embedding",
61
+ model_type=["dense"],
61
62
  languages=["eng-Latn", "zho-Hans"],
62
63
  open_weights=True,
63
64
  revision="f1e6c03ee3882e7b9fa5cec91217715272e433b8",
@@ -91,6 +92,7 @@ QZhou_Embedding_Zh = ModelMeta(
91
92
  apply_instruction_to_passages=False,
92
93
  ),
93
94
  name="Kingsoft-LLM/QZhou-Embedding-Zh",
95
+ model_type=["dense"],
94
96
  languages=["zho-Hans"],
95
97
  open_weights=True,
96
98
  revision="0321ccb126413d1e49c5ce908e802b63d35f18e2",
@@ -189,6 +189,7 @@ class RandomEncoderBaseline:
189
189
  random_encoder_baseline = ModelMeta(
190
190
  loader=RandomEncoderBaseline, # type: ignore
191
191
  name="baseline/random-encoder-baseline",
192
+ model_type=["dense"],
192
193
  modalities=["text", "image"],
193
194
  **_common_mock_metadata,
194
195
  )
@@ -233,7 +234,7 @@ class RandomCrossEncoderBaseline:
233
234
  random_cross_encoder_baseline = ModelMeta(
234
235
  loader=RandomCrossEncoderBaseline, # type: ignore
235
236
  name="baseline/random-cross-encoder-baseline",
237
+ model_type=["cross-encoder"],
236
238
  modalities=["text", "image"],
237
- is_cross_encoder=True,
238
239
  **_common_mock_metadata,
239
240
  )
@@ -6,6 +6,7 @@ from mteb.models.model_meta import ModelMeta, ScoringFunction
6
6
  potion_base_8m = ModelMeta(
7
7
  loader=Model2VecModel, # type: ignore
8
8
  name="rasgaard/m2v-dfm-large",
9
+ model_type=["dense"],
9
10
  languages=["dan-Latn"],
10
11
  open_weights=True,
11
12
  revision="387897cfb09992e6d45ea9cd7b28b9fcf119e23a",
@@ -44,6 +44,7 @@ ReasonIR_8B = ModelMeta(
44
44
  trust_remote_code=True,
45
45
  ),
46
46
  name="ReasonIR/ReasonIR-8B",
47
+ model_type=["dense"],
47
48
  languages=["eng-Latn"],
48
49
  open_weights=True,
49
50
  revision="c3d0690370ff4a8c3d3882d8dfa85c43650034fa",
@@ -162,6 +162,7 @@ repllama_llama2_original = ModelMeta(
162
162
  model_prompts=model_prompts,
163
163
  ),
164
164
  name="castorini/repllama-v1-7b-lora-passage",
165
+ model_type=["dense"],
165
166
  languages=["eng-Latn"],
166
167
  open_weights=True,
167
168
  revision="01c7f73d771dfac7d292323805ebc428287df4f9-6097554dfe6e7d93e92f55010b678bcca1e233a8", # base-peft revision
@@ -194,6 +195,7 @@ repllama_llama2_reproduced = ModelMeta(
194
195
  model_prompts=model_prompts,
195
196
  ),
196
197
  name="samaya-ai/RepLLaMA-reproduced",
198
+ model_type=["dense"],
197
199
  languages=["eng-Latn"],
198
200
  open_weights=True,
199
201
  revision="01c7f73d771dfac7d292323805ebc428287df4f9-ad5c1d0938a1e02954bcafb4d811ba2f34052e71", # base-peft revision