mteb 2.5.1__py3-none-any.whl → 2.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/abstasks/abstask.py +6 -6
- mteb/abstasks/aggregated_task.py +4 -10
- mteb/abstasks/clustering_legacy.py +3 -2
- mteb/abstasks/task_metadata.py +2 -3
- mteb/cache.py +7 -4
- mteb/cli/build_cli.py +10 -5
- mteb/cli/generate_model_card.py +4 -3
- mteb/deprecated_evaluator.py +4 -3
- mteb/evaluate.py +4 -1
- mteb/get_tasks.py +4 -3
- mteb/leaderboard/app.py +70 -3
- mteb/models/abs_encoder.py +5 -3
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +4 -1
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +13 -12
- mteb/models/model_implementations/align_models.py +1 -0
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +2 -0
- mteb/models/model_implementations/ara_models.py +1 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +17 -0
- mteb/models/model_implementations/bica_model.py +1 -0
- mteb/models/model_implementations/blip2_models.py +2 -0
- mteb/models/model_implementations/blip_models.py +8 -0
- mteb/models/model_implementations/bm25.py +1 -0
- mteb/models/model_implementations/bmretriever_models.py +4 -0
- mteb/models/model_implementations/cadet_models.py +1 -0
- mteb/models/model_implementations/cde_models.py +2 -0
- mteb/models/model_implementations/clip_models.py +3 -0
- mteb/models/model_implementations/clips_models.py +3 -0
- mteb/models/model_implementations/codefuse_models.py +3 -0
- mteb/models/model_implementations/codesage_models.py +3 -0
- mteb/models/model_implementations/cohere_models.py +4 -0
- mteb/models/model_implementations/cohere_v.py +5 -0
- mteb/models/model_implementations/colpali_models.py +3 -0
- mteb/models/model_implementations/colqwen_models.py +9 -0
- mteb/models/model_implementations/colsmol_models.py +2 -0
- mteb/models/model_implementations/conan_models.py +1 -0
- mteb/models/model_implementations/dino_models.py +19 -0
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +9 -0
- mteb/models/model_implementations/e5_v.py +1 -0
- mteb/models/model_implementations/eagerworks_models.py +1 -0
- mteb/models/model_implementations/emillykkejensen_models.py +3 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +1 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +8 -0
- mteb/models/model_implementations/facebookai.py +2 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +6 -3
- mteb/models/model_implementations/google_models.py +5 -0
- mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
- mteb/models/model_implementations/gritlm_models.py +2 -0
- mteb/models/model_implementations/gte_models.py +9 -0
- mteb/models/model_implementations/hinvec_models.py +1 -0
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +2 -0
- mteb/models/model_implementations/jina_clip.py +1 -0
- mteb/models/model_implementations/jina_models.py +7 -1
- mteb/models/model_implementations/kalm_models.py +6 -0
- mteb/models/model_implementations/kblab.py +1 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
- mteb/models/model_implementations/kfst.py +1 -0
- mteb/models/model_implementations/kowshik24_models.py +1 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +1 -0
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +3 -0
- mteb/models/model_implementations/llm2vec_models.py +8 -0
- mteb/models/model_implementations/mcinext_models.py +7 -1
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +63 -0
- mteb/models/model_implementations/mme5_models.py +1 -0
- mteb/models/model_implementations/moco_models.py +2 -0
- mteb/models/model_implementations/model2vec_models.py +13 -0
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/mxbai_models.py +3 -0
- mteb/models/model_implementations/nbailab.py +3 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
- mteb/models/model_implementations/nomic_models.py +6 -0
- mteb/models/model_implementations/nomic_models_vision.py +1 -0
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
- mteb/models/model_implementations/nvidia_models.py +3 -0
- mteb/models/model_implementations/octen_models.py +195 -0
- mteb/models/model_implementations/openai_models.py +5 -0
- mteb/models/model_implementations/openclip_models.py +8 -0
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
- mteb/models/model_implementations/ops_moa_models.py +2 -0
- mteb/models/model_implementations/pawan_models.py +1 -0
- mteb/models/model_implementations/piccolo_models.py +2 -0
- mteb/models/model_implementations/promptriever_models.py +4 -0
- mteb/models/model_implementations/pylate_models.py +3 -0
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +3 -0
- mteb/models/model_implementations/qzhou_models.py +2 -0
- mteb/models/model_implementations/random_baseline.py +2 -1
- mteb/models/model_implementations/rasgaard_models.py +1 -0
- mteb/models/model_implementations/reasonir_model.py +1 -0
- mteb/models/model_implementations/repllama_models.py +2 -0
- mteb/models/model_implementations/rerankers_custom.py +3 -3
- mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +20 -0
- mteb/models/model_implementations/ruri_models.py +10 -0
- mteb/models/model_implementations/salesforce_models.py +3 -0
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +18 -0
- mteb/models/model_implementations/shuu_model.py +32 -31
- mteb/models/model_implementations/siglip_models.py +10 -0
- mteb/models/model_implementations/sonar_models.py +1 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +2 -0
- mteb/models/model_implementations/ua_sentence_models.py +1 -0
- mteb/models/model_implementations/uae_models.py +1 -0
- mteb/models/model_implementations/vdr_models.py +1 -0
- mteb/models/model_implementations/vi_vn_models.py +6 -0
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +2 -0
- mteb/models/model_implementations/voyage_models.py +15 -0
- mteb/models/model_implementations/voyage_v.py +1 -0
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +1 -0
- mteb/models/model_meta.py +49 -4
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +4 -1
- mteb/models/search_wrappers.py +4 -2
- mteb/models/sentence_transformer_wrapper.py +10 -10
- mteb/results/benchmark_results.py +67 -43
- mteb/results/model_result.py +3 -1
- mteb/results/task_result.py +22 -17
- {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/METADATA +1 -1
- {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/RECORD +148 -147
- {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/WHEEL +0 -0
- {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/top_level.txt +0 -0
|
@@ -199,6 +199,7 @@ nomic_embed_v1_5 = ModelMeta(
|
|
|
199
199
|
model_prompts=model_prompts,
|
|
200
200
|
),
|
|
201
201
|
name="nomic-ai/nomic-embed-text-v1.5",
|
|
202
|
+
model_type=["dense"],
|
|
202
203
|
languages=["eng-Latn"],
|
|
203
204
|
open_weights=True,
|
|
204
205
|
revision="b0753ae76394dd36bcfb912a46018088bca48be0",
|
|
@@ -227,6 +228,7 @@ nomic_embed_v1 = ModelMeta(
|
|
|
227
228
|
model_prompts=model_prompts,
|
|
228
229
|
),
|
|
229
230
|
name="nomic-ai/nomic-embed-text-v1",
|
|
231
|
+
model_type=["dense"],
|
|
230
232
|
languages=["eng-Latn"],
|
|
231
233
|
open_weights=True,
|
|
232
234
|
revision="0759316f275aa0cb93a5b830973843ca66babcf5",
|
|
@@ -255,6 +257,7 @@ nomic_embed_v1_ablated = ModelMeta(
|
|
|
255
257
|
model_prompts=model_prompts,
|
|
256
258
|
),
|
|
257
259
|
name="nomic-ai/nomic-embed-text-v1-ablated",
|
|
260
|
+
model_type=["dense"],
|
|
258
261
|
languages=["eng-Latn"],
|
|
259
262
|
open_weights=True,
|
|
260
263
|
revision="7d948905c5d5d3874fa55a925d68e49dbf411e5f",
|
|
@@ -282,6 +285,7 @@ nomic_embed_v1_unsupervised = ModelMeta(
|
|
|
282
285
|
model_prompts=model_prompts,
|
|
283
286
|
),
|
|
284
287
|
name="nomic-ai/nomic-embed-text-v1-unsupervised",
|
|
288
|
+
model_type=["dense"],
|
|
285
289
|
languages=["eng-Latn"],
|
|
286
290
|
open_weights=True,
|
|
287
291
|
revision="b53d557b15ae63852847c222d336c1609eced93c",
|
|
@@ -309,6 +313,7 @@ nomic_modern_bert_embed = ModelMeta(
|
|
|
309
313
|
model_prompts=model_prompts,
|
|
310
314
|
),
|
|
311
315
|
name="nomic-ai/modernbert-embed-base",
|
|
316
|
+
model_type=["dense"],
|
|
312
317
|
languages=["eng-Latn"],
|
|
313
318
|
open_weights=True,
|
|
314
319
|
revision="5960f1566fb7cb1adf1eb6e816639cf4646d9b12",
|
|
@@ -447,6 +452,7 @@ nomic_embed_text_v2_moe = ModelMeta(
|
|
|
447
452
|
model_prompts=model_prompts,
|
|
448
453
|
),
|
|
449
454
|
name="nomic-ai/nomic-embed-text-v2-moe",
|
|
455
|
+
model_type=["dense"],
|
|
450
456
|
languages=m_languages,
|
|
451
457
|
open_weights=True,
|
|
452
458
|
revision="1066b6599d099fbb93dfcb64f9c37a7c9e503e85",
|
|
@@ -168,6 +168,7 @@ nomic_embed_vision_v1_5 = ModelMeta(
|
|
|
168
168
|
"text_model_revision": "a03db6748c80237063eb0546ac6b627eca2318cb",
|
|
169
169
|
},
|
|
170
170
|
name="nomic-ai/nomic-embed-vision-v1.5",
|
|
171
|
+
model_type=["dense"],
|
|
171
172
|
languages=["eng-Latn"],
|
|
172
173
|
revision="af2246fffdab78d8458418480e4886a8e48b70a7",
|
|
173
174
|
release_date="2024-06-08",
|
|
@@ -146,6 +146,7 @@ llama_nemoretriever_colembed_1b_v1 = ModelMeta(
|
|
|
146
146
|
trust_remote_code=True,
|
|
147
147
|
),
|
|
148
148
|
name="nvidia/llama-nemoretriever-colembed-1b-v1",
|
|
149
|
+
model_type=["late-interaction"],
|
|
149
150
|
languages=["eng-Latn"],
|
|
150
151
|
revision="1f0fdea7f5b19532a750be109b19072d719b8177",
|
|
151
152
|
release_date="2025-06-27",
|
|
@@ -172,6 +173,7 @@ llama_nemoretriever_colembed_3b_v1 = ModelMeta(
|
|
|
172
173
|
trust_remote_code=True,
|
|
173
174
|
),
|
|
174
175
|
name="nvidia/llama-nemoretriever-colembed-3b-v1",
|
|
176
|
+
model_type=["late-interaction"],
|
|
175
177
|
languages=["eng-Latn"],
|
|
176
178
|
revision="50c36f4d5271c6851aa08bd26d69f6e7ca8b870c",
|
|
177
179
|
release_date="2025-06-27",
|
|
@@ -111,6 +111,7 @@ NV_embed_v2 = ModelMeta(
|
|
|
111
111
|
add_eos_token=True,
|
|
112
112
|
),
|
|
113
113
|
name="nvidia/NV-Embed-v2",
|
|
114
|
+
model_type=["dense"],
|
|
114
115
|
languages=["eng-Latn"],
|
|
115
116
|
open_weights=True,
|
|
116
117
|
revision="7604d305b621f14095a1aa23d351674c2859553a",
|
|
@@ -141,6 +142,7 @@ NV_embed_v1 = ModelMeta(
|
|
|
141
142
|
add_eos_token=True,
|
|
142
143
|
),
|
|
143
144
|
name="nvidia/NV-Embed-v1",
|
|
145
|
+
model_type=["dense"],
|
|
144
146
|
languages=["eng-Latn"],
|
|
145
147
|
open_weights=True,
|
|
146
148
|
revision="570834afd5fef5bf3a3c2311a2b6e0a66f6f4f2c",
|
|
@@ -528,6 +530,7 @@ class LlamaEmbedNemotron(AbsEncoder):
|
|
|
528
530
|
llama_embed_nemotron_8b = ModelMeta(
|
|
529
531
|
loader=LlamaEmbedNemotron,
|
|
530
532
|
name="nvidia/llama-embed-nemotron-8b",
|
|
533
|
+
model_type=["dense"],
|
|
531
534
|
languages=llama_embed_nemotron_evaluated_languages,
|
|
532
535
|
open_weights=True,
|
|
533
536
|
revision="84a375593d27d3528beb4e104822515659e093b4",
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
|
|
2
|
+
from mteb.models.model_meta import ModelMeta
|
|
3
|
+
from mteb.models.models_protocols import PromptType
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def instruction_template(
|
|
7
|
+
instruction: str, prompt_type: PromptType | None = None
|
|
8
|
+
) -> str:
|
|
9
|
+
if (
|
|
10
|
+
prompt_type == PromptType.document
|
|
11
|
+
): # to avoid this issue: https://huggingface.co/Qwen/Qwen3-Embedding-8B/discussions/21
|
|
12
|
+
return " "
|
|
13
|
+
if not instruction:
|
|
14
|
+
return ""
|
|
15
|
+
if isinstance(instruction, dict):
|
|
16
|
+
if prompt_type is None:
|
|
17
|
+
instruction = next(iter(instruction.values())) # TODO
|
|
18
|
+
else:
|
|
19
|
+
instruction = instruction[prompt_type]
|
|
20
|
+
return f"Instruct: {instruction}\nQuery:"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
multilingual_langs = [
|
|
24
|
+
"afr-Latn",
|
|
25
|
+
"ara-Arab",
|
|
26
|
+
"aze-Latn",
|
|
27
|
+
"bel-Cyrl",
|
|
28
|
+
"bul-Cyrl",
|
|
29
|
+
"ben-Beng",
|
|
30
|
+
"cat-Latn",
|
|
31
|
+
"ceb-Latn",
|
|
32
|
+
"ces-Latn",
|
|
33
|
+
"cym-Latn",
|
|
34
|
+
"dan-Latn",
|
|
35
|
+
"deu-Latn",
|
|
36
|
+
"ell-Grek",
|
|
37
|
+
"eng-Latn",
|
|
38
|
+
"spa-Latn",
|
|
39
|
+
"est-Latn",
|
|
40
|
+
"eus-Latn",
|
|
41
|
+
"fas-Arab",
|
|
42
|
+
"fin-Latn",
|
|
43
|
+
"fra-Latn",
|
|
44
|
+
"glg-Latn",
|
|
45
|
+
"guj-Gujr",
|
|
46
|
+
"heb-Hebr",
|
|
47
|
+
"hin-Deva",
|
|
48
|
+
"hrv-Latn",
|
|
49
|
+
"hat-Latn",
|
|
50
|
+
"hun-Latn",
|
|
51
|
+
"hye-Armn",
|
|
52
|
+
"ind-Latn",
|
|
53
|
+
"isl-Latn",
|
|
54
|
+
"ita-Latn",
|
|
55
|
+
"jpn-Jpan",
|
|
56
|
+
"jav-Latn",
|
|
57
|
+
"kat-Geor",
|
|
58
|
+
"kaz-Cyrl",
|
|
59
|
+
"khm-Khmr",
|
|
60
|
+
"kan-Knda",
|
|
61
|
+
"kor-Hang",
|
|
62
|
+
"kir-Cyrl",
|
|
63
|
+
"lao-Laoo",
|
|
64
|
+
"lit-Latn",
|
|
65
|
+
"lav-Latn",
|
|
66
|
+
"mkd-Cyrl",
|
|
67
|
+
"mal-Mlym",
|
|
68
|
+
"mon-Cyrl",
|
|
69
|
+
"mar-Deva",
|
|
70
|
+
"msa-Latn",
|
|
71
|
+
"mya-Mymr",
|
|
72
|
+
"nep-Deva",
|
|
73
|
+
"nld-Latn",
|
|
74
|
+
"nor-Latn",
|
|
75
|
+
"nob-Latn",
|
|
76
|
+
"nno-Latn",
|
|
77
|
+
"pan-Guru",
|
|
78
|
+
"pol-Latn",
|
|
79
|
+
"por-Latn",
|
|
80
|
+
"que-Latn",
|
|
81
|
+
"ron-Latn",
|
|
82
|
+
"rus-Cyrl",
|
|
83
|
+
"sin-Sinh",
|
|
84
|
+
"slk-Latn",
|
|
85
|
+
"slv-Latn",
|
|
86
|
+
"swa-Latn",
|
|
87
|
+
"tam-Taml",
|
|
88
|
+
"tel-Telu",
|
|
89
|
+
"tha-Thai",
|
|
90
|
+
"tgl-Latn",
|
|
91
|
+
"tur-Latn",
|
|
92
|
+
"ukr-Cyrl",
|
|
93
|
+
"urd-Arab",
|
|
94
|
+
"vie-Latn",
|
|
95
|
+
"yor-Latn",
|
|
96
|
+
"zho-Hans",
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
OCTEN_CITATION = """@misc{octen-embedding-2025,
|
|
100
|
+
title={Octen-Embedding-8B: A Fine-tuned Multilingual Text Embedding Model},
|
|
101
|
+
author={Octen Team},
|
|
102
|
+
year={2025},
|
|
103
|
+
url={https://huggingface.co/bflhc/bflhc/Octen-Embedding-8B}
|
|
104
|
+
}"""
|
|
105
|
+
|
|
106
|
+
training_data = {
|
|
107
|
+
"T2Retrieval",
|
|
108
|
+
"DuRetrieval",
|
|
109
|
+
"MMarcoReranking",
|
|
110
|
+
"CMedQAv2-reranking",
|
|
111
|
+
"NQ",
|
|
112
|
+
"MSMARCO",
|
|
113
|
+
"HotpotQA",
|
|
114
|
+
"FEVER",
|
|
115
|
+
"MrTidyRetrieval",
|
|
116
|
+
"MIRACLRetrieval",
|
|
117
|
+
"CodeSearchNet",
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
# Predefined prompts for various RTEB tasks
|
|
121
|
+
_PREDEFINED_PROMPTS = {
|
|
122
|
+
# ========== Open Datasets ==========
|
|
123
|
+
# Legal domain
|
|
124
|
+
"AILACasedocs": "Given a legal case scenario, retrieve the most relevant case documents",
|
|
125
|
+
"AILAStatutes": "Given a legal scenario, retrieve the most relevant statute documents",
|
|
126
|
+
"LegalQuAD": "Given a legal question, retrieve relevant legal documents that answer the question",
|
|
127
|
+
"LegalSummarization": "Given a query, retrieve relevant legal documents for summarization",
|
|
128
|
+
# Code domain
|
|
129
|
+
"AppsRetrieval": "Given a query about mobile applications, retrieve relevant app information",
|
|
130
|
+
"HumanEvalRetrieval": "Given a code problem description, retrieve relevant code examples",
|
|
131
|
+
"MBPPRetrieval": "Given a programming problem description, retrieve relevant code solutions",
|
|
132
|
+
"DS1000Retrieval": "Given a data science problem, retrieve relevant code snippets",
|
|
133
|
+
"FreshStackRetrieval": "Given a programming question, retrieve relevant Stack Overflow posts",
|
|
134
|
+
# Finance domain
|
|
135
|
+
"FinQARetrieval": "Given a financial question, retrieve relevant financial documents",
|
|
136
|
+
"FinanceBenchRetrieval": "Given a financial query, retrieve relevant financial information",
|
|
137
|
+
"HC3FinanceRetrieval": "Given a finance-related query, retrieve relevant documents",
|
|
138
|
+
# Medical domain
|
|
139
|
+
"CUREv1": "Given a medical query, retrieve relevant clinical documents",
|
|
140
|
+
"ChatDoctorRetrieval": "Given a medical question, retrieve relevant medical information",
|
|
141
|
+
# SQL domain
|
|
142
|
+
"WikiSQLRetrieval": "Given a natural language query, retrieve relevant SQL examples",
|
|
143
|
+
# Multilingual
|
|
144
|
+
"MIRACLRetrievalHardNegatives": "Given a question, retrieve Wikipedia passages that answer the question",
|
|
145
|
+
# ========== Private/Closed Datasets ==========
|
|
146
|
+
# Code domain (Private)
|
|
147
|
+
"Code1Retrieval": "Given a code problem description, retrieve relevant code examples",
|
|
148
|
+
"JapaneseCode1Retrieval": "Given a code problem description, retrieve relevant code examples",
|
|
149
|
+
# Finance domain (Private)
|
|
150
|
+
"EnglishFinance1Retrieval": "Given a financial query, retrieve relevant financial documents",
|
|
151
|
+
"EnglishFinance2Retrieval": "Given a financial query, retrieve relevant financial documents",
|
|
152
|
+
"EnglishFinance3Retrieval": "Given a financial query, retrieve relevant financial documents",
|
|
153
|
+
"EnglishFinance4Retrieval": "Given a financial query, retrieve relevant financial documents",
|
|
154
|
+
# Healthcare domain (Private)
|
|
155
|
+
"EnglishHealthcare1Retrieval": "Given a medical question, retrieve relevant medical information",
|
|
156
|
+
"GermanHealthcare1Retrieval": "Given a medical question, retrieve relevant medical information",
|
|
157
|
+
# Legal domain (Private)
|
|
158
|
+
"FrenchLegal1Retrieval": "Given a legal query, retrieve relevant legal documents",
|
|
159
|
+
"GermanLegal1Retrieval": "Given a legal query, retrieve relevant legal documents",
|
|
160
|
+
"JapaneseLegal1Retrieval": "Given a legal query, retrieve relevant legal documents",
|
|
161
|
+
# General/Multilingual (Private)
|
|
162
|
+
"French1Retrieval": "Given a query, retrieve relevant passages",
|
|
163
|
+
"German1Retrieval": "Given a query, retrieve relevant passages",
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
Octen_Embedding_8B = ModelMeta(
|
|
168
|
+
loader=InstructSentenceTransformerModel,
|
|
169
|
+
loader_kwargs=dict(
|
|
170
|
+
instruction_template=instruction_template,
|
|
171
|
+
apply_instruction_to_passages=True,
|
|
172
|
+
prompts_dict=_PREDEFINED_PROMPTS,
|
|
173
|
+
max_seq_length=18480,
|
|
174
|
+
model_kwargs={"torch_dtype": "bfloat16"},
|
|
175
|
+
),
|
|
176
|
+
name="bflhc/Octen-Embedding-8B",
|
|
177
|
+
languages=multilingual_langs,
|
|
178
|
+
open_weights=True,
|
|
179
|
+
revision="2030603c2926ab005fafd824fac5911e271be21f",
|
|
180
|
+
release_date="2025-12-23",
|
|
181
|
+
n_parameters=7567295488,
|
|
182
|
+
memory_usage_mb=14433,
|
|
183
|
+
embed_dim=4096,
|
|
184
|
+
max_tokens=32768,
|
|
185
|
+
license="apache-2.0",
|
|
186
|
+
reference="https://huggingface.co/bflhc/Octen-Embedding-8B",
|
|
187
|
+
similarity_fn_name="cosine",
|
|
188
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
189
|
+
use_instructions=True,
|
|
190
|
+
public_training_code=None,
|
|
191
|
+
public_training_data=None,
|
|
192
|
+
training_datasets=training_data,
|
|
193
|
+
citation=OCTEN_CITATION,
|
|
194
|
+
adapted_from="Qwen/Qwen3-Embedding-8B",
|
|
195
|
+
)
|
|
@@ -167,6 +167,7 @@ class OpenAIModel(AbsEncoder):
|
|
|
167
167
|
|
|
168
168
|
text_embedding_3_small = ModelMeta(
|
|
169
169
|
name="openai/text-embedding-3-small",
|
|
170
|
+
model_type=["dense"],
|
|
170
171
|
revision="3",
|
|
171
172
|
release_date="2024-01-25",
|
|
172
173
|
languages=None, # supported languages not specified
|
|
@@ -191,6 +192,7 @@ text_embedding_3_small = ModelMeta(
|
|
|
191
192
|
)
|
|
192
193
|
text_embedding_3_large = ModelMeta(
|
|
193
194
|
name="openai/text-embedding-3-large",
|
|
195
|
+
model_type=["dense"],
|
|
194
196
|
revision="3",
|
|
195
197
|
release_date="2024-01-25",
|
|
196
198
|
languages=None, # supported languages not specified
|
|
@@ -215,6 +217,7 @@ text_embedding_3_large = ModelMeta(
|
|
|
215
217
|
)
|
|
216
218
|
text_embedding_ada_002 = ModelMeta(
|
|
217
219
|
name="openai/text-embedding-ada-002",
|
|
220
|
+
model_type=["dense"],
|
|
218
221
|
revision="3",
|
|
219
222
|
release_date="2022-12-15",
|
|
220
223
|
languages=None, # supported languages not specified
|
|
@@ -240,6 +243,7 @@ text_embedding_ada_002 = ModelMeta(
|
|
|
240
243
|
|
|
241
244
|
text_embedding_3_small_512 = ModelMeta(
|
|
242
245
|
name="openai/text-embedding-3-small (embed_dim=512)",
|
|
246
|
+
model_type=["dense"],
|
|
243
247
|
revision="3",
|
|
244
248
|
release_date="2024-01-25",
|
|
245
249
|
languages=None, # supported languages not specified
|
|
@@ -266,6 +270,7 @@ text_embedding_3_small_512 = ModelMeta(
|
|
|
266
270
|
|
|
267
271
|
text_embedding_3_large_512 = ModelMeta(
|
|
268
272
|
name="openai/text-embedding-3-large (embed_dim=512)",
|
|
273
|
+
model_type=["dense"],
|
|
269
274
|
revision="3",
|
|
270
275
|
release_date="2024-01-25",
|
|
271
276
|
languages=None, # supported languages not specified
|
|
@@ -122,6 +122,7 @@ def openclip_loader(model_name, **kwargs):
|
|
|
122
122
|
CLIP_ViT_L_14_DataComp_XL_s13B_b90K = ModelMeta(
|
|
123
123
|
loader=openclip_loader, # type: ignore
|
|
124
124
|
name="laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K",
|
|
125
|
+
model_type=["dense"],
|
|
125
126
|
languages=["eng-Latn"],
|
|
126
127
|
revision="84c9828e63dc9a9351d1fe637c346d4c1c4db341",
|
|
127
128
|
release_date="2023-04-26",
|
|
@@ -147,6 +148,7 @@ CLIP_ViT_L_14_DataComp_XL_s13B_b90K = ModelMeta(
|
|
|
147
148
|
CLIP_ViT_B_32_DataComp_XL_s13B_b90K = ModelMeta(
|
|
148
149
|
loader=openclip_loader, # type: ignore
|
|
149
150
|
name="laion/CLIP-ViT-B-32-DataComp.XL-s13B-b90K",
|
|
151
|
+
model_type=["dense"],
|
|
150
152
|
languages=["eng-Latn"],
|
|
151
153
|
revision="f0e2ffa09cbadab3db6a261ec1ec56407ce42912",
|
|
152
154
|
release_date="2023-04-26",
|
|
@@ -172,6 +174,7 @@ CLIP_ViT_B_32_DataComp_XL_s13B_b90K = ModelMeta(
|
|
|
172
174
|
CLIP_ViT_B_16_DataComp_XL_s13B_b90K = ModelMeta(
|
|
173
175
|
loader=openclip_loader, # type: ignore
|
|
174
176
|
name="laion/CLIP-ViT-B-16-DataComp.XL-s13B-b90K",
|
|
177
|
+
model_type=["dense"],
|
|
175
178
|
languages=["eng-Latn"],
|
|
176
179
|
revision="d110532e8d4ff91c574ee60a342323f28468b287",
|
|
177
180
|
release_date="2023-04-26",
|
|
@@ -197,6 +200,7 @@ CLIP_ViT_B_16_DataComp_XL_s13B_b90K = ModelMeta(
|
|
|
197
200
|
CLIP_ViT_bigG_14_laion2B_39B_b160k = ModelMeta(
|
|
198
201
|
loader=openclip_loader, # type: ignore
|
|
199
202
|
name="laion/CLIP-ViT-bigG-14-laion2B-39B-b160k",
|
|
203
|
+
model_type=["dense"],
|
|
200
204
|
languages=["eng-Latn"],
|
|
201
205
|
revision="bc7788f151930d91b58474715fdce5524ad9a189",
|
|
202
206
|
release_date="2023-01-23",
|
|
@@ -222,6 +226,7 @@ CLIP_ViT_bigG_14_laion2B_39B_b160k = ModelMeta(
|
|
|
222
226
|
CLIP_ViT_g_14_laion2B_s34B_b88K = ModelMeta(
|
|
223
227
|
loader=openclip_loader, # type: ignore
|
|
224
228
|
name="laion/CLIP-ViT-g-14-laion2B-s34B-b88K",
|
|
229
|
+
model_type=["dense"],
|
|
225
230
|
languages=["eng-Latn"],
|
|
226
231
|
revision="15efd0f6ac0c40c0f9da7becca03c974d7012604",
|
|
227
232
|
release_date="2023-03-06",
|
|
@@ -247,6 +252,7 @@ CLIP_ViT_g_14_laion2B_s34B_b88K = ModelMeta(
|
|
|
247
252
|
CLIP_ViT_H_14_laion2B_s32B_b79K = ModelMeta(
|
|
248
253
|
loader=openclip_loader, # type: ignore
|
|
249
254
|
name="laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
|
|
255
|
+
model_type=["dense"],
|
|
250
256
|
languages=["eng-Latn"],
|
|
251
257
|
revision="de081ac0a0ca8dc9d1533eed1ae884bb8ae1404b",
|
|
252
258
|
release_date="2022-09-15",
|
|
@@ -272,6 +278,7 @@ CLIP_ViT_H_14_laion2B_s32B_b79K = ModelMeta(
|
|
|
272
278
|
CLIP_ViT_L_14_laion2B_s32B_b82K = ModelMeta(
|
|
273
279
|
loader=openclip_loader, # type: ignore
|
|
274
280
|
name="laion/CLIP-ViT-L-14-laion2B-s32B-b82K",
|
|
281
|
+
model_type=["dense"],
|
|
275
282
|
languages=["eng-Latn"],
|
|
276
283
|
revision="1627032197142fbe2a7cfec626f4ced3ae60d07a",
|
|
277
284
|
release_date="2022-09-15",
|
|
@@ -297,6 +304,7 @@ CLIP_ViT_L_14_laion2B_s32B_b82K = ModelMeta(
|
|
|
297
304
|
CLIP_ViT_B_32_laion2B_s34B_b79K = ModelMeta(
|
|
298
305
|
loader=openclip_loader,
|
|
299
306
|
name="laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
|
|
307
|
+
model_type=["dense"],
|
|
300
308
|
languages=["eng-Latn"],
|
|
301
309
|
revision="08f73555f1b2fb7c82058aebbd492887a94968ef",
|
|
302
310
|
release_date="2022-09-15",
|
|
@@ -128,6 +128,7 @@ class SparseEncoderWrapper(AbsEncoder):
|
|
|
128
128
|
|
|
129
129
|
opensearch_neural_sparse_encoding_doc_v3_gte = ModelMeta(
|
|
130
130
|
name="opensearch-project/opensearch-neural-sparse-encoding-doc-v3-gte",
|
|
131
|
+
model_type=["dense"],
|
|
131
132
|
languages=["eng-Latn"],
|
|
132
133
|
open_weights=True,
|
|
133
134
|
revision="a8abaa916125ee512a7a8f4d706d07eb0128a8e6",
|
|
@@ -153,6 +154,7 @@ opensearch_neural_sparse_encoding_doc_v3_gte = ModelMeta(
|
|
|
153
154
|
|
|
154
155
|
opensearch_neural_sparse_encoding_doc_v3_distill = ModelMeta(
|
|
155
156
|
name="opensearch-project/opensearch-neural-sparse-encoding-doc-v3-distill",
|
|
157
|
+
model_type=["dense"],
|
|
156
158
|
languages=["eng-Latn"],
|
|
157
159
|
open_weights=True,
|
|
158
160
|
revision="babf71f3c48695e2e53a978208e8aba48335e3c0",
|
|
@@ -174,6 +176,7 @@ opensearch_neural_sparse_encoding_doc_v3_distill = ModelMeta(
|
|
|
174
176
|
|
|
175
177
|
opensearch_neural_sparse_encoding_doc_v2_distill = ModelMeta(
|
|
176
178
|
name="opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill",
|
|
179
|
+
model_type=["dense"],
|
|
177
180
|
languages=["eng-Latn"],
|
|
178
181
|
open_weights=True,
|
|
179
182
|
revision="8921a26c78b8559d6604eb1f5c0b74c079bee38f",
|
|
@@ -196,6 +199,7 @@ opensearch_neural_sparse_encoding_doc_v2_distill = ModelMeta(
|
|
|
196
199
|
|
|
197
200
|
opensearch_neural_sparse_encoding_doc_v2_mini = ModelMeta(
|
|
198
201
|
name="opensearch-project/opensearch-neural-sparse-encoding-doc-v2-mini",
|
|
202
|
+
model_type=["dense"],
|
|
199
203
|
languages=["eng-Latn"],
|
|
200
204
|
open_weights=True,
|
|
201
205
|
revision="4af867a426867dfdd744097531046f4289a32fdd",
|
|
@@ -217,6 +221,7 @@ opensearch_neural_sparse_encoding_doc_v2_mini = ModelMeta(
|
|
|
217
221
|
|
|
218
222
|
opensearch_neural_sparse_encoding_doc_v1 = ModelMeta(
|
|
219
223
|
name="opensearch-project/opensearch-neural-sparse-encoding-doc-v1",
|
|
224
|
+
model_type=["dense"],
|
|
220
225
|
languages=["eng-Latn"],
|
|
221
226
|
open_weights=True,
|
|
222
227
|
revision="98cdcbd72867c547f72f2b7b7bed9cdf9f09922d",
|
|
@@ -22,6 +22,7 @@ class OPSWrapper(AbsEncoder):
|
|
|
22
22
|
|
|
23
23
|
ops_moa_conan_embedding = ModelMeta(
|
|
24
24
|
name="OpenSearch-AI/Ops-MoA-Conan-embedding-v1",
|
|
25
|
+
model_type=["dense"],
|
|
25
26
|
revision="46dcd58753f3daa920c66f89e47086a534089350",
|
|
26
27
|
release_date="2025-03-26",
|
|
27
28
|
languages=["zho-Hans"],
|
|
@@ -53,6 +54,7 @@ ops_moa_conan_embedding = ModelMeta(
|
|
|
53
54
|
|
|
54
55
|
ops_moa_yuan_embedding = ModelMeta(
|
|
55
56
|
name="OpenSearch-AI/Ops-MoA-Yuan-embedding-1.0",
|
|
57
|
+
model_type=["dense"],
|
|
56
58
|
revision="23712d0766417b0eb88a2513c6e212a58b543268",
|
|
57
59
|
release_date="2025-03-26",
|
|
58
60
|
languages=["zho-Hans"],
|
|
@@ -14,6 +14,7 @@ PAWAN_EMBD_CITATION = """@misc{medhi2025pawanembd,
|
|
|
14
14
|
pawan_embd_68m = ModelMeta(
|
|
15
15
|
loader=sentence_transformers_loader,
|
|
16
16
|
name="dmedhi/PawanEmbd-68M",
|
|
17
|
+
model_type=["dense"],
|
|
17
18
|
languages=["eng-Latn"],
|
|
18
19
|
open_weights=True,
|
|
19
20
|
revision="32f295145802bdbd65699ad65fd27d2a5b69a909",
|
|
@@ -6,6 +6,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
|
|
|
6
6
|
piccolo_base_zh = ModelMeta(
|
|
7
7
|
loader=sentence_transformers_loader,
|
|
8
8
|
name="sensenova/piccolo-base-zh",
|
|
9
|
+
model_type=["dense"],
|
|
9
10
|
languages=["zho-Hans"],
|
|
10
11
|
open_weights=True,
|
|
11
12
|
revision="47c0a63b8f667c3482e05b2fd45577bb19252196",
|
|
@@ -29,6 +30,7 @@ piccolo_base_zh = ModelMeta(
|
|
|
29
30
|
piccolo_large_zh_v2 = ModelMeta(
|
|
30
31
|
loader=sentence_transformers_loader,
|
|
31
32
|
name="sensenova/piccolo-large-zh-v2",
|
|
33
|
+
model_type=["dense"],
|
|
32
34
|
languages=["zho-Hans"],
|
|
33
35
|
open_weights=False, # They "temporarily" removed it in may last year
|
|
34
36
|
# "Due to certain internal company considerations"
|
|
@@ -75,6 +75,7 @@ promptriever_llama2 = ModelMeta(
|
|
|
75
75
|
model_prompts=model_prompts,
|
|
76
76
|
),
|
|
77
77
|
name="samaya-ai/promptriever-llama2-7b-v1",
|
|
78
|
+
model_type=["dense"],
|
|
78
79
|
languages=["eng-Latn"],
|
|
79
80
|
open_weights=True,
|
|
80
81
|
revision="01c7f73d771dfac7d292323805ebc428287df4f9-30b14e3813c0fa45facfd01a594580c3fe5ecf23", # base-peft revision
|
|
@@ -106,6 +107,7 @@ promptriever_llama3 = ModelMeta(
|
|
|
106
107
|
model_prompts=model_prompts,
|
|
107
108
|
),
|
|
108
109
|
name="samaya-ai/promptriever-llama3.1-8b-v1",
|
|
110
|
+
model_type=["dense"],
|
|
109
111
|
languages=["eng-Latn"],
|
|
110
112
|
open_weights=True,
|
|
111
113
|
revision="48d6d0fc4e02fb1269b36940650a1b7233035cbb-2ead22cfb1b0e0c519c371c63c2ab90ffc511b8a", # base-peft revision
|
|
@@ -138,6 +140,7 @@ promptriever_llama3_instruct = ModelMeta(
|
|
|
138
140
|
model_prompts=model_prompts,
|
|
139
141
|
),
|
|
140
142
|
name="samaya-ai/promptriever-llama3.1-8b-instruct-v1",
|
|
143
|
+
model_type=["dense"],
|
|
141
144
|
languages=["eng-Latn"],
|
|
142
145
|
open_weights=True,
|
|
143
146
|
revision="5206a32e0bd3067aef1ce90f5528ade7d866253f-8b677258615625122c2eb7329292b8c402612c21", # base-peft revision
|
|
@@ -170,6 +173,7 @@ promptriever_mistral_v1 = ModelMeta(
|
|
|
170
173
|
model_prompts=model_prompts,
|
|
171
174
|
),
|
|
172
175
|
name="samaya-ai/promptriever-mistral-v0.1-7b-v1",
|
|
176
|
+
model_type=["dense"],
|
|
173
177
|
languages=["eng-Latn"],
|
|
174
178
|
open_weights=True,
|
|
175
179
|
revision="7231864981174d9bee8c7687c24c8344414eae6b-876d63e49b6115ecb6839893a56298fadee7e8f5", # base-peft revision
|
|
@@ -337,6 +337,7 @@ class MultiVectorModel(AbsEncoder, PylateSearchEncoder):
|
|
|
337
337
|
colbert_v2 = ModelMeta(
|
|
338
338
|
loader=MultiVectorModel,
|
|
339
339
|
name="colbert-ir/colbertv2.0",
|
|
340
|
+
model_type=["late-interaction"],
|
|
340
341
|
languages=["eng-Latn"],
|
|
341
342
|
open_weights=True,
|
|
342
343
|
revision="c1e84128e85ef755c096a95bdb06b47793b13acf",
|
|
@@ -369,6 +370,7 @@ jina_colbert_v2 = ModelMeta(
|
|
|
369
370
|
trust_remote_code=True,
|
|
370
371
|
),
|
|
371
372
|
name="jinaai/jina-colbert-v2",
|
|
373
|
+
model_type=["late-interaction"],
|
|
372
374
|
languages=[
|
|
373
375
|
"ara-Arab",
|
|
374
376
|
"ben-Beng",
|
|
@@ -445,6 +447,7 @@ jina_colbert_v2 = ModelMeta(
|
|
|
445
447
|
lightonai__gte_moderncolbert_v1 = ModelMeta(
|
|
446
448
|
loader=MultiVectorModel,
|
|
447
449
|
name="lightonai/GTE-ModernColBERT-v1",
|
|
450
|
+
model_type=["late-interaction"],
|
|
448
451
|
languages=[
|
|
449
452
|
"eng-Latn",
|
|
450
453
|
],
|
|
@@ -30,6 +30,7 @@ qodo_languages = [
|
|
|
30
30
|
Qodo_Embed_1_1_5B = ModelMeta(
|
|
31
31
|
loader=sentence_transformers_loader,
|
|
32
32
|
name="Qodo/Qodo-Embed-1-1.5B",
|
|
33
|
+
model_type=["dense"],
|
|
33
34
|
languages=qodo_languages,
|
|
34
35
|
open_weights=True,
|
|
35
36
|
revision="84bbef079b32e8823ec226d4e9e92902706b0eb6",
|
|
@@ -52,6 +53,7 @@ Qodo_Embed_1_1_5B = ModelMeta(
|
|
|
52
53
|
Qodo_Embed_1_7B = ModelMeta(
|
|
53
54
|
loader=sentence_transformers_loader,
|
|
54
55
|
name="Qodo/Qodo-Embed-1-7B",
|
|
56
|
+
model_type=["dense"],
|
|
55
57
|
languages=qodo_languages,
|
|
56
58
|
open_weights=True,
|
|
57
59
|
revision="f9edd9bf7f687c0e832424058e265120f603cd81",
|
|
@@ -134,6 +134,7 @@ def q3e_instruct_loader(
|
|
|
134
134
|
Qwen3_Embedding_0B6 = ModelMeta(
|
|
135
135
|
loader=q3e_instruct_loader,
|
|
136
136
|
name="Qwen/Qwen3-Embedding-0.6B",
|
|
137
|
+
model_type=["dense"],
|
|
137
138
|
languages=multilingual_langs,
|
|
138
139
|
open_weights=True,
|
|
139
140
|
revision="b22da495047858cce924d27d76261e96be6febc0", # Commit of @tomaarsen
|
|
@@ -156,6 +157,7 @@ Qwen3_Embedding_0B6 = ModelMeta(
|
|
|
156
157
|
Qwen3_Embedding_4B = ModelMeta(
|
|
157
158
|
loader=q3e_instruct_loader,
|
|
158
159
|
name="Qwen/Qwen3-Embedding-4B",
|
|
160
|
+
model_type=["dense"],
|
|
159
161
|
languages=multilingual_langs,
|
|
160
162
|
open_weights=True,
|
|
161
163
|
revision="636cd9bf47d976946cdbb2b0c3ca0cb2f8eea5ff", # Commit of @tomaarsen
|
|
@@ -178,6 +180,7 @@ Qwen3_Embedding_4B = ModelMeta(
|
|
|
178
180
|
Qwen3_Embedding_8B = ModelMeta(
|
|
179
181
|
loader=q3e_instruct_loader,
|
|
180
182
|
name="Qwen/Qwen3-Embedding-8B",
|
|
183
|
+
model_type=["dense"],
|
|
181
184
|
languages=multilingual_langs,
|
|
182
185
|
open_weights=True,
|
|
183
186
|
revision="4e423935c619ae4df87b646a3ce949610c66241c", # Commit of @tomaarsen
|
|
@@ -58,6 +58,7 @@ QZhou_Embedding = ModelMeta(
|
|
|
58
58
|
apply_instruction_to_passages=False,
|
|
59
59
|
),
|
|
60
60
|
name="Kingsoft-LLM/QZhou-Embedding",
|
|
61
|
+
model_type=["dense"],
|
|
61
62
|
languages=["eng-Latn", "zho-Hans"],
|
|
62
63
|
open_weights=True,
|
|
63
64
|
revision="f1e6c03ee3882e7b9fa5cec91217715272e433b8",
|
|
@@ -91,6 +92,7 @@ QZhou_Embedding_Zh = ModelMeta(
|
|
|
91
92
|
apply_instruction_to_passages=False,
|
|
92
93
|
),
|
|
93
94
|
name="Kingsoft-LLM/QZhou-Embedding-Zh",
|
|
95
|
+
model_type=["dense"],
|
|
94
96
|
languages=["zho-Hans"],
|
|
95
97
|
open_weights=True,
|
|
96
98
|
revision="0321ccb126413d1e49c5ce908e802b63d35f18e2",
|
|
@@ -189,6 +189,7 @@ class RandomEncoderBaseline:
|
|
|
189
189
|
random_encoder_baseline = ModelMeta(
|
|
190
190
|
loader=RandomEncoderBaseline, # type: ignore
|
|
191
191
|
name="baseline/random-encoder-baseline",
|
|
192
|
+
model_type=["dense"],
|
|
192
193
|
modalities=["text", "image"],
|
|
193
194
|
**_common_mock_metadata,
|
|
194
195
|
)
|
|
@@ -233,7 +234,7 @@ class RandomCrossEncoderBaseline:
|
|
|
233
234
|
random_cross_encoder_baseline = ModelMeta(
|
|
234
235
|
loader=RandomCrossEncoderBaseline, # type: ignore
|
|
235
236
|
name="baseline/random-cross-encoder-baseline",
|
|
237
|
+
model_type=["cross-encoder"],
|
|
236
238
|
modalities=["text", "image"],
|
|
237
|
-
is_cross_encoder=True,
|
|
238
239
|
**_common_mock_metadata,
|
|
239
240
|
)
|
|
@@ -6,6 +6,7 @@ from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
|
6
6
|
potion_base_8m = ModelMeta(
|
|
7
7
|
loader=Model2VecModel, # type: ignore
|
|
8
8
|
name="rasgaard/m2v-dfm-large",
|
|
9
|
+
model_type=["dense"],
|
|
9
10
|
languages=["dan-Latn"],
|
|
10
11
|
open_weights=True,
|
|
11
12
|
revision="387897cfb09992e6d45ea9cd7b28b9fcf119e23a",
|
|
@@ -162,6 +162,7 @@ repllama_llama2_original = ModelMeta(
|
|
|
162
162
|
model_prompts=model_prompts,
|
|
163
163
|
),
|
|
164
164
|
name="castorini/repllama-v1-7b-lora-passage",
|
|
165
|
+
model_type=["dense"],
|
|
165
166
|
languages=["eng-Latn"],
|
|
166
167
|
open_weights=True,
|
|
167
168
|
revision="01c7f73d771dfac7d292323805ebc428287df4f9-6097554dfe6e7d93e92f55010b678bcca1e233a8", # base-peft revision
|
|
@@ -194,6 +195,7 @@ repllama_llama2_reproduced = ModelMeta(
|
|
|
194
195
|
model_prompts=model_prompts,
|
|
195
196
|
),
|
|
196
197
|
name="samaya-ai/RepLLaMA-reproduced",
|
|
198
|
+
model_type=["dense"],
|
|
197
199
|
languages=["eng-Latn"],
|
|
198
200
|
open_weights=True,
|
|
199
201
|
revision="01c7f73d771dfac7d292323805ebc428287df4f9-ad5c1d0938a1e02954bcafb4d811ba2f34052e71", # base-peft revision
|