mteb 2.5.1__py3-none-any.whl → 2.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/abstasks/abstask.py +6 -6
- mteb/abstasks/aggregated_task.py +4 -10
- mteb/abstasks/clustering_legacy.py +3 -2
- mteb/abstasks/task_metadata.py +2 -3
- mteb/cache.py +7 -4
- mteb/cli/build_cli.py +10 -5
- mteb/cli/generate_model_card.py +4 -3
- mteb/deprecated_evaluator.py +4 -3
- mteb/evaluate.py +4 -1
- mteb/get_tasks.py +4 -3
- mteb/leaderboard/app.py +70 -3
- mteb/models/abs_encoder.py +5 -3
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +4 -1
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +13 -12
- mteb/models/model_implementations/align_models.py +1 -0
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +2 -0
- mteb/models/model_implementations/ara_models.py +1 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +17 -0
- mteb/models/model_implementations/bica_model.py +1 -0
- mteb/models/model_implementations/blip2_models.py +2 -0
- mteb/models/model_implementations/blip_models.py +8 -0
- mteb/models/model_implementations/bm25.py +1 -0
- mteb/models/model_implementations/bmretriever_models.py +4 -0
- mteb/models/model_implementations/cadet_models.py +1 -0
- mteb/models/model_implementations/cde_models.py +2 -0
- mteb/models/model_implementations/clip_models.py +3 -0
- mteb/models/model_implementations/clips_models.py +3 -0
- mteb/models/model_implementations/codefuse_models.py +3 -0
- mteb/models/model_implementations/codesage_models.py +3 -0
- mteb/models/model_implementations/cohere_models.py +4 -0
- mteb/models/model_implementations/cohere_v.py +5 -0
- mteb/models/model_implementations/colpali_models.py +3 -0
- mteb/models/model_implementations/colqwen_models.py +9 -0
- mteb/models/model_implementations/colsmol_models.py +2 -0
- mteb/models/model_implementations/conan_models.py +1 -0
- mteb/models/model_implementations/dino_models.py +19 -0
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +9 -0
- mteb/models/model_implementations/e5_v.py +1 -0
- mteb/models/model_implementations/eagerworks_models.py +1 -0
- mteb/models/model_implementations/emillykkejensen_models.py +3 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +1 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +8 -0
- mteb/models/model_implementations/facebookai.py +2 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +6 -3
- mteb/models/model_implementations/google_models.py +5 -0
- mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
- mteb/models/model_implementations/gritlm_models.py +2 -0
- mteb/models/model_implementations/gte_models.py +9 -0
- mteb/models/model_implementations/hinvec_models.py +1 -0
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +2 -0
- mteb/models/model_implementations/jina_clip.py +1 -0
- mteb/models/model_implementations/jina_models.py +7 -1
- mteb/models/model_implementations/kalm_models.py +6 -0
- mteb/models/model_implementations/kblab.py +1 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
- mteb/models/model_implementations/kfst.py +1 -0
- mteb/models/model_implementations/kowshik24_models.py +1 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +1 -0
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +3 -0
- mteb/models/model_implementations/llm2vec_models.py +8 -0
- mteb/models/model_implementations/mcinext_models.py +7 -1
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +63 -0
- mteb/models/model_implementations/mme5_models.py +1 -0
- mteb/models/model_implementations/moco_models.py +2 -0
- mteb/models/model_implementations/model2vec_models.py +13 -0
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/mxbai_models.py +3 -0
- mteb/models/model_implementations/nbailab.py +3 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
- mteb/models/model_implementations/nomic_models.py +6 -0
- mteb/models/model_implementations/nomic_models_vision.py +1 -0
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
- mteb/models/model_implementations/nvidia_models.py +3 -0
- mteb/models/model_implementations/octen_models.py +195 -0
- mteb/models/model_implementations/openai_models.py +5 -0
- mteb/models/model_implementations/openclip_models.py +8 -0
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
- mteb/models/model_implementations/ops_moa_models.py +2 -0
- mteb/models/model_implementations/pawan_models.py +1 -0
- mteb/models/model_implementations/piccolo_models.py +2 -0
- mteb/models/model_implementations/promptriever_models.py +4 -0
- mteb/models/model_implementations/pylate_models.py +3 -0
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +3 -0
- mteb/models/model_implementations/qzhou_models.py +2 -0
- mteb/models/model_implementations/random_baseline.py +2 -1
- mteb/models/model_implementations/rasgaard_models.py +1 -0
- mteb/models/model_implementations/reasonir_model.py +1 -0
- mteb/models/model_implementations/repllama_models.py +2 -0
- mteb/models/model_implementations/rerankers_custom.py +3 -3
- mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +20 -0
- mteb/models/model_implementations/ruri_models.py +10 -0
- mteb/models/model_implementations/salesforce_models.py +3 -0
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +18 -0
- mteb/models/model_implementations/shuu_model.py +32 -31
- mteb/models/model_implementations/siglip_models.py +10 -0
- mteb/models/model_implementations/sonar_models.py +1 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +2 -0
- mteb/models/model_implementations/ua_sentence_models.py +1 -0
- mteb/models/model_implementations/uae_models.py +1 -0
- mteb/models/model_implementations/vdr_models.py +1 -0
- mteb/models/model_implementations/vi_vn_models.py +6 -0
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +2 -0
- mteb/models/model_implementations/voyage_models.py +15 -0
- mteb/models/model_implementations/voyage_v.py +1 -0
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +1 -0
- mteb/models/model_meta.py +49 -4
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +4 -1
- mteb/models/search_wrappers.py +4 -2
- mteb/models/sentence_transformer_wrapper.py +10 -10
- mteb/results/benchmark_results.py +67 -43
- mteb/results/model_result.py +3 -1
- mteb/results/task_result.py +22 -17
- {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/METADATA +1 -1
- {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/RECORD +148 -147
- {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/WHEEL +0 -0
- {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/top_level.txt +0 -0
|
@@ -113,6 +113,7 @@ sent_trf_training_dataset = {
|
|
|
113
113
|
all_minilm_l6_v2 = ModelMeta(
|
|
114
114
|
loader=sentence_transformers_loader,
|
|
115
115
|
name="sentence-transformers/all-MiniLM-L6-v2",
|
|
116
|
+
model_type=["dense"],
|
|
116
117
|
languages=["eng-Latn"],
|
|
117
118
|
open_weights=True,
|
|
118
119
|
revision="8b3219a92973c328a8e22fadcfa821b5dc75636a",
|
|
@@ -137,6 +138,7 @@ all_minilm_l6_v2 = ModelMeta(
|
|
|
137
138
|
all_minilm_l12_v2 = ModelMeta(
|
|
138
139
|
loader=sentence_transformers_loader,
|
|
139
140
|
name="sentence-transformers/all-MiniLM-L12-v2",
|
|
141
|
+
model_type=["dense"],
|
|
140
142
|
languages=["eng-Latn"],
|
|
141
143
|
open_weights=True,
|
|
142
144
|
revision="364dd28d28dcd3359b537f3cf1f5348ba679da62",
|
|
@@ -161,6 +163,7 @@ all_minilm_l12_v2 = ModelMeta(
|
|
|
161
163
|
paraphrase_multilingual_minilm_l12_v2 = ModelMeta(
|
|
162
164
|
loader=sentence_transformers_loader,
|
|
163
165
|
name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
|
166
|
+
model_type=["dense"],
|
|
164
167
|
languages=paraphrase_langs,
|
|
165
168
|
open_weights=True,
|
|
166
169
|
revision="bf3bf13ab40c3157080a7ab344c831b9ad18b5eb",
|
|
@@ -185,6 +188,7 @@ paraphrase_multilingual_minilm_l12_v2 = ModelMeta(
|
|
|
185
188
|
paraphrase_multilingual_mpnet_base_v2 = ModelMeta(
|
|
186
189
|
loader=sentence_transformers_loader,
|
|
187
190
|
name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
|
|
191
|
+
model_type=["dense"],
|
|
188
192
|
languages=paraphrase_langs,
|
|
189
193
|
open_weights=True,
|
|
190
194
|
revision="79f2382ceacceacdf38563d7c5d16b9ff8d725d6",
|
|
@@ -220,6 +224,7 @@ paraphrase_multilingual_mpnet_base_v2 = ModelMeta(
|
|
|
220
224
|
labse = ModelMeta(
|
|
221
225
|
loader=sentence_transformers_loader,
|
|
222
226
|
name="sentence-transformers/LaBSE",
|
|
227
|
+
model_type=["dense"],
|
|
223
228
|
languages=paraphrase_langs,
|
|
224
229
|
open_weights=True,
|
|
225
230
|
revision="e34fab64a3011d2176c99545a93d5cbddc9a91b7",
|
|
@@ -257,6 +262,7 @@ labse = ModelMeta(
|
|
|
257
262
|
multi_qa_minilm_l6_cos_v1 = ModelMeta(
|
|
258
263
|
loader=sentence_transformers_loader,
|
|
259
264
|
name="sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
|
|
265
|
+
model_type=["dense"],
|
|
260
266
|
languages=["eng-Latn"],
|
|
261
267
|
open_weights=True,
|
|
262
268
|
revision="b207367332321f8e44f96e224ef15bc607f4dbf0",
|
|
@@ -281,6 +287,7 @@ multi_qa_minilm_l6_cos_v1 = ModelMeta(
|
|
|
281
287
|
all_mpnet_base_v2 = ModelMeta(
|
|
282
288
|
loader=sentence_transformers_loader,
|
|
283
289
|
name="sentence-transformers/all-mpnet-base-v2",
|
|
290
|
+
model_type=["dense"],
|
|
284
291
|
languages=["eng-Latn"],
|
|
285
292
|
open_weights=True,
|
|
286
293
|
revision="9a3225965996d404b775526de6dbfe85d3368642",
|
|
@@ -380,6 +387,7 @@ static_multi_languages = [
|
|
|
380
387
|
|
|
381
388
|
static_similarity_mrl_multilingual_v1 = ModelMeta(
|
|
382
389
|
name="sentence-transformers/static-similarity-mrl-multilingual-v1",
|
|
390
|
+
model_type=["dense"],
|
|
383
391
|
loader=SentenceTransformerEncoderWrapper,
|
|
384
392
|
loader_kwargs=dict(
|
|
385
393
|
device="cpu", # CPU is just as quick, if not quicker
|
|
@@ -416,6 +424,7 @@ static_similarity_mrl_multilingual_v1 = ModelMeta(
|
|
|
416
424
|
contriever = ModelMeta(
|
|
417
425
|
loader=SentenceTransformerEncoderWrapper,
|
|
418
426
|
name="facebook/contriever-msmarco",
|
|
427
|
+
model_type=["dense"],
|
|
419
428
|
languages=["eng-Latn"],
|
|
420
429
|
open_weights=True,
|
|
421
430
|
revision="abe8c1493371369031bcb1e02acb754cf4e162fa",
|
|
@@ -445,6 +454,7 @@ contriever = ModelMeta(
|
|
|
445
454
|
microllama_text_embedding = ModelMeta(
|
|
446
455
|
loader=sentence_transformers_loader,
|
|
447
456
|
name="keeeeenw/MicroLlama-text-embedding",
|
|
457
|
+
model_type=["dense"],
|
|
448
458
|
languages=["eng-Latn"],
|
|
449
459
|
open_weights=True,
|
|
450
460
|
revision="98f70f14cdf12d7ea217ed2fd4e808b0195f1e7e",
|
|
@@ -490,6 +500,7 @@ SENTENCE_T5_CITATION = """
|
|
|
490
500
|
sentence_t5_base = ModelMeta(
|
|
491
501
|
loader=sentence_transformers_loader,
|
|
492
502
|
name="sentence-transformers/sentence-t5-base",
|
|
503
|
+
model_type=["dense"],
|
|
493
504
|
languages=["eng-Latn"],
|
|
494
505
|
open_weights=True,
|
|
495
506
|
revision="50c53e206f8b01c9621484a3c0aafce4e55efebf",
|
|
@@ -512,6 +523,7 @@ sentence_t5_base = ModelMeta(
|
|
|
512
523
|
sentence_t5_large = ModelMeta(
|
|
513
524
|
loader=sentence_transformers_loader,
|
|
514
525
|
name="sentence-transformers/sentence-t5-large",
|
|
526
|
+
model_type=["dense"],
|
|
515
527
|
languages=["eng-Latn"],
|
|
516
528
|
open_weights=True,
|
|
517
529
|
revision="1fc08ea477205aa54a3e5b13f0971ae16b86410a",
|
|
@@ -534,6 +546,7 @@ sentence_t5_large = ModelMeta(
|
|
|
534
546
|
sentence_t5_xl = ModelMeta(
|
|
535
547
|
loader=sentence_transformers_loader,
|
|
536
548
|
name="sentence-transformers/sentence-t5-xl",
|
|
549
|
+
model_type=["dense"],
|
|
537
550
|
languages=["eng-Latn"],
|
|
538
551
|
open_weights=True,
|
|
539
552
|
revision="2965d31b368fb14117688e0bde77cbd720e91f53",
|
|
@@ -556,6 +569,7 @@ sentence_t5_xl = ModelMeta(
|
|
|
556
569
|
sentence_t5_xxl = ModelMeta(
|
|
557
570
|
loader=sentence_transformers_loader,
|
|
558
571
|
name="sentence-transformers/sentence-t5-xxl",
|
|
572
|
+
model_type=["dense"],
|
|
559
573
|
languages=["eng-Latn"],
|
|
560
574
|
open_weights=True,
|
|
561
575
|
revision="4d122282ba80e807e9e6eb8c358269e92796365d",
|
|
@@ -588,6 +602,7 @@ GTR_CITATION = """
|
|
|
588
602
|
gtr_t5_large = ModelMeta(
|
|
589
603
|
loader=sentence_transformers_loader,
|
|
590
604
|
name="sentence-transformers/gtr-t5-large",
|
|
605
|
+
model_type=["dense"],
|
|
591
606
|
languages=["eng-Latn"], # in format eng-Latn
|
|
592
607
|
open_weights=True,
|
|
593
608
|
revision="a2c8ac47f998531948d4cbe32a0b577a7037a5e3",
|
|
@@ -622,6 +637,7 @@ gtr_t5_large = ModelMeta(
|
|
|
622
637
|
gtr_t5_xl = ModelMeta(
|
|
623
638
|
loader=sentence_transformers_loader,
|
|
624
639
|
name="sentence-transformers/gtr-t5-xl",
|
|
640
|
+
model_type=["dense"],
|
|
625
641
|
languages=["eng-Latn"], # in format eng-Latn
|
|
626
642
|
open_weights=True,
|
|
627
643
|
revision="23a8d667a1ad2578af181ce762867003c498d1bf",
|
|
@@ -655,6 +671,7 @@ gtr_t5_xl = ModelMeta(
|
|
|
655
671
|
gtr_t5_xxl = ModelMeta(
|
|
656
672
|
loader=sentence_transformers_loader,
|
|
657
673
|
name="sentence-transformers/gtr-t5-xxl",
|
|
674
|
+
model_type=["dense"],
|
|
658
675
|
languages=["eng-Latn"], # in format eng-Latn
|
|
659
676
|
open_weights=True,
|
|
660
677
|
revision="73f2a9156a3dcc2194dfdb2bf201cd7d17e17884",
|
|
@@ -689,6 +706,7 @@ gtr_t5_xxl = ModelMeta(
|
|
|
689
706
|
gtr_t5_base = ModelMeta(
|
|
690
707
|
loader=sentence_transformers_loader,
|
|
691
708
|
name="sentence-transformers/gtr-t5-base",
|
|
709
|
+
model_type=["dense"],
|
|
692
710
|
languages=["eng-Latn"], # in format eng-Latn
|
|
693
711
|
open_weights=True,
|
|
694
712
|
revision="7027e9594267928589816394bdd295273ddc0739",
|
|
@@ -1,31 +1,32 @@
|
|
|
1
|
-
from mteb.models.model_meta import ModelMeta
|
|
2
|
-
from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
|
|
3
|
-
|
|
4
|
-
codemodernbert_crow_meta = ModelMeta(
|
|
5
|
-
loader=sentence_transformers_loader,
|
|
6
|
-
name="Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
# "
|
|
26
|
-
# "Shuu12121/
|
|
27
|
-
# "Shuu12121/
|
|
28
|
-
# "Shuu12121/
|
|
29
|
-
# "Shuu12121/
|
|
30
|
-
|
|
31
|
-
|
|
1
|
+
from mteb.models.model_meta import ModelMeta
|
|
2
|
+
from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
|
|
3
|
+
|
|
4
|
+
codemodernbert_crow_meta = ModelMeta(
|
|
5
|
+
loader=sentence_transformers_loader,
|
|
6
|
+
name="Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
|
|
7
|
+
model_type=["dense"],
|
|
8
|
+
languages=["eng-Latn"],
|
|
9
|
+
open_weights=True,
|
|
10
|
+
revision="044a7a4b552f86e284817234c336bccf16f895ce",
|
|
11
|
+
release_date="2025-04-21",
|
|
12
|
+
n_parameters=151668480,
|
|
13
|
+
memory_usage_mb=607,
|
|
14
|
+
embed_dim=768,
|
|
15
|
+
license="apache-2.0",
|
|
16
|
+
max_tokens=1024,
|
|
17
|
+
reference="https://huggingface.co/Shuu12121/CodeSearch-ModernBERT-Crow-Plus",
|
|
18
|
+
similarity_fn_name="cosine",
|
|
19
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
20
|
+
use_instructions=False,
|
|
21
|
+
public_training_code=None,
|
|
22
|
+
public_training_data=None,
|
|
23
|
+
training_datasets={
|
|
24
|
+
"CodeSearchNetRetrieval",
|
|
25
|
+
# "code-search-net/code_search_net",
|
|
26
|
+
# "Shuu12121/python-codesearch-filtered",
|
|
27
|
+
# "Shuu12121/java-codesearch-filtered",
|
|
28
|
+
# "Shuu12121/javascript-codesearch-filtered",
|
|
29
|
+
# "Shuu12121/ruby-codesearch-filtered",
|
|
30
|
+
# "Shuu12121/rust-codesearch-filtered",
|
|
31
|
+
},
|
|
32
|
+
)
|
|
@@ -125,6 +125,7 @@ siglip_training_datasets = set(
|
|
|
125
125
|
siglip_so400m_patch14_224 = ModelMeta(
|
|
126
126
|
loader=SiglipModelWrapper, # type: ignore
|
|
127
127
|
name="google/siglip-so400m-patch14-224",
|
|
128
|
+
model_type=["dense"],
|
|
128
129
|
languages=["eng-Latn"],
|
|
129
130
|
revision="d04cf29fca7b6374f74d8bea1969314492266b5e",
|
|
130
131
|
release_date="2024-01-08",
|
|
@@ -148,6 +149,7 @@ siglip_so400m_patch14_224 = ModelMeta(
|
|
|
148
149
|
siglip_so400m_patch14_384 = ModelMeta(
|
|
149
150
|
loader=SiglipModelWrapper, # type: ignore
|
|
150
151
|
name="google/siglip-so400m-patch14-384",
|
|
152
|
+
model_type=["dense"],
|
|
151
153
|
languages=["eng-Latn"],
|
|
152
154
|
revision="9fdffc58afc957d1a03a25b10dba0329ab15c2a3",
|
|
153
155
|
release_date="2024-01-08",
|
|
@@ -171,6 +173,7 @@ siglip_so400m_patch14_384 = ModelMeta(
|
|
|
171
173
|
siglip_so400m_patch16_256_i18n = ModelMeta(
|
|
172
174
|
loader=SiglipModelWrapper, # type: ignore
|
|
173
175
|
name="google/siglip-so400m-patch16-256-i18n",
|
|
176
|
+
model_type=["dense"],
|
|
174
177
|
languages=["eng-Latn"],
|
|
175
178
|
revision="365d321c0cfdea96bc28e3a29787a11a062681a1",
|
|
176
179
|
release_date="2024-01-08",
|
|
@@ -194,6 +197,7 @@ siglip_so400m_patch16_256_i18n = ModelMeta(
|
|
|
194
197
|
siglip_base_patch16_256_multilingual = ModelMeta(
|
|
195
198
|
loader=SiglipModelWrapper, # type: ignore
|
|
196
199
|
name="google/siglip-base-patch16-256-multilingual",
|
|
200
|
+
model_type=["dense"],
|
|
197
201
|
languages=["eng-Latn"],
|
|
198
202
|
revision="8952a4eafcde3cb7ab46b1dd629b33f8784ca9c6",
|
|
199
203
|
release_date="2024-01-08",
|
|
@@ -217,6 +221,7 @@ siglip_base_patch16_256_multilingual = ModelMeta(
|
|
|
217
221
|
siglip_base_patch16_256 = ModelMeta(
|
|
218
222
|
loader=SiglipModelWrapper, # type: ignore
|
|
219
223
|
name="google/siglip-base-patch16-256",
|
|
224
|
+
model_type=["dense"],
|
|
220
225
|
languages=["eng-Latn"],
|
|
221
226
|
revision="b078df89e446d623010d890864d4207fe6399f61",
|
|
222
227
|
release_date="2024-01-08",
|
|
@@ -240,6 +245,7 @@ siglip_base_patch16_256 = ModelMeta(
|
|
|
240
245
|
siglip_base_patch16_512 = ModelMeta(
|
|
241
246
|
loader=SiglipModelWrapper, # type: ignore
|
|
242
247
|
name="google/siglip-base-patch16-512",
|
|
248
|
+
model_type=["dense"],
|
|
243
249
|
languages=["eng-Latn"],
|
|
244
250
|
revision="753a949581523b60257d93e18391e8c27f72eb22",
|
|
245
251
|
release_date="2024-01-08",
|
|
@@ -263,6 +269,7 @@ siglip_base_patch16_512 = ModelMeta(
|
|
|
263
269
|
siglip_base_patch16_384 = ModelMeta(
|
|
264
270
|
loader=SiglipModelWrapper, # type: ignore
|
|
265
271
|
name="google/siglip-base-patch16-384",
|
|
272
|
+
model_type=["dense"],
|
|
266
273
|
languages=["eng-Latn"],
|
|
267
274
|
revision="41aec1c83b32e0a6fca20ad88ba058aa5b5ea394",
|
|
268
275
|
release_date="2024-01-08",
|
|
@@ -286,6 +293,7 @@ siglip_base_patch16_384 = ModelMeta(
|
|
|
286
293
|
siglip_base_patch16_224 = ModelMeta(
|
|
287
294
|
loader=SiglipModelWrapper, # type: ignore
|
|
288
295
|
name="google/siglip-base-patch16-224",
|
|
296
|
+
model_type=["dense"],
|
|
289
297
|
languages=["eng-Latn"],
|
|
290
298
|
revision="7fd15f0689c79d79e38b1c2e2e2370a7bf2761ed",
|
|
291
299
|
release_date="2024-01-08",
|
|
@@ -309,6 +317,7 @@ siglip_base_patch16_224 = ModelMeta(
|
|
|
309
317
|
siglip_large_patch16_256 = ModelMeta(
|
|
310
318
|
loader=SiglipModelWrapper, # type: ignore
|
|
311
319
|
name="google/siglip-large-patch16-256",
|
|
320
|
+
model_type=["dense"],
|
|
312
321
|
languages=["eng-Latn"],
|
|
313
322
|
revision="d0da9f876e7d66b4e250cd2450c3ba2ce735e447",
|
|
314
323
|
release_date="2024-01-08",
|
|
@@ -332,6 +341,7 @@ siglip_large_patch16_256 = ModelMeta(
|
|
|
332
341
|
siglip_large_patch16_384 = ModelMeta(
|
|
333
342
|
loader=SiglipModelWrapper, # type: ignore
|
|
334
343
|
name="google/siglip-large-patch16-384",
|
|
344
|
+
model_type=["dense"],
|
|
335
345
|
languages=["eng-Latn"],
|
|
336
346
|
revision="ce005573a40965dfd21fd937fbdeeebf2439fc35",
|
|
337
347
|
release_date="2024-01-08",
|
|
@@ -6,6 +6,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
|
|
|
6
6
|
spartan8806_atles_champion_embedding = ModelMeta(
|
|
7
7
|
loader=sentence_transformers_loader,
|
|
8
8
|
name="spartan8806/atles-champion-embedding",
|
|
9
|
+
model_type=["dense"],
|
|
9
10
|
languages=["eng-Latn"],
|
|
10
11
|
open_weights=True,
|
|
11
12
|
revision="d4c74d7000bbd25f3597fc0f2dcde59ef1386e8f",
|
|
@@ -59,6 +59,7 @@ stella_en_400m = ModelMeta(
|
|
|
59
59
|
torch_dtype="auto",
|
|
60
60
|
),
|
|
61
61
|
name="NovaSearch/stella_en_400M_v5",
|
|
62
|
+
model_type=["dense"],
|
|
62
63
|
languages=["eng-Latn"],
|
|
63
64
|
open_weights=True,
|
|
64
65
|
use_instructions=True,
|
|
@@ -87,6 +88,7 @@ stella_en_1_5b = ModelMeta(
|
|
|
87
88
|
torch_dtype="auto",
|
|
88
89
|
),
|
|
89
90
|
name="NovaSearch/stella_en_1.5B_v5",
|
|
91
|
+
model_type=["dense"],
|
|
90
92
|
languages=["eng-Latn"],
|
|
91
93
|
open_weights=True,
|
|
92
94
|
use_instructions=True,
|
|
@@ -109,6 +111,7 @@ stella_en_1_5b = ModelMeta(
|
|
|
109
111
|
stella_large_zh_v3_1792d = ModelMeta(
|
|
110
112
|
loader=sentence_transformers_loader,
|
|
111
113
|
name="dunzhang/stella-large-zh-v3-1792d",
|
|
114
|
+
model_type=["dense"],
|
|
112
115
|
languages=["zho-Hans"],
|
|
113
116
|
open_weights=True,
|
|
114
117
|
revision="d5d39eb8cd11c80a63df53314e59997074469f09",
|
|
@@ -135,6 +138,7 @@ stella_large_zh_v3_1792d = ModelMeta(
|
|
|
135
138
|
stella_base_zh_v3_1792d = ModelMeta(
|
|
136
139
|
loader=sentence_transformers_loader,
|
|
137
140
|
name="infgrad/stella-base-zh-v3-1792d",
|
|
141
|
+
model_type=["dense"],
|
|
138
142
|
languages=["zho-Hans"],
|
|
139
143
|
open_weights=True,
|
|
140
144
|
revision="82254892a0fba125aa2abf3a4800d2dd12821343",
|
|
@@ -162,6 +166,7 @@ stella_base_zh_v3_1792d = ModelMeta(
|
|
|
162
166
|
stella_mrl_large_zh_v3_5_1792d = ModelMeta(
|
|
163
167
|
loader=sentence_transformers_loader,
|
|
164
168
|
name="dunzhang/stella-mrl-large-zh-v3.5-1792d",
|
|
169
|
+
model_type=["dense"],
|
|
165
170
|
languages=["zho-Hans"],
|
|
166
171
|
open_weights=True,
|
|
167
172
|
revision="17bb1c32a93a8fc5f6fc9e91d5ea86da99983cfe",
|
|
@@ -185,6 +190,7 @@ stella_mrl_large_zh_v3_5_1792d = ModelMeta(
|
|
|
185
190
|
zpoint_large_embedding_zh = ModelMeta(
|
|
186
191
|
loader=sentence_transformers_loader,
|
|
187
192
|
name="iampanda/zpoint_large_embedding_zh",
|
|
193
|
+
model_type=["dense"],
|
|
188
194
|
languages=["zho-Hans"],
|
|
189
195
|
open_weights=True,
|
|
190
196
|
revision="b1075144f440ab4409c05622c1179130ebd57d03",
|
|
@@ -321,6 +321,7 @@ training_data = {
|
|
|
321
321
|
tarka_embedding_150m_v1 = ModelMeta(
|
|
322
322
|
loader=gemma_embedding_loader,
|
|
323
323
|
name="Tarka-AIR/Tarka-Embedding-150M-V1",
|
|
324
|
+
model_type=["dense"],
|
|
324
325
|
languages=MULTILINGUAL_EVALUATED_LANGUAGES,
|
|
325
326
|
open_weights=True,
|
|
326
327
|
revision="b0ffecc4ef0d873e517507ed080e43b88b2704b9",
|
|
@@ -354,6 +355,7 @@ tarka_embedding_350m_v1 = ModelMeta(
|
|
|
354
355
|
loader=InstructSentenceTransformerModel,
|
|
355
356
|
loader_kwargs=tark_embedding_350_v1_kwargs,
|
|
356
357
|
name="Tarka-AIR/Tarka-Embedding-350M-V1",
|
|
358
|
+
model_type=["dense"],
|
|
357
359
|
languages=MULTILINGUAL_EVALUATED_LANGUAGES,
|
|
358
360
|
open_weights=True,
|
|
359
361
|
revision="a850d6a329145474727424fed6b12b62096b8ba3",
|
|
@@ -5,6 +5,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
|
|
|
5
5
|
|
|
6
6
|
xlm_roberta_ua_distilled = ModelMeta(
|
|
7
7
|
name="panalexeu/xlm-roberta-ua-distilled",
|
|
8
|
+
model_type=["dense"],
|
|
8
9
|
loader=sentence_transformers_loader,
|
|
9
10
|
n_parameters=278_000_000,
|
|
10
11
|
memory_usage_mb=1061,
|
|
@@ -7,6 +7,7 @@ greennode_embedding_large_vn_v1_training_data = {
|
|
|
7
7
|
|
|
8
8
|
greennode_embedding_large_vn_v1 = ModelMeta(
|
|
9
9
|
name="GreenNode/GreenNode-Embedding-Large-VN-V1",
|
|
10
|
+
model_type=["dense"],
|
|
10
11
|
revision="660def1f6e1c8ecdf39f6f9c95829e3cf0cef837",
|
|
11
12
|
release_date="2024-04-11",
|
|
12
13
|
languages=[
|
|
@@ -31,6 +32,7 @@ greennode_embedding_large_vn_v1 = ModelMeta(
|
|
|
31
32
|
|
|
32
33
|
greennode_embedding_large_vn_mixed_v1 = ModelMeta(
|
|
33
34
|
name="GreenNode/GreenNode-Embedding-Large-VN-Mixed-V1",
|
|
35
|
+
model_type=["dense"],
|
|
34
36
|
revision="1d3dddb3862292dab4bd3eddf0664c0335ad5843",
|
|
35
37
|
release_date="2024-04-11",
|
|
36
38
|
languages=[
|
|
@@ -55,6 +57,7 @@ greennode_embedding_large_vn_mixed_v1 = ModelMeta(
|
|
|
55
57
|
|
|
56
58
|
aiteamvn_vietnamese_embeddings = ModelMeta(
|
|
57
59
|
name="AITeamVN/Vietnamese_Embedding",
|
|
60
|
+
model_type=["dense"],
|
|
58
61
|
revision="fcbbb905e6c3757d421aaa5db6fd7c53d038f6fb",
|
|
59
62
|
release_date="2024-03-17",
|
|
60
63
|
languages=[
|
|
@@ -85,6 +88,7 @@ aiteamvn_vietnamese_embeddings = ModelMeta(
|
|
|
85
88
|
|
|
86
89
|
hiieu_halong_embedding = ModelMeta(
|
|
87
90
|
name="hiieu/halong_embedding",
|
|
91
|
+
model_type=["dense"],
|
|
88
92
|
revision="b57776031035f70ed2030d2e35ecc533eb0f8f71",
|
|
89
93
|
release_date="2024-07-06",
|
|
90
94
|
languages=[
|
|
@@ -115,6 +119,7 @@ hiieu_halong_embedding = ModelMeta(
|
|
|
115
119
|
|
|
116
120
|
sup_simcse_vietnamese_phobert_base_ = ModelMeta(
|
|
117
121
|
name="VoVanPhuc/sup-SimCSE-VietNamese-phobert-base",
|
|
122
|
+
model_type=["dense"],
|
|
118
123
|
revision="608779b86741a8acd8c8d38132974ff04086b138",
|
|
119
124
|
release_date="2021-05-26",
|
|
120
125
|
languages=[
|
|
@@ -152,6 +157,7 @@ pages = {1037--1042}
|
|
|
152
157
|
|
|
153
158
|
bkai_foundation_models_vietnamese_bi_encoder = ModelMeta(
|
|
154
159
|
name="bkai-foundation-models/vietnamese-bi-encoder",
|
|
160
|
+
model_type=["dense"],
|
|
155
161
|
revision="84f9d9ada0d1a3c37557398b9ae9fcedcdf40be0",
|
|
156
162
|
release_date="2023-09-09",
|
|
157
163
|
languages=[
|
|
@@ -247,6 +247,7 @@ visualized_bge_base = ModelMeta(
|
|
|
247
247
|
image_tokens_num=196,
|
|
248
248
|
),
|
|
249
249
|
name="BAAI/bge-visualized-base",
|
|
250
|
+
model_type=["dense"],
|
|
250
251
|
languages=["eng-Latn"],
|
|
251
252
|
revision="98db10b10d22620010d06f11733346e1c98c34aa",
|
|
252
253
|
release_date="2024-06-06",
|
|
@@ -274,6 +275,7 @@ visualized_bge_m3 = ModelMeta(
|
|
|
274
275
|
image_tokens_num=256,
|
|
275
276
|
),
|
|
276
277
|
name="BAAI/bge-visualized-m3",
|
|
278
|
+
model_type=["dense"],
|
|
277
279
|
languages=["eng-Latn"],
|
|
278
280
|
revision="98db10b10d22620010d06f11733346e1c98c34aa",
|
|
279
281
|
release_date="2024-06-06",
|
|
@@ -269,6 +269,7 @@ vlm2vec_training_datasets = set(
|
|
|
269
269
|
vlm2vec_lora = ModelMeta(
|
|
270
270
|
loader=VLM2VecWrapper,
|
|
271
271
|
name="TIGER-Lab/VLM2Vec-LoRA",
|
|
272
|
+
model_type=["dense"],
|
|
272
273
|
languages=["eng-Latn"],
|
|
273
274
|
revision="7403b6327958071c1e33c822c7453adadccc7298",
|
|
274
275
|
release_date="2024-10-08",
|
|
@@ -292,6 +293,7 @@ vlm2vec_lora = ModelMeta(
|
|
|
292
293
|
vlm2vec_full = ModelMeta(
|
|
293
294
|
loader=VLM2VecWrapper,
|
|
294
295
|
name="TIGER-Lab/VLM2Vec-Full",
|
|
296
|
+
model_type=["dense"],
|
|
295
297
|
languages=["eng-Latn"],
|
|
296
298
|
revision="e9afa98002097ac2471827ba23ea1f2ddd229480",
|
|
297
299
|
release_date="2024-10-08",
|
|
@@ -208,6 +208,7 @@ model_prompts = {
|
|
|
208
208
|
|
|
209
209
|
voyage_3_large = ModelMeta(
|
|
210
210
|
name="voyageai/voyage-3-large", # Date of publication of this post https://blog.voyageai.com/2025/01/07/voyage-3-large/
|
|
211
|
+
model_type=["dense"],
|
|
211
212
|
revision="1",
|
|
212
213
|
release_date="2025-01-07",
|
|
213
214
|
languages=None, # supported languages not specified
|
|
@@ -234,6 +235,7 @@ voyage_3_large = ModelMeta(
|
|
|
234
235
|
|
|
235
236
|
voyage_3_5 = ModelMeta(
|
|
236
237
|
name="voyageai/voyage-3.5",
|
|
238
|
+
model_type=["dense"],
|
|
237
239
|
revision="1",
|
|
238
240
|
release_date="2025-01-21",
|
|
239
241
|
languages=None, # supported languages not specified
|
|
@@ -259,6 +261,7 @@ voyage_3_5 = ModelMeta(
|
|
|
259
261
|
|
|
260
262
|
voyage_3_5_int8 = ModelMeta(
|
|
261
263
|
name="voyageai/voyage-3.5 (output_dtype=int8)",
|
|
264
|
+
model_type=["dense"],
|
|
262
265
|
revision="1",
|
|
263
266
|
release_date="2025-01-21",
|
|
264
267
|
languages=None, # supported languages not specified
|
|
@@ -285,6 +288,7 @@ voyage_3_5_int8 = ModelMeta(
|
|
|
285
288
|
|
|
286
289
|
voyage_3_5_binary = ModelMeta(
|
|
287
290
|
name="voyageai/voyage-3.5 (output_dtype=binary)",
|
|
291
|
+
model_type=["dense"],
|
|
288
292
|
revision="1",
|
|
289
293
|
release_date="2025-01-21",
|
|
290
294
|
languages=None, # supported languages not specified
|
|
@@ -311,6 +315,7 @@ voyage_3_5_binary = ModelMeta(
|
|
|
311
315
|
|
|
312
316
|
voyage_large_2_instruct = ModelMeta(
|
|
313
317
|
name="voyageai/voyage-large-2-instruct",
|
|
318
|
+
model_type=["dense"],
|
|
314
319
|
revision="1",
|
|
315
320
|
release_date="2024-05-05",
|
|
316
321
|
languages=None, # supported languages not specified
|
|
@@ -336,6 +341,7 @@ voyage_large_2_instruct = ModelMeta(
|
|
|
336
341
|
|
|
337
342
|
voyage_finance_2 = ModelMeta(
|
|
338
343
|
name="voyageai/voyage-finance-2",
|
|
344
|
+
model_type=["dense"],
|
|
339
345
|
revision="1",
|
|
340
346
|
release_date="2024-05-30",
|
|
341
347
|
languages=None, # supported languages not specified
|
|
@@ -361,6 +367,7 @@ voyage_finance_2 = ModelMeta(
|
|
|
361
367
|
|
|
362
368
|
voyage_law_2 = ModelMeta(
|
|
363
369
|
name="voyageai/voyage-law-2",
|
|
370
|
+
model_type=["dense"],
|
|
364
371
|
revision="1",
|
|
365
372
|
release_date="2024-04-15",
|
|
366
373
|
languages=None, # supported languages not specified
|
|
@@ -386,6 +393,7 @@ voyage_law_2 = ModelMeta(
|
|
|
386
393
|
|
|
387
394
|
voyage_code_2 = ModelMeta(
|
|
388
395
|
name="voyageai/voyage-code-2",
|
|
396
|
+
model_type=["dense"],
|
|
389
397
|
revision="1",
|
|
390
398
|
release_date="2024-01-23",
|
|
391
399
|
languages=None, # supported languages not specified
|
|
@@ -411,6 +419,7 @@ voyage_code_2 = ModelMeta(
|
|
|
411
419
|
|
|
412
420
|
voyage_code_3 = ModelMeta(
|
|
413
421
|
name="voyageai/voyage-code-3",
|
|
422
|
+
model_type=["dense"],
|
|
414
423
|
revision="1",
|
|
415
424
|
release_date="2024-12-04",
|
|
416
425
|
languages=None, # supported languages not specified
|
|
@@ -437,6 +446,7 @@ voyage_code_3 = ModelMeta(
|
|
|
437
446
|
|
|
438
447
|
voyage_large_2 = ModelMeta(
|
|
439
448
|
name="voyageai/voyage-large-2", # Date of publication of this post https://blog.voyageai.com/2023/10/29/voyage-embeddings/
|
|
449
|
+
model_type=["dense"],
|
|
440
450
|
revision="1",
|
|
441
451
|
release_date="2023-10-29",
|
|
442
452
|
languages=None, # supported languages not specified
|
|
@@ -462,6 +472,7 @@ voyage_large_2 = ModelMeta(
|
|
|
462
472
|
|
|
463
473
|
voyage_2 = ModelMeta(
|
|
464
474
|
name="voyageai/voyage-2",
|
|
475
|
+
model_type=["dense"],
|
|
465
476
|
revision="1",
|
|
466
477
|
release_date="2023-10-29",
|
|
467
478
|
languages=None, # supported languages not specified
|
|
@@ -486,6 +497,7 @@ voyage_2 = ModelMeta(
|
|
|
486
497
|
)
|
|
487
498
|
voyage_multilingual_2 = ModelMeta(
|
|
488
499
|
name="voyageai/voyage-multilingual-2",
|
|
500
|
+
model_type=["dense"],
|
|
489
501
|
revision="1",
|
|
490
502
|
release_date="2024-06-10",
|
|
491
503
|
languages=None, # supported languages not specified
|
|
@@ -511,6 +523,7 @@ voyage_multilingual_2 = ModelMeta(
|
|
|
511
523
|
|
|
512
524
|
voyage_3 = ModelMeta(
|
|
513
525
|
name="voyageai/voyage-3",
|
|
526
|
+
model_type=["dense"],
|
|
514
527
|
revision="1",
|
|
515
528
|
release_date="2024-09-18",
|
|
516
529
|
languages=None, # supported languages not specified
|
|
@@ -536,6 +549,7 @@ voyage_3 = ModelMeta(
|
|
|
536
549
|
|
|
537
550
|
voyage_3_lite = ModelMeta(
|
|
538
551
|
name="voyageai/voyage-3-lite",
|
|
552
|
+
model_type=["dense"],
|
|
539
553
|
revision="1",
|
|
540
554
|
release_date="2024-09-18",
|
|
541
555
|
languages=None, # supported languages not specified
|
|
@@ -561,6 +575,7 @@ voyage_3_lite = ModelMeta(
|
|
|
561
575
|
|
|
562
576
|
voyage_3_exp = ModelMeta(
|
|
563
577
|
name="voyageai/voyage-3-m-exp",
|
|
578
|
+
model_type=["dense"],
|
|
564
579
|
revision="1",
|
|
565
580
|
release_date="2025-01-08",
|
|
566
581
|
languages=["eng-Latn"],
|
|
@@ -204,6 +204,7 @@ def voyage_v_loader(model_name, **kwargs):
|
|
|
204
204
|
voyage_v = ModelMeta(
|
|
205
205
|
loader=voyage_v_loader, # type: ignore
|
|
206
206
|
name="voyageai/voyage-multimodal-3",
|
|
207
|
+
model_type=["dense"],
|
|
207
208
|
languages=[], # Unknown
|
|
208
209
|
revision="1",
|
|
209
210
|
release_date="2024-11-10",
|