mteb 2.6.4__py3-none-any.whl → 2.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/abstasks/classification.py +2 -3
- mteb/abstasks/multilabel_classification.py +3 -3
- mteb/abstasks/regression.py +1 -1
- mteb/abstasks/retrieval.py +1 -1
- mteb/abstasks/task_metadata.py +9 -14
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/models/model_implementations/align_models.py +1 -1
- mteb/models/model_implementations/andersborges.py +2 -2
- mteb/models/model_implementations/ara_models.py +1 -1
- mteb/models/model_implementations/arctic_models.py +8 -8
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +45 -21
- mteb/models/model_implementations/bica_model.py +3 -3
- mteb/models/model_implementations/blip2_models.py +2 -2
- mteb/models/model_implementations/blip_models.py +8 -8
- mteb/models/model_implementations/bmretriever_models.py +4 -4
- mteb/models/model_implementations/cadet_models.py +1 -1
- mteb/models/model_implementations/cde_models.py +2 -2
- mteb/models/model_implementations/clip_models.py +3 -3
- mteb/models/model_implementations/clips_models.py +3 -3
- mteb/models/model_implementations/codefuse_models.py +5 -5
- mteb/models/model_implementations/codesage_models.py +3 -3
- mteb/models/model_implementations/cohere_models.py +4 -4
- mteb/models/model_implementations/colpali_models.py +3 -3
- mteb/models/model_implementations/colqwen_models.py +8 -8
- mteb/models/model_implementations/colsmol_models.py +2 -2
- mteb/models/model_implementations/conan_models.py +1 -1
- mteb/models/model_implementations/dino_models.py +19 -19
- mteb/models/model_implementations/e5_instruct.py +23 -4
- mteb/models/model_implementations/e5_models.py +9 -9
- mteb/models/model_implementations/e5_v.py +1 -1
- mteb/models/model_implementations/eagerworks_models.py +1 -1
- mteb/models/model_implementations/emillykkejensen_models.py +3 -3
- mteb/models/model_implementations/en_code_retriever.py +1 -1
- mteb/models/model_implementations/euler_models.py +2 -2
- mteb/models/model_implementations/fa_models.py +9 -9
- mteb/models/model_implementations/facebookai.py +14 -2
- mteb/models/model_implementations/geogpt_models.py +1 -1
- mteb/models/model_implementations/gme_v_models.py +2 -2
- mteb/models/model_implementations/google_models.py +1 -1
- mteb/models/model_implementations/granite_vision_embedding_models.py +1 -1
- mteb/models/model_implementations/gritlm_models.py +2 -2
- mteb/models/model_implementations/gte_models.py +25 -13
- mteb/models/model_implementations/hinvec_models.py +1 -1
- mteb/models/model_implementations/ibm_granite_models.py +30 -6
- mteb/models/model_implementations/inf_models.py +2 -2
- mteb/models/model_implementations/jasper_models.py +2 -2
- mteb/models/model_implementations/jina_clip.py +1 -1
- mteb/models/model_implementations/jina_models.py +11 -5
- mteb/models/model_implementations/kblab.py +12 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -2
- mteb/models/model_implementations/kfst.py +1 -1
- mteb/models/model_implementations/kowshik24_models.py +1 -1
- mteb/models/model_implementations/lgai_embedding_models.py +1 -1
- mteb/models/model_implementations/linq_models.py +1 -1
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +3 -3
- mteb/models/model_implementations/llm2vec_models.py +8 -8
- mteb/models/model_implementations/mdbr_models.py +14 -2
- mteb/models/model_implementations/misc_models.py +68 -68
- mteb/models/model_implementations/mme5_models.py +1 -1
- mteb/models/model_implementations/moco_models.py +2 -2
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +13 -13
- mteb/models/model_implementations/moka_models.py +1 -1
- mteb/models/model_implementations/mxbai_models.py +16 -3
- mteb/models/model_implementations/nbailab.py +3 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +1 -1
- mteb/models/model_implementations/nomic_models.py +18 -6
- mteb/models/model_implementations/nomic_models_vision.py +1 -1
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -2
- mteb/models/model_implementations/nvidia_models.py +3 -3
- mteb/models/model_implementations/octen_models.py +3 -3
- mteb/models/model_implementations/openclip_models.py +6 -6
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -5
- mteb/models/model_implementations/ops_moa_models.py +1 -1
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +1 -1
- mteb/models/model_implementations/piccolo_models.py +1 -1
- mteb/models/model_implementations/promptriever_models.py +4 -4
- mteb/models/model_implementations/pylate_models.py +5 -5
- mteb/models/model_implementations/qodo_models.py +2 -2
- mteb/models/model_implementations/qtack_models.py +1 -1
- mteb/models/model_implementations/qwen3_models.py +3 -3
- mteb/models/model_implementations/qzhou_models.py +2 -2
- mteb/models/model_implementations/rasgaard_models.py +1 -1
- mteb/models/model_implementations/reasonir_model.py +1 -1
- mteb/models/model_implementations/repllama_models.py +1 -1
- mteb/models/model_implementations/rerankers_custom.py +9 -3
- mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
- mteb/models/model_implementations/richinfoai_models.py +1 -1
- mteb/models/model_implementations/ru_sentence_models.py +20 -20
- mteb/models/model_implementations/ruri_models.py +10 -10
- mteb/models/model_implementations/salesforce_models.py +3 -3
- mteb/models/model_implementations/samilpwc_models.py +1 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -2
- mteb/models/model_implementations/searchmap_models.py +1 -1
- mteb/models/model_implementations/sentence_transformers_models.py +58 -22
- mteb/models/model_implementations/shuu_model.py +1 -1
- mteb/models/model_implementations/siglip_models.py +10 -10
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -1
- mteb/models/model_implementations/stella_models.py +17 -4
- mteb/models/model_implementations/tarka_models.py +2 -2
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +1 -1
- mteb/models/model_implementations/uae_models.py +7 -1
- mteb/models/model_implementations/vdr_models.py +1 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -6
- mteb/models/model_implementations/vlm2vec_models.py +2 -2
- mteb/models/model_implementations/youtu_models.py +1 -1
- mteb/models/model_implementations/yuan_models.py +1 -1
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +46 -17
- mteb/results/benchmark_results.py +2 -2
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/METADATA +3 -3
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/RECORD +142 -133
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/WHEEL +0 -0
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/entry_points.txt +0 -0
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/top_level.txt +0 -0
|
@@ -172,7 +172,7 @@ m2v_base_glove_subword = ModelMeta(
|
|
|
172
172
|
embed_dim=256,
|
|
173
173
|
license="mit",
|
|
174
174
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
175
|
-
framework=["NumPy", "Sentence Transformers"],
|
|
175
|
+
framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
|
|
176
176
|
reference="https://huggingface.co/minishlab/M2V_base_glove_subword",
|
|
177
177
|
use_instructions=False,
|
|
178
178
|
adapted_from="BAAI/bge-base-en-v1.5",
|
|
@@ -198,7 +198,7 @@ m2v_base_glove = ModelMeta(
|
|
|
198
198
|
embed_dim=256,
|
|
199
199
|
license="mit",
|
|
200
200
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
201
|
-
framework=["NumPy", "Sentence Transformers"],
|
|
201
|
+
framework=["NumPy", "Sentence Transformers", "safetensors"],
|
|
202
202
|
reference="https://huggingface.co/minishlab/M2V_base_glove",
|
|
203
203
|
use_instructions=False,
|
|
204
204
|
adapted_from="BAAI/bge-base-en-v1.5",
|
|
@@ -223,7 +223,7 @@ m2v_base_output = ModelMeta(
|
|
|
223
223
|
embed_dim=256,
|
|
224
224
|
license="mit",
|
|
225
225
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
226
|
-
framework=["NumPy", "Sentence Transformers"],
|
|
226
|
+
framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
|
|
227
227
|
reference="https://huggingface.co/minishlab/M2V_base_output",
|
|
228
228
|
use_instructions=False,
|
|
229
229
|
adapted_from="BAAI/bge-base-en-v1.5",
|
|
@@ -248,7 +248,7 @@ m2v_multilingual_output = ModelMeta(
|
|
|
248
248
|
embed_dim=256,
|
|
249
249
|
license="mit",
|
|
250
250
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
251
|
-
framework=["NumPy", "Sentence Transformers"],
|
|
251
|
+
framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
|
|
252
252
|
reference="https://huggingface.co/minishlab/M2V_multilingual_output",
|
|
253
253
|
use_instructions=False,
|
|
254
254
|
adapted_from="sentence-transformers/LaBSE",
|
|
@@ -273,7 +273,7 @@ potion_base_2m = ModelMeta(
|
|
|
273
273
|
embed_dim=64,
|
|
274
274
|
license="mit",
|
|
275
275
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
276
|
-
framework=["NumPy", "Sentence Transformers"],
|
|
276
|
+
framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
|
|
277
277
|
reference="https://huggingface.co/minishlab/potion-base-2M",
|
|
278
278
|
use_instructions=False,
|
|
279
279
|
adapted_from="BAAI/bge-base-en-v1.5",
|
|
@@ -298,7 +298,7 @@ potion_base_4m = ModelMeta(
|
|
|
298
298
|
embed_dim=128,
|
|
299
299
|
license="mit",
|
|
300
300
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
301
|
-
framework=["NumPy", "Sentence Transformers"],
|
|
301
|
+
framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
|
|
302
302
|
reference="https://huggingface.co/minishlab/potion-base-4M",
|
|
303
303
|
use_instructions=False,
|
|
304
304
|
adapted_from="BAAI/bge-base-en-v1.5",
|
|
@@ -323,7 +323,7 @@ potion_base_8m = ModelMeta(
|
|
|
323
323
|
embed_dim=256,
|
|
324
324
|
license="mit",
|
|
325
325
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
326
|
-
framework=["NumPy", "Sentence Transformers"],
|
|
326
|
+
framework=["NumPy", "Sentence Transformers", "ONNX", "safetensors"],
|
|
327
327
|
reference="https://huggingface.co/minishlab/potion-base-8M",
|
|
328
328
|
use_instructions=False,
|
|
329
329
|
adapted_from="BAAI/bge-base-en-v1.5",
|
|
@@ -348,7 +348,7 @@ potion_multilingual_128m = ModelMeta(
|
|
|
348
348
|
embed_dim=256,
|
|
349
349
|
license="mit",
|
|
350
350
|
similarity_fn_name="cosine",
|
|
351
|
-
framework=["NumPy"],
|
|
351
|
+
framework=["NumPy", "ONNX", "safetensors", "Sentence Transformers"],
|
|
352
352
|
reference="https://huggingface.co/minishlab/potion-multilingual-128M",
|
|
353
353
|
use_instructions=False,
|
|
354
354
|
adapted_from="BAAI/bge-m3",
|
|
@@ -373,7 +373,7 @@ pubmed_bert_100k = ModelMeta(
|
|
|
373
373
|
embed_dim=64,
|
|
374
374
|
license="apache-2.0",
|
|
375
375
|
similarity_fn_name="cosine",
|
|
376
|
-
framework=["NumPy"],
|
|
376
|
+
framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
|
|
377
377
|
reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-100K",
|
|
378
378
|
use_instructions=False,
|
|
379
379
|
adapted_from="NeuML/pubmedbert-base-embeddings",
|
|
@@ -397,7 +397,7 @@ pubmed_bert_500k = ModelMeta(
|
|
|
397
397
|
embed_dim=64,
|
|
398
398
|
license="apache-2.0",
|
|
399
399
|
similarity_fn_name="cosine",
|
|
400
|
-
framework=["NumPy"],
|
|
400
|
+
framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
|
|
401
401
|
reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-500K",
|
|
402
402
|
use_instructions=False,
|
|
403
403
|
adapted_from="NeuML/pubmedbert-base-embeddings",
|
|
@@ -421,7 +421,7 @@ pubmed_bert_1m = ModelMeta(
|
|
|
421
421
|
embed_dim=64,
|
|
422
422
|
license="apache-2.0",
|
|
423
423
|
similarity_fn_name="cosine",
|
|
424
|
-
framework=["NumPy"],
|
|
424
|
+
framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
|
|
425
425
|
reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-1M",
|
|
426
426
|
use_instructions=False,
|
|
427
427
|
adapted_from="NeuML/pubmedbert-base-embeddings",
|
|
@@ -445,7 +445,7 @@ pubmed_bert_2m = ModelMeta(
|
|
|
445
445
|
embed_dim=64,
|
|
446
446
|
license="apache-2.0",
|
|
447
447
|
similarity_fn_name="cosine",
|
|
448
|
-
framework=["NumPy"],
|
|
448
|
+
framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
|
|
449
449
|
reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-2M",
|
|
450
450
|
use_instructions=False,
|
|
451
451
|
adapted_from="NeuML/pubmedbert-base-embeddings",
|
|
@@ -469,7 +469,7 @@ pubmed_bert_8m = ModelMeta(
|
|
|
469
469
|
embed_dim=256,
|
|
470
470
|
license="apache-2.0",
|
|
471
471
|
similarity_fn_name="cosine",
|
|
472
|
-
framework=["NumPy"],
|
|
472
|
+
framework=["NumPy", "Sentence Transformers", "safetensors", "Transformers"],
|
|
473
473
|
reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-8M",
|
|
474
474
|
use_instructions=False,
|
|
475
475
|
adapted_from="NeuML/pubmedbert-base-embeddings",
|
|
@@ -104,7 +104,7 @@ m3e_base = ModelMeta(
|
|
|
104
104
|
max_tokens=512,
|
|
105
105
|
reference="https://huggingface.co/moka-ai/m3e-base",
|
|
106
106
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
107
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
107
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
108
108
|
use_instructions=False,
|
|
109
109
|
superseded_by=None,
|
|
110
110
|
adapted_from=None,
|
|
@@ -33,7 +33,14 @@ mxbai_embed_large_v1 = ModelMeta(
|
|
|
33
33
|
license="apache-2.0",
|
|
34
34
|
reference="https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1",
|
|
35
35
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
36
|
-
framework=[
|
|
36
|
+
framework=[
|
|
37
|
+
"Sentence Transformers",
|
|
38
|
+
"PyTorch",
|
|
39
|
+
"ONNX",
|
|
40
|
+
"safetensors",
|
|
41
|
+
"GGUF",
|
|
42
|
+
"Transformers",
|
|
43
|
+
],
|
|
37
44
|
use_instructions=True,
|
|
38
45
|
citation="""
|
|
39
46
|
@online{emb2024mxbai,
|
|
@@ -70,7 +77,13 @@ mxbai_embed_2d_large_v1 = ModelMeta(
|
|
|
70
77
|
license="apache-2.0",
|
|
71
78
|
reference="https://huggingface.co/mixedbread-ai/mxbai-embed-2d-large-v1",
|
|
72
79
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
73
|
-
framework=[
|
|
80
|
+
framework=[
|
|
81
|
+
"Sentence Transformers",
|
|
82
|
+
"PyTorch",
|
|
83
|
+
"ONNX",
|
|
84
|
+
"safetensors",
|
|
85
|
+
"Transformers",
|
|
86
|
+
],
|
|
74
87
|
use_instructions=True,
|
|
75
88
|
adapted_from=None,
|
|
76
89
|
superseded_by=None,
|
|
@@ -95,7 +108,7 @@ mxbai_embed_xsmall_v1 = ModelMeta(
|
|
|
95
108
|
license="apache-2.0",
|
|
96
109
|
reference="https://huggingface.co/mixedbread-ai/mxbai-embed-xsmall-v1",
|
|
97
110
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
98
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
111
|
+
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors", "GGUF"],
|
|
99
112
|
use_instructions=True,
|
|
100
113
|
adapted_from="sentence-transformers/all-MiniLM-L6-v2",
|
|
101
114
|
superseded_by=None,
|
|
@@ -18,7 +18,7 @@ nb_sbert = ModelMeta(
|
|
|
18
18
|
max_tokens=75,
|
|
19
19
|
reference="https://huggingface.co/NbAiLab/nb-sbert-base",
|
|
20
20
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
21
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
21
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
22
22
|
use_instructions=False,
|
|
23
23
|
public_training_code=None,
|
|
24
24
|
public_training_data="https://huggingface.co/datasets/NbAiLab/mnli-norwegian",
|
|
@@ -40,7 +40,7 @@ nb_bert_large = ModelMeta(
|
|
|
40
40
|
max_tokens=512,
|
|
41
41
|
reference="https://huggingface.co/NbAiLab/nb-bert-large",
|
|
42
42
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
43
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
43
|
+
framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
|
|
44
44
|
use_instructions=False,
|
|
45
45
|
public_training_code=None,
|
|
46
46
|
public_training_data="https://huggingface.co/NbAiLab/nb-bert-large#training-data",
|
|
@@ -62,7 +62,7 @@ nb_bert_base = ModelMeta(
|
|
|
62
62
|
max_tokens=512,
|
|
63
63
|
reference="https://huggingface.co/NbAiLab/nb-bert-base",
|
|
64
64
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
65
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
65
|
+
framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
|
|
66
66
|
use_instructions=False,
|
|
67
67
|
public_training_code=None,
|
|
68
68
|
public_training_data="https://huggingface.co/NbAiLab/nb-bert-base#training-data",
|
|
@@ -109,7 +109,7 @@ no_instruct_small_v0 = ModelMeta(
|
|
|
109
109
|
license="mit",
|
|
110
110
|
reference="https://huggingface.co/avsolatorio/NoInstruct-small-Embedding-v0",
|
|
111
111
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
112
|
-
framework=["PyTorch"],
|
|
112
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
113
113
|
use_instructions=False,
|
|
114
114
|
adapted_from=None,
|
|
115
115
|
superseded_by=None,
|
|
@@ -215,7 +215,13 @@ nomic_embed_v1_5 = ModelMeta(
|
|
|
215
215
|
license="apache-2.0",
|
|
216
216
|
reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
|
|
217
217
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
218
|
-
framework=[
|
|
218
|
+
framework=[
|
|
219
|
+
"Sentence Transformers",
|
|
220
|
+
"PyTorch",
|
|
221
|
+
"ONNX",
|
|
222
|
+
"safetensors",
|
|
223
|
+
"Transformers",
|
|
224
|
+
],
|
|
219
225
|
use_instructions=True,
|
|
220
226
|
adapted_from=None,
|
|
221
227
|
superseded_by=None,
|
|
@@ -243,7 +249,13 @@ nomic_embed_v1 = ModelMeta(
|
|
|
243
249
|
license="apache-2.0",
|
|
244
250
|
reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1",
|
|
245
251
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
246
|
-
framework=[
|
|
252
|
+
framework=[
|
|
253
|
+
"Sentence Transformers",
|
|
254
|
+
"PyTorch",
|
|
255
|
+
"ONNX",
|
|
256
|
+
"safetensors",
|
|
257
|
+
"Transformers",
|
|
258
|
+
],
|
|
247
259
|
use_instructions=True,
|
|
248
260
|
citation=NOMIC_CITATION,
|
|
249
261
|
adapted_from=None,
|
|
@@ -272,7 +284,7 @@ nomic_embed_v1_ablated = ModelMeta(
|
|
|
272
284
|
license="apache-2.0",
|
|
273
285
|
reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1-ablated",
|
|
274
286
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
275
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
287
|
+
framework=["Sentence Transformers", "PyTorch", "ONNX"],
|
|
276
288
|
use_instructions=True,
|
|
277
289
|
adapted_from=None,
|
|
278
290
|
superseded_by=None,
|
|
@@ -300,7 +312,7 @@ nomic_embed_v1_unsupervised = ModelMeta(
|
|
|
300
312
|
license="apache-2.0",
|
|
301
313
|
reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1-unsupervised",
|
|
302
314
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
303
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
315
|
+
framework=["Sentence Transformers", "PyTorch", "ONNX", "Transformers"],
|
|
304
316
|
use_instructions=True,
|
|
305
317
|
adapted_from=None,
|
|
306
318
|
superseded_by=None,
|
|
@@ -328,7 +340,7 @@ nomic_modern_bert_embed = ModelMeta(
|
|
|
328
340
|
license="apache-2.0",
|
|
329
341
|
reference="https://huggingface.co/nomic-ai/modernbert-embed-base",
|
|
330
342
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
331
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
343
|
+
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
|
|
332
344
|
use_instructions=True,
|
|
333
345
|
adapted_from="answerdotai/ModernBERT-base",
|
|
334
346
|
public_training_code="https://github.com/nomic-ai/contrastors/blob/5f7b461e5a13b5636692d1c9f1141b27232fe966/src/contrastors/configs/train/contrastive_pretrain_modernbert.yaml",
|
|
@@ -467,7 +479,7 @@ nomic_embed_text_v2_moe = ModelMeta(
|
|
|
467
479
|
license="apache-2.0",
|
|
468
480
|
reference="https://huggingface.co/nomic-ai/nomic-embed-text-v2-moe",
|
|
469
481
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
470
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
482
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
471
483
|
use_instructions=True,
|
|
472
484
|
adapted_from="nomic-ai/nomic-xlm-2048",
|
|
473
485
|
public_training_data="https://github.com/nomic-ai/contrastors?tab=readme-ov-file#data-access",
|
|
@@ -181,7 +181,7 @@ nomic_embed_vision_v1_5 = ModelMeta(
|
|
|
181
181
|
open_weights=True,
|
|
182
182
|
public_training_code="https://github.com/nomic-ai/contrastors",
|
|
183
183
|
public_training_data=None,
|
|
184
|
-
framework=["PyTorch"],
|
|
184
|
+
framework=["PyTorch", "Transformers", "ONNX", "safetensors"],
|
|
185
185
|
reference="https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5",
|
|
186
186
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
187
187
|
use_instructions=True,
|
|
@@ -159,7 +159,7 @@ llama_nemoretriever_colembed_1b_v1 = ModelMeta(
|
|
|
159
159
|
open_weights=True,
|
|
160
160
|
public_training_code="Proprietary Code",
|
|
161
161
|
public_training_data="https://huggingface.co/nvidia/llama-nemoretriever-colembed-1b-v1#training-dataset",
|
|
162
|
-
framework=["PyTorch"],
|
|
162
|
+
framework=["PyTorch", "Transformers", "safetensors"],
|
|
163
163
|
reference="https://huggingface.co/nvidia/llama-nemoretriever-colembed-1b-v1",
|
|
164
164
|
similarity_fn_name="MaxSim",
|
|
165
165
|
use_instructions=True,
|
|
@@ -186,7 +186,7 @@ llama_nemoretriever_colembed_3b_v1 = ModelMeta(
|
|
|
186
186
|
open_weights=True,
|
|
187
187
|
public_training_code="Proprietary Code",
|
|
188
188
|
public_training_data="https://huggingface.co/nvidia/llama-nemoretriever-colembed-1b-v1#training-dataset",
|
|
189
|
-
framework=["PyTorch"],
|
|
189
|
+
framework=["PyTorch", "Transformers", "safetensors"],
|
|
190
190
|
reference="https://huggingface.co/nvidia/llama-nemoretriever-colembed-3b-v1",
|
|
191
191
|
similarity_fn_name="MaxSim",
|
|
192
192
|
use_instructions=True,
|
|
@@ -123,7 +123,7 @@ NV_embed_v2 = ModelMeta(
|
|
|
123
123
|
max_tokens=32768,
|
|
124
124
|
reference="https://huggingface.co/nvidia/NV-Embed-v2",
|
|
125
125
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
126
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
126
|
+
framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
|
|
127
127
|
use_instructions=True,
|
|
128
128
|
training_datasets=nvidia_training_datasets,
|
|
129
129
|
public_training_code=None,
|
|
@@ -154,7 +154,7 @@ NV_embed_v1 = ModelMeta(
|
|
|
154
154
|
max_tokens=32768,
|
|
155
155
|
reference="https://huggingface.co/nvidia/NV-Embed-v1",
|
|
156
156
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
157
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
157
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
158
158
|
use_instructions=True,
|
|
159
159
|
training_datasets=nvidia_training_datasets,
|
|
160
160
|
public_training_code=None,
|
|
@@ -543,7 +543,7 @@ llama_embed_nemotron_8b = ModelMeta(
|
|
|
543
543
|
max_tokens=32768,
|
|
544
544
|
reference="https://huggingface.co/nvidia/llama-embed-nemotron-8b",
|
|
545
545
|
similarity_fn_name="cosine",
|
|
546
|
-
framework=["PyTorch"],
|
|
546
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
547
547
|
use_instructions=True,
|
|
548
548
|
training_datasets=llama_embed_nemotron_training_datasets,
|
|
549
549
|
public_training_code=None, # Will be released later
|
|
@@ -184,7 +184,7 @@ Octen_Embedding_4B = ModelMeta(
|
|
|
184
184
|
license="apache-2.0",
|
|
185
185
|
reference="https://huggingface.co/bflhc/Octen-Embedding-4B",
|
|
186
186
|
similarity_fn_name="cosine",
|
|
187
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
187
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
188
188
|
use_instructions=True,
|
|
189
189
|
public_training_code=None,
|
|
190
190
|
public_training_data=None,
|
|
@@ -205,7 +205,7 @@ Octen_Embedding_8B = ModelMeta(
|
|
|
205
205
|
name="bflhc/Octen-Embedding-8B",
|
|
206
206
|
languages=multilingual_langs,
|
|
207
207
|
open_weights=True,
|
|
208
|
-
revision="
|
|
208
|
+
revision="f7db178d5a82fb841f606a6a67c423cead2fdbba",
|
|
209
209
|
release_date="2025-12-23",
|
|
210
210
|
n_parameters=7567295488,
|
|
211
211
|
memory_usage_mb=14433,
|
|
@@ -214,7 +214,7 @@ Octen_Embedding_8B = ModelMeta(
|
|
|
214
214
|
license="apache-2.0",
|
|
215
215
|
reference="https://huggingface.co/bflhc/Octen-Embedding-8B",
|
|
216
216
|
similarity_fn_name="cosine",
|
|
217
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
217
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
218
218
|
use_instructions=True,
|
|
219
219
|
public_training_code=None,
|
|
220
220
|
public_training_data=None,
|
|
@@ -161,7 +161,7 @@ CLIP_ViT_B_32_DataComp_XL_s13B_b90K = ModelMeta(
|
|
|
161
161
|
open_weights=True,
|
|
162
162
|
public_training_code="https://github.com/mlfoundations/open_clip",
|
|
163
163
|
public_training_data="https://huggingface.co/datasets/mlfoundations/datacomp_1b",
|
|
164
|
-
framework=["PyTorch"],
|
|
164
|
+
framework=["PyTorch", "safetensors"],
|
|
165
165
|
reference="https://huggingface.co/laion/CLIP-ViT-B-32-DataComp.XL-s13B-b90K",
|
|
166
166
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
167
167
|
use_instructions=False,
|
|
@@ -213,7 +213,7 @@ CLIP_ViT_bigG_14_laion2B_39B_b160k = ModelMeta(
|
|
|
213
213
|
open_weights=True,
|
|
214
214
|
public_training_code="https://github.com/mlfoundations/open_clip",
|
|
215
215
|
public_training_data="https://laion.ai/blog/laion-5b/",
|
|
216
|
-
framework=["PyTorch"],
|
|
216
|
+
framework=["PyTorch", "safetensors"],
|
|
217
217
|
reference="https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k",
|
|
218
218
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
219
219
|
use_instructions=False,
|
|
@@ -239,7 +239,7 @@ CLIP_ViT_g_14_laion2B_s34B_b88K = ModelMeta(
|
|
|
239
239
|
open_weights=True,
|
|
240
240
|
public_training_code="https://github.com/mlfoundations/open_clip",
|
|
241
241
|
public_training_data="https://laion.ai/blog/laion-5b/",
|
|
242
|
-
framework=["PyTorch"],
|
|
242
|
+
framework=["PyTorch", "safetensors"],
|
|
243
243
|
reference="https://huggingface.co/laion/CLIP-ViT-g-14-laion2B-s34B-b88K",
|
|
244
244
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
245
245
|
use_instructions=False,
|
|
@@ -265,7 +265,7 @@ CLIP_ViT_H_14_laion2B_s32B_b79K = ModelMeta(
|
|
|
265
265
|
open_weights=True,
|
|
266
266
|
public_training_code="https://github.com/mlfoundations/open_clip",
|
|
267
267
|
public_training_data="https://laion.ai/blog/laion-5b/",
|
|
268
|
-
framework=["PyTorch"],
|
|
268
|
+
framework=["PyTorch", "safetensors"],
|
|
269
269
|
reference="https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
|
|
270
270
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
271
271
|
use_instructions=False,
|
|
@@ -291,7 +291,7 @@ CLIP_ViT_L_14_laion2B_s32B_b82K = ModelMeta(
|
|
|
291
291
|
open_weights=True,
|
|
292
292
|
public_training_code="https://github.com/mlfoundations/open_clip",
|
|
293
293
|
public_training_data="https://laion.ai/blog/laion-5b/",
|
|
294
|
-
framework=["PyTorch"],
|
|
294
|
+
framework=["PyTorch", "safetensors"],
|
|
295
295
|
reference="https://huggingface.co/laion/CLIP-ViT-L-14-laion2B-s32B-b82K",
|
|
296
296
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
297
297
|
use_instructions=False,
|
|
@@ -317,7 +317,7 @@ CLIP_ViT_B_32_laion2B_s34B_b79K = ModelMeta(
|
|
|
317
317
|
open_weights=True,
|
|
318
318
|
public_training_code="https://github.com/mlfoundations/open_clip",
|
|
319
319
|
public_training_data="https://laion.ai/blog/laion-5b/",
|
|
320
|
-
framework=["PyTorch"],
|
|
320
|
+
framework=["PyTorch", "safetensors"],
|
|
321
321
|
reference="https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
|
|
322
322
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
323
323
|
use_instructions=False,
|
|
@@ -140,7 +140,7 @@ opensearch_neural_sparse_encoding_doc_v3_gte = ModelMeta(
|
|
|
140
140
|
max_tokens=8192,
|
|
141
141
|
reference="https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-doc-v3-gte",
|
|
142
142
|
similarity_fn_name="dot",
|
|
143
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
143
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
144
144
|
public_training_code="https://github.com/zhichao-aws/opensearch-sparse-model-tuning-sample",
|
|
145
145
|
public_training_data=True,
|
|
146
146
|
use_instructions=True,
|
|
@@ -166,7 +166,7 @@ opensearch_neural_sparse_encoding_doc_v3_distill = ModelMeta(
|
|
|
166
166
|
max_tokens=512,
|
|
167
167
|
reference="https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-doc-v3-distill",
|
|
168
168
|
similarity_fn_name="dot",
|
|
169
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
169
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
170
170
|
public_training_code="https://github.com/zhichao-aws/opensearch-sparse-model-tuning-sample",
|
|
171
171
|
public_training_data=True,
|
|
172
172
|
use_instructions=True,
|
|
@@ -188,7 +188,7 @@ opensearch_neural_sparse_encoding_doc_v2_distill = ModelMeta(
|
|
|
188
188
|
max_tokens=512,
|
|
189
189
|
reference="https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill",
|
|
190
190
|
similarity_fn_name="dot",
|
|
191
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
191
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
192
192
|
public_training_code="https://github.com/zhichao-aws/opensearch-sparse-model-tuning-sample",
|
|
193
193
|
public_training_data=True,
|
|
194
194
|
use_instructions=True,
|
|
@@ -211,7 +211,7 @@ opensearch_neural_sparse_encoding_doc_v2_mini = ModelMeta(
|
|
|
211
211
|
max_tokens=512,
|
|
212
212
|
reference="https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-doc-v2-mini",
|
|
213
213
|
similarity_fn_name="dot",
|
|
214
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
214
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
215
215
|
public_training_code="https://github.com/zhichao-aws/opensearch-sparse-model-tuning-sample",
|
|
216
216
|
public_training_data=True,
|
|
217
217
|
use_instructions=True,
|
|
@@ -233,7 +233,7 @@ opensearch_neural_sparse_encoding_doc_v1 = ModelMeta(
|
|
|
233
233
|
max_tokens=512,
|
|
234
234
|
reference="https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-doc-v1",
|
|
235
235
|
similarity_fn_name="dot",
|
|
236
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
236
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
237
237
|
public_training_code="https://github.com/zhichao-aws/opensearch-sparse-model-tuning-sample",
|
|
238
238
|
public_training_data=True,
|
|
239
239
|
use_instructions=True,
|
|
@@ -67,7 +67,7 @@ ops_moa_yuan_embedding = ModelMeta(
|
|
|
67
67
|
open_weights=True,
|
|
68
68
|
public_training_code=None,
|
|
69
69
|
public_training_data=None,
|
|
70
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
70
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors"],
|
|
71
71
|
reference="https://huggingface.co/OpenSearch-AI/Ops-MoA-Yuan-embedding-1.0",
|
|
72
72
|
similarity_fn_name="cosine",
|
|
73
73
|
use_instructions=False,
|
|
@@ -14,7 +14,7 @@ solon_embeddings_1_1 = ModelMeta(
|
|
|
14
14
|
max_tokens=8192,
|
|
15
15
|
reference="https://huggingface.co/OrdalieTech/Solon-embeddings-mini-beta-1.1",
|
|
16
16
|
similarity_fn_name="cosine",
|
|
17
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
17
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
18
18
|
use_instructions=False,
|
|
19
19
|
public_training_data=(
|
|
20
20
|
"https://huggingface.co/datasets/PleIAs/common_corpus; "
|
|
@@ -26,7 +26,7 @@ pawan_embd_68m = ModelMeta(
|
|
|
26
26
|
max_tokens=512,
|
|
27
27
|
reference="https://huggingface.co/dmedhi/PawanEmbd-68M",
|
|
28
28
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
29
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
29
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
30
30
|
adapted_from="ibm-granite/granite-embedding-278m-multilingual",
|
|
31
31
|
superseded_by=None,
|
|
32
32
|
public_training_code=None,
|
|
@@ -18,7 +18,7 @@ piccolo_base_zh = ModelMeta(
|
|
|
18
18
|
max_tokens=512,
|
|
19
19
|
reference="https://huggingface.co/sensenova/piccolo-base-zh",
|
|
20
20
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
21
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
21
|
+
framework=["Sentence Transformers", "PyTorch", "Transformers"],
|
|
22
22
|
use_instructions=False,
|
|
23
23
|
superseded_by=None,
|
|
24
24
|
adapted_from=None,
|
|
@@ -90,7 +90,7 @@ promptriever_llama2 = ModelMeta(
|
|
|
90
90
|
),
|
|
91
91
|
reference="https://huggingface.co/samaya-ai/promptriever-llama2-7b-v1",
|
|
92
92
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
93
|
-
framework=["PyTorch", "Tevatron"],
|
|
93
|
+
framework=["PyTorch", "Tevatron", "safetensors"],
|
|
94
94
|
use_instructions=True,
|
|
95
95
|
citation=PROMPTRIEVER_CITATION,
|
|
96
96
|
public_training_code=None,
|
|
@@ -123,7 +123,7 @@ promptriever_llama3 = ModelMeta(
|
|
|
123
123
|
license="apache-2.0",
|
|
124
124
|
reference="https://huggingface.co/samaya-ai/promptriever-llama3.1-8b-v1",
|
|
125
125
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
126
|
-
framework=["PyTorch", "Tevatron"],
|
|
126
|
+
framework=["PyTorch", "Tevatron", "safetensors"],
|
|
127
127
|
use_instructions=True,
|
|
128
128
|
citation=PROMPTRIEVER_CITATION,
|
|
129
129
|
public_training_code=None,
|
|
@@ -156,7 +156,7 @@ promptriever_llama3_instruct = ModelMeta(
|
|
|
156
156
|
license="apache-2.0",
|
|
157
157
|
reference="https://huggingface.co/samaya-ai/promptriever-llama3.1-8b-instruct-v1",
|
|
158
158
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
159
|
-
framework=["PyTorch", "Tevatron"],
|
|
159
|
+
framework=["PyTorch", "Tevatron", "safetensors"],
|
|
160
160
|
use_instructions=True,
|
|
161
161
|
citation=PROMPTRIEVER_CITATION,
|
|
162
162
|
public_training_code=None,
|
|
@@ -189,7 +189,7 @@ promptriever_mistral_v1 = ModelMeta(
|
|
|
189
189
|
license="apache-2.0",
|
|
190
190
|
reference="https://huggingface.co/samaya-ai/promptriever-mistral-v0.1-7b-v1",
|
|
191
191
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
192
|
-
framework=["PyTorch", "Tevatron"],
|
|
192
|
+
framework=["PyTorch", "Tevatron", "safetensors"],
|
|
193
193
|
use_instructions=True,
|
|
194
194
|
citation=PROMPTRIEVER_CITATION,
|
|
195
195
|
public_training_code=None,
|
|
@@ -29,7 +29,7 @@ logger = logging.getLogger(__name__)
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class PylateSearchEncoder:
|
|
32
|
-
"""Mixin class to add PyLate-based indexing and search to an encoder. Implements
|
|
32
|
+
"""Mixin class to add PyLate-based indexing and search to an encoder. Implements [SearchProtocol][mteb.models.SearchProtocol]"""
|
|
33
33
|
|
|
34
34
|
base_index_dir: Path | None = None
|
|
35
35
|
_index_dir: Path | None = None
|
|
@@ -350,7 +350,7 @@ colbert_v2 = ModelMeta(
|
|
|
350
350
|
embed_dim=None,
|
|
351
351
|
license="mit",
|
|
352
352
|
similarity_fn_name=ScoringFunction.MAX_SIM,
|
|
353
|
-
framework=["PyLate", "ColBERT"],
|
|
353
|
+
framework=["PyLate", "ColBERT", "Transformers", "ONNX", "safetensors"],
|
|
354
354
|
reference="https://huggingface.co/colbert-ir/colbertv2.0",
|
|
355
355
|
use_instructions=False,
|
|
356
356
|
adapted_from=None,
|
|
@@ -406,7 +406,7 @@ jina_colbert_v2 = ModelMeta(
|
|
|
406
406
|
embed_dim=None,
|
|
407
407
|
license="cc-by-nc-4.0",
|
|
408
408
|
similarity_fn_name=ScoringFunction.MAX_SIM,
|
|
409
|
-
framework=["PyLate", "ColBERT"],
|
|
409
|
+
framework=["PyLate", "ColBERT", "ONNX", "safetensors"],
|
|
410
410
|
reference="https://huggingface.co/jinaai/jina-colbert-v2",
|
|
411
411
|
use_instructions=False,
|
|
412
412
|
adapted_from=None,
|
|
@@ -439,7 +439,7 @@ jina_colbert_v2 = ModelMeta(
|
|
|
439
439
|
url = "https://aclanthology.org/2024.mrl-1.11/",
|
|
440
440
|
doi = "10.18653/v1/2024.mrl-1.11",
|
|
441
441
|
pages = "159--166",
|
|
442
|
-
abstract = "Multi-vector dense models, such as ColBERT, have proven highly effective in information retrieval. ColBERT`s late interaction scoring approximates the joint query-document attention seen in cross-encoders while maintaining inference efficiency closer to traditional dense retrieval models, thanks to its bi-encoder architecture and recent optimizations in indexing and search. In this paper, we introduce a novel architecture and a training framework to support long context window and multilingual retrieval. Leveraging Matryoshka Representation Loss, we further demonstrate that the reducing the embedding dimensionality from 128 to 64 has insignificant impact on the model`s retrieval performance and cut storage requirements by up to 50{
|
|
442
|
+
abstract = "Multi-vector dense models, such as ColBERT, have proven highly effective in information retrieval. ColBERT`s late interaction scoring approximates the joint query-document attention seen in cross-encoders while maintaining inference efficiency closer to traditional dense retrieval models, thanks to its bi-encoder architecture and recent optimizations in indexing and search. In this paper, we introduce a novel architecture and a training framework to support long context window and multilingual retrieval. Leveraging Matryoshka Representation Loss, we further demonstrate that the reducing the embedding dimensionality from 128 to 64 has insignificant impact on the model`s retrieval performance and cut storage requirements by up to 50{\\%}. Our new model, Jina-ColBERT-v2, demonstrates strong performance across a range of English and multilingual retrieval tasks,"
|
|
443
443
|
}""",
|
|
444
444
|
)
|
|
445
445
|
|
|
@@ -462,7 +462,7 @@ lightonai__gte_moderncolbert_v1 = ModelMeta(
|
|
|
462
462
|
embed_dim=None,
|
|
463
463
|
license="apache-2.0",
|
|
464
464
|
similarity_fn_name="MaxSim",
|
|
465
|
-
framework=["PyLate", "ColBERT"],
|
|
465
|
+
framework=["PyLate", "ColBERT", "safetensors", "Sentence Transformers"],
|
|
466
466
|
reference="https://huggingface.co/lightonai/GTE-ModernColBERT-v1",
|
|
467
467
|
use_instructions=False,
|
|
468
468
|
adapted_from="Alibaba-NLP/gte-modernbert-base",
|
|
@@ -42,7 +42,7 @@ Qodo_Embed_1_1_5B = ModelMeta(
|
|
|
42
42
|
max_tokens=32768,
|
|
43
43
|
reference="https://huggingface.co/Qodo/Qodo-Embed-1-1.5B",
|
|
44
44
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
45
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
45
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
46
46
|
use_instructions=False,
|
|
47
47
|
public_training_code=None,
|
|
48
48
|
public_training_data=None,
|
|
@@ -65,7 +65,7 @@ Qodo_Embed_1_7B = ModelMeta(
|
|
|
65
65
|
max_tokens=32768,
|
|
66
66
|
reference="https://huggingface.co/Qodo/Qodo-Embed-1-7B",
|
|
67
67
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
68
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
68
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
69
69
|
use_instructions=False,
|
|
70
70
|
public_training_code=None,
|
|
71
71
|
public_training_data=None,
|
|
@@ -37,7 +37,7 @@ mini_gte = ModelMeta(
|
|
|
37
37
|
max_tokens=512,
|
|
38
38
|
reference="https://huggingface.co/prdev/mini-gte",
|
|
39
39
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
40
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
40
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
41
41
|
use_instructions=False,
|
|
42
42
|
public_training_code=None,
|
|
43
43
|
public_training_data=None,
|
|
@@ -146,7 +146,7 @@ Qwen3_Embedding_0B6 = ModelMeta(
|
|
|
146
146
|
license="apache-2.0",
|
|
147
147
|
reference="https://huggingface.co/Qwen/Qwen3-Embedding-0.6B",
|
|
148
148
|
similarity_fn_name="cosine",
|
|
149
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
149
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
150
150
|
use_instructions=True,
|
|
151
151
|
public_training_code=None,
|
|
152
152
|
public_training_data=None,
|
|
@@ -169,7 +169,7 @@ Qwen3_Embedding_4B = ModelMeta(
|
|
|
169
169
|
license="apache-2.0",
|
|
170
170
|
reference="https://huggingface.co/Qwen/Qwen3-Embedding-4B",
|
|
171
171
|
similarity_fn_name="cosine",
|
|
172
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
172
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
173
173
|
use_instructions=True,
|
|
174
174
|
public_training_code=None,
|
|
175
175
|
public_training_data=None,
|
|
@@ -192,7 +192,7 @@ Qwen3_Embedding_8B = ModelMeta(
|
|
|
192
192
|
license="apache-2.0",
|
|
193
193
|
reference="https://huggingface.co/Qwen/Qwen3-Embedding-8B",
|
|
194
194
|
similarity_fn_name="cosine",
|
|
195
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
195
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
196
196
|
use_instructions=True,
|
|
197
197
|
public_training_code=None,
|
|
198
198
|
public_training_data=None,
|