mteb 2.6.4__py3-none-any.whl → 2.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/abstasks/classification.py +2 -3
- mteb/abstasks/multilabel_classification.py +3 -3
- mteb/abstasks/regression.py +1 -1
- mteb/abstasks/retrieval.py +1 -1
- mteb/abstasks/task_metadata.py +9 -14
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/models/model_implementations/align_models.py +1 -1
- mteb/models/model_implementations/andersborges.py +2 -2
- mteb/models/model_implementations/ara_models.py +1 -1
- mteb/models/model_implementations/arctic_models.py +8 -8
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +45 -21
- mteb/models/model_implementations/bica_model.py +3 -3
- mteb/models/model_implementations/blip2_models.py +2 -2
- mteb/models/model_implementations/blip_models.py +8 -8
- mteb/models/model_implementations/bmretriever_models.py +4 -4
- mteb/models/model_implementations/cadet_models.py +1 -1
- mteb/models/model_implementations/cde_models.py +2 -2
- mteb/models/model_implementations/clip_models.py +3 -3
- mteb/models/model_implementations/clips_models.py +3 -3
- mteb/models/model_implementations/codefuse_models.py +5 -5
- mteb/models/model_implementations/codesage_models.py +3 -3
- mteb/models/model_implementations/cohere_models.py +4 -4
- mteb/models/model_implementations/colpali_models.py +3 -3
- mteb/models/model_implementations/colqwen_models.py +8 -8
- mteb/models/model_implementations/colsmol_models.py +2 -2
- mteb/models/model_implementations/conan_models.py +1 -1
- mteb/models/model_implementations/dino_models.py +19 -19
- mteb/models/model_implementations/e5_instruct.py +23 -4
- mteb/models/model_implementations/e5_models.py +9 -9
- mteb/models/model_implementations/e5_v.py +1 -1
- mteb/models/model_implementations/eagerworks_models.py +1 -1
- mteb/models/model_implementations/emillykkejensen_models.py +3 -3
- mteb/models/model_implementations/en_code_retriever.py +1 -1
- mteb/models/model_implementations/euler_models.py +2 -2
- mteb/models/model_implementations/fa_models.py +9 -9
- mteb/models/model_implementations/facebookai.py +14 -2
- mteb/models/model_implementations/geogpt_models.py +1 -1
- mteb/models/model_implementations/gme_v_models.py +2 -2
- mteb/models/model_implementations/google_models.py +1 -1
- mteb/models/model_implementations/granite_vision_embedding_models.py +1 -1
- mteb/models/model_implementations/gritlm_models.py +2 -2
- mteb/models/model_implementations/gte_models.py +25 -13
- mteb/models/model_implementations/hinvec_models.py +1 -1
- mteb/models/model_implementations/ibm_granite_models.py +30 -6
- mteb/models/model_implementations/inf_models.py +2 -2
- mteb/models/model_implementations/jasper_models.py +2 -2
- mteb/models/model_implementations/jina_clip.py +1 -1
- mteb/models/model_implementations/jina_models.py +11 -5
- mteb/models/model_implementations/kblab.py +12 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -2
- mteb/models/model_implementations/kfst.py +1 -1
- mteb/models/model_implementations/kowshik24_models.py +1 -1
- mteb/models/model_implementations/lgai_embedding_models.py +1 -1
- mteb/models/model_implementations/linq_models.py +1 -1
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +3 -3
- mteb/models/model_implementations/llm2vec_models.py +8 -8
- mteb/models/model_implementations/mdbr_models.py +14 -2
- mteb/models/model_implementations/misc_models.py +68 -68
- mteb/models/model_implementations/mme5_models.py +1 -1
- mteb/models/model_implementations/moco_models.py +2 -2
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +13 -13
- mteb/models/model_implementations/moka_models.py +1 -1
- mteb/models/model_implementations/mxbai_models.py +16 -3
- mteb/models/model_implementations/nbailab.py +3 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +1 -1
- mteb/models/model_implementations/nomic_models.py +18 -6
- mteb/models/model_implementations/nomic_models_vision.py +1 -1
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -2
- mteb/models/model_implementations/nvidia_models.py +3 -3
- mteb/models/model_implementations/octen_models.py +3 -3
- mteb/models/model_implementations/openclip_models.py +6 -6
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -5
- mteb/models/model_implementations/ops_moa_models.py +1 -1
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +1 -1
- mteb/models/model_implementations/piccolo_models.py +1 -1
- mteb/models/model_implementations/promptriever_models.py +4 -4
- mteb/models/model_implementations/pylate_models.py +5 -5
- mteb/models/model_implementations/qodo_models.py +2 -2
- mteb/models/model_implementations/qtack_models.py +1 -1
- mteb/models/model_implementations/qwen3_models.py +3 -3
- mteb/models/model_implementations/qzhou_models.py +2 -2
- mteb/models/model_implementations/rasgaard_models.py +1 -1
- mteb/models/model_implementations/reasonir_model.py +1 -1
- mteb/models/model_implementations/repllama_models.py +1 -1
- mteb/models/model_implementations/rerankers_custom.py +9 -3
- mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
- mteb/models/model_implementations/richinfoai_models.py +1 -1
- mteb/models/model_implementations/ru_sentence_models.py +20 -20
- mteb/models/model_implementations/ruri_models.py +10 -10
- mteb/models/model_implementations/salesforce_models.py +3 -3
- mteb/models/model_implementations/samilpwc_models.py +1 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -2
- mteb/models/model_implementations/searchmap_models.py +1 -1
- mteb/models/model_implementations/sentence_transformers_models.py +58 -22
- mteb/models/model_implementations/shuu_model.py +1 -1
- mteb/models/model_implementations/siglip_models.py +10 -10
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -1
- mteb/models/model_implementations/stella_models.py +17 -4
- mteb/models/model_implementations/tarka_models.py +2 -2
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +1 -1
- mteb/models/model_implementations/uae_models.py +7 -1
- mteb/models/model_implementations/vdr_models.py +1 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -6
- mteb/models/model_implementations/vlm2vec_models.py +2 -2
- mteb/models/model_implementations/youtu_models.py +1 -1
- mteb/models/model_implementations/yuan_models.py +1 -1
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +46 -17
- mteb/results/benchmark_results.py +2 -2
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/METADATA +3 -3
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/RECORD +142 -133
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/WHEEL +0 -0
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/entry_points.txt +0 -0
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SauerkrautLM Visual Document Retrieval Models - MTEB Integration
|
|
3
|
+
|
|
4
|
+
This module provides MTEB wrappers for SauerkrautLM ColPali-style models:
|
|
5
|
+
- SLM-ColQwen3 (Qwen3-VL backbone)
|
|
6
|
+
- SLM-ColLFM2 (LFM2 backbone)
|
|
7
|
+
- SLM-ColMinistral3 (Ministral3 backbone)
|
|
8
|
+
|
|
9
|
+
Based on:
|
|
10
|
+
- MTEB ColPali implementation: mteb/models/model_implementations/colpali_models.py
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
import torch
|
|
19
|
+
from torch.utils.data import DataLoader
|
|
20
|
+
from tqdm.auto import tqdm
|
|
21
|
+
|
|
22
|
+
from mteb._requires_package import (
|
|
23
|
+
requires_image_dependencies,
|
|
24
|
+
requires_package,
|
|
25
|
+
)
|
|
26
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
27
|
+
from mteb.models.abs_encoder import AbsEncoder
|
|
28
|
+
from mteb.models.model_implementations.colpali_models import (
|
|
29
|
+
COLPALI_CITATION,
|
|
30
|
+
COLPALI_TRAINING_DATA,
|
|
31
|
+
)
|
|
32
|
+
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
33
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
SUPPORTED_LANGUAGES = [
|
|
39
|
+
"eng-Latn", # English
|
|
40
|
+
"deu-Latn", # German
|
|
41
|
+
"fra-Latn", # French
|
|
42
|
+
"spa-Latn", # Spanish
|
|
43
|
+
"ita-Latn", # Italian
|
|
44
|
+
"por-Latn", # Portuguese
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class SLMBaseWrapper(AbsEncoder):
|
|
49
|
+
"""
|
|
50
|
+
Base wrapper for SauerkrautLM multi-vector embedding models.
|
|
51
|
+
|
|
52
|
+
All our models use late interaction (MaxSim) for retrieval scoring.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
model_class = None
|
|
56
|
+
processor_class = None
|
|
57
|
+
model_name_prefix = "SLM"
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
model_name: str,
|
|
62
|
+
revision: str | None = None,
|
|
63
|
+
device: str | None = None,
|
|
64
|
+
use_flash_attn: bool = True,
|
|
65
|
+
**kwargs,
|
|
66
|
+
):
|
|
67
|
+
requires_image_dependencies()
|
|
68
|
+
requires_package(
|
|
69
|
+
self, "sauerkrautlm_colpali", model_name, "pip install sauerkrautlm-colpali"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
|
73
|
+
self._load_model_and_processor(model_name, revision, use_flash_attn, **kwargs)
|
|
74
|
+
self.mdl = self.mdl.to(self.device)
|
|
75
|
+
self.mdl.eval()
|
|
76
|
+
|
|
77
|
+
def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
|
|
78
|
+
"""Override in subclasses to load specific model/processor."""
|
|
79
|
+
raise NotImplementedError
|
|
80
|
+
|
|
81
|
+
def encode(
|
|
82
|
+
self,
|
|
83
|
+
inputs: DataLoader[BatchedInput],
|
|
84
|
+
*,
|
|
85
|
+
task_metadata: TaskMetadata,
|
|
86
|
+
hf_split: str,
|
|
87
|
+
hf_subset: str,
|
|
88
|
+
prompt_type: PromptType | None = None,
|
|
89
|
+
**kwargs: Any,
|
|
90
|
+
) -> Array:
|
|
91
|
+
text_embeddings = None
|
|
92
|
+
image_embeddings = None
|
|
93
|
+
|
|
94
|
+
if "text" in inputs.dataset.features:
|
|
95
|
+
text_embeddings = self.get_text_embeddings(inputs, **kwargs)
|
|
96
|
+
if "image" in inputs.dataset.features:
|
|
97
|
+
image_embeddings = self.get_image_embeddings(inputs, **kwargs)
|
|
98
|
+
|
|
99
|
+
if text_embeddings is not None and image_embeddings is not None:
|
|
100
|
+
if len(text_embeddings) != len(image_embeddings):
|
|
101
|
+
raise ValueError(
|
|
102
|
+
"The number of texts and images must have the same length"
|
|
103
|
+
)
|
|
104
|
+
fused_embeddings = text_embeddings + image_embeddings
|
|
105
|
+
return fused_embeddings
|
|
106
|
+
elif text_embeddings is not None:
|
|
107
|
+
return text_embeddings
|
|
108
|
+
elif image_embeddings is not None:
|
|
109
|
+
return image_embeddings
|
|
110
|
+
raise ValueError("No text or image features found in inputs")
|
|
111
|
+
|
|
112
|
+
def encode_input(self, inputs):
|
|
113
|
+
"""Forward pass through the model."""
|
|
114
|
+
return self.mdl(**inputs)
|
|
115
|
+
|
|
116
|
+
def _move_to_device(self, inputs: dict) -> dict:
|
|
117
|
+
"""Move all tensor inputs to the model's device."""
|
|
118
|
+
result = {}
|
|
119
|
+
for k, v in inputs.items():
|
|
120
|
+
if isinstance(v, torch.Tensor):
|
|
121
|
+
result[k] = v.to(self.device)
|
|
122
|
+
else:
|
|
123
|
+
result[k] = v
|
|
124
|
+
return result
|
|
125
|
+
|
|
126
|
+
def get_image_embeddings(
|
|
127
|
+
self,
|
|
128
|
+
images: DataLoader,
|
|
129
|
+
batch_size: int = 32,
|
|
130
|
+
**kwargs,
|
|
131
|
+
) -> torch.Tensor:
|
|
132
|
+
import torchvision.transforms.functional as F
|
|
133
|
+
|
|
134
|
+
all_embeds = []
|
|
135
|
+
|
|
136
|
+
with torch.no_grad():
|
|
137
|
+
for batch in tqdm(images, desc="Encoding images"):
|
|
138
|
+
from PIL import Image
|
|
139
|
+
|
|
140
|
+
imgs = [
|
|
141
|
+
F.to_pil_image(b) if not isinstance(b, Image.Image) else b
|
|
142
|
+
for b in batch["image"]
|
|
143
|
+
]
|
|
144
|
+
inputs = self.processor.process_images(imgs)
|
|
145
|
+
inputs = self._move_to_device(inputs)
|
|
146
|
+
outs = self.encode_input(inputs)
|
|
147
|
+
all_embeds.extend(outs.cpu().to(torch.float32))
|
|
148
|
+
|
|
149
|
+
padded = torch.nn.utils.rnn.pad_sequence(
|
|
150
|
+
all_embeds, batch_first=True, padding_value=0
|
|
151
|
+
)
|
|
152
|
+
return padded
|
|
153
|
+
|
|
154
|
+
def get_text_embeddings(
|
|
155
|
+
self,
|
|
156
|
+
texts: DataLoader,
|
|
157
|
+
batch_size: int = 32,
|
|
158
|
+
**kwargs,
|
|
159
|
+
) -> torch.Tensor:
|
|
160
|
+
all_embeds = []
|
|
161
|
+
|
|
162
|
+
with torch.no_grad():
|
|
163
|
+
for batch in tqdm(texts, desc="Encoding texts"):
|
|
164
|
+
inputs = self.processor.process_queries(batch["text"])
|
|
165
|
+
inputs = self._move_to_device(inputs)
|
|
166
|
+
outs = self.encode_input(inputs)
|
|
167
|
+
all_embeds.extend(outs.cpu().to(torch.float32))
|
|
168
|
+
|
|
169
|
+
padded = torch.nn.utils.rnn.pad_sequence(
|
|
170
|
+
all_embeds, batch_first=True, padding_value=0
|
|
171
|
+
)
|
|
172
|
+
return padded
|
|
173
|
+
|
|
174
|
+
def calculate_probs(
|
|
175
|
+
self,
|
|
176
|
+
text_embeddings: torch.Tensor,
|
|
177
|
+
image_embeddings: torch.Tensor,
|
|
178
|
+
) -> torch.Tensor:
|
|
179
|
+
scores = self.similarity(text_embeddings, image_embeddings).T
|
|
180
|
+
return scores.softmax(dim=-1)
|
|
181
|
+
|
|
182
|
+
def similarity(
|
|
183
|
+
self,
|
|
184
|
+
a: torch.Tensor | list,
|
|
185
|
+
b: torch.Tensor | list,
|
|
186
|
+
) -> torch.Tensor:
|
|
187
|
+
return self.processor.score(a, b, device=self.device)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class SLMColQwen3Wrapper(SLMBaseWrapper):
|
|
191
|
+
"""Wrapper for SLM-ColQwen3 models (Qwen3-VL backbone)."""
|
|
192
|
+
|
|
193
|
+
def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
|
|
194
|
+
from sauerkrautlm_colpali.models.qwen3.colqwen3 import (
|
|
195
|
+
ColQwen3,
|
|
196
|
+
ColQwen3Processor,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
self.mdl = ColQwen3.from_pretrained(
|
|
200
|
+
model_name,
|
|
201
|
+
torch_dtype=torch.bfloat16,
|
|
202
|
+
attn_implementation="flash_attention_2" if use_flash_attn else "eager",
|
|
203
|
+
revision=revision,
|
|
204
|
+
**kwargs,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
self.processor = ColQwen3Processor.from_pretrained(
|
|
208
|
+
model_name,
|
|
209
|
+
revision=revision,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
logger.info(f"SLM-ColQwen3 loaded: dim={self.mdl.dim}, device={self.device}")
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class SLMColLFM2Wrapper(SLMBaseWrapper):
|
|
216
|
+
"""Wrapper for SLM-ColLFM2 models (LFM2 backbone)."""
|
|
217
|
+
|
|
218
|
+
def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
|
|
219
|
+
from sauerkrautlm_colpali.models.lfm2.collfm2 import ColLFM2, ColLFM2Processor
|
|
220
|
+
|
|
221
|
+
self.mdl = ColLFM2.from_pretrained(
|
|
222
|
+
model_name,
|
|
223
|
+
torch_dtype=torch.bfloat16,
|
|
224
|
+
revision=revision,
|
|
225
|
+
**kwargs,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
self.processor = ColLFM2Processor.from_pretrained(
|
|
229
|
+
model_name,
|
|
230
|
+
revision=revision,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
logger.info(f"SLM-ColLFM2 loaded: dim={self.mdl.dim}, device={self.device}")
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class SLMColMinistral3Wrapper(SLMBaseWrapper):
|
|
237
|
+
"""Wrapper for SLM-ColMinistral3 models (Ministral3 backbone)."""
|
|
238
|
+
|
|
239
|
+
def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
|
|
240
|
+
from sauerkrautlm_colpali.models.ministral3.colministral3 import (
|
|
241
|
+
ColMinistral3,
|
|
242
|
+
ColMinistral3Processor,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
self.mdl = ColMinistral3.from_pretrained(
|
|
246
|
+
model_name,
|
|
247
|
+
torch_dtype=torch.bfloat16,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
self.processor = ColMinistral3Processor.from_pretrained(model_name)
|
|
251
|
+
|
|
252
|
+
logger.info(
|
|
253
|
+
f"SLM-ColMinistral3 loaded: dim={self.mdl.dim}, device={self.device}"
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
SAUERKRAUTLM_CITATION = """
|
|
258
|
+
@misc{sauerkrautlm-colpali-2025,
|
|
259
|
+
title={SauerkrautLM-ColPali: Multi-Vector Vision Retrieval Models},
|
|
260
|
+
author={David Golchinfar},
|
|
261
|
+
organization={VAGO Solutions},
|
|
262
|
+
year={2025},
|
|
263
|
+
url={https://github.com/VAGOsolutions/sauerkrautlm-colpali}
|
|
264
|
+
}
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
slm_colqwen3_1_7b_turbo = ModelMeta(
|
|
269
|
+
loader=SLMColQwen3Wrapper,
|
|
270
|
+
name="VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
|
|
271
|
+
languages=SUPPORTED_LANGUAGES,
|
|
272
|
+
revision="19c295a18e057d6d82754f627c09408117ffdb66",
|
|
273
|
+
release_date="2025-12-20",
|
|
274
|
+
modalities=["image", "text"],
|
|
275
|
+
model_type=["late-interaction"],
|
|
276
|
+
n_parameters=1_756_572_288,
|
|
277
|
+
memory_usage_mb=3350,
|
|
278
|
+
max_tokens=262144,
|
|
279
|
+
embed_dim=128,
|
|
280
|
+
license="apache-2.0",
|
|
281
|
+
open_weights=True,
|
|
282
|
+
public_training_code=None,
|
|
283
|
+
public_training_data=None,
|
|
284
|
+
framework=["ColPali"],
|
|
285
|
+
reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
|
|
286
|
+
similarity_fn_name=ScoringFunction.MAX_SIM,
|
|
287
|
+
use_instructions=True,
|
|
288
|
+
adapted_from="Qwen/Qwen3-VL-2B-Instruct",
|
|
289
|
+
training_datasets=COLPALI_TRAINING_DATA,
|
|
290
|
+
citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
slm_colqwen3_2b = ModelMeta(
|
|
294
|
+
loader=SLMColQwen3Wrapper,
|
|
295
|
+
name="VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
|
|
296
|
+
languages=SUPPORTED_LANGUAGES,
|
|
297
|
+
revision="48f699713c10af754684e12060a2af9266462cc9",
|
|
298
|
+
release_date="2025-12-20",
|
|
299
|
+
modalities=["image", "text"],
|
|
300
|
+
model_type=["late-interaction"],
|
|
301
|
+
n_parameters=2_127_794_304,
|
|
302
|
+
memory_usage_mb=4058,
|
|
303
|
+
max_tokens=262144,
|
|
304
|
+
embed_dim=128,
|
|
305
|
+
license="apache-2.0",
|
|
306
|
+
open_weights=True,
|
|
307
|
+
public_training_code=None,
|
|
308
|
+
public_training_data=None,
|
|
309
|
+
framework=["ColPali"],
|
|
310
|
+
reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
|
|
311
|
+
similarity_fn_name=ScoringFunction.MAX_SIM,
|
|
312
|
+
use_instructions=True,
|
|
313
|
+
adapted_from="Qwen/Qwen3-VL-2B-Instruct",
|
|
314
|
+
training_datasets=COLPALI_TRAINING_DATA,
|
|
315
|
+
citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
slm_colqwen3_4b = ModelMeta(
|
|
319
|
+
loader=SLMColQwen3Wrapper,
|
|
320
|
+
name="VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
|
|
321
|
+
languages=SUPPORTED_LANGUAGES,
|
|
322
|
+
revision="b635fbb3ab145f07608ed10a85def33544de1723",
|
|
323
|
+
release_date="2025-12-20",
|
|
324
|
+
modalities=["image", "text"],
|
|
325
|
+
model_type=["late-interaction"],
|
|
326
|
+
n_parameters=4_438_143_616,
|
|
327
|
+
memory_usage_mb=8465,
|
|
328
|
+
max_tokens=262144,
|
|
329
|
+
embed_dim=128,
|
|
330
|
+
license="apache-2.0",
|
|
331
|
+
open_weights=True,
|
|
332
|
+
public_training_code=None,
|
|
333
|
+
public_training_data=None,
|
|
334
|
+
framework=["ColPali"],
|
|
335
|
+
reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
|
|
336
|
+
similarity_fn_name=ScoringFunction.MAX_SIM,
|
|
337
|
+
use_instructions=True,
|
|
338
|
+
adapted_from="Qwen/Qwen3-VL-4B-Instruct",
|
|
339
|
+
training_datasets=COLPALI_TRAINING_DATA,
|
|
340
|
+
citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
slm_colqwen3_8b = ModelMeta(
|
|
344
|
+
loader=SLMColQwen3Wrapper,
|
|
345
|
+
name="VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
|
|
346
|
+
languages=SUPPORTED_LANGUAGES,
|
|
347
|
+
revision="36ac136e451a7b8d8229725d69d4ec23aa4f03c8",
|
|
348
|
+
release_date="2025-12-20",
|
|
349
|
+
modalities=["image", "text"],
|
|
350
|
+
model_type=["late-interaction"],
|
|
351
|
+
n_parameters=8_145_318_256,
|
|
352
|
+
memory_usage_mb=15536,
|
|
353
|
+
max_tokens=262144,
|
|
354
|
+
embed_dim=128,
|
|
355
|
+
license="apache-2.0",
|
|
356
|
+
open_weights=True,
|
|
357
|
+
public_training_code=None,
|
|
358
|
+
public_training_data=None,
|
|
359
|
+
framework=["ColPali"],
|
|
360
|
+
reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
|
|
361
|
+
similarity_fn_name=ScoringFunction.MAX_SIM,
|
|
362
|
+
use_instructions=True,
|
|
363
|
+
adapted_from="Qwen/Qwen3-VL-8B-Instruct",
|
|
364
|
+
training_datasets=COLPALI_TRAINING_DATA,
|
|
365
|
+
citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
slm_collfm2_450m = ModelMeta(
|
|
369
|
+
loader=SLMColLFM2Wrapper,
|
|
370
|
+
name="VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
|
|
371
|
+
languages=SUPPORTED_LANGUAGES,
|
|
372
|
+
revision="a65223fd6633f331ccff4483e47575c3c620dc60",
|
|
373
|
+
release_date="2025-12-20",
|
|
374
|
+
modalities=["image", "text"],
|
|
375
|
+
model_type=["late-interaction"],
|
|
376
|
+
n_parameters=450_953_856,
|
|
377
|
+
memory_usage_mb=860,
|
|
378
|
+
max_tokens=32768,
|
|
379
|
+
embed_dim=128,
|
|
380
|
+
license="https://huggingface.co/LiquidAI/LFM2-VL-450M/blob/main/LICENSE",
|
|
381
|
+
open_weights=True,
|
|
382
|
+
public_training_code=None,
|
|
383
|
+
public_training_data=None,
|
|
384
|
+
framework=["ColPali"],
|
|
385
|
+
reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
|
|
386
|
+
similarity_fn_name=ScoringFunction.MAX_SIM,
|
|
387
|
+
use_instructions=True,
|
|
388
|
+
adapted_from="LiquidAI/LFM2-VL-450M",
|
|
389
|
+
training_datasets=COLPALI_TRAINING_DATA,
|
|
390
|
+
citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
slm_colministral3_3b = ModelMeta(
|
|
394
|
+
loader=SLMColMinistral3Wrapper,
|
|
395
|
+
name="VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
|
|
396
|
+
languages=SUPPORTED_LANGUAGES,
|
|
397
|
+
revision="54aa3ffbbce20471fdcc4afc07d13989c65e71b8",
|
|
398
|
+
release_date="2025-12-20",
|
|
399
|
+
modalities=["image", "text"],
|
|
400
|
+
model_type=["late-interaction"],
|
|
401
|
+
n_parameters=4_252_136_448,
|
|
402
|
+
memory_usage_mb=8110,
|
|
403
|
+
max_tokens=262144,
|
|
404
|
+
embed_dim=128,
|
|
405
|
+
license="apache-2.0",
|
|
406
|
+
open_weights=True,
|
|
407
|
+
public_training_code=None,
|
|
408
|
+
public_training_data=None,
|
|
409
|
+
framework=["ColPali"],
|
|
410
|
+
reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
|
|
411
|
+
similarity_fn_name=ScoringFunction.MAX_SIM,
|
|
412
|
+
use_instructions=True,
|
|
413
|
+
adapted_from="mistralai/Ministral-3B-Instruct-2410",
|
|
414
|
+
training_datasets=COLPALI_TRAINING_DATA,
|
|
415
|
+
citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
|
|
416
|
+
)
|
|
@@ -17,7 +17,7 @@ spartan8806_atles_champion_embedding = ModelMeta(
|
|
|
17
17
|
embed_dim=768,
|
|
18
18
|
license="apache-2.0",
|
|
19
19
|
similarity_fn_name="cosine",
|
|
20
|
-
framework=["Sentence Transformers"],
|
|
20
|
+
framework=["Sentence Transformers", "safetensors", "Transformers"],
|
|
21
21
|
reference="https://huggingface.co/spartan8806/atles-champion-embedding",
|
|
22
22
|
use_instructions=False,
|
|
23
23
|
training_datasets={"STSBenchmark"},
|
|
@@ -71,7 +71,13 @@ stella_en_400m = ModelMeta(
|
|
|
71
71
|
embed_dim=4096,
|
|
72
72
|
license="mit",
|
|
73
73
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
74
|
-
framework=[
|
|
74
|
+
framework=[
|
|
75
|
+
"Sentence Transformers",
|
|
76
|
+
"PyTorch",
|
|
77
|
+
"GritLM",
|
|
78
|
+
"safetensors",
|
|
79
|
+
"Transformers",
|
|
80
|
+
],
|
|
75
81
|
reference="https://huggingface.co/NovaSearch/stella_en_400M_v5",
|
|
76
82
|
training_datasets=nvidia_training_datasets, # also distilled from gte-qwen (but training data is unknown) #2164
|
|
77
83
|
public_training_code="https://github.com/NovaSearch-Team/RAG-Retrieval/blob/c40f4638b705eb77d88305d2056901ed550f9f4b/rag_retrieval/train/embedding/README.md",
|
|
@@ -100,7 +106,14 @@ stella_en_1_5b = ModelMeta(
|
|
|
100
106
|
embed_dim=8960,
|
|
101
107
|
license="mit",
|
|
102
108
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
103
|
-
framework=[
|
|
109
|
+
framework=[
|
|
110
|
+
"Sentence Transformers",
|
|
111
|
+
"PyTorch",
|
|
112
|
+
"GritLM",
|
|
113
|
+
"ONNX",
|
|
114
|
+
"safetensors",
|
|
115
|
+
"Transformers",
|
|
116
|
+
],
|
|
104
117
|
reference="https://huggingface.co/NovaSearch/stella_en_1.5B_v5",
|
|
105
118
|
training_datasets=nvidia_training_datasets, # also distilled from gte-qwen (but training data is unknown) #2164
|
|
106
119
|
public_training_code="https://github.com/NovaSearch-Team/RAG-Retrieval/blob/c40f4638b705eb77d88305d2056901ed550f9f4b/rag_retrieval/train/embedding/README.md",
|
|
@@ -178,7 +191,7 @@ stella_mrl_large_zh_v3_5_1792d = ModelMeta(
|
|
|
178
191
|
max_tokens=512,
|
|
179
192
|
reference="https://huggingface.co/dunzhang/stella-large-zh-v3-1792d",
|
|
180
193
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
181
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
194
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
182
195
|
use_instructions=False,
|
|
183
196
|
superseded_by=None,
|
|
184
197
|
adapted_from="dunzhang/stella-large-zh-v3-1792d",
|
|
@@ -202,7 +215,7 @@ zpoint_large_embedding_zh = ModelMeta(
|
|
|
202
215
|
max_tokens=512,
|
|
203
216
|
reference="https://huggingface.co/iampanda/zpoint_large_embedding_zh",
|
|
204
217
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
205
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
218
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
206
219
|
use_instructions=False,
|
|
207
220
|
superseded_by=None,
|
|
208
221
|
adapted_from="dunzhang/stella-mrl-large-zh-v3.5-1792d",
|
|
@@ -331,7 +331,7 @@ tarka_embedding_150m_v1 = ModelMeta(
|
|
|
331
331
|
max_tokens=2048,
|
|
332
332
|
license="gemma",
|
|
333
333
|
reference="https://huggingface.co/Tarka-AIR/Tarka-Embedding-150M-V1",
|
|
334
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
334
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
335
335
|
use_instructions=True,
|
|
336
336
|
public_training_code=None,
|
|
337
337
|
public_training_data=None,
|
|
@@ -367,7 +367,7 @@ tarka_embedding_350m_v1 = ModelMeta(
|
|
|
367
367
|
license=None,
|
|
368
368
|
reference="https://huggingface.co/Tarka-AIR/Tarka-Embedding-350M-V1",
|
|
369
369
|
similarity_fn_name="cosine",
|
|
370
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
370
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
371
371
|
use_instructions=True,
|
|
372
372
|
public_training_code=None,
|
|
373
373
|
public_training_data=None,
|
|
@@ -27,7 +27,7 @@ text2vec_base_chinese = ModelMeta(
|
|
|
27
27
|
max_tokens=512,
|
|
28
28
|
reference="https://huggingface.co/shibing624/text2vec-base-chinese",
|
|
29
29
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
30
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
30
|
+
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
|
|
31
31
|
use_instructions=False,
|
|
32
32
|
superseded_by=None,
|
|
33
33
|
adapted_from=None,
|
|
@@ -57,7 +57,7 @@ text2vec_base_chinese_paraphrase = ModelMeta(
|
|
|
57
57
|
max_tokens=512,
|
|
58
58
|
reference="https://huggingface.co/shibing624/text2vec-base-chinese-paraphrase",
|
|
59
59
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
60
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
60
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
61
61
|
use_instructions=False,
|
|
62
62
|
superseded_by=None,
|
|
63
63
|
adapted_from=None,
|
|
@@ -101,7 +101,13 @@ text2vec_base_multilingual = ModelMeta(
|
|
|
101
101
|
max_tokens=256,
|
|
102
102
|
reference="https://huggingface.co/shibing624/text2vec-base-chinese-paraphrase",
|
|
103
103
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
104
|
-
framework=[
|
|
104
|
+
framework=[
|
|
105
|
+
"Sentence Transformers",
|
|
106
|
+
"PyTorch",
|
|
107
|
+
"ONNX",
|
|
108
|
+
"safetensors",
|
|
109
|
+
"Transformers",
|
|
110
|
+
],
|
|
105
111
|
use_instructions=False,
|
|
106
112
|
superseded_by=None,
|
|
107
113
|
adapted_from="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
|
@@ -17,7 +17,7 @@ xlm_roberta_ua_distilled = ModelMeta(
|
|
|
17
17
|
open_weights=True,
|
|
18
18
|
public_training_code="https://github.com/panalexeu/xlm-roberta-ua-distilled/blob/main/researches/research_final.ipynb",
|
|
19
19
|
similarity_fn_name="cosine",
|
|
20
|
-
framework=["Sentence Transformers"],
|
|
20
|
+
framework=["Sentence Transformers", "safetensors"],
|
|
21
21
|
reference="https://github.com/panalexeu/xlm-roberta-ua-distilled/tree/main",
|
|
22
22
|
languages=["eng-Latn", "ukr-Cyrl"],
|
|
23
23
|
training_datasets=set(
|
|
@@ -72,7 +72,13 @@ uae_large_v1 = ModelMeta(
|
|
|
72
72
|
embed_dim=1024,
|
|
73
73
|
license="mit",
|
|
74
74
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
75
|
-
framework=[
|
|
75
|
+
framework=[
|
|
76
|
+
"Sentence Transformers",
|
|
77
|
+
"PyTorch",
|
|
78
|
+
"ONNX",
|
|
79
|
+
"safetensors",
|
|
80
|
+
"Transformers",
|
|
81
|
+
],
|
|
76
82
|
reference="https://huggingface.co/WhereIsAI/UAE-Large-V1",
|
|
77
83
|
use_instructions=True,
|
|
78
84
|
citation="""
|
|
@@ -38,7 +38,7 @@ vdr_2b_multi_v1 = ModelMeta(
|
|
|
38
38
|
license="apache-2.0",
|
|
39
39
|
reference="https://huggingface.co/llamaindex/vdr-2b-multi-v1",
|
|
40
40
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
41
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
41
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
42
42
|
use_instructions=True,
|
|
43
43
|
public_training_code=None,
|
|
44
44
|
public_training_data="https://huggingface.co/datasets/llamaindex/vdr-multilingual-train",
|
|
@@ -22,7 +22,7 @@ greennode_embedding_large_vn_v1 = ModelMeta(
|
|
|
22
22
|
max_tokens=8194,
|
|
23
23
|
reference="https://huggingface.co/GreenNode/GreenNode-Embedding-Large-VN-V1",
|
|
24
24
|
similarity_fn_name="cosine",
|
|
25
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
25
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
26
26
|
use_instructions=False,
|
|
27
27
|
public_training_code=None,
|
|
28
28
|
public_training_data="https://huggingface.co/datasets/GreenNode/GreenNode-Table-Markdown-Retrieval-VN",
|
|
@@ -47,7 +47,7 @@ greennode_embedding_large_vn_mixed_v1 = ModelMeta(
|
|
|
47
47
|
max_tokens=8194,
|
|
48
48
|
reference="https://huggingface.co/GreenNode/GreenNode-Embedding-Large-VN-Mixed-V1",
|
|
49
49
|
similarity_fn_name="cosine",
|
|
50
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
50
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
51
51
|
use_instructions=False,
|
|
52
52
|
public_training_code=None,
|
|
53
53
|
public_training_data="https://huggingface.co/datasets/GreenNode/GreenNode-Table-Markdown-Retrieval-VN",
|
|
@@ -72,7 +72,7 @@ aiteamvn_vietnamese_embeddings = ModelMeta(
|
|
|
72
72
|
max_tokens=8194,
|
|
73
73
|
reference="https://huggingface.co/AITeamVN/Vietnamese_Embedding",
|
|
74
74
|
similarity_fn_name="cosine",
|
|
75
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
75
|
+
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
|
|
76
76
|
use_instructions=False,
|
|
77
77
|
public_training_code=None,
|
|
78
78
|
public_training_data=None,
|
|
@@ -104,7 +104,7 @@ hiieu_halong_embedding = ModelMeta(
|
|
|
104
104
|
max_tokens=514,
|
|
105
105
|
reference="https://huggingface.co/hiieu/halong_embedding",
|
|
106
106
|
similarity_fn_name="cosine",
|
|
107
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
107
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
108
108
|
public_training_code=None,
|
|
109
109
|
public_training_data=None,
|
|
110
110
|
training_datasets=None,
|
|
@@ -135,7 +135,7 @@ sup_simcse_vietnamese_phobert_base_ = ModelMeta(
|
|
|
135
135
|
license="apache-2.0",
|
|
136
136
|
public_training_code=None,
|
|
137
137
|
public_training_data=None,
|
|
138
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
138
|
+
framework=["PyTorch", "Sentence Transformers", "Transformers", "safetensors"],
|
|
139
139
|
reference="https://huggingface.co/VoVanPhuc/sup-SimCSE-VietNamese-phobert-base",
|
|
140
140
|
similarity_fn_name="cosine",
|
|
141
141
|
training_datasets=None,
|
|
@@ -173,7 +173,7 @@ bkai_foundation_models_vietnamese_bi_encoder = ModelMeta(
|
|
|
173
173
|
license="apache-2.0",
|
|
174
174
|
public_training_code=None,
|
|
175
175
|
public_training_data=None,
|
|
176
|
-
framework=["PyTorch", "Sentence Transformers"],
|
|
176
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
177
177
|
reference="https://huggingface.co/bkai-foundation-models/vietnamese-bi-encoder",
|
|
178
178
|
similarity_fn_name="cosine",
|
|
179
179
|
training_datasets=None,
|
|
@@ -282,7 +282,7 @@ vlm2vec_lora = ModelMeta(
|
|
|
282
282
|
open_weights=True,
|
|
283
283
|
public_training_code="https://github.com/TIGER-AI-Lab/VLM2Vec",
|
|
284
284
|
public_training_data="https://huggingface.co/datasets/TIGER-Lab/MMEB-train",
|
|
285
|
-
framework=["PyTorch"],
|
|
285
|
+
framework=["PyTorch", "Transformers"],
|
|
286
286
|
reference="https://huggingface.co/TIGER-Lab/VLM2Vec-LoRA",
|
|
287
287
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
288
288
|
use_instructions=True,
|
|
@@ -306,7 +306,7 @@ vlm2vec_full = ModelMeta(
|
|
|
306
306
|
open_weights=True,
|
|
307
307
|
public_training_code="https://github.com/TIGER-AI-Lab/VLM2Vec",
|
|
308
308
|
public_training_data="https://huggingface.co/TIGER-Lab/VLM2Vec-Full",
|
|
309
|
-
framework=["PyTorch"],
|
|
309
|
+
framework=["PyTorch", "Transformers", "safetensors"],
|
|
310
310
|
reference="https://huggingface.co/TIGER-Lab/VLM2Vec-Full",
|
|
311
311
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
312
312
|
use_instructions=True,
|
|
@@ -127,7 +127,7 @@ Youtu_Embedding_V1 = ModelMeta(
|
|
|
127
127
|
max_tokens=8192,
|
|
128
128
|
reference="https://huggingface.co/tencent/Youtu-Embedding",
|
|
129
129
|
similarity_fn_name="cosine",
|
|
130
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
130
|
+
framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
|
|
131
131
|
use_instructions=True,
|
|
132
132
|
public_training_code=None,
|
|
133
133
|
public_training_data=None,
|
|
@@ -26,7 +26,7 @@ yuan_embedding_2_zh = ModelMeta(
|
|
|
26
26
|
max_tokens=512,
|
|
27
27
|
reference="https://huggingface.co/IEITYuan/Yuan-embedding-2.0-zh",
|
|
28
28
|
similarity_fn_name="cosine",
|
|
29
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
29
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
30
30
|
use_instructions=False,
|
|
31
31
|
public_training_code=None,
|
|
32
32
|
public_training_data=None,
|
|
@@ -49,7 +49,7 @@ yuan_embedding_2_en = ModelMeta(
|
|
|
49
49
|
license="apache-2.0",
|
|
50
50
|
reference="https://huggingface.co/IEITYuan/Yuan-embedding-2.0-en",
|
|
51
51
|
similarity_fn_name="cosine",
|
|
52
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
52
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
53
53
|
use_instructions=True,
|
|
54
54
|
public_training_code=None,
|
|
55
55
|
public_training_data=None,
|