mteb 2.5.2__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +17 -18
- mteb/_evaluators/any_sts_evaluator.py +3 -3
- mteb/_evaluators/clustering_evaluator.py +2 -2
- mteb/_evaluators/evaluator.py +4 -2
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +10 -8
- mteb/_evaluators/pair_classification_evaluator.py +5 -3
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/retrieval_metrics.py +18 -17
- mteb/_evaluators/sklearn_evaluator.py +11 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +27 -18
- mteb/_evaluators/text/summarization_evaluator.py +23 -18
- mteb/_evaluators/zeroshot_classification_evaluator.py +5 -3
- mteb/abstasks/_data_filter/filters.py +1 -1
- mteb/abstasks/_data_filter/task_pipelines.py +3 -0
- mteb/abstasks/_statistics_calculation.py +18 -10
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +35 -28
- mteb/abstasks/aggregate_task_metadata.py +1 -9
- mteb/abstasks/aggregated_task.py +10 -29
- mteb/abstasks/classification.py +15 -10
- mteb/abstasks/clustering.py +19 -15
- mteb/abstasks/clustering_legacy.py +10 -10
- mteb/abstasks/image/image_text_pair_classification.py +7 -4
- mteb/abstasks/multilabel_classification.py +23 -19
- mteb/abstasks/pair_classification.py +20 -11
- mteb/abstasks/regression.py +4 -4
- mteb/abstasks/retrieval.py +28 -24
- mteb/abstasks/retrieval_dataset_loaders.py +2 -2
- mteb/abstasks/sts.py +8 -5
- mteb/abstasks/task_metadata.py +31 -33
- mteb/abstasks/text/bitext_mining.py +39 -28
- mteb/abstasks/text/reranking.py +8 -6
- mteb/abstasks/text/summarization.py +10 -5
- mteb/abstasks/zeroshot_classification.py +8 -4
- mteb/benchmarks/benchmark.py +4 -2
- mteb/benchmarks/benchmarks/__init__.py +4 -0
- mteb/benchmarks/benchmarks/benchmarks.py +112 -11
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +182 -29
- mteb/cli/_display_tasks.py +2 -2
- mteb/cli/build_cli.py +110 -14
- mteb/cli/generate_model_card.py +43 -23
- mteb/deprecated_evaluator.py +63 -49
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +44 -33
- mteb/filter_tasks.py +25 -26
- mteb/get_tasks.py +29 -30
- mteb/languages/language_scripts.py +5 -3
- mteb/leaderboard/app.py +162 -34
- mteb/load_results.py +12 -12
- mteb/models/abs_encoder.py +10 -6
- mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +5 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +2 -2
- mteb/models/get_model_meta.py +21 -3
- mteb/models/instruct_wrapper.py +28 -8
- mteb/models/model_implementations/align_models.py +1 -1
- mteb/models/model_implementations/andersborges.py +4 -4
- mteb/models/model_implementations/ara_models.py +1 -1
- mteb/models/model_implementations/arctic_models.py +8 -8
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +45 -21
- mteb/models/model_implementations/bica_model.py +3 -3
- mteb/models/model_implementations/blip2_models.py +2 -2
- mteb/models/model_implementations/blip_models.py +16 -16
- mteb/models/model_implementations/bm25.py +4 -4
- mteb/models/model_implementations/bmretriever_models.py +6 -4
- mteb/models/model_implementations/cadet_models.py +1 -1
- mteb/models/model_implementations/cde_models.py +11 -4
- mteb/models/model_implementations/clip_models.py +6 -6
- mteb/models/model_implementations/clips_models.py +3 -3
- mteb/models/model_implementations/codefuse_models.py +5 -5
- mteb/models/model_implementations/codesage_models.py +3 -3
- mteb/models/model_implementations/cohere_models.py +5 -5
- mteb/models/model_implementations/cohere_v.py +2 -2
- mteb/models/model_implementations/colpali_models.py +3 -3
- mteb/models/model_implementations/colqwen_models.py +8 -8
- mteb/models/model_implementations/colsmol_models.py +2 -2
- mteb/models/model_implementations/conan_models.py +1 -1
- mteb/models/model_implementations/dino_models.py +42 -42
- mteb/models/model_implementations/e5_instruct.py +23 -4
- mteb/models/model_implementations/e5_models.py +9 -9
- mteb/models/model_implementations/e5_v.py +6 -6
- mteb/models/model_implementations/eagerworks_models.py +1 -1
- mteb/models/model_implementations/emillykkejensen_models.py +6 -6
- mteb/models/model_implementations/en_code_retriever.py +1 -1
- mteb/models/model_implementations/euler_models.py +2 -2
- mteb/models/model_implementations/fa_models.py +9 -9
- mteb/models/model_implementations/facebookai.py +14 -2
- mteb/models/model_implementations/geogpt_models.py +1 -1
- mteb/models/model_implementations/gme_v_models.py +6 -5
- mteb/models/model_implementations/google_models.py +1 -1
- mteb/models/model_implementations/granite_vision_embedding_models.py +1 -1
- mteb/models/model_implementations/gritlm_models.py +2 -2
- mteb/models/model_implementations/gte_models.py +25 -13
- mteb/models/model_implementations/hinvec_models.py +1 -1
- mteb/models/model_implementations/ibm_granite_models.py +30 -6
- mteb/models/model_implementations/inf_models.py +2 -2
- mteb/models/model_implementations/jasper_models.py +2 -2
- mteb/models/model_implementations/jina_clip.py +48 -10
- mteb/models/model_implementations/jina_models.py +18 -11
- mteb/models/model_implementations/kblab.py +12 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +4 -4
- mteb/models/model_implementations/kfst.py +1 -1
- mteb/models/model_implementations/kowshik24_models.py +1 -1
- mteb/models/model_implementations/lgai_embedding_models.py +1 -1
- mteb/models/model_implementations/linq_models.py +1 -1
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +6 -6
- mteb/models/model_implementations/llm2vec_models.py +8 -8
- mteb/models/model_implementations/mcinext_models.py +4 -1
- mteb/models/model_implementations/mdbr_models.py +17 -3
- mteb/models/model_implementations/misc_models.py +68 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
- mteb/models/model_implementations/mme5_models.py +1 -1
- mteb/models/model_implementations/moco_models.py +4 -4
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +14 -14
- mteb/models/model_implementations/moka_models.py +1 -1
- mteb/models/model_implementations/nbailab.py +3 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +2 -2
- mteb/models/model_implementations/nomic_models.py +30 -15
- mteb/models/model_implementations/nomic_models_vision.py +1 -1
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +15 -9
- mteb/models/model_implementations/nvidia_models.py +151 -19
- mteb/models/model_implementations/octen_models.py +61 -2
- mteb/models/model_implementations/openclip_models.py +13 -13
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -5
- mteb/models/model_implementations/ops_moa_models.py +1 -1
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +1 -1
- mteb/models/model_implementations/piccolo_models.py +1 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +4 -4
- mteb/models/model_implementations/pylate_models.py +10 -9
- mteb/models/model_implementations/qodo_models.py +2 -2
- mteb/models/model_implementations/qtack_models.py +1 -1
- mteb/models/model_implementations/qwen3_models.py +3 -3
- mteb/models/model_implementations/qzhou_models.py +2 -2
- mteb/models/model_implementations/random_baseline.py +3 -3
- mteb/models/model_implementations/rasgaard_models.py +2 -2
- mteb/models/model_implementations/reasonir_model.py +1 -1
- mteb/models/model_implementations/repllama_models.py +3 -3
- mteb/models/model_implementations/rerankers_custom.py +12 -6
- mteb/models/model_implementations/rerankers_monot5_based.py +17 -17
- mteb/models/model_implementations/richinfoai_models.py +1 -1
- mteb/models/model_implementations/ru_sentence_models.py +20 -20
- mteb/models/model_implementations/ruri_models.py +10 -10
- mteb/models/model_implementations/salesforce_models.py +3 -3
- mteb/models/model_implementations/samilpwc_models.py +1 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -2
- mteb/models/model_implementations/searchmap_models.py +1 -1
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +113 -146
- mteb/models/model_implementations/sentence_transformers_models.py +124 -22
- mteb/models/model_implementations/shuu_model.py +1 -1
- mteb/models/model_implementations/siglip_models.py +20 -20
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -1
- mteb/models/model_implementations/stella_models.py +17 -4
- mteb/models/model_implementations/tarka_models.py +2 -2
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +1 -1
- mteb/models/model_implementations/uae_models.py +7 -1
- mteb/models/model_implementations/vdr_models.py +1 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -6
- mteb/models/model_implementations/vlm2vec_models.py +3 -3
- mteb/models/model_implementations/voyage_models.py +84 -0
- mteb/models/model_implementations/voyage_v.py +9 -7
- mteb/models/model_implementations/youtu_models.py +1 -1
- mteb/models/model_implementations/yuan_models.py +1 -1
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +80 -31
- mteb/models/models_protocols.py +22 -6
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +9 -6
- mteb/models/search_wrappers.py +33 -18
- mteb/models/sentence_transformer_wrapper.py +50 -25
- mteb/models/vllm_wrapper.py +327 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +29 -21
- mteb/results/model_result.py +52 -22
- mteb/results/task_result.py +80 -58
- mteb/similarity_functions.py +11 -7
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +12 -12
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +2 -0
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +12 -0
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +9 -3
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/RECORD +240 -219
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
|
@@ -298,7 +298,7 @@ jasper_en_v1 = ModelMeta(
|
|
|
298
298
|
license="apache-2.0",
|
|
299
299
|
reference="https://huggingface.co/infgrad/jasper_en_vision_language_v1",
|
|
300
300
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
301
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
301
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
302
302
|
use_instructions=True,
|
|
303
303
|
adapted_from=None,
|
|
304
304
|
superseded_by=None,
|
|
@@ -345,7 +345,7 @@ Jasper_Token_Compression_600M = ModelMeta(
|
|
|
345
345
|
max_tokens=32768,
|
|
346
346
|
reference="https://huggingface.co/infgrad/Jasper-Token-Compression-600M",
|
|
347
347
|
similarity_fn_name="cosine",
|
|
348
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
348
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
349
349
|
use_instructions=True,
|
|
350
350
|
public_training_code="https://github.com/DunZhang/Jasper-Token-Compression-Training",
|
|
351
351
|
# public_training_data: unsupervised data for distillation
|
|
@@ -7,6 +7,7 @@ from tqdm.auto import tqdm
|
|
|
7
7
|
from mteb._requires_package import requires_image_dependencies
|
|
8
8
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
9
9
|
from mteb.models.abs_encoder import AbsEncoder
|
|
10
|
+
from mteb.models.model_implementations.colpali_models import COLPALI_TRAINING_DATA
|
|
10
11
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
11
12
|
from mteb.types import Array, BatchedInput, PromptType
|
|
12
13
|
|
|
@@ -120,8 +121,17 @@ class JinaCLIPModel(AbsEncoder):
|
|
|
120
121
|
raise ValueError
|
|
121
122
|
|
|
122
123
|
|
|
124
|
+
_JINA_CLIP_TRAIN_DATASETS_V1 = {
|
|
125
|
+
# LAION400M
|
|
126
|
+
# ShareGPT4V
|
|
127
|
+
"MSMARCO",
|
|
128
|
+
"NQ",
|
|
129
|
+
"HotpotQA",
|
|
130
|
+
# Natural Language Inference (NLI) dataset (Bowman et al., 2015)
|
|
131
|
+
}
|
|
132
|
+
|
|
123
133
|
jina_clip_v1 = ModelMeta(
|
|
124
|
-
loader=JinaCLIPModel,
|
|
134
|
+
loader=JinaCLIPModel,
|
|
125
135
|
name="jinaai/jina-clip-v1",
|
|
126
136
|
model_type=["dense"],
|
|
127
137
|
languages=["eng-Latn"],
|
|
@@ -136,17 +146,45 @@ jina_clip_v1 = ModelMeta(
|
|
|
136
146
|
open_weights=True,
|
|
137
147
|
public_training_code=None,
|
|
138
148
|
public_training_data=None,
|
|
139
|
-
framework=["PyTorch"],
|
|
149
|
+
framework=["PyTorch", "Transformers", "ONNX", "safetensors"],
|
|
140
150
|
reference="https://huggingface.co/jinaai/jina-clip-v1",
|
|
141
151
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
142
152
|
use_instructions=True,
|
|
143
|
-
training_datasets=
|
|
144
|
-
# LAION400M
|
|
145
|
-
# ShareGPT4V
|
|
146
|
-
"MSMARCO",
|
|
147
|
-
# NQ
|
|
148
|
-
# HotpotQA
|
|
149
|
-
# Natural Language Inference (NLI) dataset (Bowman et al., 2015)
|
|
150
|
-
},
|
|
153
|
+
training_datasets=_JINA_CLIP_TRAIN_DATASETS_V1,
|
|
151
154
|
citation=JINA_CLIP_CITATION,
|
|
155
|
+
superseded_by="jinaai/jina-clip-v2",
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
jina_clip_v2 = ModelMeta(
|
|
159
|
+
loader=JinaCLIPModel,
|
|
160
|
+
name="jinaai/jina-clip-v2",
|
|
161
|
+
revision="344d954da76eb8ad47a7aaff42d012e30c15b8fe",
|
|
162
|
+
release_date="2024-10-09",
|
|
163
|
+
languages=["eng-Latn"],
|
|
164
|
+
n_parameters=865278477,
|
|
165
|
+
memory_usage_mb=1650.0,
|
|
166
|
+
max_tokens=8192,
|
|
167
|
+
embed_dim=1024,
|
|
168
|
+
license="cc-by-nc-4.0",
|
|
169
|
+
open_weights=True,
|
|
170
|
+
public_training_code=None,
|
|
171
|
+
public_training_data=None,
|
|
172
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
173
|
+
reference="https://huggingface.co/jinaai/jina-clip-v2",
|
|
174
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
175
|
+
use_instructions=False,
|
|
176
|
+
training_datasets=_JINA_CLIP_TRAIN_DATASETS_V1 | COLPALI_TRAINING_DATA,
|
|
177
|
+
modalities=["text", "image"],
|
|
178
|
+
model_type=["dense"],
|
|
179
|
+
citation="""
|
|
180
|
+
@misc{koukounas2024jinaclipv2multilingualmultimodalembeddings,
|
|
181
|
+
title={jina-clip-v2: Multilingual Multimodal Embeddings for Text and Images},
|
|
182
|
+
author={Andreas Koukounas and Georgios Mastrapas and Bo Wang and Mohammad Kalim Akram and Sedigheh Eslami and Michael Günther and Isabelle Mohr and Saba Sturua and Scott Martens and Nan Wang and Han Xiao},
|
|
183
|
+
year={2024},
|
|
184
|
+
eprint={2412.08802},
|
|
185
|
+
archivePrefix={arXiv},
|
|
186
|
+
primaryClass={cs.CL},
|
|
187
|
+
url={https://arxiv.org/abs/2412.08802},
|
|
188
|
+
}
|
|
189
|
+
""",
|
|
152
190
|
)
|
|
@@ -257,6 +257,7 @@ class JinaRerankerV3Wrapper(CrossEncoderWrapper):
|
|
|
257
257
|
self,
|
|
258
258
|
model: CrossEncoder | str,
|
|
259
259
|
revision: str | None = None,
|
|
260
|
+
device: str | None = None,
|
|
260
261
|
trust_remote_code: bool = True,
|
|
261
262
|
**kwargs: Any,
|
|
262
263
|
) -> None:
|
|
@@ -267,10 +268,7 @@ class JinaRerankerV3Wrapper(CrossEncoderWrapper):
|
|
|
267
268
|
model, trust_remote_code=trust_remote_code, dtype="auto"
|
|
268
269
|
)
|
|
269
270
|
|
|
270
|
-
device =
|
|
271
|
-
if device is None:
|
|
272
|
-
device = get_device_name()
|
|
273
|
-
logger.info(f"Use pytorch device: {device}")
|
|
271
|
+
device = device or get_device_name()
|
|
274
272
|
|
|
275
273
|
self.model.to(device)
|
|
276
274
|
self.model.eval()
|
|
@@ -320,6 +318,7 @@ class JinaWrapper(SentenceTransformerEncoderWrapper):
|
|
|
320
318
|
self,
|
|
321
319
|
model: str,
|
|
322
320
|
revision: str,
|
|
321
|
+
device: str | None = None,
|
|
323
322
|
model_prompts: dict[str, str] | None = None,
|
|
324
323
|
**kwargs,
|
|
325
324
|
) -> None:
|
|
@@ -339,7 +338,9 @@ class JinaWrapper(SentenceTransformerEncoderWrapper):
|
|
|
339
338
|
)
|
|
340
339
|
import flash_attn # noqa: F401
|
|
341
340
|
|
|
342
|
-
super().__init__(
|
|
341
|
+
super().__init__(
|
|
342
|
+
model, revision, device=device, model_prompts=model_prompts, **kwargs
|
|
343
|
+
)
|
|
343
344
|
|
|
344
345
|
def encode(
|
|
345
346
|
self,
|
|
@@ -732,7 +733,7 @@ jina_reranker_v3 = ModelMeta(
|
|
|
732
733
|
embed_dim=None,
|
|
733
734
|
license="cc-by-nc-4.0",
|
|
734
735
|
similarity_fn_name=None,
|
|
735
|
-
framework=["PyTorch"],
|
|
736
|
+
framework=["PyTorch", "Transformers", "safetensors"],
|
|
736
737
|
use_instructions=None,
|
|
737
738
|
reference="https://huggingface.co/jinaai/jina-reranker-v3",
|
|
738
739
|
public_training_code=None,
|
|
@@ -775,7 +776,7 @@ jina_embeddings_v4 = ModelMeta(
|
|
|
775
776
|
embed_dim=2048,
|
|
776
777
|
license="cc-by-nc-4.0",
|
|
777
778
|
similarity_fn_name="cosine",
|
|
778
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
779
|
+
framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
|
|
779
780
|
use_instructions=True,
|
|
780
781
|
reference="https://huggingface.co/jinaai/jina-embeddings-v4",
|
|
781
782
|
public_training_code=None,
|
|
@@ -795,7 +796,7 @@ jina_embeddings_v4 = ModelMeta(
|
|
|
795
796
|
|
|
796
797
|
|
|
797
798
|
jina_embeddings_v3 = ModelMeta(
|
|
798
|
-
loader=JinaWrapper,
|
|
799
|
+
loader=JinaWrapper,
|
|
799
800
|
loader_kwargs=dict(
|
|
800
801
|
trust_remote_code=True,
|
|
801
802
|
model_prompts={
|
|
@@ -823,7 +824,13 @@ jina_embeddings_v3 = ModelMeta(
|
|
|
823
824
|
embed_dim=1024,
|
|
824
825
|
license="cc-by-nc-4.0",
|
|
825
826
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
826
|
-
framework=[
|
|
827
|
+
framework=[
|
|
828
|
+
"Sentence Transformers",
|
|
829
|
+
"PyTorch",
|
|
830
|
+
"Transformers",
|
|
831
|
+
"ONNX",
|
|
832
|
+
"safetensors",
|
|
833
|
+
],
|
|
827
834
|
use_instructions=True,
|
|
828
835
|
reference="https://huggingface.co/jinaai/jina-embeddings-v3",
|
|
829
836
|
public_training_code=None,
|
|
@@ -878,7 +885,7 @@ jina_embeddings_v2_base_en = ModelMeta(
|
|
|
878
885
|
max_tokens=8192,
|
|
879
886
|
reference="https://huggingface.co/jinaai/jina-embeddings-v2-base-en",
|
|
880
887
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
881
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
888
|
+
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
|
|
882
889
|
use_instructions=False,
|
|
883
890
|
superseded_by=None,
|
|
884
891
|
adapted_from="jina-bert-base-en-v1", # pretrained on C4 with Alibi to support longer context.
|
|
@@ -942,7 +949,7 @@ jina_embeddings_v2_small_en = ModelMeta(
|
|
|
942
949
|
max_tokens=8192,
|
|
943
950
|
reference="https://huggingface.co/jinaai/jina-embeddings-v2-small-en",
|
|
944
951
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
945
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
952
|
+
framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
|
|
946
953
|
use_instructions=False,
|
|
947
954
|
superseded_by=None,
|
|
948
955
|
adapted_from="jina-bert-smalll-en-v1", # pretrained on C4 with Alibi to support longer context
|
|
@@ -16,16 +16,22 @@ sbert_swedish = ModelMeta(
|
|
|
16
16
|
max_tokens=384,
|
|
17
17
|
reference="https://huggingface.co/KBLab/sentence-bert-swedish-cased",
|
|
18
18
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
19
|
-
framework=[
|
|
19
|
+
framework=[
|
|
20
|
+
"Sentence Transformers",
|
|
21
|
+
"PyTorch",
|
|
22
|
+
"safetensors",
|
|
23
|
+
"GGUF",
|
|
24
|
+
"Transformers",
|
|
25
|
+
],
|
|
20
26
|
use_instructions=False,
|
|
21
27
|
public_training_code=None,
|
|
22
28
|
public_training_data=None,
|
|
23
29
|
training_datasets=None,
|
|
24
30
|
adapted_from="sentence-transformers/all-mpnet-base-v2",
|
|
25
|
-
citation="""@misc{rekathati2021introducing,
|
|
26
|
-
author = {Rekathati, Faton},
|
|
27
|
-
title = {The KBLab Blog: Introducing a Swedish Sentence Transformer},
|
|
28
|
-
url = {https://kb-labb.github.io/posts/2021-08-23-a-swedish-sentence-transformer/},
|
|
29
|
-
year = {2021}
|
|
31
|
+
citation="""@misc{rekathati2021introducing,
|
|
32
|
+
author = {Rekathati, Faton},
|
|
33
|
+
title = {The KBLab Blog: Introducing a Swedish Sentence Transformer},
|
|
34
|
+
url = {https://kb-labb.github.io/posts/2021-08-23-a-swedish-sentence-transformer/},
|
|
35
|
+
year = {2021}
|
|
30
36
|
}""",
|
|
31
37
|
)
|
|
@@ -4,7 +4,7 @@ from mteb.models.sentence_transformer_wrapper import (
|
|
|
4
4
|
)
|
|
5
5
|
|
|
6
6
|
dfm_enc_large = ModelMeta(
|
|
7
|
-
loader=sentence_transformers_loader,
|
|
7
|
+
loader=sentence_transformers_loader,
|
|
8
8
|
name="KennethEnevoldsen/dfm-sentence-encoder-large",
|
|
9
9
|
model_type=["dense"],
|
|
10
10
|
languages=["dan-Latn"],
|
|
@@ -18,7 +18,7 @@ dfm_enc_large = ModelMeta(
|
|
|
18
18
|
max_tokens=512,
|
|
19
19
|
reference="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-large",
|
|
20
20
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
21
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
21
|
+
framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
|
|
22
22
|
use_instructions=False,
|
|
23
23
|
superseded_by=None,
|
|
24
24
|
adapted_from="chcaa/dfm-encoder-large-v1",
|
|
@@ -39,7 +39,7 @@ dfm_enc_large = ModelMeta(
|
|
|
39
39
|
)
|
|
40
40
|
|
|
41
41
|
dfm_enc_med = ModelMeta(
|
|
42
|
-
loader=sentence_transformers_loader,
|
|
42
|
+
loader=sentence_transformers_loader,
|
|
43
43
|
name="KennethEnevoldsen/dfm-sentence-encoder-medium",
|
|
44
44
|
model_type=["dense"],
|
|
45
45
|
languages=["dan-Latn"],
|
|
@@ -53,7 +53,7 @@ dfm_enc_med = ModelMeta(
|
|
|
53
53
|
max_tokens=512,
|
|
54
54
|
reference="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-medium",
|
|
55
55
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
56
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
56
|
+
framework=["Sentence Transformers", "PyTorch", "Transformers"],
|
|
57
57
|
use_instructions=False,
|
|
58
58
|
superseded_by=None,
|
|
59
59
|
adapted_from=None,
|
|
@@ -16,7 +16,7 @@ xlmr_scandi = ModelMeta(
|
|
|
16
16
|
max_tokens=512,
|
|
17
17
|
reference="https://huggingface.co/KFST/XLMRoberta-en-da-sv-nb",
|
|
18
18
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
19
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
19
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
20
20
|
use_instructions=False,
|
|
21
21
|
public_training_code=None,
|
|
22
22
|
public_training_data=None,
|
|
@@ -15,7 +15,7 @@ kowshik24_bangla_embedding_model = ModelMeta(
|
|
|
15
15
|
max_tokens=128,
|
|
16
16
|
reference="https://huggingface.co/Kowshik24/bangla-sentence-transformer-ft-matryoshka-paraphrase-multilingual-mpnet-base-v2",
|
|
17
17
|
similarity_fn_name="cosine",
|
|
18
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
18
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
19
19
|
use_instructions=False,
|
|
20
20
|
public_training_code="https://github.com/kowshik24/Bangla-Embedding",
|
|
21
21
|
public_training_data="https://huggingface.co/datasets/sartajekram/BanglaRQA",
|
|
@@ -58,7 +58,7 @@ lgai_embedding_en = ModelMeta(
|
|
|
58
58
|
max_tokens=32768,
|
|
59
59
|
reference="https://huggingface.co/annamodels/LGAI-Embedding-Preview",
|
|
60
60
|
similarity_fn_name="cosine",
|
|
61
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
61
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
62
62
|
use_instructions=True,
|
|
63
63
|
public_training_code=None,
|
|
64
64
|
public_training_data=None,
|
|
@@ -44,7 +44,7 @@ Linq_Embed_Mistral = ModelMeta(
|
|
|
44
44
|
max_tokens=32768,
|
|
45
45
|
reference="https://huggingface.co/Linq-AI-Research/Linq-Embed-Mistral",
|
|
46
46
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
47
|
-
framework=["Sentence Transformers", "PyTorch"],
|
|
47
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
48
48
|
use_instructions=True,
|
|
49
49
|
public_training_code=None,
|
|
50
50
|
public_training_data=None,
|
|
@@ -125,7 +125,7 @@ listconranker = ModelMeta(
|
|
|
125
125
|
license="mit",
|
|
126
126
|
max_tokens=512,
|
|
127
127
|
reference="https://huggingface.co/ByteDance/ListConRanker",
|
|
128
|
-
framework=["PyTorch"],
|
|
128
|
+
framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
|
|
129
129
|
use_instructions=False,
|
|
130
130
|
public_training_code=None,
|
|
131
131
|
public_training_data=None,
|
|
@@ -181,7 +181,7 @@ llm2clip_training_sets = set(
|
|
|
181
181
|
)
|
|
182
182
|
|
|
183
183
|
llm2clip_openai_l_14_336 = ModelMeta(
|
|
184
|
-
loader=llm2clip_loader,
|
|
184
|
+
loader=llm2clip_loader,
|
|
185
185
|
name="microsoft/LLM2CLIP-Openai-L-14-336",
|
|
186
186
|
model_type=["dense"],
|
|
187
187
|
languages=["eng-Latn"],
|
|
@@ -196,7 +196,7 @@ llm2clip_openai_l_14_336 = ModelMeta(
|
|
|
196
196
|
open_weights=True,
|
|
197
197
|
public_training_code="https://github.com/microsoft/LLM2CLIP",
|
|
198
198
|
public_training_data=None,
|
|
199
|
-
framework=["PyTorch"],
|
|
199
|
+
framework=["PyTorch", "safetensors"],
|
|
200
200
|
reference="https://huggingface.co/microsoft/LLM2CLIP-Openai-L-14-336",
|
|
201
201
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
202
202
|
use_instructions=True,
|
|
@@ -206,7 +206,7 @@ llm2clip_openai_l_14_336 = ModelMeta(
|
|
|
206
206
|
|
|
207
207
|
# NOTE: https://huggingface.co/microsoft/LLM2CLIP-Openai-L-14-224/discussions/1
|
|
208
208
|
llm2clip_openai_l_14_224 = ModelMeta(
|
|
209
|
-
loader=llm2clip_loader,
|
|
209
|
+
loader=llm2clip_loader,
|
|
210
210
|
name="microsoft/LLM2CLIP-Openai-L-14-224",
|
|
211
211
|
model_type=["dense"],
|
|
212
212
|
languages=["eng-Latn"],
|
|
@@ -221,7 +221,7 @@ llm2clip_openai_l_14_224 = ModelMeta(
|
|
|
221
221
|
open_weights=True,
|
|
222
222
|
public_training_code="https://github.com/microsoft/LLM2CLIP",
|
|
223
223
|
public_training_data=None,
|
|
224
|
-
framework=["PyTorch"],
|
|
224
|
+
framework=["PyTorch", "safetensors"],
|
|
225
225
|
reference="https://huggingface.co/microsoft/LLM2CLIP-Openai-L-14-224",
|
|
226
226
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
227
227
|
use_instructions=True,
|
|
@@ -230,7 +230,7 @@ llm2clip_openai_l_14_224 = ModelMeta(
|
|
|
230
230
|
)
|
|
231
231
|
|
|
232
232
|
llm2clip_openai_b_16 = ModelMeta(
|
|
233
|
-
loader=llm2clip_loader,
|
|
233
|
+
loader=llm2clip_loader,
|
|
234
234
|
name="microsoft/LLM2CLIP-Openai-B-16",
|
|
235
235
|
model_type=["dense"],
|
|
236
236
|
languages=["eng-Latn"],
|
|
@@ -245,7 +245,7 @@ llm2clip_openai_b_16 = ModelMeta(
|
|
|
245
245
|
open_weights=True,
|
|
246
246
|
public_training_code="https://github.com/microsoft/LLM2CLIP",
|
|
247
247
|
public_training_data=None,
|
|
248
|
-
framework=["PyTorch"],
|
|
248
|
+
framework=["PyTorch", "safetensors"],
|
|
249
249
|
reference="https://huggingface.co/microsoft/LLM2CLIP-Openai-B-16",
|
|
250
250
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
251
251
|
use_instructions=True,
|
|
@@ -145,7 +145,7 @@ llm2vec_llama3_8b_supervised = ModelMeta(
|
|
|
145
145
|
license="mit",
|
|
146
146
|
reference="https://huggingface.co/McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised",
|
|
147
147
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
148
|
-
framework=["LLM2Vec", "PyTorch"],
|
|
148
|
+
framework=["LLM2Vec", "PyTorch", "safetensors"],
|
|
149
149
|
use_instructions=True,
|
|
150
150
|
public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs",
|
|
151
151
|
training_datasets=llm2vec_supervised_training_data,
|
|
@@ -174,7 +174,7 @@ llm2vec_llama3_8b_unsupervised = ModelMeta(
|
|
|
174
174
|
license="mit",
|
|
175
175
|
reference="https://huggingface.co/McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse",
|
|
176
176
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
177
|
-
framework=["LLM2Vec", "PyTorch"],
|
|
177
|
+
framework=["LLM2Vec", "PyTorch", "safetensors"],
|
|
178
178
|
use_instructions=True,
|
|
179
179
|
citation=LLM2VEC_CITATION,
|
|
180
180
|
public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs",
|
|
@@ -203,7 +203,7 @@ llm2vec_mistral7b_supervised = ModelMeta(
|
|
|
203
203
|
license="mit",
|
|
204
204
|
reference="https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised",
|
|
205
205
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
206
|
-
framework=["LLM2Vec", "PyTorch"],
|
|
206
|
+
framework=["LLM2Vec", "PyTorch", "safetensors"],
|
|
207
207
|
use_instructions=True,
|
|
208
208
|
citation=LLM2VEC_CITATION,
|
|
209
209
|
public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs",
|
|
@@ -232,7 +232,7 @@ llm2vec_mistral7b_unsupervised = ModelMeta(
|
|
|
232
232
|
license="mit",
|
|
233
233
|
reference="https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse",
|
|
234
234
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
235
|
-
framework=["LLM2Vec", "PyTorch"],
|
|
235
|
+
framework=["LLM2Vec", "PyTorch", "safetensors"],
|
|
236
236
|
use_instructions=True,
|
|
237
237
|
citation=LLM2VEC_CITATION,
|
|
238
238
|
public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs",
|
|
@@ -261,7 +261,7 @@ llm2vec_llama2_7b_supervised = ModelMeta(
|
|
|
261
261
|
license="mit",
|
|
262
262
|
reference="https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised",
|
|
263
263
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
264
|
-
framework=["LLM2Vec", "PyTorch"],
|
|
264
|
+
framework=["LLM2Vec", "PyTorch", "safetensors"],
|
|
265
265
|
use_instructions=True,
|
|
266
266
|
citation=LLM2VEC_CITATION,
|
|
267
267
|
public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs",
|
|
@@ -290,7 +290,7 @@ llm2vec_llama2_7b_unsupervised = ModelMeta(
|
|
|
290
290
|
license="mit",
|
|
291
291
|
reference="https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse",
|
|
292
292
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
293
|
-
framework=["LLM2Vec", "PyTorch"],
|
|
293
|
+
framework=["LLM2Vec", "PyTorch", "safetensors"],
|
|
294
294
|
use_instructions=True,
|
|
295
295
|
public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs",
|
|
296
296
|
training_datasets=set(),
|
|
@@ -319,7 +319,7 @@ llm2vec_sheared_llama_supervised = ModelMeta(
|
|
|
319
319
|
license="mit",
|
|
320
320
|
reference="https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
|
|
321
321
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
322
|
-
framework=["LLM2Vec", "PyTorch"],
|
|
322
|
+
framework=["LLM2Vec", "PyTorch", "safetensors"],
|
|
323
323
|
use_instructions=True,
|
|
324
324
|
citation=LLM2VEC_CITATION,
|
|
325
325
|
public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs",
|
|
@@ -348,7 +348,7 @@ llm2vec_sheared_llama_unsupervised = ModelMeta(
|
|
|
348
348
|
license="mit",
|
|
349
349
|
reference="https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse",
|
|
350
350
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
351
|
-
framework=["LLM2Vec", "PyTorch"],
|
|
351
|
+
framework=["LLM2Vec", "PyTorch", "safetensors"],
|
|
352
352
|
use_instructions=True,
|
|
353
353
|
citation=LLM2VEC_CITATION,
|
|
354
354
|
public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs",
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
import time
|
|
4
|
+
import warnings
|
|
4
5
|
from typing import Any
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
@@ -246,7 +247,9 @@ class HakimModelWrapper(AbsEncoder):
|
|
|
246
247
|
task_prompt, task_id = DATASET_TASKS.get(task_name, (None, None))
|
|
247
248
|
|
|
248
249
|
if not task_prompt:
|
|
249
|
-
|
|
250
|
+
msg = f"Unknown dataset: {task_name}, no preprocessing applied."
|
|
251
|
+
logger.warning(msg)
|
|
252
|
+
warnings.warn(msg)
|
|
250
253
|
return sample
|
|
251
254
|
|
|
252
255
|
task_prompt = f"مسئله : {task_prompt}"
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from mteb.models.model_implementations.arctic_models import arctic_v1_training_datasets
|
|
2
|
-
from mteb.models.model_implementations.
|
|
2
|
+
from mteb.models.model_implementations.mixedbread_ai_models import (
|
|
3
|
+
mixedbread_training_data,
|
|
4
|
+
)
|
|
3
5
|
from mteb.models.model_meta import ModelMeta
|
|
4
6
|
from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
|
|
5
7
|
|
|
@@ -35,7 +37,13 @@ mdbr_leaf_ir = ModelMeta(
|
|
|
35
37
|
release_date="2025-08-27",
|
|
36
38
|
languages=["eng-Latn"],
|
|
37
39
|
open_weights=True,
|
|
38
|
-
framework=[
|
|
40
|
+
framework=[
|
|
41
|
+
"Sentence Transformers",
|
|
42
|
+
"PyTorch",
|
|
43
|
+
"ONNX",
|
|
44
|
+
"safetensors",
|
|
45
|
+
"Transformers",
|
|
46
|
+
],
|
|
39
47
|
n_parameters=22_861_056,
|
|
40
48
|
memory_usage_mb=86,
|
|
41
49
|
max_tokens=512,
|
|
@@ -63,7 +71,13 @@ mdbr_leaf_mt = ModelMeta(
|
|
|
63
71
|
release_date="2025-08-27",
|
|
64
72
|
languages=["eng-Latn"],
|
|
65
73
|
open_weights=True,
|
|
66
|
-
framework=[
|
|
74
|
+
framework=[
|
|
75
|
+
"Sentence Transformers",
|
|
76
|
+
"PyTorch",
|
|
77
|
+
"ONNX",
|
|
78
|
+
"safetensors",
|
|
79
|
+
"Transformers",
|
|
80
|
+
],
|
|
67
81
|
n_parameters=22_958_592,
|
|
68
82
|
memory_usage_mb=86,
|
|
69
83
|
max_tokens=512,
|