mteb 2.4.1__py3-none-any.whl → 2.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/models/model_implementations/andersborges.py +12 -0
- mteb/models/model_implementations/bge_models.py +43 -0
- mteb/models/model_implementations/dino_models.py +152 -0
- mteb/models/model_implementations/emillykkejensen_models.py +18 -0
- mteb/models/model_implementations/euler_models.py +6 -0
- mteb/models/model_implementations/fa_models.py +50 -0
- mteb/models/model_implementations/facebookai.py +44 -0
- mteb/models/model_implementations/gte_models.py +69 -0
- mteb/models/model_implementations/kalm_models.py +38 -0
- mteb/models/model_implementations/kblab.py +6 -0
- mteb/models/model_implementations/kowshik24_models.py +9 -0
- mteb/models/model_implementations/misc_models.py +293 -0
- mteb/models/model_implementations/mod_models.py +7 -22
- mteb/models/model_implementations/mxbai_models.py +6 -0
- mteb/models/model_implementations/nomic_models.py +8 -0
- mteb/models/model_implementations/pylate_models.py +33 -0
- mteb/models/model_implementations/ru_sentence_models.py +22 -0
- mteb/models/model_implementations/sentence_transformers_models.py +39 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +7 -0
- mteb/models/model_implementations/ua_sentence_models.py +9 -0
- mteb/models/model_implementations/vi_vn_models.py +33 -0
- {mteb-2.4.1.dist-info → mteb-2.4.2.dist-info}/METADATA +1 -1
- {mteb-2.4.1.dist-info → mteb-2.4.2.dist-info}/RECORD +27 -27
- {mteb-2.4.1.dist-info → mteb-2.4.2.dist-info}/WHEEL +0 -0
- {mteb-2.4.1.dist-info → mteb-2.4.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.4.1.dist-info → mteb-2.4.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.4.1.dist-info → mteb-2.4.2.dist-info}/top_level.txt +0 -0
|
@@ -89,6 +89,12 @@ gte_qwen1_5_7b_instruct = ModelMeta(
|
|
|
89
89
|
public_training_code=None,
|
|
90
90
|
public_training_data=None,
|
|
91
91
|
training_datasets=None,
|
|
92
|
+
citation="""@article{li2023towards,
|
|
93
|
+
title={Towards general text embeddings with multi-stage contrastive learning},
|
|
94
|
+
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
|
|
95
|
+
journal={arXiv preprint arXiv:2308.03281},
|
|
96
|
+
year={2023}
|
|
97
|
+
}""",
|
|
92
98
|
)
|
|
93
99
|
|
|
94
100
|
gte_qwen2_1_5b_instruct = ModelMeta(
|
|
@@ -119,6 +125,12 @@ gte_qwen2_1_5b_instruct = ModelMeta(
|
|
|
119
125
|
public_training_code=None,
|
|
120
126
|
public_training_data=None,
|
|
121
127
|
training_datasets=None,
|
|
128
|
+
citation="""@article{li2023towards,
|
|
129
|
+
title={Towards general text embeddings with multi-stage contrastive learning},
|
|
130
|
+
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
|
|
131
|
+
journal={arXiv preprint arXiv:2308.03281},
|
|
132
|
+
year={2023}
|
|
133
|
+
}""",
|
|
122
134
|
)
|
|
123
135
|
|
|
124
136
|
gte_small_zh = ModelMeta(
|
|
@@ -140,6 +152,12 @@ gte_small_zh = ModelMeta(
|
|
|
140
152
|
public_training_code=None,
|
|
141
153
|
public_training_data=None,
|
|
142
154
|
training_datasets=None, # Not disclosed
|
|
155
|
+
citation="""@article{li2023towards,
|
|
156
|
+
title={Towards general text embeddings with multi-stage contrastive learning},
|
|
157
|
+
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
|
|
158
|
+
journal={arXiv preprint arXiv:2308.03281},
|
|
159
|
+
year={2023}
|
|
160
|
+
}""",
|
|
143
161
|
)
|
|
144
162
|
|
|
145
163
|
gte_base_zh = ModelMeta(
|
|
@@ -161,6 +179,12 @@ gte_base_zh = ModelMeta(
|
|
|
161
179
|
public_training_code=None,
|
|
162
180
|
public_training_data=None,
|
|
163
181
|
training_datasets=None, # Not disclosed
|
|
182
|
+
citation="""@article{li2023towards,
|
|
183
|
+
title={Towards general text embeddings with multi-stage contrastive learning},
|
|
184
|
+
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
|
|
185
|
+
journal={arXiv preprint arXiv:2308.03281},
|
|
186
|
+
year={2023}
|
|
187
|
+
}""",
|
|
164
188
|
)
|
|
165
189
|
|
|
166
190
|
gte_large_zh = ModelMeta(
|
|
@@ -182,6 +206,12 @@ gte_large_zh = ModelMeta(
|
|
|
182
206
|
public_training_code=None,
|
|
183
207
|
public_training_data=None,
|
|
184
208
|
training_datasets=None, # Not disclosed
|
|
209
|
+
citation="""@article{li2023towards,
|
|
210
|
+
title={Towards general text embeddings with multi-stage contrastive learning},
|
|
211
|
+
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
|
|
212
|
+
journal={arXiv preprint arXiv:2308.03281},
|
|
213
|
+
year={2023}
|
|
214
|
+
}""",
|
|
185
215
|
)
|
|
186
216
|
|
|
187
217
|
gte_multilingual_langs = [
|
|
@@ -304,6 +334,13 @@ gte_multilingual_base = ModelMeta(
|
|
|
304
334
|
public_training_code=None,
|
|
305
335
|
public_training_data=None, # couldn't find
|
|
306
336
|
training_datasets=gte_multi_training_data,
|
|
337
|
+
citation="""@inproceedings{zhang2024mgte,
|
|
338
|
+
title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
|
|
339
|
+
author={Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Wen and Dai, Ziqi and Tang, Jialong and Lin, Huan and Yang, Baosong and Xie, Pengjun and Huang, Fei and others},
|
|
340
|
+
booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track},
|
|
341
|
+
pages={1393--1412},
|
|
342
|
+
year={2024}
|
|
343
|
+
}""",
|
|
307
344
|
)
|
|
308
345
|
|
|
309
346
|
gte_modernbert_base = ModelMeta(
|
|
@@ -325,6 +362,20 @@ gte_modernbert_base = ModelMeta(
|
|
|
325
362
|
public_training_code=None, # couldn't find
|
|
326
363
|
public_training_data=None,
|
|
327
364
|
training_datasets=gte_multi_training_data, # English part of gte_multi_training_data,
|
|
365
|
+
citation="""@inproceedings{zhang2024mgte,
|
|
366
|
+
title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
|
|
367
|
+
author={Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Wen and Dai, Ziqi and Tang, Jialong and Lin, Huan and Yang, Baosong and Xie, Pengjun and Huang, Fei and others},
|
|
368
|
+
booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track},
|
|
369
|
+
pages={1393--1412},
|
|
370
|
+
year={2024}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
@article{li2023towards,
|
|
374
|
+
title={Towards general text embeddings with multi-stage contrastive learning},
|
|
375
|
+
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
|
|
376
|
+
journal={arXiv preprint arXiv:2308.03281},
|
|
377
|
+
year={2023}
|
|
378
|
+
}""",
|
|
328
379
|
)
|
|
329
380
|
|
|
330
381
|
|
|
@@ -349,4 +400,22 @@ gte_base_en_v15 = ModelMeta(
|
|
|
349
400
|
public_training_code=None,
|
|
350
401
|
public_training_data=None,
|
|
351
402
|
training_datasets=None,
|
|
403
|
+
citation="""@misc{zhang2024mgte,
|
|
404
|
+
title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
|
|
405
|
+
author={Xin Zhang and Yanzhao Zhang and Dingkun Long and Wen Xie and Ziqi Dai and Jialong Tang and Huan Lin and Baosong Yang and Pengjun Xie and Fei Huang and Meishan Zhang and Wenjie Li and Min Zhang},
|
|
406
|
+
year={2024},
|
|
407
|
+
eprint={2407.19669},
|
|
408
|
+
archivePrefix={arXiv},
|
|
409
|
+
primaryClass={cs.CL},
|
|
410
|
+
url={https://arxiv.org/abs/2407.19669},
|
|
411
|
+
}
|
|
412
|
+
@misc{li2023gte,
|
|
413
|
+
title={Towards General Text Embeddings with Multi-stage Contrastive Learning},
|
|
414
|
+
author={Zehan Li and Xin Zhang and Yanzhao Zhang and Dingkun Long and Pengjun Xie and Meishan Zhang},
|
|
415
|
+
year={2023},
|
|
416
|
+
eprint={2308.03281},
|
|
417
|
+
archivePrefix={arXiv},
|
|
418
|
+
primaryClass={cs.CL},
|
|
419
|
+
url={https://arxiv.org/abs/2308.03281},
|
|
420
|
+
}""",
|
|
352
421
|
)
|
|
@@ -901,6 +901,25 @@ KaLM_Embedding_KaLM_embedding_multilingual_mini_instruct_v2_5 = ModelMeta(
|
|
|
901
901
|
training_datasets=kalm_v2_training_data,
|
|
902
902
|
adapted_from="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2",
|
|
903
903
|
superseded_by=None,
|
|
904
|
+
citation="""@misc{zhao2025kalmembeddingv2,
|
|
905
|
+
title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
|
|
906
|
+
author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
|
|
907
|
+
year={2025},
|
|
908
|
+
eprint={2506.20923},
|
|
909
|
+
archivePrefix={arXiv},
|
|
910
|
+
primaryClass={cs.CL},
|
|
911
|
+
url={https://arxiv.org/abs/2506.20923},
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
@misc{hu2025kalmembedding,
|
|
915
|
+
title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
|
|
916
|
+
author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
|
|
917
|
+
year={2025},
|
|
918
|
+
eprint={2501.01028},
|
|
919
|
+
archivePrefix={arXiv},
|
|
920
|
+
primaryClass={cs.CL},
|
|
921
|
+
url={https://arxiv.org/abs/2501.01028},
|
|
922
|
+
}""",
|
|
904
923
|
)
|
|
905
924
|
|
|
906
925
|
KaLM_Embedding_gemma_3_12b_2511 = ModelMeta(
|
|
@@ -928,4 +947,23 @@ KaLM_Embedding_gemma_3_12b_2511 = ModelMeta(
|
|
|
928
947
|
public_training_code="https://github.com/HITsz-TMG/KaLM-Embedding",
|
|
929
948
|
public_training_data=None,
|
|
930
949
|
training_datasets=KaLM_Embedding_gemma_3_12b_training_data,
|
|
950
|
+
citation="""@misc{zhao2025kalmembeddingv2,
|
|
951
|
+
title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
|
|
952
|
+
author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
|
|
953
|
+
year={2025},
|
|
954
|
+
eprint={2506.20923},
|
|
955
|
+
archivePrefix={arXiv},
|
|
956
|
+
primaryClass={cs.CL},
|
|
957
|
+
url={https://arxiv.org/abs/2506.20923},
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
@misc{hu2025kalmembedding,
|
|
961
|
+
title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
|
|
962
|
+
author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
|
|
963
|
+
year={2025},
|
|
964
|
+
eprint={2501.01028},
|
|
965
|
+
archivePrefix={arXiv},
|
|
966
|
+
primaryClass={cs.CL},
|
|
967
|
+
url={https://arxiv.org/abs/2501.01028},
|
|
968
|
+
}""",
|
|
931
969
|
)
|
|
@@ -21,4 +21,10 @@ sbert_swedish = ModelMeta(
|
|
|
21
21
|
public_training_data=None,
|
|
22
22
|
training_datasets=None,
|
|
23
23
|
adapted_from="sentence-transformers/all-mpnet-base-v2",
|
|
24
|
+
citation="""@misc{rekathati2021introducing,
|
|
25
|
+
author = {Rekathati, Faton},
|
|
26
|
+
title = {The KBLab Blog: Introducing a Swedish Sentence Transformer},
|
|
27
|
+
url = {https://kb-labb.github.io/posts/2021-08-23-a-swedish-sentence-transformer/},
|
|
28
|
+
year = {2021}
|
|
29
|
+
}""",
|
|
24
30
|
)
|
|
@@ -19,4 +19,13 @@ kowshik24_bangla_embedding_model = ModelMeta(
|
|
|
19
19
|
public_training_code="https://github.com/kowshik24/Bangla-Embedding",
|
|
20
20
|
public_training_data="https://huggingface.co/datasets/sartajekram/BanglaRQA",
|
|
21
21
|
training_datasets=set(),
|
|
22
|
+
citation="""@inproceedings{reimers-2019-sentence-bert,
|
|
23
|
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
|
24
|
+
author = "Reimers, Nils and Gurevych, Iryna",
|
|
25
|
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
|
26
|
+
month = "11",
|
|
27
|
+
year = "2019",
|
|
28
|
+
publisher = "Association for Computational Linguistics",
|
|
29
|
+
url = "https://arxiv.org/abs/1908.10084",
|
|
30
|
+
}""",
|
|
22
31
|
)
|