mteb 2.4.1__py3-none-any.whl → 2.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. mteb/models/model_implementations/andersborges.py +12 -0
  2. mteb/models/model_implementations/bge_models.py +43 -0
  3. mteb/models/model_implementations/dino_models.py +152 -0
  4. mteb/models/model_implementations/emillykkejensen_models.py +18 -0
  5. mteb/models/model_implementations/euler_models.py +6 -0
  6. mteb/models/model_implementations/fa_models.py +50 -0
  7. mteb/models/model_implementations/facebookai.py +44 -0
  8. mteb/models/model_implementations/gte_models.py +69 -0
  9. mteb/models/model_implementations/kalm_models.py +38 -0
  10. mteb/models/model_implementations/kblab.py +6 -0
  11. mteb/models/model_implementations/kowshik24_models.py +9 -0
  12. mteb/models/model_implementations/misc_models.py +293 -0
  13. mteb/models/model_implementations/mod_models.py +7 -22
  14. mteb/models/model_implementations/mxbai_models.py +6 -0
  15. mteb/models/model_implementations/nomic_models.py +8 -0
  16. mteb/models/model_implementations/pylate_models.py +33 -0
  17. mteb/models/model_implementations/ru_sentence_models.py +22 -0
  18. mteb/models/model_implementations/sentence_transformers_models.py +39 -0
  19. mteb/models/model_implementations/spartan8806_atles_champion.py +7 -0
  20. mteb/models/model_implementations/ua_sentence_models.py +9 -0
  21. mteb/models/model_implementations/vi_vn_models.py +33 -0
  22. {mteb-2.4.1.dist-info → mteb-2.4.2.dist-info}/METADATA +1 -1
  23. {mteb-2.4.1.dist-info → mteb-2.4.2.dist-info}/RECORD +27 -27
  24. {mteb-2.4.1.dist-info → mteb-2.4.2.dist-info}/WHEEL +0 -0
  25. {mteb-2.4.1.dist-info → mteb-2.4.2.dist-info}/entry_points.txt +0 -0
  26. {mteb-2.4.1.dist-info → mteb-2.4.2.dist-info}/licenses/LICENSE +0 -0
  27. {mteb-2.4.1.dist-info → mteb-2.4.2.dist-info}/top_level.txt +0 -0
@@ -89,6 +89,12 @@ gte_qwen1_5_7b_instruct = ModelMeta(
89
89
  public_training_code=None,
90
90
  public_training_data=None,
91
91
  training_datasets=None,
92
+ citation="""@article{li2023towards,
93
+ title={Towards general text embeddings with multi-stage contrastive learning},
94
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
95
+ journal={arXiv preprint arXiv:2308.03281},
96
+ year={2023}
97
+ }""",
92
98
  )
93
99
 
94
100
  gte_qwen2_1_5b_instruct = ModelMeta(
@@ -119,6 +125,12 @@ gte_qwen2_1_5b_instruct = ModelMeta(
119
125
  public_training_code=None,
120
126
  public_training_data=None,
121
127
  training_datasets=None,
128
+ citation="""@article{li2023towards,
129
+ title={Towards general text embeddings with multi-stage contrastive learning},
130
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
131
+ journal={arXiv preprint arXiv:2308.03281},
132
+ year={2023}
133
+ }""",
122
134
  )
123
135
 
124
136
  gte_small_zh = ModelMeta(
@@ -140,6 +152,12 @@ gte_small_zh = ModelMeta(
140
152
  public_training_code=None,
141
153
  public_training_data=None,
142
154
  training_datasets=None, # Not disclosed
155
+ citation="""@article{li2023towards,
156
+ title={Towards general text embeddings with multi-stage contrastive learning},
157
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
158
+ journal={arXiv preprint arXiv:2308.03281},
159
+ year={2023}
160
+ }""",
143
161
  )
144
162
 
145
163
  gte_base_zh = ModelMeta(
@@ -161,6 +179,12 @@ gte_base_zh = ModelMeta(
161
179
  public_training_code=None,
162
180
  public_training_data=None,
163
181
  training_datasets=None, # Not disclosed
182
+ citation="""@article{li2023towards,
183
+ title={Towards general text embeddings with multi-stage contrastive learning},
184
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
185
+ journal={arXiv preprint arXiv:2308.03281},
186
+ year={2023}
187
+ }""",
164
188
  )
165
189
 
166
190
  gte_large_zh = ModelMeta(
@@ -182,6 +206,12 @@ gte_large_zh = ModelMeta(
182
206
  public_training_code=None,
183
207
  public_training_data=None,
184
208
  training_datasets=None, # Not disclosed
209
+ citation="""@article{li2023towards,
210
+ title={Towards general text embeddings with multi-stage contrastive learning},
211
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
212
+ journal={arXiv preprint arXiv:2308.03281},
213
+ year={2023}
214
+ }""",
185
215
  )
186
216
 
187
217
  gte_multilingual_langs = [
@@ -304,6 +334,13 @@ gte_multilingual_base = ModelMeta(
304
334
  public_training_code=None,
305
335
  public_training_data=None, # couldn't find
306
336
  training_datasets=gte_multi_training_data,
337
+ citation="""@inproceedings{zhang2024mgte,
338
+ title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
339
+ author={Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Wen and Dai, Ziqi and Tang, Jialong and Lin, Huan and Yang, Baosong and Xie, Pengjun and Huang, Fei and others},
340
+ booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track},
341
+ pages={1393--1412},
342
+ year={2024}
343
+ }""",
307
344
  )
308
345
 
309
346
  gte_modernbert_base = ModelMeta(
@@ -325,6 +362,20 @@ gte_modernbert_base = ModelMeta(
325
362
  public_training_code=None, # couldn't find
326
363
  public_training_data=None,
327
364
  training_datasets=gte_multi_training_data, # English part of gte_multi_training_data,
365
+ citation="""@inproceedings{zhang2024mgte,
366
+ title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
367
+ author={Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Wen and Dai, Ziqi and Tang, Jialong and Lin, Huan and Yang, Baosong and Xie, Pengjun and Huang, Fei and others},
368
+ booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track},
369
+ pages={1393--1412},
370
+ year={2024}
371
+ }
372
+
373
+ @article{li2023towards,
374
+ title={Towards general text embeddings with multi-stage contrastive learning},
375
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
376
+ journal={arXiv preprint arXiv:2308.03281},
377
+ year={2023}
378
+ }""",
328
379
  )
329
380
 
330
381
 
@@ -349,4 +400,22 @@ gte_base_en_v15 = ModelMeta(
349
400
  public_training_code=None,
350
401
  public_training_data=None,
351
402
  training_datasets=None,
403
+ citation="""@misc{zhang2024mgte,
404
+ title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
405
+ author={Xin Zhang and Yanzhao Zhang and Dingkun Long and Wen Xie and Ziqi Dai and Jialong Tang and Huan Lin and Baosong Yang and Pengjun Xie and Fei Huang and Meishan Zhang and Wenjie Li and Min Zhang},
406
+ year={2024},
407
+ eprint={2407.19669},
408
+ archivePrefix={arXiv},
409
+ primaryClass={cs.CL},
410
+ url={https://arxiv.org/abs/2407.19669},
411
+ }
412
+ @misc{li2023gte,
413
+ title={Towards General Text Embeddings with Multi-stage Contrastive Learning},
414
+ author={Zehan Li and Xin Zhang and Yanzhao Zhang and Dingkun Long and Pengjun Xie and Meishan Zhang},
415
+ year={2023},
416
+ eprint={2308.03281},
417
+ archivePrefix={arXiv},
418
+ primaryClass={cs.CL},
419
+ url={https://arxiv.org/abs/2308.03281},
420
+ }""",
352
421
  )
@@ -901,6 +901,25 @@ KaLM_Embedding_KaLM_embedding_multilingual_mini_instruct_v2_5 = ModelMeta(
901
901
  training_datasets=kalm_v2_training_data,
902
902
  adapted_from="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2",
903
903
  superseded_by=None,
904
+ citation="""@misc{zhao2025kalmembeddingv2,
905
+ title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
906
+ author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
907
+ year={2025},
908
+ eprint={2506.20923},
909
+ archivePrefix={arXiv},
910
+ primaryClass={cs.CL},
911
+ url={https://arxiv.org/abs/2506.20923},
912
+ }
913
+
914
+ @misc{hu2025kalmembedding,
915
+ title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
916
+ author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
917
+ year={2025},
918
+ eprint={2501.01028},
919
+ archivePrefix={arXiv},
920
+ primaryClass={cs.CL},
921
+ url={https://arxiv.org/abs/2501.01028},
922
+ }""",
904
923
  )
905
924
 
906
925
  KaLM_Embedding_gemma_3_12b_2511 = ModelMeta(
@@ -928,4 +947,23 @@ KaLM_Embedding_gemma_3_12b_2511 = ModelMeta(
928
947
  public_training_code="https://github.com/HITsz-TMG/KaLM-Embedding",
929
948
  public_training_data=None,
930
949
  training_datasets=KaLM_Embedding_gemma_3_12b_training_data,
950
+ citation="""@misc{zhao2025kalmembeddingv2,
951
+ title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
952
+ author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
953
+ year={2025},
954
+ eprint={2506.20923},
955
+ archivePrefix={arXiv},
956
+ primaryClass={cs.CL},
957
+ url={https://arxiv.org/abs/2506.20923},
958
+ }
959
+
960
+ @misc{hu2025kalmembedding,
961
+ title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
962
+ author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
963
+ year={2025},
964
+ eprint={2501.01028},
965
+ archivePrefix={arXiv},
966
+ primaryClass={cs.CL},
967
+ url={https://arxiv.org/abs/2501.01028},
968
+ }""",
931
969
  )
@@ -21,4 +21,10 @@ sbert_swedish = ModelMeta(
21
21
  public_training_data=None,
22
22
  training_datasets=None,
23
23
  adapted_from="sentence-transformers/all-mpnet-base-v2",
24
+ citation="""@misc{rekathati2021introducing,
25
+ author = {Rekathati, Faton},
26
+ title = {The KBLab Blog: Introducing a Swedish Sentence Transformer},
27
+ url = {https://kb-labb.github.io/posts/2021-08-23-a-swedish-sentence-transformer/},
28
+ year = {2021}
29
+ }""",
24
30
  )
@@ -19,4 +19,13 @@ kowshik24_bangla_embedding_model = ModelMeta(
19
19
  public_training_code="https://github.com/kowshik24/Bangla-Embedding",
20
20
  public_training_data="https://huggingface.co/datasets/sartajekram/BanglaRQA",
21
21
  training_datasets=set(),
22
+ citation="""@inproceedings{reimers-2019-sentence-bert,
23
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
24
+ author = "Reimers, Nils and Gurevych, Iryna",
25
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
26
+ month = "11",
27
+ year = "2019",
28
+ publisher = "Association for Computational Linguistics",
29
+ url = "https://arxiv.org/abs/1908.10084",
30
+ }""",
22
31
  )