PyPI - mteb - Versions diffs - 2.4.1__py3-none-any.whl → 2.4.2__py3-none-any.whl - Mend

mteb 2.4.1py3-none-any.whl → 2.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

mteb/models/model_implementations/andersborges.py CHANGED Viewed

@@ -24,6 +24,12 @@ model2vecdk = ModelMeta(
     training_datasets=set(),  # distilled
     public_training_code="https://github.com/andersborges/dkmodel2vec",
     public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
+    citation="""@article{minishlab2024model2vec,
+  author = {Tulkens, Stephan and {van Dongen}, Thomas},
+  title = {Model2Vec: Fast State-of-the-Art Static Embeddings},
+  year = {2024},
+  url = {https://github.com/MinishLab/model2vec}
+}""",
 )
@@ -48,4 +54,10 @@ model2vecdk_stem = ModelMeta(
     training_datasets=set(),  # distilled
     public_training_code="https://github.com/andersborges/dkmodel2vec",
     public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
+    citation="""@article{minishlab2024model2vec,
+  author = {Tulkens, Stephan and {van Dongen}, Thomas},
+  title = {Model2Vec: Fast State-of-the-Art Static Embeddings},
+  year = {2024},
+  url = {https://github.com/MinishLab/model2vec}
+}""",
 )

mteb/models/model_implementations/bge_models.py CHANGED Viewed

@@ -411,6 +411,7 @@ bge_small_zh = ModelMeta(
     public_training_data=None,
     training_datasets=bge_chinese_training_data,
     superseded_by="BAAI/bge-small-zh-v1.5",
+    citation=BGE_15_CITATION,
 )
 bge_base_zh = ModelMeta(
@@ -436,6 +437,7 @@ bge_base_zh = ModelMeta(
     public_training_data=None,
     training_datasets=bge_chinese_training_data,
     superseded_by="BAAI/bge-base-zh-v1.5",
+    citation=BGE_15_CITATION,
 )
 bge_large_zh = ModelMeta(
@@ -461,6 +463,7 @@ bge_large_zh = ModelMeta(
     public_training_data=None,
     training_datasets=bge_chinese_training_data,
     superseded_by="BAAI/bge-large-zh-v1.5",
+    citation=BGE_15_CITATION,
 )
 bge_small_en = ModelMeta(
@@ -486,6 +489,7 @@ bge_small_en = ModelMeta(
     public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
     training_datasets=bge_training_data,
     superseded_by="BAAI/bge-small-en-v1.5",
+    citation=BGE_15_CITATION,
 )
 bge_base_en = ModelMeta(
@@ -511,6 +515,7 @@ bge_base_en = ModelMeta(
     public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
     training_datasets=bge_training_data,
     superseded_by="BAAI/bge-base-en-v1.5",
+    citation=BGE_15_CITATION,
 )
 bge_large_en = ModelMeta(
@@ -536,6 +541,7 @@ bge_large_en = ModelMeta(
     public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
     training_datasets=bge_training_data,
     superseded_by="BAAI/bge-large-en-v1.5",
+    citation=BGE_15_CITATION,
 )
@@ -561,6 +567,7 @@ bge_small_zh_v1_5 = ModelMeta(
     public_training_code=None,
     public_training_data=None,
     training_datasets=bge_chinese_training_data,
+    citation=BGE_15_CITATION,
 )
 bge_base_zh_v1_5 = ModelMeta(
@@ -585,6 +592,7 @@ bge_base_zh_v1_5 = ModelMeta(
     public_training_code=None,
     public_training_data=None,
     training_datasets=bge_chinese_training_data,
+    citation=BGE_15_CITATION,
 )
 bge_large_zh_v1_5 = ModelMeta(
@@ -609,6 +617,7 @@ bge_large_zh_v1_5 = ModelMeta(
     public_training_code=None,
     public_training_data=None,
     training_datasets=bge_chinese_training_data,
+    citation=BGE_15_CITATION,
 )
 bge_m3 = ModelMeta(
@@ -630,6 +639,14 @@ bge_m3 = ModelMeta(
     public_training_code=None,
     public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
     training_datasets=bge_m3_training_data,
+    citation="""@misc{bge-m3,
+      title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
+      author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
+      year={2024},
+      eprint={2402.03216},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}""",
 )
 # Contents of cfli/bge-full-data
@@ -722,6 +739,24 @@ bge_multilingual_gemma2 = ModelMeta(
     }
     | bge_full_data
     | bge_m3_training_data,
+    citation="""@misc{bge-m3,
+      title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
+      author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
+      year={2024},
+      eprint={2402.03216},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+@misc{bge_embedding,
+      title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
+      author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
+      year={2023},
+      eprint={2309.07597},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}""",
 )
 bge_en_icl = ModelMeta(
@@ -778,6 +813,14 @@ bge_m3_unsupervised = ModelMeta(
     public_training_code="https://github.com/FlagOpen/FlagEmbedding",
     public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
     training_datasets=bge_m3_training_data,
+    citation="""@misc{bge-m3,
+      title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
+      author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
+      year={2024},
+      eprint={2402.03216},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}""",
 )
 manu__bge_m3_custom_fr = ModelMeta(

mteb/models/model_implementations/dino_models.py CHANGED Viewed

@@ -123,6 +123,14 @@ dinov2_small = ModelMeta(
     similarity_fn_name=ScoringFunction.COSINE,
     use_instructions=False,
     training_datasets=dinov2_training_datasets,
+    citation="""@misc{oquab2023dinov2,
+      title={DINOv2: Learning Robust Visual Features without Supervision},
+      author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
+      year={2023},
+      eprint={2304.07193},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}""",
 )
 dinov2_base = ModelMeta(
@@ -145,6 +153,14 @@ dinov2_base = ModelMeta(
     similarity_fn_name=ScoringFunction.COSINE,
     use_instructions=False,
     training_datasets=dinov2_training_datasets,
+    citation="""@misc{oquab2023dinov2,
+      title={DINOv2: Learning Robust Visual Features without Supervision},
+      author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
+      year={2023},
+      eprint={2304.07193},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}""",
 )
 dinov2_large = ModelMeta(
@@ -167,6 +183,14 @@ dinov2_large = ModelMeta(
     similarity_fn_name=ScoringFunction.COSINE,
     use_instructions=False,
     training_datasets=dinov2_training_datasets,
+    citation="""@misc{oquab2023dinov2,
+      title={DINOv2: Learning Robust Visual Features without Supervision},
+      author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
+      year={2023},
+      eprint={2304.07193},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}""",
 )
 dinov2_giant = ModelMeta(
@@ -189,6 +213,14 @@ dinov2_giant = ModelMeta(
     similarity_fn_name=ScoringFunction.COSINE,
     use_instructions=False,
     training_datasets=dinov2_training_datasets,
+    citation="""@misc{oquab2023dinov2,
+      title={DINOv2: Learning Robust Visual Features without Supervision},
+      author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
+      year={2023},
+      eprint={2304.07193},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}""",
 )
 webssl_dino_training_datasets = set(
@@ -215,6 +247,14 @@ webssl_dino300m_full2b = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_dino1b_full2b = ModelMeta(
@@ -237,6 +277,14 @@ webssl_dino1b_full2b = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_dino2b_full2b = ModelMeta(
@@ -259,6 +307,14 @@ webssl_dino2b_full2b = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_dino3b_full2b = ModelMeta(
@@ -281,6 +337,14 @@ webssl_dino3b_full2b = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_dino5b_full2b = ModelMeta(
@@ -303,6 +367,14 @@ webssl_dino5b_full2b = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_dino7b_full8b_224 = ModelMeta(
@@ -325,6 +397,14 @@ webssl_dino7b_full8b_224 = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_dino7b_full8b_378 = ModelMeta(
@@ -347,6 +427,14 @@ webssl_dino7b_full8b_378 = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_dino7b_full8b_518 = ModelMeta(
@@ -369,6 +457,14 @@ webssl_dino7b_full8b_518 = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
@@ -392,6 +488,14 @@ webssl_dino2b_light2b = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_dino2b_heavy2b = ModelMeta(
@@ -414,6 +518,14 @@ webssl_dino2b_heavy2b = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_dino3b_light2b = ModelMeta(
@@ -436,6 +548,14 @@ webssl_dino3b_light2b = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_dino3b_heavy2b = ModelMeta(
@@ -458,6 +578,14 @@ webssl_dino3b_heavy2b = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_mae300m_full2b = ModelMeta(
@@ -480,6 +608,14 @@ webssl_mae300m_full2b = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_mae700m_full2b = ModelMeta(
@@ -502,6 +638,14 @@ webssl_mae700m_full2b = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )
 webssl_mae1b_full2b = ModelMeta(
@@ -524,4 +668,12 @@ webssl_mae1b_full2b = ModelMeta(
     similarity_fn_name=None,
     use_instructions=False,
     training_datasets=webssl_dino_training_datasets,
+    citation="""@article{fan2025scaling,
+  title={Scaling Language-Free Visual Representation Learning},
+  author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
+  year={2025},
+  eprint={2504.01017},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}""",
 )

mteb/models/model_implementations/emillykkejensen_models.py CHANGED Viewed

@@ -21,6 +21,15 @@ embedding_gemma_300m_scandi = ModelMeta(
     similarity_fn_name="cosine",  # type: ignore[arg-type]
     adapted_from="google/embeddinggemma-300m",
     memory_usage_mb=578,
+    citation="""@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}""",
 )
@@ -67,4 +76,13 @@ mmbert_scandi = ModelMeta(
     training_datasets=set(),
     similarity_fn_name="cosine",  # type: ignore[arg-type]
     adapted_from="jonasaise/scandmmBERT-base-scandinavian",
+    citation="""@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}""",
 )

mteb/models/model_implementations/euler_models.py CHANGED Viewed

@@ -22,4 +22,10 @@ Euler_Legal_Embedding_V1 = ModelMeta(
     training_datasets=set(),  # final-data-new-anonymized-grok4-filtered
     adapted_from="Qwen/Qwen3-Embedding-8B",
     superseded_by=None,
+    citation="""@misc{euler2025legal,
+      title={Euler-Legal-Embedding: Advanced Legal Representation Learning},
+      author={LawRank Team},
+      year={2025},
+      publisher={Hugging Face}
+}""",
 )

mteb/models/model_implementations/fa_models.py CHANGED Viewed

@@ -156,6 +156,15 @@ tooka_sbert = ModelMeta(
     public_training_code=None,
     public_training_data=None,
     training_datasets=None,
+    citation="""@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}""",
 )
 fa_bert = ModelMeta(
@@ -180,6 +189,29 @@ fa_bert = ModelMeta(
         # It's just a base model
         # https://huggingface.co/datasets/sbunlp/hmblogs-v3
     ),
+    citation="""@inproceedings{masumi-etal-2025-fabert,
+    title = "{F}a{BERT}: Pre-training {BERT} on {P}ersian Blogs",
+    author = "Masumi, Mostafa  and
+      Majd, Seyed Soroush  and
+      Shamsfard, Mehrnoush  and
+      Beigy, Hamid",
+    editor = "Bak, JinYeong  and
+      Goot, Rob van der  and
+      Jang, Hyeju  and
+      Buaphet, Weerayut  and
+      Ramponi, Alan  and
+      Xu, Wei  and
+      Ritter, Alan",
+    booktitle = "Proceedings of the Tenth Workshop on Noisy and User-generated Text",
+    month = may,
+    year = "2025",
+    address = "Albuquerque, New Mexico, USA",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2025.wnut-1.10/",
+    doi = "10.18653/v1/2025.wnut-1.10",
+    pages = "85--96",
+    ISBN = "979-8-89176-232-9",
+}""",
 )
 tooka_sbert_v2_small = ModelMeta(
@@ -201,6 +233,15 @@ tooka_sbert_v2_small = ModelMeta(
     public_training_code=None,
     public_training_data=None,
     training_datasets=None,
+    citation="""@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}""",
 )
 tooka_sbert_v2_large = ModelMeta(
@@ -222,4 +263,13 @@ tooka_sbert_v2_large = ModelMeta(
     public_training_code=None,
     public_training_data=None,
     training_datasets=None,
+    citation="""@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}""",
 )

mteb/models/model_implementations/facebookai.py CHANGED Viewed

@@ -123,6 +123,28 @@ xlmr_base = ModelMeta(
     public_training_code=None,
     public_training_data=None,
     training_datasets=set(),
+    citation="""@article{DBLP:journals/corr/abs-1911-02116,
+  author    = {Alexis Conneau and
+               Kartikay Khandelwal and
+               Naman Goyal and
+               Vishrav Chaudhary and
+               Guillaume Wenzek and
+               Francisco Guzm{\'{a}}n and
+               Edouard Grave and
+               Myle Ott and
+               Luke Zettlemoyer and
+               Veselin Stoyanov},
+  title     = {Unsupervised Cross-lingual Representation Learning at Scale},
+  journal   = {CoRR},
+  volume    = {abs/1911.02116},
+  year      = {2019},
+  url       = {http://arxiv.org/abs/1911.02116},
+  eprinttype = {arXiv},
+  eprint    = {1911.02116},
+  timestamp = {Mon, 11 Nov 2019 18:38:09 +0100},
+  biburl    = {https://dblp.org/rec/journals/corr/abs-1911-02116.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}""",
 )
 xlmr_large = ModelMeta(
@@ -144,4 +166,26 @@ xlmr_large = ModelMeta(
     public_training_code=None,
     public_training_data=None,
     training_datasets=set(),
+    citation="""@article{DBLP:journals/corr/abs-1911-02116,
+  author    = {Alexis Conneau and
+               Kartikay Khandelwal and
+               Naman Goyal and
+               Vishrav Chaudhary and
+               Guillaume Wenzek and
+               Francisco Guzm{\'{a}}n and
+               Edouard Grave and
+               Myle Ott and
+               Luke Zettlemoyer and
+               Veselin Stoyanov},
+  title     = {Unsupervised Cross-lingual Representation Learning at Scale},
+  journal   = {CoRR},
+  volume    = {abs/1911.02116},
+  year      = {2019},
+  url       = {http://arxiv.org/abs/1911.02116},
+  eprinttype = {arXiv},
+  eprint    = {1911.02116},
+  timestamp = {Mon, 11 Nov 2019 18:38:09 +0100},
+  biburl    = {https://dblp.org/rec/journals/corr/abs-1911-02116.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}""",
 )

mteb 2.4.1__py3-none-any.whl → 2.4.2__py3-none-any.whl

mteb 2.4.1py3-none-any.whl → 2.4.2py3-none-any.whl