mteb 2.3.8__py3-none-any.whl → 2.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,108 +5,10 @@ from mteb.models.model_meta import (
5
5
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
6
6
  from mteb.types import PromptType
7
7
 
8
+ from .facebookai import XLMR_LANGUAGES
9
+
8
10
  E5_PAPER_RELEASE_DATE = "2024-02-08"
9
- XLMR_LANGUAGES = [
10
- "afr-Latn",
11
- "amh-Latn",
12
- "ara-Latn",
13
- "asm-Latn",
14
- "aze-Latn",
15
- "bel-Latn",
16
- "bul-Latn",
17
- "ben-Latn",
18
- "ben-Beng",
19
- "bre-Latn",
20
- "bos-Latn",
21
- "cat-Latn",
22
- "ces-Latn",
23
- "cym-Latn",
24
- "dan-Latn",
25
- "deu-Latn",
26
- "ell-Latn",
27
- "eng-Latn",
28
- "epo-Latn",
29
- "spa-Latn",
30
- "est-Latn",
31
- "eus-Latn",
32
- "fas-Latn",
33
- "fin-Latn",
34
- "fra-Latn",
35
- "fry-Latn",
36
- "gle-Latn",
37
- "gla-Latn",
38
- "glg-Latn",
39
- "guj-Latn",
40
- "hau-Latn",
41
- "heb-Latn",
42
- "hin-Latn",
43
- "hin-Deva",
44
- "hrv-Latn",
45
- "hun-Latn",
46
- "hye-Latn",
47
- "ind-Latn",
48
- "isl-Latn",
49
- "ita-Latn",
50
- "jpn-Latn",
51
- "jav-Latn",
52
- "kat-Latn",
53
- "kaz-Latn",
54
- "khm-Latn",
55
- "kan-Latn",
56
- "kor-Latn",
57
- "kur-Latn",
58
- "kir-Latn",
59
- "lat-Latn",
60
- "lao-Latn",
61
- "lit-Latn",
62
- "lav-Latn",
63
- "mlg-Latn",
64
- "mkd-Latn",
65
- "mal-Latn",
66
- "mon-Latn",
67
- "mar-Latn",
68
- "msa-Latn",
69
- "mya-Latn",
70
- "nep-Latn",
71
- "nld-Latn",
72
- "nob-Latn",
73
- "orm-Latn",
74
- "ori-Latn",
75
- "pan-Latn",
76
- "pol-Latn",
77
- "pus-Latn",
78
- "por-Latn",
79
- "ron-Latn",
80
- "rus-Latn",
81
- "san-Latn",
82
- "snd-Latn",
83
- "sin-Latn",
84
- "slk-Latn",
85
- "slv-Latn",
86
- "som-Latn",
87
- "sqi-Latn",
88
- "srp-Latn",
89
- "sun-Latn",
90
- "swe-Latn",
91
- "swa-Latn",
92
- "tam-Latn",
93
- "tam-Taml",
94
- "tel-Latn",
95
- "tel-Telu",
96
- "tha-Latn",
97
- "tgl-Latn",
98
- "tur-Latn",
99
- "uig-Latn",
100
- "ukr-Latn",
101
- "urd-Latn",
102
- "urd-Arab",
103
- "uzb-Latn",
104
- "vie-Latn",
105
- "xho-Latn",
106
- "yid-Latn",
107
- "zho-Hant",
108
- "zho-Hans",
109
- ]
11
+
110
12
 
111
13
  MULTILINGUAL_E5_CITATION = """
112
14
  @article{wang2024multilingual,
@@ -0,0 +1,147 @@
1
+ from mteb.models import sentence_transformers_loader
2
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
3
+
4
+ XLMR_LANGUAGES = [
5
+ "afr-Latn",
6
+ "amh-Latn",
7
+ "ara-Latn",
8
+ "asm-Latn",
9
+ "aze-Latn",
10
+ "bel-Latn",
11
+ "bul-Latn",
12
+ "ben-Latn",
13
+ "ben-Beng",
14
+ "bre-Latn",
15
+ "bos-Latn",
16
+ "cat-Latn",
17
+ "ces-Latn",
18
+ "cym-Latn",
19
+ "dan-Latn",
20
+ "deu-Latn",
21
+ "ell-Latn",
22
+ "eng-Latn",
23
+ "epo-Latn",
24
+ "spa-Latn",
25
+ "est-Latn",
26
+ "eus-Latn",
27
+ "fas-Latn",
28
+ "fin-Latn",
29
+ "fra-Latn",
30
+ "fry-Latn",
31
+ "gle-Latn",
32
+ "gla-Latn",
33
+ "glg-Latn",
34
+ "guj-Latn",
35
+ "hau-Latn",
36
+ "heb-Latn",
37
+ "hin-Latn",
38
+ "hin-Deva",
39
+ "hrv-Latn",
40
+ "hun-Latn",
41
+ "hye-Latn",
42
+ "ind-Latn",
43
+ "isl-Latn",
44
+ "ita-Latn",
45
+ "jpn-Latn",
46
+ "jav-Latn",
47
+ "kat-Latn",
48
+ "kaz-Latn",
49
+ "khm-Latn",
50
+ "kan-Latn",
51
+ "kor-Latn",
52
+ "kur-Latn",
53
+ "kir-Latn",
54
+ "lat-Latn",
55
+ "lao-Latn",
56
+ "lit-Latn",
57
+ "lav-Latn",
58
+ "mlg-Latn",
59
+ "mkd-Latn",
60
+ "mal-Latn",
61
+ "mon-Latn",
62
+ "mar-Latn",
63
+ "msa-Latn",
64
+ "mya-Latn",
65
+ "nep-Latn",
66
+ "nld-Latn",
67
+ "nob-Latn",
68
+ "orm-Latn",
69
+ "ori-Latn",
70
+ "pan-Latn",
71
+ "pol-Latn",
72
+ "pus-Latn",
73
+ "por-Latn",
74
+ "ron-Latn",
75
+ "rus-Latn",
76
+ "san-Latn",
77
+ "snd-Latn",
78
+ "sin-Latn",
79
+ "slk-Latn",
80
+ "slv-Latn",
81
+ "som-Latn",
82
+ "sqi-Latn",
83
+ "srp-Latn",
84
+ "sun-Latn",
85
+ "swe-Latn",
86
+ "swa-Latn",
87
+ "tam-Latn",
88
+ "tam-Taml",
89
+ "tel-Latn",
90
+ "tel-Telu",
91
+ "tha-Latn",
92
+ "tgl-Latn",
93
+ "tur-Latn",
94
+ "uig-Latn",
95
+ "ukr-Latn",
96
+ "urd-Latn",
97
+ "urd-Arab",
98
+ "uzb-Latn",
99
+ "vie-Latn",
100
+ "xho-Latn",
101
+ "yid-Latn",
102
+ "zho-Hant",
103
+ "zho-Hans",
104
+ ]
105
+
106
+
107
+ xlmr_base = ModelMeta(
108
+ loader=sentence_transformers_loader, # type: ignore[arg-type]
109
+ name="FacebookAI/xlm-roberta-base",
110
+ languages=XLMR_LANGUAGES,
111
+ open_weights=True,
112
+ revision="e73636d4f797dec63c3081bb6ed5c7b0bb3f2089",
113
+ release_date="2019-11-05", # arxiv paper release
114
+ n_parameters=278043648,
115
+ memory_usage_mb=1064,
116
+ embed_dim=768,
117
+ license="mit",
118
+ max_tokens=512,
119
+ reference="https://huggingface.co/FacebookAI/xlm-roberta-base",
120
+ similarity_fn_name=ScoringFunction.COSINE,
121
+ framework=["Sentence Transformers", "PyTorch"],
122
+ use_instructions=False,
123
+ public_training_code=None,
124
+ public_training_data=None,
125
+ training_datasets=set(),
126
+ )
127
+
128
+ xlmr_large = ModelMeta(
129
+ loader=sentence_transformers_loader, # type: ignore[arg-type]
130
+ name="FacebookAI/xlm-roberta-large",
131
+ languages=XLMR_LANGUAGES,
132
+ open_weights=True,
133
+ revision="c23d21b0620b635a76227c604d44e43a9f0ee389",
134
+ release_date="2019-11-05", # arxiv paper release
135
+ n_parameters=559890432,
136
+ memory_usage_mb=2141,
137
+ embed_dim=1024,
138
+ license="mit",
139
+ max_tokens=512,
140
+ reference="https://huggingface.co/FacebookAI/xlm-roberta-large",
141
+ similarity_fn_name=ScoringFunction.COSINE,
142
+ framework=["Sentence Transformers", "PyTorch"],
143
+ use_instructions=False,
144
+ public_training_code=None,
145
+ public_training_data=None,
146
+ training_datasets=set(),
147
+ )
@@ -0,0 +1,24 @@
1
+ from mteb.models import sentence_transformers_loader
2
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
3
+
4
+ sbert_swedish = ModelMeta(
5
+ loader=sentence_transformers_loader, # type: ignore[arg-type]
6
+ name="KBLab/sentence-bert-swedish-cased",
7
+ languages=["swe-Latn"],
8
+ open_weights=True,
9
+ revision="6b5e83cd29c03729cfdc33d13b1423399b0efb5c",
10
+ release_date="2023-01-11",
11
+ n_parameters=124690944,
12
+ memory_usage_mb=476,
13
+ embed_dim=768,
14
+ license="apache-2.0",
15
+ max_tokens=384,
16
+ reference="https://huggingface.co/KBLab/sentence-bert-swedish-cased",
17
+ similarity_fn_name=ScoringFunction.COSINE,
18
+ framework=["Sentence Transformers", "PyTorch"],
19
+ use_instructions=False,
20
+ public_training_code=None,
21
+ public_training_data=None,
22
+ training_datasets=None,
23
+ adapted_from="sentence-transformers/all-mpnet-base-v2",
24
+ )
@@ -0,0 +1,24 @@
1
+ from mteb.models import sentence_transformers_loader
2
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
3
+
4
+ xlmr_scandi = ModelMeta(
5
+ loader=sentence_transformers_loader, # type: ignore[arg-type]
6
+ name="KFST/XLMRoberta-en-da-sv-nb",
7
+ languages=["swe-Latn", "nob-Latn", "nno-Latn", "dan-Latn", "eng-Latn"],
8
+ open_weights=True,
9
+ revision="d40c10ca7b1e68b5a8372f2d112dac9eb3279df1",
10
+ release_date="2022-02-22",
11
+ n_parameters=278043648,
12
+ memory_usage_mb=1061,
13
+ embed_dim=768,
14
+ license="not specified",
15
+ max_tokens=512,
16
+ reference="https://huggingface.co/KFST/XLMRoberta-en-da-sv-nb",
17
+ similarity_fn_name=ScoringFunction.COSINE,
18
+ framework=["Sentence Transformers", "PyTorch"],
19
+ use_instructions=False,
20
+ public_training_code=None,
21
+ public_training_data=None,
22
+ training_datasets=None,
23
+ adapted_from="FacebookAI/xlm-roberta-base",
24
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.3.8
3
+ Version: 2.3.9
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -1477,7 +1477,7 @@ mteb/models/model_implementations/colsmol_models.py,sha256=O2M7Ksydh94M_Iax4KytH
1477
1477
  mteb/models/model_implementations/conan_models.py,sha256=G-s7xo9VtNX-f7lWKtYVGHHiMMN0Xp44PlNIp7E0LAo,6502
1478
1478
  mteb/models/model_implementations/dino_models.py,sha256=QFgaFHR5YKrylqJGSljXCBn2W7qHhmF6KdXkvHrQNEI,16380
1479
1479
  mteb/models/model_implementations/e5_instruct.py,sha256=9R4GoSFicgqNDCh3HhTN_8L1qhzuEKvatjHYn3T9zlU,7676
1480
- mteb/models/model_implementations/e5_models.py,sha256=vsqkmm6XzZn9ROj_OUR0j2KiN75MEuQsOPeoyc1AeYg,10937
1480
+ mteb/models/model_implementations/e5_models.py,sha256=ZLRgzx2uEBc_yWY6DwcJFUNKG6RHpWSEVp1_jaEURhs,9373
1481
1481
  mteb/models/model_implementations/e5_v.py,sha256=_9W7I0ryIzx_H9eCkzwdm8iHdGX1LIjKGXkhSh_zNv8,6690
1482
1482
  mteb/models/model_implementations/eagerworks_models.py,sha256=NOQkCUqn9jLSpf9p6KyaIHnJxYV1MNlr2z7hO2AcRSc,5744
1483
1483
  mteb/models/model_implementations/emillykkejensen_models.py,sha256=QdhGqCm_1-AURkrniZj2S1MjwwIVOPMzLvpgfJq-3EQ,2779
@@ -1485,6 +1485,7 @@ mteb/models/model_implementations/en_code_retriever.py,sha256=leZ-0M6LrunocY3XQB
1485
1485
  mteb/models/model_implementations/euler_models.py,sha256=fZoXYeDjSRN2Qj1Pf-ROi8xok03PjhYi4FLEZKjMPkk,905
1486
1486
  mteb/models/model_implementations/evaclip_models.py,sha256=cPMGYLDIq4s8zJxb4vPXqJ-rqwPaq7KOh2QZSO6cDas,8000
1487
1487
  mteb/models/model_implementations/fa_models.py,sha256=WGal70_ezITWoNdjcMdbOCTSCtoaXzuPadYstLVXxhg,7478
1488
+ mteb/models/model_implementations/facebookai.py,sha256=uhE6rB1YgxE0SIc7u8heE1U62qRFFA23IMgpjxBq_Ok,3116
1488
1489
  mteb/models/model_implementations/geogpt_models.py,sha256=Juv86SwhgQX80lVLjAFtim2aSiJT1AcgjniyyiKyk1Q,1923
1489
1490
  mteb/models/model_implementations/gme_v_models.py,sha256=NkfgR3_UdZzoBt1NnalVou6LOR-F7qXM4by9EbAVrys,13568
1490
1491
  mteb/models/model_implementations/google_models.py,sha256=7QfsaJ5JNDRQxFl7Zh2AtiR2PR7PZcfeCBgviuOFBCo,9130
@@ -1499,7 +1500,9 @@ mteb/models/model_implementations/jasper_models.py,sha256=ZY7qRRpBpD3eVryQb4rLs5
1499
1500
  mteb/models/model_implementations/jina_clip.py,sha256=CfiIxbhKspjQajNtObCfGPHOWPk6uLn4cuwydQHFTMo,5118
1500
1501
  mteb/models/model_implementations/jina_models.py,sha256=HrHm2Io3g9gHwxU5icAaudy_E8rAVkAAIFSzVYWF-dM,34859
1501
1502
  mteb/models/model_implementations/kalm_models.py,sha256=FmW7Z5Qs6WYBLuKvql3u4IJW36kj4k-Ypah8qTBEBkg,59837
1503
+ mteb/models/model_implementations/kblab.py,sha256=DDh8gDEI6YPjS4_yGYWC4HatE0mFf7vhGDU83zzV7V0,866
1502
1504
  mteb/models/model_implementations/kennethenevoldsen_models.py,sha256=DF-9nmsewYO9ikZ0kV81ujKGr7Ot36-9iPoxN7KX2mY,2993
1505
+ mteb/models/model_implementations/kfst.py,sha256=BQj0fxMJwyA6NOdK26NDYVL3z2PW1_F-lTTVImxEWZQ,892
1503
1506
  mteb/models/model_implementations/kowshik24_models.py,sha256=HoQpybjhquK2XSnawlq0aiSWFI5M7l6N4DNY4MQ-P10,976
1504
1507
  mteb/models/model_implementations/lens_models.py,sha256=fC7_NB1F8vBAlXD0p0-hALf6eZTPFJwpz57dy71OlwI,1696
1505
1508
  mteb/models/model_implementations/lgai_embedding_models.py,sha256=S83pbfkMH3YUNl4skusgbK-Rn-uLuScQVxgXwegR_N4,2333
@@ -2578,9 +2581,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2578
2581
  mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
2579
2582
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2580
2583
  mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
2581
- mteb-2.3.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2582
- mteb-2.3.8.dist-info/METADATA,sha256=QMpRmhMLXi45L0d29kCoNcEugCwDl8IWCc3wE_r-fb4,13923
2583
- mteb-2.3.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2584
- mteb-2.3.8.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2585
- mteb-2.3.8.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2586
- mteb-2.3.8.dist-info/RECORD,,
2584
+ mteb-2.3.9.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2585
+ mteb-2.3.9.dist-info/METADATA,sha256=da_FgK7mGK2HivEwQfKDyIPYzDVMFaz-lTeVQVvp2q8,13923
2586
+ mteb-2.3.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2587
+ mteb-2.3.9.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2588
+ mteb-2.3.9.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2589
+ mteb-2.3.9.dist-info/RECORD,,
File without changes