mteb 2.1.3__py3-none-any.whl → 2.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/descriptive_stats/Retrieval/WinoGrande.json +14 -14
- mteb/models/model_implementations/tarka_models.py +58 -0
- mteb/models/model_implementations/voyage_models.py +6 -7
- mteb/models/search_wrappers.py +1 -0
- mteb/tasks/retrieval/eng/wino_grande_retrieval.py +1 -1
- {mteb-2.1.3.dist-info → mteb-2.1.5.dist-info}/METADATA +1 -1
- {mteb-2.1.3.dist-info → mteb-2.1.5.dist-info}/RECORD +11 -10
- {mteb-2.1.3.dist-info → mteb-2.1.5.dist-info}/WHEEL +0 -0
- {mteb-2.1.3.dist-info → mteb-2.1.5.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.3.dist-info → mteb-2.1.5.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.3.dist-info → mteb-2.1.5.dist-info}/top_level.txt +0 -0
|
@@ -1,29 +1,29 @@
|
|
|
1
1
|
{
|
|
2
2
|
"test": {
|
|
3
|
-
"num_samples":
|
|
4
|
-
"number_of_characters":
|
|
3
|
+
"num_samples": 6362,
|
|
4
|
+
"number_of_characters": 180770,
|
|
5
5
|
"documents_text_statistics": {
|
|
6
|
-
"total_text_length":
|
|
7
|
-
"min_text_length":
|
|
8
|
-
"average_text_length":
|
|
9
|
-
"max_text_length":
|
|
10
|
-
"unique_texts":
|
|
6
|
+
"total_text_length": 39142,
|
|
7
|
+
"min_text_length": 2,
|
|
8
|
+
"average_text_length": 7.68243375858685,
|
|
9
|
+
"max_text_length": 31,
|
|
10
|
+
"unique_texts": 5095
|
|
11
11
|
},
|
|
12
12
|
"documents_image_statistics": null,
|
|
13
13
|
"queries_text_statistics": {
|
|
14
|
-
"total_text_length":
|
|
15
|
-
"min_text_length":
|
|
16
|
-
"average_text_length":
|
|
17
|
-
"max_text_length":
|
|
18
|
-
"unique_texts":
|
|
14
|
+
"total_text_length": 141628,
|
|
15
|
+
"min_text_length": 79,
|
|
16
|
+
"average_text_length": 111.78216258879242,
|
|
17
|
+
"max_text_length": 185,
|
|
18
|
+
"unique_texts": 1267
|
|
19
19
|
},
|
|
20
20
|
"queries_image_statistics": null,
|
|
21
21
|
"relevant_docs_statistics": {
|
|
22
|
-
"num_relevant_docs":
|
|
22
|
+
"num_relevant_docs": 1267,
|
|
23
23
|
"min_relevant_docs_per_query": 1,
|
|
24
24
|
"average_relevant_docs_per_query": 1.0,
|
|
25
25
|
"max_relevant_docs_per_query": 1,
|
|
26
|
-
"unique_relevant_docs":
|
|
26
|
+
"unique_relevant_docs": 478
|
|
27
27
|
},
|
|
28
28
|
"top_ranked_statistics": null
|
|
29
29
|
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from mteb.models.model_implementations.google_models import gemma_embedding_loader
|
|
2
|
+
from mteb.models.model_meta import ModelMeta
|
|
3
|
+
|
|
4
|
+
Tarka_Embedding_150M_V1_CITATION = """@misc{tarka_ai_research_2025,
|
|
5
|
+
author = { Tarka AI Research },
|
|
6
|
+
title = { Tarka-Embedding-150M-V1 (Revision c5f4f43) },
|
|
7
|
+
year = 2025,
|
|
8
|
+
url = { https://huggingface.co/Tarka-AIR/Tarka-Embedding-150M-V1 },
|
|
9
|
+
doi = { 10.57967/hf/6875 },
|
|
10
|
+
publisher = { Hugging Face }
|
|
11
|
+
}"""
|
|
12
|
+
|
|
13
|
+
MULTILINGUAL_EVALUATED_LANGUAGES = [
|
|
14
|
+
"arb-Arab",
|
|
15
|
+
"eng-Latn",
|
|
16
|
+
"spa-Latn",
|
|
17
|
+
"deu-Latn",
|
|
18
|
+
"fra-Latn",
|
|
19
|
+
"jpn-Jpan",
|
|
20
|
+
"kor-Hang",
|
|
21
|
+
"zho-Hans",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
training_data = {
|
|
25
|
+
"T2Retrieval",
|
|
26
|
+
"DuRetrieval",
|
|
27
|
+
"MMarcoReranking",
|
|
28
|
+
"CMedQAv2-reranking",
|
|
29
|
+
"NQ",
|
|
30
|
+
"MSMARCO",
|
|
31
|
+
"HotpotQA",
|
|
32
|
+
"FEVER",
|
|
33
|
+
"MrTidyRetrieval",
|
|
34
|
+
"MIRACLRetrieval",
|
|
35
|
+
"CodeSearchNet",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
tarka_embedding_150m_v1 = ModelMeta(
|
|
39
|
+
loader=gemma_embedding_loader,
|
|
40
|
+
name="Tarka-AIR/Tarka-Embedding-150M-V1",
|
|
41
|
+
languages=MULTILINGUAL_EVALUATED_LANGUAGES,
|
|
42
|
+
open_weights=True,
|
|
43
|
+
revision="c5f4f43",
|
|
44
|
+
release_date="2025-11-04",
|
|
45
|
+
n_parameters=155_714_304,
|
|
46
|
+
embed_dim=768,
|
|
47
|
+
max_tokens=2048,
|
|
48
|
+
license="gemma",
|
|
49
|
+
reference="https://huggingface.co/Tarka-AIR/Tarka-Embedding-150M-V1",
|
|
50
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
51
|
+
use_instructions=True,
|
|
52
|
+
public_training_code=None,
|
|
53
|
+
public_training_data=None,
|
|
54
|
+
training_datasets=training_data,
|
|
55
|
+
similarity_fn_name="cosine",
|
|
56
|
+
memory_usage_mb=576,
|
|
57
|
+
citation=Tarka_Embedding_150M_V1_CITATION,
|
|
58
|
+
)
|
|
@@ -156,16 +156,15 @@ class VoyageModel(AbsEncoder):
|
|
|
156
156
|
and len(batch) < batch_size
|
|
157
157
|
and batch_tokens < self._max_tokens_per_batch
|
|
158
158
|
):
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
)
|
|
159
|
+
txt = sentences[index] if len(sentences[index]) > 0 else " "
|
|
160
|
+
n_tokens = len(self._client.tokenize([txt], model=self._model_name)[0])
|
|
162
161
|
if (
|
|
163
162
|
batch_tokens + n_tokens > self._max_tokens_per_batch
|
|
164
163
|
and len(batch) > 0
|
|
165
164
|
):
|
|
166
165
|
break
|
|
167
166
|
batch_tokens += n_tokens
|
|
168
|
-
batch.append(
|
|
167
|
+
batch.append(txt)
|
|
169
168
|
index += 1
|
|
170
169
|
|
|
171
170
|
embeddings.extend(
|
|
@@ -249,7 +248,7 @@ voyage_3_5 = ModelMeta(
|
|
|
249
248
|
n_parameters=None,
|
|
250
249
|
memory_usage_mb=None,
|
|
251
250
|
license=None,
|
|
252
|
-
reference="https://
|
|
251
|
+
reference="https://blog.voyageai.com/2025/05/20/voyage-3-5/",
|
|
253
252
|
similarity_fn_name="cosine",
|
|
254
253
|
framework=["API"],
|
|
255
254
|
use_instructions=True,
|
|
@@ -274,7 +273,7 @@ voyage_3_5_int8 = ModelMeta(
|
|
|
274
273
|
n_parameters=None,
|
|
275
274
|
memory_usage_mb=None,
|
|
276
275
|
license=None,
|
|
277
|
-
reference="https://
|
|
276
|
+
reference="https://blog.voyageai.com/2025/05/20/voyage-3-5/",
|
|
278
277
|
similarity_fn_name="cosine",
|
|
279
278
|
framework=["API"],
|
|
280
279
|
use_instructions=True,
|
|
@@ -300,7 +299,7 @@ voyage_3_5_binary = ModelMeta(
|
|
|
300
299
|
n_parameters=None,
|
|
301
300
|
memory_usage_mb=None,
|
|
302
301
|
license=None,
|
|
303
|
-
reference="https://
|
|
302
|
+
reference="https://blog.voyageai.com/2025/05/20/voyage-3-5/",
|
|
304
303
|
similarity_fn_name="cosine",
|
|
305
304
|
framework=["API"],
|
|
306
305
|
use_instructions=True,
|
mteb/models/search_wrappers.py
CHANGED
|
@@ -191,6 +191,7 @@ class SearchEncoderWrapper:
|
|
|
191
191
|
cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
|
|
192
192
|
cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
|
|
193
193
|
|
|
194
|
+
sub_corpus_ids = list(sub_corpus_ids)
|
|
194
195
|
for query_itr in range(len(query_embeddings)):
|
|
195
196
|
query_id = query_idx_to_id[query_itr]
|
|
196
197
|
for sub_corpus_id, score in zip(
|
|
@@ -9,7 +9,7 @@ class WinoGrande(AbsTaskRetrieval):
|
|
|
9
9
|
reference="https://winogrande.allenai.org/",
|
|
10
10
|
dataset={
|
|
11
11
|
"path": "mteb/WinoGrande",
|
|
12
|
-
"revision": "
|
|
12
|
+
"revision": "4dec9c5666e9f84702ac614363db6d96a68bc6de",
|
|
13
13
|
},
|
|
14
14
|
type="Retrieval",
|
|
15
15
|
category="t2t",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.5
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -1340,7 +1340,7 @@ mteb/descriptive_stats/Retrieval/VieQuADRetrieval.json,sha256=NHt0U-wJXBOPYOki5Y
|
|
|
1340
1340
|
mteb/descriptive_stats/Retrieval/WebFAQRetrieval.json,sha256=uNkLCfiuVbdNKZ54vYGo8dPaoBlTShexDjl_IwCcT_M,60318
|
|
1341
1341
|
mteb/descriptive_stats/Retrieval/WikiSQLRetrieval.json,sha256=JinCBRnmfMDeIwQtQfD6bD8MYNEpUgedw05A6G-W7u4,985
|
|
1342
1342
|
mteb/descriptive_stats/Retrieval/WikipediaRetrievalMultilingual.json,sha256=v9npJOOJrcBUdRQ8EzAbOrpynAoSAJBaJoSJhS-qkww,20357
|
|
1343
|
-
mteb/descriptive_stats/Retrieval/WinoGrande.json,sha256
|
|
1343
|
+
mteb/descriptive_stats/Retrieval/WinoGrande.json,sha256=czzrnqg24MPfCrlVPLamL4LvMdWWOZC4R4tJYjc1QAA,982
|
|
1344
1344
|
mteb/descriptive_stats/Retrieval/XMarket.json,sha256=s0odo5MWwzgQi0HwqK4APYhWIBR8WEtiC8KKt7wgzOc,4770
|
|
1345
1345
|
mteb/descriptive_stats/Retrieval/XPQARetrieval.json,sha256=qHxeCggIwW1iw5ujbHsCc_7rf_-JIhfYRFGEKvzSTO0,44958
|
|
1346
1346
|
mteb/descriptive_stats/Retrieval/XQuADRetrieval.json,sha256=NjNvrloDl561L_WTWBg6fQ31yBTZInYWL-SW0q35SfA,15462
|
|
@@ -1419,7 +1419,7 @@ mteb/models/get_model_meta.py,sha256=VpZZNINk-QrNeVpPZnlqzlLhtBs8G84eRwTzAb_gRD4
|
|
|
1419
1419
|
mteb/models/instruct_wrapper.py,sha256=HxHmnlxkjtZhfgTZRYJBT3Nma7Dhx6a9e2Bg-cO_IYs,8844
|
|
1420
1420
|
mteb/models/model_meta.py,sha256=b-Nel9nX5bJk4cgJnqkBzEKyMY7uXvxlCBSxmmH1Ios,14769
|
|
1421
1421
|
mteb/models/models_protocols.py,sha256=D2hYWn_UBGMaKtRwBx3u0B0ni6lHJjSzTxX21XFNwIc,8917
|
|
1422
|
-
mteb/models/search_wrappers.py,sha256=
|
|
1422
|
+
mteb/models/search_wrappers.py,sha256=W99EeMDQ58N0auVp2-7T39orG7GZLekSsVCdxEZLxw0,15638
|
|
1423
1423
|
mteb/models/sentence_transformer_wrapper.py,sha256=n5CMsM6Lpg_CFHH0NkpJusMsaLUTt-L9vRmFINQ961k,12338
|
|
1424
1424
|
mteb/models/cache_wrappers/__init__.py,sha256=j3JBHN73Tr7uMUO92FEvKXstnybxrPpGWmKXU2lAoIE,88
|
|
1425
1425
|
mteb/models/cache_wrappers/cache_backend_protocol.py,sha256=TR7kD7KbN1J4piszIecpegtLZYGy7sRHZt3SDWlImKk,1665
|
|
@@ -1521,6 +1521,7 @@ mteb/models/model_implementations/shuu_model.py,sha256=KkcuVYjIzoha3Fvxh8ppqHQ9B
|
|
|
1521
1521
|
mteb/models/model_implementations/siglip_models.py,sha256=tvi8QB2ayBoeXsxwHrl5RFlkknvE6FM9N06zSBWGQD0,12602
|
|
1522
1522
|
mteb/models/model_implementations/sonar_models.py,sha256=Nc6kAJRWSrxA57DPRrgOPHqS1dNhz2vsE_1ZA2JtigQ,4784
|
|
1523
1523
|
mteb/models/model_implementations/stella_models.py,sha256=NL3tk-rnuBdznsQ-nmelqun4tFO2xKoNPPOOVKqnPGU,8062
|
|
1524
|
+
mteb/models/model_implementations/tarka_models.py,sha256=aj4PvEzZ6ZSKcvwYVuTxf1IFOvH4rmJHtbPUcRw1fMI,1568
|
|
1524
1525
|
mteb/models/model_implementations/text2vec_models.py,sha256=zaHWRc2W0RYZAOetinqRzug9UGW0HmY5U-jYsLXA8wo,4160
|
|
1525
1526
|
mteb/models/model_implementations/ua_sentence_models.py,sha256=fcvXR4-Rrt-UDTlDkh2ZAO1gO_ufCOHiT6EhoeKiHx8,1224
|
|
1526
1527
|
mteb/models/model_implementations/uae_models.py,sha256=KZxH5a3t-sfh33xUBkLizEuyFAyPlGfnRsn-S7mjq74,3112
|
|
@@ -1528,7 +1529,7 @@ mteb/models/model_implementations/vdr_models.py,sha256=lMm43BBPjZU5lxZcpmPZ8hn0P
|
|
|
1528
1529
|
mteb/models/model_implementations/vi_vn_models.py,sha256=quWmd3JT2J6SlAsFrV2gcnc67M9zr58mEF2zLUF8-uw,4795
|
|
1529
1530
|
mteb/models/model_implementations/vista_models.py,sha256=Q3I01kRtIPaoke0iMIcH4CLcCDTnMSIBFNCof7LPTX4,10832
|
|
1530
1531
|
mteb/models/model_implementations/vlm2vec_models.py,sha256=HGGy_-z9Wc99xOKum71rBNipCPqWcM1efmmXgy5Rvxc,11724
|
|
1531
|
-
mteb/models/model_implementations/voyage_models.py,sha256=
|
|
1532
|
+
mteb/models/model_implementations/voyage_models.py,sha256=dOCccOQlloGrg0q44PxMQzx8dHuQ8VgkDUD01EydpJ0,19824
|
|
1532
1533
|
mteb/models/model_implementations/voyage_v.py,sha256=6i-oFnaY2D2qR1Dgb0B98ougnD1ujW9aNG9QoWyvwwY,8041
|
|
1533
1534
|
mteb/models/model_implementations/xyz_models.py,sha256=TePlrH6EHwRPO87U_J3Yce9-XHCn_X7I2cJ_6BZ2fUY,1296
|
|
1534
1535
|
mteb/models/model_implementations/youtu_models.py,sha256=NB74E6z-_36HyXb8GXKn8CrmRLN68uX9eH4xcS57zl0,5938
|
|
@@ -2283,7 +2284,7 @@ mteb/tasks/retrieval/eng/viz_wiz_it2t_retrieval.py,sha256=jE70T5If62lkKnbF-CMAgR
|
|
|
2283
2284
|
mteb/tasks/retrieval/eng/vqa2_it2t_retrieval.py,sha256=M_g6Y6OrNRByD52-JxuO8iIO8aFUg8HHg5BxQ31-m1I,1403
|
|
2284
2285
|
mteb/tasks/retrieval/eng/web_qa_t2it_retrieval.py,sha256=c7pJja_ii4ku9pfd-Gd3FqO6cF-0IIEb_H0FRY2A69w,1477
|
|
2285
2286
|
mteb/tasks/retrieval/eng/web_qa_t2t_retrieval.py,sha256=rx6uoqc8yduGhuvdv2K5v2oFiQI8jP-BEt5nmaKrsac,1517
|
|
2286
|
-
mteb/tasks/retrieval/eng/wino_grande_retrieval.py,sha256=
|
|
2287
|
+
mteb/tasks/retrieval/eng/wino_grande_retrieval.py,sha256=ou8TlZ-JPS1nh7NS7OeerUsB2WRZWWwKTuygpJNLb2A,1714
|
|
2287
2288
|
mteb/tasks/retrieval/est/__init__.py,sha256=uNkOSKfZsO1F-xC4twL8ukxtfrI4A4eIU-oAs3Hi5Dg,46
|
|
2288
2289
|
mteb/tasks/retrieval/est/estqa.py,sha256=sORL3KI47yXOy8GXptBtCuryOdDShdRDFpCdnnIaaCI,1418
|
|
2289
2290
|
mteb/tasks/retrieval/fas/__init__.py,sha256=DUq1CTC_nj-201dbUNqlmqN-oR-YKjeW3O8DhtMX9rk,2213
|
|
@@ -2536,9 +2537,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
|
|
|
2536
2537
|
mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
|
|
2537
2538
|
mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
|
|
2538
2539
|
mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
|
|
2539
|
-
mteb-2.1.
|
|
2540
|
-
mteb-2.1.
|
|
2541
|
-
mteb-2.1.
|
|
2542
|
-
mteb-2.1.
|
|
2543
|
-
mteb-2.1.
|
|
2544
|
-
mteb-2.1.
|
|
2540
|
+
mteb-2.1.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
2541
|
+
mteb-2.1.5.dist-info/METADATA,sha256=VunL5iKfFZLfKB-4Yruzd5vq31mhfNOT0JVjs2GibSQ,13573
|
|
2542
|
+
mteb-2.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
2543
|
+
mteb-2.1.5.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
|
|
2544
|
+
mteb-2.1.5.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
|
|
2545
|
+
mteb-2.1.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|