mteb 2.1.4__py3-none-any.whl → 2.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/models/model_implementations/tarka_models.py +58 -0
- mteb/models/search_wrappers.py +1 -0
- {mteb-2.1.4.dist-info → mteb-2.1.5.dist-info}/METADATA +1 -1
- {mteb-2.1.4.dist-info → mteb-2.1.5.dist-info}/RECORD +8 -7
- {mteb-2.1.4.dist-info → mteb-2.1.5.dist-info}/WHEEL +0 -0
- {mteb-2.1.4.dist-info → mteb-2.1.5.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.4.dist-info → mteb-2.1.5.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.4.dist-info → mteb-2.1.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from mteb.models.model_implementations.google_models import gemma_embedding_loader
|
|
2
|
+
from mteb.models.model_meta import ModelMeta
|
|
3
|
+
|
|
4
|
+
Tarka_Embedding_150M_V1_CITATION = """@misc{tarka_ai_research_2025,
|
|
5
|
+
author = { Tarka AI Research },
|
|
6
|
+
title = { Tarka-Embedding-150M-V1 (Revision c5f4f43) },
|
|
7
|
+
year = 2025,
|
|
8
|
+
url = { https://huggingface.co/Tarka-AIR/Tarka-Embedding-150M-V1 },
|
|
9
|
+
doi = { 10.57967/hf/6875 },
|
|
10
|
+
publisher = { Hugging Face }
|
|
11
|
+
}"""
|
|
12
|
+
|
|
13
|
+
MULTILINGUAL_EVALUATED_LANGUAGES = [
|
|
14
|
+
"arb-Arab",
|
|
15
|
+
"eng-Latn",
|
|
16
|
+
"spa-Latn",
|
|
17
|
+
"deu-Latn",
|
|
18
|
+
"fra-Latn",
|
|
19
|
+
"jpn-Jpan",
|
|
20
|
+
"kor-Hang",
|
|
21
|
+
"zho-Hans",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
training_data = {
|
|
25
|
+
"T2Retrieval",
|
|
26
|
+
"DuRetrieval",
|
|
27
|
+
"MMarcoReranking",
|
|
28
|
+
"CMedQAv2-reranking",
|
|
29
|
+
"NQ",
|
|
30
|
+
"MSMARCO",
|
|
31
|
+
"HotpotQA",
|
|
32
|
+
"FEVER",
|
|
33
|
+
"MrTidyRetrieval",
|
|
34
|
+
"MIRACLRetrieval",
|
|
35
|
+
"CodeSearchNet",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
tarka_embedding_150m_v1 = ModelMeta(
|
|
39
|
+
loader=gemma_embedding_loader,
|
|
40
|
+
name="Tarka-AIR/Tarka-Embedding-150M-V1",
|
|
41
|
+
languages=MULTILINGUAL_EVALUATED_LANGUAGES,
|
|
42
|
+
open_weights=True,
|
|
43
|
+
revision="c5f4f43",
|
|
44
|
+
release_date="2025-11-04",
|
|
45
|
+
n_parameters=155_714_304,
|
|
46
|
+
embed_dim=768,
|
|
47
|
+
max_tokens=2048,
|
|
48
|
+
license="gemma",
|
|
49
|
+
reference="https://huggingface.co/Tarka-AIR/Tarka-Embedding-150M-V1",
|
|
50
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
51
|
+
use_instructions=True,
|
|
52
|
+
public_training_code=None,
|
|
53
|
+
public_training_data=None,
|
|
54
|
+
training_datasets=training_data,
|
|
55
|
+
similarity_fn_name="cosine",
|
|
56
|
+
memory_usage_mb=576,
|
|
57
|
+
citation=Tarka_Embedding_150M_V1_CITATION,
|
|
58
|
+
)
|
mteb/models/search_wrappers.py
CHANGED
|
@@ -191,6 +191,7 @@ class SearchEncoderWrapper:
|
|
|
191
191
|
cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
|
|
192
192
|
cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
|
|
193
193
|
|
|
194
|
+
sub_corpus_ids = list(sub_corpus_ids)
|
|
194
195
|
for query_itr in range(len(query_embeddings)):
|
|
195
196
|
query_id = query_idx_to_id[query_itr]
|
|
196
197
|
for sub_corpus_id, score in zip(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.5
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -1419,7 +1419,7 @@ mteb/models/get_model_meta.py,sha256=VpZZNINk-QrNeVpPZnlqzlLhtBs8G84eRwTzAb_gRD4
|
|
|
1419
1419
|
mteb/models/instruct_wrapper.py,sha256=HxHmnlxkjtZhfgTZRYJBT3Nma7Dhx6a9e2Bg-cO_IYs,8844
|
|
1420
1420
|
mteb/models/model_meta.py,sha256=b-Nel9nX5bJk4cgJnqkBzEKyMY7uXvxlCBSxmmH1Ios,14769
|
|
1421
1421
|
mteb/models/models_protocols.py,sha256=D2hYWn_UBGMaKtRwBx3u0B0ni6lHJjSzTxX21XFNwIc,8917
|
|
1422
|
-
mteb/models/search_wrappers.py,sha256=
|
|
1422
|
+
mteb/models/search_wrappers.py,sha256=W99EeMDQ58N0auVp2-7T39orG7GZLekSsVCdxEZLxw0,15638
|
|
1423
1423
|
mteb/models/sentence_transformer_wrapper.py,sha256=n5CMsM6Lpg_CFHH0NkpJusMsaLUTt-L9vRmFINQ961k,12338
|
|
1424
1424
|
mteb/models/cache_wrappers/__init__.py,sha256=j3JBHN73Tr7uMUO92FEvKXstnybxrPpGWmKXU2lAoIE,88
|
|
1425
1425
|
mteb/models/cache_wrappers/cache_backend_protocol.py,sha256=TR7kD7KbN1J4piszIecpegtLZYGy7sRHZt3SDWlImKk,1665
|
|
@@ -1521,6 +1521,7 @@ mteb/models/model_implementations/shuu_model.py,sha256=KkcuVYjIzoha3Fvxh8ppqHQ9B
|
|
|
1521
1521
|
mteb/models/model_implementations/siglip_models.py,sha256=tvi8QB2ayBoeXsxwHrl5RFlkknvE6FM9N06zSBWGQD0,12602
|
|
1522
1522
|
mteb/models/model_implementations/sonar_models.py,sha256=Nc6kAJRWSrxA57DPRrgOPHqS1dNhz2vsE_1ZA2JtigQ,4784
|
|
1523
1523
|
mteb/models/model_implementations/stella_models.py,sha256=NL3tk-rnuBdznsQ-nmelqun4tFO2xKoNPPOOVKqnPGU,8062
|
|
1524
|
+
mteb/models/model_implementations/tarka_models.py,sha256=aj4PvEzZ6ZSKcvwYVuTxf1IFOvH4rmJHtbPUcRw1fMI,1568
|
|
1524
1525
|
mteb/models/model_implementations/text2vec_models.py,sha256=zaHWRc2W0RYZAOetinqRzug9UGW0HmY5U-jYsLXA8wo,4160
|
|
1525
1526
|
mteb/models/model_implementations/ua_sentence_models.py,sha256=fcvXR4-Rrt-UDTlDkh2ZAO1gO_ufCOHiT6EhoeKiHx8,1224
|
|
1526
1527
|
mteb/models/model_implementations/uae_models.py,sha256=KZxH5a3t-sfh33xUBkLizEuyFAyPlGfnRsn-S7mjq74,3112
|
|
@@ -2536,9 +2537,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
|
|
|
2536
2537
|
mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
|
|
2537
2538
|
mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
|
|
2538
2539
|
mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
|
|
2539
|
-
mteb-2.1.
|
|
2540
|
-
mteb-2.1.
|
|
2541
|
-
mteb-2.1.
|
|
2542
|
-
mteb-2.1.
|
|
2543
|
-
mteb-2.1.
|
|
2544
|
-
mteb-2.1.
|
|
2540
|
+
mteb-2.1.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
2541
|
+
mteb-2.1.5.dist-info/METADATA,sha256=VunL5iKfFZLfKB-4Yruzd5vq31mhfNOT0JVjs2GibSQ,13573
|
|
2542
|
+
mteb-2.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
2543
|
+
mteb-2.1.5.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
|
|
2544
|
+
mteb-2.1.5.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
|
|
2545
|
+
mteb-2.1.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|