mteb 2.1.4__py3-none-any.whl → 2.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ from mteb.models.model_implementations.google_models import gemma_embedding_loader
2
+ from mteb.models.model_meta import ModelMeta
3
+
4
+ Tarka_Embedding_150M_V1_CITATION = """@misc{tarka_ai_research_2025,
5
+ author = { Tarka AI Research },
6
+ title = { Tarka-Embedding-150M-V1 (Revision c5f4f43) },
7
+ year = 2025,
8
+ url = { https://huggingface.co/Tarka-AIR/Tarka-Embedding-150M-V1 },
9
+ doi = { 10.57967/hf/6875 },
10
+ publisher = { Hugging Face }
11
+ }"""
12
+
13
+ MULTILINGUAL_EVALUATED_LANGUAGES = [
14
+ "arb-Arab",
15
+ "eng-Latn",
16
+ "spa-Latn",
17
+ "deu-Latn",
18
+ "fra-Latn",
19
+ "jpn-Jpan",
20
+ "kor-Hang",
21
+ "zho-Hans",
22
+ ]
23
+
24
+ training_data = {
25
+ "T2Retrieval",
26
+ "DuRetrieval",
27
+ "MMarcoReranking",
28
+ "CMedQAv2-reranking",
29
+ "NQ",
30
+ "MSMARCO",
31
+ "HotpotQA",
32
+ "FEVER",
33
+ "MrTidyRetrieval",
34
+ "MIRACLRetrieval",
35
+ "CodeSearchNet",
36
+ }
37
+
38
+ tarka_embedding_150m_v1 = ModelMeta(
39
+ loader=gemma_embedding_loader,
40
+ name="Tarka-AIR/Tarka-Embedding-150M-V1",
41
+ languages=MULTILINGUAL_EVALUATED_LANGUAGES,
42
+ open_weights=True,
43
+ revision="c5f4f43",
44
+ release_date="2025-11-04",
45
+ n_parameters=155_714_304,
46
+ embed_dim=768,
47
+ max_tokens=2048,
48
+ license="gemma",
49
+ reference="https://huggingface.co/Tarka-AIR/Tarka-Embedding-150M-V1",
50
+ framework=["Sentence Transformers", "PyTorch"],
51
+ use_instructions=True,
52
+ public_training_code=None,
53
+ public_training_data=None,
54
+ training_datasets=training_data,
55
+ similarity_fn_name="cosine",
56
+ memory_usage_mb=576,
57
+ citation=Tarka_Embedding_150M_V1_CITATION,
58
+ )
@@ -191,6 +191,7 @@ class SearchEncoderWrapper:
191
191
  cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
192
192
  cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
193
193
 
194
+ sub_corpus_ids = list(sub_corpus_ids)
194
195
  for query_itr in range(len(query_embeddings)):
195
196
  query_id = query_idx_to_id[query_itr]
196
197
  for sub_corpus_id, score in zip(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.1.4
3
+ Version: 2.1.5
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -1419,7 +1419,7 @@ mteb/models/get_model_meta.py,sha256=VpZZNINk-QrNeVpPZnlqzlLhtBs8G84eRwTzAb_gRD4
1419
1419
  mteb/models/instruct_wrapper.py,sha256=HxHmnlxkjtZhfgTZRYJBT3Nma7Dhx6a9e2Bg-cO_IYs,8844
1420
1420
  mteb/models/model_meta.py,sha256=b-Nel9nX5bJk4cgJnqkBzEKyMY7uXvxlCBSxmmH1Ios,14769
1421
1421
  mteb/models/models_protocols.py,sha256=D2hYWn_UBGMaKtRwBx3u0B0ni6lHJjSzTxX21XFNwIc,8917
1422
- mteb/models/search_wrappers.py,sha256=0McxwGnqyiYKPHjHsxWZp1pP9qGuHemZjeX1z5ZgNAI,15588
1422
+ mteb/models/search_wrappers.py,sha256=W99EeMDQ58N0auVp2-7T39orG7GZLekSsVCdxEZLxw0,15638
1423
1423
  mteb/models/sentence_transformer_wrapper.py,sha256=n5CMsM6Lpg_CFHH0NkpJusMsaLUTt-L9vRmFINQ961k,12338
1424
1424
  mteb/models/cache_wrappers/__init__.py,sha256=j3JBHN73Tr7uMUO92FEvKXstnybxrPpGWmKXU2lAoIE,88
1425
1425
  mteb/models/cache_wrappers/cache_backend_protocol.py,sha256=TR7kD7KbN1J4piszIecpegtLZYGy7sRHZt3SDWlImKk,1665
@@ -1521,6 +1521,7 @@ mteb/models/model_implementations/shuu_model.py,sha256=KkcuVYjIzoha3Fvxh8ppqHQ9B
1521
1521
  mteb/models/model_implementations/siglip_models.py,sha256=tvi8QB2ayBoeXsxwHrl5RFlkknvE6FM9N06zSBWGQD0,12602
1522
1522
  mteb/models/model_implementations/sonar_models.py,sha256=Nc6kAJRWSrxA57DPRrgOPHqS1dNhz2vsE_1ZA2JtigQ,4784
1523
1523
  mteb/models/model_implementations/stella_models.py,sha256=NL3tk-rnuBdznsQ-nmelqun4tFO2xKoNPPOOVKqnPGU,8062
1524
+ mteb/models/model_implementations/tarka_models.py,sha256=aj4PvEzZ6ZSKcvwYVuTxf1IFOvH4rmJHtbPUcRw1fMI,1568
1524
1525
  mteb/models/model_implementations/text2vec_models.py,sha256=zaHWRc2W0RYZAOetinqRzug9UGW0HmY5U-jYsLXA8wo,4160
1525
1526
  mteb/models/model_implementations/ua_sentence_models.py,sha256=fcvXR4-Rrt-UDTlDkh2ZAO1gO_ufCOHiT6EhoeKiHx8,1224
1526
1527
  mteb/models/model_implementations/uae_models.py,sha256=KZxH5a3t-sfh33xUBkLizEuyFAyPlGfnRsn-S7mjq74,3112
@@ -2536,9 +2537,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2536
2537
  mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
2537
2538
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2538
2539
  mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
2539
- mteb-2.1.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2540
- mteb-2.1.4.dist-info/METADATA,sha256=3HJ0LwqN5B9zLnqA4j5PsmVknk75iAHG-qwtZqJdKW0,13573
2541
- mteb-2.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2542
- mteb-2.1.4.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2543
- mteb-2.1.4.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2544
- mteb-2.1.4.dist-info/RECORD,,
2540
+ mteb-2.1.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2541
+ mteb-2.1.5.dist-info/METADATA,sha256=VunL5iKfFZLfKB-4Yruzd5vq31mhfNOT0JVjs2GibSQ,13573
2542
+ mteb-2.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2543
+ mteb-2.1.5.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2544
+ mteb-2.1.5.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2545
+ mteb-2.1.5.dist-info/RECORD,,
File without changes