mteb 2.1.13__py3-none-any.whl → 2.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/leaderboard/app.py +4 -2
- mteb/models/instruct_wrapper.py +3 -0
- mteb/models/model_implementations/jasper_models.py +12 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +26 -0
- {mteb-2.1.13.dist-info → mteb-2.1.15.dist-info}/METADATA +1 -1
- {mteb-2.1.13.dist-info → mteb-2.1.15.dist-info}/RECORD +10 -9
- {mteb-2.1.13.dist-info → mteb-2.1.15.dist-info}/WHEEL +0 -0
- {mteb-2.1.13.dist-info → mteb-2.1.15.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.13.dist-info → mteb-2.1.15.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.13.dist-info → mteb-2.1.15.dist-info}/top_level.txt +0 -0
mteb/leaderboard/app.py
CHANGED
|
@@ -107,7 +107,9 @@ def _update_description(
|
|
|
107
107
|
description += f" - **Number of task types**: {n_task_types}\n"
|
|
108
108
|
description += f" - **Number of domains**: {n_domains}\n"
|
|
109
109
|
if benchmark.reference is not None:
|
|
110
|
-
description +=
|
|
110
|
+
description += (
|
|
111
|
+
f'\n<a href="{benchmark.reference}" target="_blank">Click for More Info</a>'
|
|
112
|
+
)
|
|
111
113
|
|
|
112
114
|
return description
|
|
113
115
|
|
|
@@ -137,7 +139,7 @@ def _update_task_info(task_names: str) -> gr.DataFrame:
|
|
|
137
139
|
df["languages"] = df["languages"].map(_format_list)
|
|
138
140
|
df = df.sort_values("name")
|
|
139
141
|
df["domains"] = df["domains"].map(_format_list)
|
|
140
|
-
df["name"] = "
|
|
142
|
+
df["name"] = f'<a href="{df["reference"]}" target="_blank">{df["name"]}</a>'
|
|
141
143
|
df["modalities"] = df["modalities"].map(_format_list)
|
|
142
144
|
df = df.rename(
|
|
143
145
|
columns={
|
mteb/models/instruct_wrapper.py
CHANGED
|
@@ -153,6 +153,9 @@ class InstructSentenceTransformerModel(AbsEncoder):
|
|
|
153
153
|
|
|
154
154
|
self.model_name = model_name
|
|
155
155
|
self.model = SentenceTransformer(model_name, revision=revision, **kwargs)
|
|
156
|
+
if max_seq_length:
|
|
157
|
+
# https://github.com/huggingface/sentence-transformers/issues/3575
|
|
158
|
+
self.model.max_seq_length = max_seq_length
|
|
156
159
|
self.apply_instruction_to_passages = apply_instruction_to_passages
|
|
157
160
|
self.prompts_dict = prompts_dict
|
|
158
161
|
|
|
@@ -156,7 +156,7 @@ Jasper_Token_Compression_600M = ModelMeta(
|
|
|
156
156
|
similarity_fn_name="cosine",
|
|
157
157
|
framework=["Sentence Transformers", "PyTorch"],
|
|
158
158
|
use_instructions=True,
|
|
159
|
-
public_training_code=
|
|
159
|
+
public_training_code="https://github.com/DunZhang/Jasper-Token-Compression-Training",
|
|
160
160
|
# public_training_data: unsupervised data for distillation
|
|
161
161
|
public_training_data="https://huggingface.co/datasets/infgrad/jasper_text_distill_dataset",
|
|
162
162
|
training_datasets=bge_m3_training_data
|
|
@@ -164,4 +164,15 @@ Jasper_Token_Compression_600M = ModelMeta(
|
|
|
164
164
|
| bge_full_data
|
|
165
165
|
| E5_MISTRAL_TRAINING_DATA
|
|
166
166
|
| qzhou_training_data,
|
|
167
|
+
citation="""
|
|
168
|
+
@misc{zhang2025jaspertokencompression600mtechnicalreport,
|
|
169
|
+
title={Jasper-Token-Compression-600M Technical Report},
|
|
170
|
+
author={Dun Zhang and Ziyang Zeng and Yudong Zhou and Shuyang Lu},
|
|
171
|
+
year={2025},
|
|
172
|
+
eprint={2511.14405},
|
|
173
|
+
archivePrefix={arXiv},
|
|
174
|
+
primaryClass={cs.IR},
|
|
175
|
+
url={https://arxiv.org/abs/2511.14405},
|
|
176
|
+
}
|
|
177
|
+
""",
|
|
167
178
|
)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""ATLES Champion Embedding Model for MTEB."""
|
|
2
|
+
|
|
3
|
+
from mteb.models.model_meta import ModelMeta
|
|
4
|
+
from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
|
|
5
|
+
|
|
6
|
+
spartan8806_atles_champion_embedding = ModelMeta(
|
|
7
|
+
loader=sentence_transformers_loader,
|
|
8
|
+
name="spartan8806/atles-champion-embedding",
|
|
9
|
+
languages=["eng-Latn"],
|
|
10
|
+
open_weights=True,
|
|
11
|
+
revision="d4c74d7000bbd25f3597fc0f2dcde59ef1386e8f",
|
|
12
|
+
release_date="2025-11-15",
|
|
13
|
+
n_parameters=110_000_000,
|
|
14
|
+
memory_usage_mb=420,
|
|
15
|
+
max_tokens=512,
|
|
16
|
+
embed_dim=768,
|
|
17
|
+
license="apache-2.0",
|
|
18
|
+
similarity_fn_name="cosine",
|
|
19
|
+
framework=["Sentence Transformers"],
|
|
20
|
+
reference="https://huggingface.co/spartan8806/atles-champion-embedding",
|
|
21
|
+
use_instructions=False,
|
|
22
|
+
training_datasets={"STSBenchmark"},
|
|
23
|
+
adapted_from="sentence-transformers/all-mpnet-base-v2",
|
|
24
|
+
public_training_code=None,
|
|
25
|
+
public_training_data=None,
|
|
26
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.15
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -1422,7 +1422,7 @@ mteb/languages/language_family.json,sha256=OUGcHeOIPcZPb2FWmYLhxTS0JxjK5y3Fo6x0P
|
|
|
1422
1422
|
mteb/languages/language_scripts.py,sha256=5wix9HTYolNIpTiS5oXf2pGJyL7ftdGKs_m432w81V8,3998
|
|
1423
1423
|
mteb/languages/programming_languages.py,sha256=zxAakT3OSUnAuTnQ34VyeFIECnNXMlleZmAake6jsZE,211
|
|
1424
1424
|
mteb/leaderboard/__init__.py,sha256=991roXmtRwEQysV-37hWEzWpkvPgMCGRqZTHR-hm2io,88
|
|
1425
|
-
mteb/leaderboard/app.py,sha256=
|
|
1425
|
+
mteb/leaderboard/app.py,sha256=EsQ_qoJ26yJbg2qExKFFAx90R8VYOO6GbLtIzFuHGpE,32642
|
|
1426
1426
|
mteb/leaderboard/benchmark_selector.py,sha256=hnXdo_Kj4UUAruFl6nZkCxAQ88IEfbaH8EADFJMMdVo,7686
|
|
1427
1427
|
mteb/leaderboard/figures.py,sha256=Rq20LFpaUhQD4tuKp7P7ExQtAjonMLibgO3ud0ykMag,7491
|
|
1428
1428
|
mteb/leaderboard/table.py,sha256=qs0H_Gt9FzRvzb-AL0YlqEe0YAsdYsVX3QlncfCBEqg,7828
|
|
@@ -1430,7 +1430,7 @@ mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9Hyd
|
|
|
1430
1430
|
mteb/models/__init__.py,sha256=ycGU-x60LT0OFyP4CYa5pQhM7J5hCimubuT56va9wfM,741
|
|
1431
1431
|
mteb/models/abs_encoder.py,sha256=m0JkRfRPMYadDgBR9eozRloI31ZSWkSzDFINpwbfLZk,16533
|
|
1432
1432
|
mteb/models/get_model_meta.py,sha256=VpZZNINk-QrNeVpPZnlqzlLhtBs8G84eRwTzAb_gRD4,9108
|
|
1433
|
-
mteb/models/instruct_wrapper.py,sha256=
|
|
1433
|
+
mteb/models/instruct_wrapper.py,sha256=Ty4nfEvioycL_uATkhd0PGuyeB5Xc9xrRd6HOGgb-tc,9005
|
|
1434
1434
|
mteb/models/model_meta.py,sha256=b-Nel9nX5bJk4cgJnqkBzEKyMY7uXvxlCBSxmmH1Ios,14769
|
|
1435
1435
|
mteb/models/models_protocols.py,sha256=D2hYWn_UBGMaKtRwBx3u0B0ni6lHJjSzTxX21XFNwIc,8917
|
|
1436
1436
|
mteb/models/search_wrappers.py,sha256=qe2APunvRfPREdrq1moSi44mFXV6uaHvGHcLnaza-Sc,15483
|
|
@@ -1483,7 +1483,7 @@ mteb/models/model_implementations/hinvec_models.py,sha256=I_d_dSNVaGIwMIwyvTlaPA
|
|
|
1483
1483
|
mteb/models/model_implementations/human.py,sha256=klMpuMAtYH92EIEwNMEhne_Baf9fNiTg1DNWYD11P44,532
|
|
1484
1484
|
mteb/models/model_implementations/ibm_granite_models.py,sha256=YCT0jbgawy19ps5l8QlxpQoJLjq8Nh-3R-e6yxS0DRM,7902
|
|
1485
1485
|
mteb/models/model_implementations/inf_models.py,sha256=lvXUFhAYDltq2_Xa9MHcwfhh1V20rbJLSgON76tkj6w,2906
|
|
1486
|
-
mteb/models/model_implementations/jasper_models.py,sha256=
|
|
1486
|
+
mteb/models/model_implementations/jasper_models.py,sha256=e1ooLxf0PGZ77YWJshCha94_6YXDMY6l0afgiGKmzNc,6464
|
|
1487
1487
|
mteb/models/model_implementations/jina_clip.py,sha256=CfiIxbhKspjQajNtObCfGPHOWPk6uLn4cuwydQHFTMo,5118
|
|
1488
1488
|
mteb/models/model_implementations/jina_models.py,sha256=QWoesiTygdFTLcdGpdx26wOUI1AXRz3jLmxGHJ0WMNE,29919
|
|
1489
1489
|
mteb/models/model_implementations/kalm_models.py,sha256=FmW7Z5Qs6WYBLuKvql3u4IJW36kj4k-Ypah8qTBEBkg,59837
|
|
@@ -1536,6 +1536,7 @@ mteb/models/model_implementations/sentence_transformers_models.py,sha256=EtEaXg1
|
|
|
1536
1536
|
mteb/models/model_implementations/shuu_model.py,sha256=KkcuVYjIzoha3Fvxh8ppqHQ9BfNMWeqDqn9dGCRKUjg,1167
|
|
1537
1537
|
mteb/models/model_implementations/siglip_models.py,sha256=tvi8QB2ayBoeXsxwHrl5RFlkknvE6FM9N06zSBWGQD0,12602
|
|
1538
1538
|
mteb/models/model_implementations/sonar_models.py,sha256=Nc6kAJRWSrxA57DPRrgOPHqS1dNhz2vsE_1ZA2JtigQ,4784
|
|
1539
|
+
mteb/models/model_implementations/spartan8806_atles_champion.py,sha256=9sWQH7tOT0uxXA7sbQcnqGt2f5O9xcw9HqFpRCzoQAA,918
|
|
1539
1540
|
mteb/models/model_implementations/stella_models.py,sha256=NL3tk-rnuBdznsQ-nmelqun4tFO2xKoNPPOOVKqnPGU,8062
|
|
1540
1541
|
mteb/models/model_implementations/tarka_models.py,sha256=xC6olJs9PSe_lrYsScw5hDHTjYSjcxgbvfK_7IoBFnk,27397
|
|
1541
1542
|
mteb/models/model_implementations/text2vec_models.py,sha256=zaHWRc2W0RYZAOetinqRzug9UGW0HmY5U-jYsLXA8wo,4160
|
|
@@ -2554,9 +2555,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
|
|
|
2554
2555
|
mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
|
|
2555
2556
|
mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
|
|
2556
2557
|
mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
|
|
2557
|
-
mteb-2.1.
|
|
2558
|
-
mteb-2.1.
|
|
2559
|
-
mteb-2.1.
|
|
2560
|
-
mteb-2.1.
|
|
2561
|
-
mteb-2.1.
|
|
2562
|
-
mteb-2.1.
|
|
2558
|
+
mteb-2.1.15.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
2559
|
+
mteb-2.1.15.dist-info/METADATA,sha256=sH6xAIJ1ECtja9vx3g6Bz59HFNQv9irFidIozawidfw,13574
|
|
2560
|
+
mteb-2.1.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
2561
|
+
mteb-2.1.15.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
|
|
2562
|
+
mteb-2.1.15.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
|
|
2563
|
+
mteb-2.1.15.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|