PyPI - mteb - Versions diffs - 2.1.10__py3-none-any.whl → 2.1.12__py3-none-any.whl - Mend

mteb 2.1.10py3-none-any.whl → 2.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

mteb/benchmarks/benchmarks/benchmarks.py CHANGED Viewed

@@ -1647,7 +1647,7 @@ MTEB_NL = Benchmark(
             exclusive_language_filter=True,
             tasks=[
                 # Classification
-                "DutchBookReviewSentimentClassification",
+                "DutchBookReviewSentimentClassification.v2",
                 "MassiveIntentClassification",
                 "MassiveScenarioClassification",
                 "SIB200Classification",
@@ -1678,10 +1678,10 @@ MTEB_NL = Benchmark(
                 # # Reranking
                 "WikipediaRerankingMultilingual",
                 # # Retrieval
-                "ArguAna-NL",
-                "SCIDOCS-NL",
-                "SciFact-NL",
-                "NFCorpus-NL",
+                "ArguAna-NL.v2",
+                "SCIDOCS-NL.v2",
+                "SciFact-NL.v2",
+                "NFCorpus-NL.v2",
                 "BelebeleRetrieval",
                 "WebFAQRetrieval",
                 "DutchNewsArticlesRetrieval",

mteb/models/model_implementations/emillykkejensen_models.py CHANGED Viewed

@@ -19,7 +19,7 @@ embedding_gemma_300m_scandi = ModelMeta(
     public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
     training_datasets=set(),
     similarity_fn_name="cosine",  # type: ignore[arg-type]
-    adapted_from="emillykkejensen/EmbeddingGemma-Scandi-300m",
+    adapted_from="google/embeddinggemma-300m",
     memory_usage_mb=578,
 )

mteb/models/model_implementations/jasper_models.py CHANGED Viewed

@@ -7,14 +7,34 @@ from torch.utils.data import DataLoader
 from mteb.abstasks.task_metadata import TaskMetadata
 from mteb.models.abs_encoder import AbsEncoder
+from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
+from mteb.models.model_implementations.bge_models import (
+    bge_chinese_training_data,
+    bge_full_data,
+    bge_m3_training_data,
+)
+from mteb.models.model_implementations.e5_instruct import E5_MISTRAL_TRAINING_DATA
+from mteb.models.model_implementations.nvidia_models import nvidia_training_datasets
+from mteb.models.model_implementations.qzhou_models import qzhou_training_data
 from mteb.models.model_meta import ModelMeta, ScoringFunction
 from mteb.types import Array, BatchedInput, PromptType
-from .nvidia_models import nvidia_training_datasets
 logger = logging.getLogger(__name__)
+def instruction_template(
+    instruction: str, prompt_type: PromptType | None = None
+) -> str:
+    if not instruction or prompt_type == PromptType.document:
+        return ""
+    if isinstance(instruction, dict):
+        if prompt_type is None:
+            instruction = "Given a web search query, retrieve relevant passages that answer the query"
+        else:
+            instruction = instruction[prompt_type]
+    return f"Instruct: {instruction}\nQuery:"
 class JasperModel(AbsEncoder):
     def __init__(
         self,
@@ -114,3 +134,34 @@ jasper_en_v1 = ModelMeta(
 }
 """,
 )
+Jasper_Token_Compression_600M = ModelMeta(
+    loader=InstructSentenceTransformerModel,
+    loader_kwargs=dict(
+        instruction_template=instruction_template,
+        apply_instruction_to_passages=False,
+        trust_remote_code=True,
+    ),
+    name="infgrad/Jasper-Token-Compression-600M",
+    languages=["eng-Latn", "zho-Hans"],
+    open_weights=True,
+    revision="06a100f753a5a96d9e583b3af79c6fcdfacc4719",
+    release_date="2025-11-14",
+    n_parameters=595776512,
+    memory_usage_mb=2272,
+    embed_dim=2048,
+    license="mit",
+    max_tokens=32768,
+    reference="https://huggingface.co/infgrad/Jasper-Token-Compression-600M",
+    similarity_fn_name="cosine",
+    framework=["Sentence Transformers", "PyTorch"],
+    use_instructions=True,
+    public_training_code=None,
+    # public_training_data: unsupervised data for distillation
+    public_training_data="https://huggingface.co/datasets/infgrad/jasper_text_distill_dataset",
+    training_datasets=bge_m3_training_data
+    | bge_chinese_training_data
+    | bge_full_data
+    | E5_MISTRAL_TRAINING_DATA
+    | qzhou_training_data,
+)

{mteb-2.1.10.dist-info → mteb-2.1.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mteb
-Version: 2.1.10
+Version: 2.1.12
 Summary: Massive Text Embedding Benchmark
 Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
 Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>

{mteb-2.1.10.dist-info → mteb-2.1.12.dist-info}/RECORD RENAMED Viewed

@@ -56,7 +56,7 @@ mteb/benchmarks/_create_table.py,sha256=z3iqa5dajLk0DYxEE9EeO1qpR3VJXokg8ZQ2rdUk
 mteb/benchmarks/benchmark.py,sha256=70RlMyyg_wkWTlU_IbfLl-KaqRWXGCKTd8fWe9X-AQE,4173
 mteb/benchmarks/get_benchmark.py,sha256=-n_O-gitRKZi48gJKNgGuI36hsP7yLVSiwulnMHN7Gw,3935
 mteb/benchmarks/benchmarks/__init__.py,sha256=UD6YjWPDVPSQdUhmD-4rho08Gs5LU9pS_C2jX5eUns0,2102
-mteb/benchmarks/benchmarks/benchmarks.py,sha256=v7n2fPGOC66zzBhS1nfSthY55DQnGNg-hGa4XfT21Vg,89941
+mteb/benchmarks/benchmarks/benchmarks.py,sha256=KDJanVYs3BkFn74VHwarZ8HJ2DX6EIgcVYBrlyjbv9I,89956
 mteb/benchmarks/benchmarks/rteb_benchmarks.py,sha256=QnCSrTTaBfcRlAQp2Nu81tgv1idMXqiM16Fp2zKJ5Ys,10607
 mteb/cli/__init__.py,sha256=v-csUr3eUZElIvrGB6QGtaIdndDfNWEe9oZchsGsJpg,64
 mteb/cli/_display_tasks.py,sha256=7A06dT9sSoTz6shyMvskPxuc5eHY_H7PGPlROzMP0yw,2196
@@ -1469,7 +1469,7 @@ mteb/models/model_implementations/dino_models.py,sha256=QFgaFHR5YKrylqJGSljXCBn2
 mteb/models/model_implementations/e5_instruct.py,sha256=9R4GoSFicgqNDCh3HhTN_8L1qhzuEKvatjHYn3T9zlU,7676
 mteb/models/model_implementations/e5_models.py,sha256=vsqkmm6XzZn9ROj_OUR0j2KiN75MEuQsOPeoyc1AeYg,10937
 mteb/models/model_implementations/e5_v.py,sha256=_9W7I0ryIzx_H9eCkzwdm8iHdGX1LIjKGXkhSh_zNv8,6690
-mteb/models/model_implementations/emillykkejensen_models.py,sha256=1DEAGdSZZXDFbbw0YH-vkLm9Y-wthgbOJCRTIpV3Jeo,2795
+mteb/models/model_implementations/emillykkejensen_models.py,sha256=QdhGqCm_1-AURkrniZj2S1MjwwIVOPMzLvpgfJq-3EQ,2779
 mteb/models/model_implementations/en_code_retriever.py,sha256=leZ-0M6LrunocY3XQBYZU1uevDRopeyR5ujIhwqBbd8,1043
 mteb/models/model_implementations/evaclip_models.py,sha256=cPMGYLDIq4s8zJxb4vPXqJ-rqwPaq7KOh2QZSO6cDas,8000
 mteb/models/model_implementations/fa_models.py,sha256=WGal70_ezITWoNdjcMdbOCTSCtoaXzuPadYstLVXxhg,7478
@@ -1483,7 +1483,7 @@ mteb/models/model_implementations/hinvec_models.py,sha256=I_d_dSNVaGIwMIwyvTlaPA
 mteb/models/model_implementations/human.py,sha256=klMpuMAtYH92EIEwNMEhne_Baf9fNiTg1DNWYD11P44,532
 mteb/models/model_implementations/ibm_granite_models.py,sha256=YCT0jbgawy19ps5l8QlxpQoJLjq8Nh-3R-e6yxS0DRM,7902
 mteb/models/model_implementations/inf_models.py,sha256=lvXUFhAYDltq2_Xa9MHcwfhh1V20rbJLSgON76tkj6w,2906
-mteb/models/model_implementations/jasper_models.py,sha256=KzjVnQ1HwaVO9Z7kk1ZkjFrhvlKupeWCmkSljnZv-IM,4071
+mteb/models/model_implementations/jasper_models.py,sha256=yf6gNPTWl05rAJrao8lIpw0wld6xdmPx9PhDwbGHSlc,6037
 mteb/models/model_implementations/jina_clip.py,sha256=CfiIxbhKspjQajNtObCfGPHOWPk6uLn4cuwydQHFTMo,5118
 mteb/models/model_implementations/jina_models.py,sha256=QWoesiTygdFTLcdGpdx26wOUI1AXRz3jLmxGHJ0WMNE,29919
 mteb/models/model_implementations/kalm_models.py,sha256=FmW7Z5Qs6WYBLuKvql3u4IJW36kj4k-Ypah8qTBEBkg,59837
@@ -2554,9 +2554,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
 mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
 mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
 mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
-mteb-2.1.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-mteb-2.1.10.dist-info/METADATA,sha256=LClBepxtjXoGssnPn6QgdAukEqJerTX67OC7zoKhdiE,13574
-mteb-2.1.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-mteb-2.1.10.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
-mteb-2.1.10.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
-mteb-2.1.10.dist-info/RECORD,,
+mteb-2.1.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+mteb-2.1.12.dist-info/METADATA,sha256=mXrsN01rI1osGl_9epUwEI7BjLmwXSxJECQjR7BmoJM,13574
+mteb-2.1.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+mteb-2.1.12.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
+mteb-2.1.12.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
+mteb-2.1.12.dist-info/RECORD,,

{mteb-2.1.10.dist-info → mteb-2.1.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{mteb-2.1.10.dist-info → mteb-2.1.12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mteb-2.1.10.dist-info → mteb-2.1.12.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{mteb-2.1.10.dist-info → mteb-2.1.12.dist-info}/top_level.txt RENAMED Viewed

File without changes

mteb 2.1.10__py3-none-any.whl → 2.1.12__py3-none-any.whl

mteb 2.1.10py3-none-any.whl → 2.1.12py3-none-any.whl