PyPI - mteb - Versions diffs - 2.1.4__py3-none-any.whl → 2.5.2__py3-none-any.whl - Mend

mteb 2.1.4py3-none-any.whl → 2.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (458) hide show

mteb/tasks/sts/vie/sickr_stsvn.py CHANGED Viewed

@@ -9,11 +9,7 @@ class SickrSTSVN(AbsTaskSTS):
             "path": "GreenNode/sickr-sts-vn",
             "revision": "bc89f0401983c456b609f7fb324278f346b2cccf",
         },
-        description="""A translated dataset from Semantic Textual Similarity SICK-R dataset as described here:
-            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
-            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
-            - Applies advanced embedding models to filter the translations.
-            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        description="A translated dataset from Semantic Textual Similarity SICK-R dataset as described here: The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
         reference="https://aclanthology.org/2020.lrec-1.207",
         type="STS",
         category="t2c",

mteb/tasks/sts/vie/sts_benchmark_stsvn.py CHANGED Viewed

@@ -9,11 +9,7 @@ class STSBenchmarkSTSVN(AbsTaskSTS):
             "path": "GreenNode/stsbenchmark-sts-vn",
             "revision": "f24d66738cda4a02138ada5af7689a92ce1fcad6",
         },
-        description="""A translated dataset from Semantic Textual Similarity Benchmark (STSbenchmark) dataset.
-            The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
-            - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
-            - Applies advanced embedding models to filter the translations.
-            - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
+        description="A translated dataset from Semantic Textual Similarity Benchmark (STSbenchmark) dataset. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
         reference="https://github.com/PhilipMay/stsb-multi-mt/",
         type="STS",
         category="t2c",

mteb/tasks/zeroshot_classification/eng/gtsrb.py CHANGED Viewed

@@ -9,7 +9,7 @@ from mteb.abstasks.zeroshot_classification import (
 class GTSRBZeroShotClassification(AbsTaskZeroShotClassification):
     metadata = TaskMetadata(
         name="GTSRBZeroShot",
-        description="""The German Traffic Sign Recognition Benchmark (GTSRB) is a multi-class classification dataset for traffic signs. It consists of dataset of more than 50,000 traffic sign images. The dataset comprises 43 classes with unbalanced class frequencies.""",
+        description="The German Traffic Sign Recognition Benchmark (GTSRB) is a multi-class classification dataset for traffic signs. It consists of dataset of more than 50,000 traffic sign images. The dataset comprises 43 classes with unbalanced class frequencies.",
         reference="https://benchmark.ini.rub.de/",
         dataset={
             "path": "clip-benchmark/wds_gtsrb",

mteb/tasks/zeroshot_classification/eng/patch_camelyon.py CHANGED Viewed

@@ -9,7 +9,7 @@ from mteb.abstasks.zeroshot_classification import (
 class PatchCamelyonZeroShotClassification(AbsTaskZeroShotClassification):
     metadata = TaskMetadata(
         name="PatchCamelyonZeroShot",
-        description="""Histopathology diagnosis classification dataset.""",
+        description="Histopathology diagnosis classification dataset.",
         reference="https://link.springer.com/chapter/10.1007/978-3-030-00934-2_24",
         dataset={
             "path": "clip-benchmark/wds_vtab-pcam",

mteb/tasks/zeroshot_classification/eng/ucf101.py CHANGED Viewed

@@ -7,11 +7,7 @@ from mteb.abstasks.zeroshot_classification import (
 class UCF101ZeroShotClassification(AbsTaskZeroShotClassification):
     metadata = TaskMetadata(
         name="UCF101ZeroShot",
-        description="""UCF101 is an action recognition data set of realistic
-action videos collected from YouTube, having 101 action categories. This
-version of the dataset does not contain images but images saved frame by
-frame. Train and test splits are generated based on the authors' first
-version train/test list.""",
+        description="UCF101 is an action recognition data set of realistic action videos collected from YouTube, having 101 action categories. This version of the dataset does not contain images but images saved frame by frame. Train and test splits are generated based on the authors' first version train/test list.",
         reference="https://huggingface.co/datasets/flwrlabs/ucf101",
         dataset={
             "path": "flwrlabs/ucf101",

mteb/types/_encoder_io.py CHANGED Viewed

@@ -1,13 +1,18 @@
+from __future__ import annotations
 from collections.abc import Mapping
 from enum import Enum
-from typing import TypedDict
+from typing import TYPE_CHECKING, TypedDict
 import numpy as np
 import torch
 from datasets import Dataset
-from PIL import Image
 from typing_extensions import NotRequired
+if TYPE_CHECKING:
+    from PIL import Image
 # --- Output types ---
 Array = np.ndarray | torch.Tensor
 """General array type, can be a numpy array or a torch tensor."""

{mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mteb
-Version: 2.1.4
+Version: 2.5.2
 Summary: Massive Text Embedding Benchmark
 Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
 Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -16,7 +16,7 @@ Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Information Technology
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python
-Requires-Python: <3.14,>=3.10
+Requires-Python: <3.15,>=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: datasets>=2.19.0
@@ -34,10 +34,11 @@ Requires-Dist: pydantic>=2.0.0
 Requires-Dist: polars>=0.20.22
 Provides-Extra: image
 Requires-Dist: torchvision>0.2.1; extra == "image"
+Requires-Dist: transformers[torch-vision,vision]; extra == "image"
 Provides-Extra: codecarbon
 Requires-Dist: codecarbon<3.0.0,>=2.0.0; extra == "codecarbon"
 Provides-Extra: leaderboard
-Requires-Dist: gradio==5.35.0; extra == "leaderboard"
+Requires-Dist: gradio==6.0.1; extra == "leaderboard"
 Requires-Dist: plotly<6.0.0,>=5.24.0; extra == "leaderboard"
 Requires-Dist: cachetools>=5.2.0; extra == "leaderboard"
 Requires-Dist: matplotlib>=3.9.4; extra == "leaderboard"
@@ -91,6 +92,9 @@ Requires-Dist: volcengine-python-sdk[ark]==3.0.2; extra == "ark"
 Requires-Dist: tiktoken>=0.8.0; extra == "ark"
 Provides-Extra: colpali-engine
 Requires-Dist: colpali_engine>=0.3.12; extra == "colpali-engine"
+Provides-Extra: colqwen3
+Requires-Dist: transformers>=4.57; extra == "colqwen3"
+Requires-Dist: torchvision>=0.22.1; extra == "colqwen3"
 Provides-Extra: xet
 Requires-Dist: huggingface_hub>=0.32.0; extra == "xet"
 Provides-Extra: youtu
@@ -100,10 +104,12 @@ Provides-Extra: llama-embed-nemotron
 Requires-Dist: transformers==4.51.0; extra == "llama-embed-nemotron"
 Provides-Extra: faiss-cpu
 Requires-Dist: faiss-cpu>=1.12.0; extra == "faiss-cpu"
+Provides-Extra: eager-embed
+Requires-Dist: qwen_vl_utils>=0.0.14; extra == "eager-embed"
 Dynamic: license-file
 <h1 align="center">
-  <img src="docs/images/logos/mteb_logo/dots-icon.png" alt="MTEB" width="28" style="vertical-align: middle; margin-right: 10px;"/> MTEB
+  <img src="https://github.com/embeddings-benchmark/mteb/blob/main/docs/images/logos/mteb_logo/dots-icon.png?raw=true" alt="MTEB" width="28" style="vertical-align: middle; margin-right: 10px;"/> MTEB
 </h1>
 <h3 align="center" style="border-bottom: none;">Multimodal toolbox for evaluating embeddings and retrieval systems</h3>
@@ -132,7 +138,7 @@ Dynamic: license-file
 <h3 align="center">
-    <a href="https://huggingface.co/spaces/mteb/leaderboard"><img style="float: middle; padding: 10px 10px 10px 10px;" width="60" height="55" src="./docs/images/logos/hf_logo.png" /></a>
+    <a href="https://huggingface.co/spaces/mteb/leaderboard"><img style="float: middle; padding: 10px 10px 10px 10px;" width="60" height="55" src="https://github.com/embeddings-benchmark/mteb/blob/main/docs/images/logos/hf_logo.png?raw=true" /></a>
 </h3>

mteb 2.1.4__py3-none-any.whl → 2.5.2__py3-none-any.whl

mteb 2.1.4py3-none-any.whl → 2.5.2py3-none-any.whl