PyPI - mteb - Versions diffs - 2.3.10__py3-none-any.whl → 2.4.1__py3-none-any.whl - Mend

mteb 2.3.10py3-none-any.whl → 2.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +7 -2
mteb/abstasks/_statistics_calculation.py +6 -2
mteb/abstasks/classification.py +0 -2
mteb/benchmarks/benchmarks/__init__.py +2 -0
mteb/benchmarks/benchmarks/benchmarks.py +57 -0
mteb/deprecated_evaluator.py +8 -13
mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
mteb/evaluate.py +2 -33
mteb/leaderboard/figures.py +1 -1
mteb/leaderboard/table.py +1 -11
mteb/models/abs_encoder.py +21 -17
mteb/models/cache_wrappers/cache_backends/_hash_utils.py +2 -2
mteb/models/get_model_meta.py +3 -123
mteb/models/instruct_wrapper.py +2 -1
mteb/models/model_implementations/bica_model.py +34 -0
mteb/models/model_implementations/colpali_models.py +7 -2
mteb/models/model_implementations/colqwen_models.py +1 -1
mteb/models/model_implementations/gme_v_models.py +9 -5
mteb/models/model_implementations/google_models.py +10 -0
mteb/models/model_implementations/granite_vision_embedding_models.py +6 -2
mteb/models/model_implementations/jasper_models.py +2 -2
mteb/models/model_implementations/jina_models.py +1 -1
mteb/models/model_implementations/mod_models.py +204 -0
mteb/models/model_implementations/nomic_models.py +142 -4
mteb/models/model_implementations/nomic_models_vision.py +6 -2
mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +6 -2
mteb/models/model_implementations/pylate_models.py +1 -4
mteb/models/model_implementations/random_baseline.py +6 -2
mteb/models/model_implementations/seed_1_6_embedding_models.py +7 -2
mteb/models/model_implementations/voyage_v.py +6 -2
mteb/models/model_meta.py +396 -19
mteb/models/sentence_transformer_wrapper.py +2 -7
mteb/tasks/reranking/jpn/__init__.py +9 -1
mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
mteb/tasks/retrieval/jpn/__init__.py +8 -0
mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
mteb/types/_encoder_io.py +7 -2
{mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/METADATA +2 -1
{mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/RECORD +53 -39
{mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/WHEEL +0 -0
{mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/entry_points.txt +0 -0
{mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/licenses/LICENSE +0 -0
{mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/top_level.txt +0 -0

mteb/_evaluators/image/imagetext_pairclassification_evaluator.py CHANGED Viewed

@@ -1,10 +1,11 @@
+from __future__ import annotations
 import logging
-from typing import Any
+from typing import TYPE_CHECKING, Any
 import torch
 import torch.nn.functional as F
 from datasets import Dataset
-from PIL.Image import Image
 from torch.utils.data import DataLoader
 from mteb._create_dataloaders import (
@@ -15,6 +16,10 @@ from mteb._requires_package import requires_image_dependencies
 from mteb.abstasks.task_metadata import TaskMetadata
 from mteb.models.models_protocols import EncoderProtocol
+if TYPE_CHECKING:
+    from PIL.Image import Image
 logger = logging.getLogger(__name__)

mteb/abstasks/_statistics_calculation.py CHANGED Viewed

@@ -1,7 +1,8 @@
+from __future__ import annotations
 import hashlib
 from collections import Counter
-from PIL import Image
+from typing import TYPE_CHECKING
 from mteb.types import TopRankedDocumentsType
 from mteb.types.statistics import (
@@ -13,6 +14,9 @@ from mteb.types.statistics import (
     TopRankedStatistics,
 )
+if TYPE_CHECKING:
+    from PIL import Image
 def calculate_text_statistics(texts: list[str]) -> TextStatistics:
     """Calculate descriptive statistics for a list of texts.

mteb/abstasks/classification.py CHANGED Viewed

@@ -5,7 +5,6 @@ from typing import Any, TypedDict
 import numpy as np
 from datasets import Dataset, DatasetDict
-from PIL import ImageFile
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import (
     accuracy_score,
@@ -32,7 +31,6 @@ from ._statistics_calculation import (
 )
 from .abstask import AbsTask
-ImageFile.LOAD_TRUNCATED_IMAGES = True
 logger = logging.getLogger(__name__)

mteb/benchmarks/benchmarks/__init__.py CHANGED Viewed

@@ -12,6 +12,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
     FA_MTEB_2,
     HUME,
     JINA_VDR,
+    JMTEB_LITE_V1,
     JMTEB_V2,
     LONG_EMBED,
     MIEB_ENG,
@@ -76,6 +77,7 @@ __all__ = [
     "HUME",
     "HUME",
     "JINA_VDR",
+    "JMTEB_LITE_V1",
     "JMTEB_V2",
     "LONG_EMBED",
     "MIEB_ENG",

mteb/benchmarks/benchmarks/benchmarks.py CHANGED Viewed

@@ -2650,3 +2650,60 @@ JMTEB_V2 = Benchmark(
 """,
     contacts=["lsz05"],
 )
+JMTEB_LITE_V1 = Benchmark(
+    name="JMTEB-lite(v1)",
+    display_name="Japanese",
+    icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
+    tasks=get_tasks(
+        languages=["jpn"],
+        tasks=[
+            # Clustering (3)
+            "LivedoorNewsClustering.v2",
+            "MewsC16JaClustering",
+            "SIB200ClusteringS2S",
+            # Classification (7)
+            "AmazonReviewsClassification",
+            "AmazonCounterfactualClassification",
+            "MassiveIntentClassification",
+            "MassiveScenarioClassification",
+            "JapaneseSentimentClassification",
+            "SIB200Classification",
+            "WRIMEClassification",
+            # STS (2)
+            "JSTS",
+            "JSICK",
+            # Retrieval (11)
+            "JaqketRetrievalLite",
+            "MrTyDiJaRetrievalLite",
+            "JaGovFaqsRetrieval",
+            "NLPJournalTitleAbsRetrieval.V2",
+            "NLPJournalTitleIntroRetrieval.V2",
+            "NLPJournalAbsIntroRetrieval.V2",
+            "NLPJournalAbsArticleRetrieval.V2",
+            "JaCWIRRetrievalLite",
+            "MIRACLJaRetrievalLite",
+            "MintakaRetrieval",
+            "MultiLongDocRetrieval",
+            # Reranking (5)
+            "ESCIReranking",
+            "JQaRARerankingLite",
+            "JaCWIRRerankingLite",
+            "MIRACLReranking",
+            "MultiLongDocReranking",
+        ],
+    ),
+    description="JMTEB-lite is a lightweight version of JMTEB. It makes agile evaluation possible by reaching an average of 5x faster evaluation comparing with JMTEB, as 6 heavy datasets in JMTEB are optimized with hard negative pooling strategy, making them much smaller. The result of JMTEB-lite is proved to be highly relevant with that of JMTEB, making it a faithful preview of JMTEB.",
+    reference="https://huggingface.co/datasets/sbintuitions/JMTEB-lite",
+    citation=r"""
+@article{li2025jmteb,
+  author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide and Kawahara, Daisuke},
+  issue = {3},
+  journal = {Vol.2025-NL-265,No.3,1-15},
+  month = {sep},
+  title = {{JMTEB and JMTEB-lite: Japanese Massive Text Embedding Benchmark and Its Lightweight Version}},
+  year = {2025},
+}
+""",
+    contacts=["lsz05"],
+)

mteb/deprecated_evaluator.py CHANGED Viewed

@@ -13,21 +13,11 @@ from pathlib import Path
 from time import time
 from typing import TYPE_CHECKING, Any
-from mteb.abstasks.task_metadata import TaskCategory, TaskType
-from mteb.models.get_model_meta import (
-    _model_meta_from_cross_encoder,
-    _model_meta_from_sentence_transformers,
-)
-if sys.version_info >= (3, 13):
-    from warnings import deprecated
-else:
-    from typing_extensions import deprecated
 import datasets
 import mteb
 from mteb.abstasks import AbsTask
+from mteb.abstasks.task_metadata import TaskCategory, TaskType
 from mteb.benchmarks import Benchmark
 from mteb.models import (
     CrossEncoderWrapper,
@@ -39,6 +29,11 @@ from mteb.models import (
 from mteb.results import TaskResult
 from mteb.types import ScoresDict
+if sys.version_info >= (3, 13):
+    from warnings import deprecated
+else:
+    from typing_extensions import deprecated
 if TYPE_CHECKING:
     from sentence_transformers import CrossEncoder, SentenceTransformer
@@ -669,9 +664,9 @@ class MTEB:
         from sentence_transformers import CrossEncoder, SentenceTransformer
         if isinstance(model, CrossEncoder):
-            meta = _model_meta_from_cross_encoder(model)
+            meta = ModelMeta.from_cross_encoder(model)
         elif isinstance(model, SentenceTransformer):
-            meta = _model_meta_from_sentence_transformers(model)
+            meta = ModelMeta.from_sentence_transformer_model(model)
         else:
             meta = ModelMeta(
                 loader=None,

mteb/descriptive_stats/Reranking/JQaRARerankingLite.json ADDED Viewed

@@ -0,0 +1,35 @@
+{
+    "test": {
+        "num_samples": 91353,
+        "number_of_characters": 21318247,
+        "documents_text_statistics": {
+            "total_text_length": 21231812,
+            "min_text_length": 14,
+            "average_text_length": 236.73496420846064,
+            "max_text_length": 438,
+            "unique_texts": 89683
+        },
+        "documents_image_statistics": null,
+        "queries_text_statistics": {
+            "total_text_length": 86435,
+            "min_text_length": 16,
+            "average_text_length": 51.850629874025195,
+            "max_text_length": 118,
+            "unique_texts": 1667
+        },
+        "queries_image_statistics": null,
+        "relevant_docs_statistics": {
+            "num_relevant_docs": 16204,
+            "min_relevant_docs_per_query": 51,
+            "average_relevant_docs_per_query": 9.720455908818236,
+            "max_relevant_docs_per_query": 78,
+            "unique_relevant_docs": 89686
+        },
+        "top_ranked_statistics": {
+            "num_top_ranked": 98941,
+            "min_top_ranked_per_query": 51,
+            "average_top_ranked_per_query": 59.35272945410918,
+            "max_top_ranked_per_query": 78
+        }
+    }
+}

mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json ADDED Viewed

@@ -0,0 +1,35 @@
+{
+    "test": {
+        "num_samples": 161744,
+        "number_of_characters": 29754484,
+        "documents_text_statistics": {
+            "total_text_length": 29612965,
+            "min_text_length": 142,
+            "average_text_length": 187.72799599350847,
+            "max_text_length": 252,
+            "unique_texts": 156741
+        },
+        "documents_image_statistics": null,
+        "queries_text_statistics": {
+            "total_text_length": 141519,
+            "min_text_length": 9,
+            "average_text_length": 35.37975,
+            "max_text_length": 176,
+            "unique_texts": 3993
+        },
+        "queries_image_statistics": null,
+        "relevant_docs_statistics": {
+            "num_relevant_docs": 3998,
+            "min_relevant_docs_per_query": 50,
+            "average_relevant_docs_per_query": 0.9995,
+            "max_relevant_docs_per_query": 51,
+            "unique_relevant_docs": 157744
+        },
+        "top_ranked_statistics": {
+            "num_top_ranked": 204000,
+            "min_top_ranked_per_query": 51,
+            "average_top_ranked_per_query": 51.0,
+            "max_top_ranked_per_query": 51
+        }
+    }
+}

mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json ADDED Viewed

@@ -0,0 +1,30 @@
+{
+    "test": {
+        "num_samples": 306638,
+        "number_of_characters": 56607519,
+        "documents_text_statistics": {
+            "total_text_length": 56466000,
+            "min_text_length": 142,
+            "average_text_length": 186.57934562084074,
+            "max_text_length": 252,
+            "unique_texts": 299096
+        },
+        "documents_image_statistics": null,
+        "queries_text_statistics": {
+            "total_text_length": 141519,
+            "min_text_length": 9,
+            "average_text_length": 35.37975,
+            "max_text_length": 176,
+            "unique_texts": 3993
+        },
+        "queries_image_statistics": null,
+        "relevant_docs_statistics": {
+            "num_relevant_docs": 4000,
+            "min_relevant_docs_per_query": 1,
+            "average_relevant_docs_per_query": 1.0,
+            "max_relevant_docs_per_query": 1,
+            "unique_relevant_docs": 4000
+        },
+        "top_ranked_statistics": null
+    }
+}

mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json ADDED Viewed

@@ -0,0 +1,30 @@
+{
+    "test": {
+        "num_samples": 66799,
+        "number_of_characters": 280024895,
+        "documents_text_statistics": {
+            "total_text_length": 279974341,
+            "min_text_length": 8,
+            "average_text_length": 4254.799869304884,
+            "max_text_length": 188424,
+            "unique_texts": 65802
+        },
+        "documents_image_statistics": null,
+        "queries_text_statistics": {
+            "total_text_length": 50554,
+            "min_text_length": 16,
+            "average_text_length": 50.70611835506519,
+            "max_text_length": 98,
+            "unique_texts": 997
+        },
+        "queries_image_statistics": null,
+        "relevant_docs_statistics": {
+            "num_relevant_docs": 997,
+            "min_relevant_docs_per_query": 1,
+            "average_relevant_docs_per_query": 1.0,
+            "max_relevant_docs_per_query": 1,
+            "unique_relevant_docs": 989
+        },
+        "top_ranked_statistics": null
+    }
+}

mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json ADDED Viewed

@@ -0,0 +1,30 @@
+{
+    "test": {
+        "num_samples": 105924,
+        "number_of_characters": 20818958,
+        "documents_text_statistics": {
+            "total_text_length": 20803724,
+            "min_text_length": 4,
+            "average_text_length": 198.01001294449097,
+            "max_text_length": 13231,
+            "unique_texts": 104988
+        },
+        "documents_image_statistics": null,
+        "queries_text_statistics": {
+            "total_text_length": 15234,
+            "min_text_length": 7,
+            "average_text_length": 17.71395348837209,
+            "max_text_length": 48,
+            "unique_texts": 860
+        },
+        "queries_image_statistics": null,
+        "relevant_docs_statistics": {
+            "num_relevant_docs": 1790,
+            "min_relevant_docs_per_query": 1,
+            "average_relevant_docs_per_query": 2.0813953488372094,
+            "max_relevant_docs_per_query": 11,
+            "unique_relevant_docs": 1728
+        },
+        "top_ranked_statistics": null
+    }
+}

mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json ADDED Viewed

@@ -0,0 +1,30 @@
+{
+    "test": {
+        "num_samples": 94102,
+        "number_of_characters": 17949014,
+        "documents_text_statistics": {
+            "total_text_length": 17935995,
+            "min_text_length": 4,
+            "average_text_length": 192.07122357627807,
+            "max_text_length": 10778,
+            "unique_texts": 93122
+        },
+        "documents_image_statistics": null,
+        "queries_text_statistics": {
+            "total_text_length": 13019,
+            "min_text_length": 6,
+            "average_text_length": 18.081944444444446,
+            "max_text_length": 44,
+            "unique_texts": 720
+        },
+        "queries_image_statistics": null,
+        "relevant_docs_statistics": {
+            "num_relevant_docs": 923,
+            "min_relevant_docs_per_query": 1,
+            "average_relevant_docs_per_query": 1.2819444444444446,
+            "max_relevant_docs_per_query": 3,
+            "unique_relevant_docs": 880
+        },
+        "top_ranked_statistics": null
+    }
+}

mteb/evaluate.py CHANGED Viewed

@@ -2,7 +2,6 @@ from __future__ import annotations
 import logging
 from collections.abc import Iterable
-from copy import deepcopy
 from pathlib import Path
 from time import time
 from typing import TYPE_CHECKING, Any, cast
@@ -53,36 +52,6 @@ class OverwriteStrategy(HelpfulStrEnum):
     ONLY_CACHE = "only-cache"
-_empty_model_meta = ModelMeta(
-    loader=None,
-    name=None,
-    revision=None,
-    release_date=None,
-    languages=None,
-    framework=[],
-    similarity_fn_name=None,
-    n_parameters=None,
-    memory_usage_mb=None,
-    max_tokens=None,
-    embed_dim=None,
-    license=None,
-    open_weights=None,
-    public_training_code=None,
-    public_training_data=None,
-    use_instructions=None,
-    training_datasets=None,
-    modalities=[],
-)
-def _create_empty_model_meta() -> ModelMeta:
-    logger.warning("Model metadata is missing. Using empty metadata.")
-    meta = deepcopy(_empty_model_meta)
-    meta.revision = "no_revision_available"
-    meta.name = "no_model_name_available"
-    return meta
 def _sanitize_model(
     model: ModelMeta | MTEBModels | SentenceTransformer | CrossEncoder,
 ) -> tuple[MTEBModels | ModelMeta, ModelMeta, ModelName, Revision]:
@@ -101,9 +70,9 @@ def _sanitize_model(
     elif hasattr(model, "mteb_model_meta"):
         meta = model.mteb_model_meta  # type: ignore[attr-defined]
         if not isinstance(meta, ModelMeta):
-            meta = _create_empty_model_meta()
+            meta = ModelMeta.from_hub(None)
     else:
-        meta = _create_empty_model_meta() if not isinstance(model, ModelMeta) else model
+        meta = ModelMeta.from_hub(None) if not isinstance(model, ModelMeta) else model
     model_name = cast(str, meta.name)
     model_revision = cast(str, meta.revision)

mteb/leaderboard/figures.py CHANGED Viewed

@@ -117,7 +117,7 @@ def _performance_size_plot(df: pd.DataFrame) -> go.Figure:
     df["Max Tokens"] = df["Max Tokens"].map(_parse_float)
     df["Log(Tokens)"] = np.log10(df["Max Tokens"])
     df["Mean (Task)"] = df["Mean (Task)"].map(_parse_float)
-    df = df[df["Mean (Task)"] > 0].dropna(
+    df = df.dropna(
         subset=["Mean (Task)", "Number of Parameters", "Embedding Dimensions"]
     )
     if not len(df.index):

mteb/leaderboard/table.py CHANGED Viewed

@@ -26,16 +26,6 @@ def _format_scores(score: float) -> float:
     return round(score * 100, 2)
-def _get_column_types(df: pd.DataFrame) -> list[str]:
-    types = []
-    for column_name in df.columns:
-        if is_numeric_dtype(df[column_name]):
-            types.append("number")
-        else:
-            types.append("str")
-    return types
 def _get_column_widths(df: pd.DataFrame) -> list[str]:
     # Please do not remove this function when refactoring.
     # Column width calculation seeminlgy changes regularly with Gradio releases,
@@ -226,7 +216,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
                     gmap=gmap_values.loc[mask],
                 )
-    column_types = _get_column_types(joint_table_style.data)
+    column_types = ["auto" for _ in joint_table_style.data.columns]
     # setting model name column to markdown
     if len(column_types) > 1:
         column_types[1] = "markdown"

mteb/models/abs_encoder.py CHANGED Viewed

@@ -54,11 +54,11 @@ class AbsEncoder(ABC):
         """A wrapper function around the model.encode method that handles the prompt_name argument and standardizes the output to a numpy array.
         The order of priorities for prompt selection are:
-            1. Composed prompt of task name + prompt type (query or passage)
+            1. Composed prompt of task name + prompt type
             2. Specific task prompt
-            3. Composed prompt of task type + prompt type (query or passage)
+            3. Composed prompt of task type + prompt type
             4. Specific task type prompt
-            5. Specific prompt type (query or passage)
+            5. Specific prompt type
         Args:
             task_metadata: The task name to use for building the encoding prompt
@@ -105,7 +105,7 @@ class AbsEncoder(ABC):
         Args:
             task_metadata: The metadata of the task.
-            prompt_type: The name type of prompt. (query or passage)
+            prompt_type: The name type of prompt.
         """
         if not self.model_prompts:
             return None
@@ -210,13 +210,11 @@ class AbsEncoder(ABC):
             task_metadata: The metadata of the task. Sentence-transformers uses this to
                 determine which prompt to use from a specified dictionary.
                 The order of priorities for prompt selection are:
-                    1. Composed prompt of task name + prompt type (query or passage)
-                    2. Specific task prompt
-                    3. Composed prompt of task type + prompt type (query or passage)
-                    4. Specific task type prompt
-                    5. Specific prompt type (query or passage)
-                    6. Default prompt from the task definition
-            prompt_type: The name type of prompt. (query or passage)
+                    1. Specific task prompt
+                    2. Specific task type prompt
+                    3. Specific prompt type
+                    4. Default prompt from the task definition
+            prompt_type: The name type of prompt.
         Returns:
             The instruction/prompt to be used for encoding sentences.
@@ -224,6 +222,12 @@ class AbsEncoder(ABC):
         prompt = task_metadata.prompt
         if self.prompts_dict and task_metadata.name in self.prompts_dict:
             prompt = self.prompts_dict[task_metadata.name]
+        elif self.prompts_dict and task_metadata.type in self.prompts_dict:
+            prompt = self.prompts_dict[task_metadata.type]
+        elif (
+            self.prompts_dict and prompt_type and prompt_type.value in self.prompts_dict
+        ):
+            prompt = self.prompts_dict[prompt_type.value]
         if isinstance(prompt, dict) and prompt_type:
             if prompt.get(prompt_type.value):
@@ -246,7 +250,7 @@ class AbsEncoder(ABC):
         Args:
             instruction: The instruction to be formatted.
-            prompt_type: The name type of prompt. (query or passage)
+            prompt_type: The name type of prompt.
         """
         if self.instruction_template is None:
             raise ValueError(
@@ -269,7 +273,7 @@ class AbsEncoder(ABC):
         Args:
             task_metadata: The metadata of the task
-            prompt_type: The name type of prompt. (query or passage)
+            prompt_type: The name type of prompt.
         Returns:
             The instruction to be used for encoding sentences.
@@ -373,14 +377,14 @@ class AbsEncoder(ABC):
             task_metadata: The metadata of the task. Sentence-transformers uses this to
                 determine which prompt to use from a specified dictionary.
                 The order of priorities for prompt selection are:
-                    1. Composed prompt of task name + prompt type (query or passage)
+                    1. Composed prompt of task name + prompt type
                     2. Specific task prompt
-                    3. Composed prompt of task type + prompt type (query or passage)
+                    3. Composed prompt of task type + prompt type
                     4. Specific task type prompt
-                    5. Specific prompt type (query or passage)
+                    5. Specific prompt type
             hf_split: Split of current task
             hf_subset: Subset of current task
-            prompt_type: The name type of prompt. (query or passage)
+            prompt_type: The name type of prompt.
             **kwargs: Additional arguments to pass to the encoder.
         Returns:

mteb/models/cache_wrappers/cache_backends/_hash_utils.py CHANGED Viewed

@@ -1,7 +1,5 @@
 import hashlib
-from PIL import Image
 from mteb.types import BatchedInput
@@ -11,6 +9,8 @@ def _hash_item(item: BatchedInput) -> str:
         item_hash = hashlib.sha256(item["text"].encode()).hexdigest()
     if "image" in item:
+        from PIL import Image
         image: Image.Image = item["image"]
         item_hash += hashlib.sha256(image.tobytes()).hexdigest()

mteb 2.3.10__py3-none-any.whl → 2.4.1__py3-none-any.whl

mteb 2.3.10py3-none-any.whl → 2.4.1py3-none-any.whl