PyPI - mteb - Versions diffs - 2.1.0__py3-none-any.whl → 2.1.2__py3-none-any.whl - Mend

mteb 2.1.0py3-none-any.whl → 2.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

mteb/models/model_implementations/voyage_v.py CHANGED Viewed

@@ -51,7 +51,13 @@ def _downsample_image(
 def voyage_v_loader(model_name, **kwargs):
     requires_package(
         voyage_v_loader,
-        "voyageai and tenacity",
+        "voyageai",
+        model_name,
+        "pip install 'mteb[voyage_v]'",
+    )
+    requires_package(
+        voyage_v_loader,
+        "tenacity",
         model_name,
         "pip install 'mteb[voyage_v]'",
     )
@@ -65,11 +71,9 @@ def voyage_v_loader(model_name, **kwargs):
             **kwargs: Any,
         ):
             requires_image_dependencies()
-            from torchvision import transforms
             self.model_name = model_name.split("/")[-1]
             self.vo = voyageai.Client()
-            self.tensor_to_image = transforms.Compose([transforms.PILToTensor()])
         @retry(
             stop=stop_after_attempt(6),  # Stop after 6 attempts
@@ -126,10 +130,7 @@ def voyage_v_loader(model_name, **kwargs):
             for batch in tqdm(
                 images, disable=not show_progress_bar, desc="Image Encoding"
             ):
-                batch_images = [
-                    [_downsample_image(self.tensor_to_image(image))]
-                    for image in batch["image"]
-                ]
+                batch_images = [[_downsample_image(image)] for image in batch["image"]]
                 embeddings = self._multimodal_embed(
                     batch_images, model=self.model_name, input_type=input_type
                 ).embeddings
@@ -163,8 +164,7 @@ def voyage_v_loader(model_name, **kwargs):
                     inputs, disable=not show_progress_bar, desc="Interleaved Encoding"
                 ):
                     batch_images = [
-                        _downsample_image(self.tensor_to_image(image))
-                        for image in batch["image"]
+                        _downsample_image(image) for image in batch["image"]
                     ]
                     batch_texts = batch["text"]
                     interleaved_inputs = [

mteb/results/task_result.py CHANGED Viewed

@@ -32,7 +32,7 @@ from mteb.types import (
 logger = logging.getLogger(__name__)
-class Criterias(HelpfulStrEnum):
+class Criteria(HelpfulStrEnum):
     """Enum for criteria to check when merging TaskResult objects."""
     MTEB_VERSION = "mteb_version"
@@ -671,7 +671,7 @@ class TaskResult(BaseModel):
     def is_mergeable(
         self,
         result: TaskResult | AbsTask,
-        criteria: list[str] | list[Criterias] = [
+        criteria: list[str] | list[Criteria] = [
             "mteb_version",
             "dataset_revision",
         ],
@@ -688,9 +688,7 @@ class TaskResult(BaseModel):
         Returns:
             True if the TaskResult object can be merged with the other object, False otherwise.
         """
-        criteria = [
-            Criterias.from_str(c) if isinstance(c, str) else c for c in criteria
-        ]
+        criteria = [Criteria.from_str(c) if isinstance(c, str) else c for c in criteria]
         if isinstance(result, TaskResult):
             name = result.task_name
             revision = result.dataset_revision
@@ -709,14 +707,14 @@ class TaskResult(BaseModel):
                 )
             return False
-        if Criterias.MTEB_VERSION in criteria and self.mteb_version != mteb_version:
+        if Criteria.MTEB_VERSION in criteria and self.mteb_version != mteb_version:
             if raise_error:
                 raise ValueError(
                     f"Cannot merge TaskResult objects as they are derived from different MTEB versions ({self.mteb_version} and {mteb_version})"
                 )
             return False
-        if Criterias.DATASET_REVISION in criteria and self.dataset_revision != revision:
+        if Criteria.DATASET_REVISION in criteria and self.dataset_revision != revision:
             if raise_error:
                 raise ValueError(
                     f"Cannot merge TaskResult objects as they are derived from different dataset revisions ({self.dataset_revision} and {revision})"
@@ -728,7 +726,7 @@ class TaskResult(BaseModel):
     def merge(
         self,
         new_results: TaskResult,
-        criteria: list[str] | list[Criterias] = [
+        criteria: list[str] | list[Criteria] = [
             "mteb_version",
             "dataset_revision",
         ],

mteb/tasks/classification/dan/angry_tweets_classification.py CHANGED Viewed

@@ -9,7 +9,7 @@ class AngryTweetsClassification(AbsTaskClassification):
             "path": "DDSC/angry-tweets",
             "revision": "20b0e6081892e78179356fada741b7afa381443d",
         },
-        description="A sentiment dataset with 3 classes (positiv, negativ, neutral) for Danish tweets",
+        description="A sentiment dataset with 3 classes (positive, negative, neutral) for Danish tweets",
         reference="https://aclanthology.org/2021.nodalida-main.53/",
         type="Classification",
         category="t2c",
@@ -47,7 +47,7 @@ class AngryTweetsClassificationV2(AbsTaskClassification):
             "path": "mteb/angry_tweets",
             "revision": "b9475fb66a13befda4fa9871cd92343bb2c0eb77",
         },
-        description="""A sentiment dataset with 3 classes (positiv, negativ, neutral) for Danish tweets
+        description="""A sentiment dataset with 3 classes (positive, negative, neutral) for Danish tweets
         This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
         reference="https://aclanthology.org/2021.nodalida-main.53/",
         type="Classification",

mteb/tasks/classification/eng/legal_bench_classification.py CHANGED Viewed

@@ -2641,7 +2641,7 @@ class InternationalCitizenshipQuestionsLegalBenchClassification(AbsTaskClassific
 class JCrewBlockerLegalBenchClassification(AbsTaskClassification):
     metadata = TaskMetadata(
         name="JCrewBlockerLegalBenchClassification",
-        description="The J.Crew Blocker, also known as the J.Crew Protection, is a provision included in leveraged loan documents to prevent companies from removing security by transferring intellectual property (IP) into new subsidiaries and raising additional debt. The task consists of detemining whether the J.Crew Blocker is present in the document.",
+        description="The J.Crew Blocker, also known as the J.Crew Protection, is a provision included in leveraged loan documents to prevent companies from removing security by transferring intellectual property (IP) into new subsidiaries and raising additional debt. The task consists of determining whether the J.Crew Blocker is present in the document.",
         reference="https://huggingface.co/datasets/nguha/legalbench",
         dataset={
             "path": "mteb/JCrewBlockerLegalBenchClassification",
@@ -2677,7 +2677,7 @@ class JCrewBlockerLegalBenchClassification(AbsTaskClassification):
 class JCrewBlockerLegalBenchClassificationV2(AbsTaskClassification):
     metadata = TaskMetadata(
         name="JCrewBlockerLegalBenchClassification.v2",
-        description="""The J.Crew Blocker, also known as the J.Crew Protection, is a provision included in leveraged loan documents to prevent companies from removing security by transferring intellectual property (IP) into new subsidiaries and raising additional debt. The task consists of detemining whether the J.Crew Blocker is present in the document.
+        description="""The J.Crew Blocker, also known as the J.Crew Protection, is a provision included in leveraged loan documents to prevent companies from removing security by transferring intellectual property (IP) into new subsidiaries and raising additional debt. The task consists of determining whether the J.Crew Blocker is present in the document.
         This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
         reference="https://huggingface.co/datasets/nguha/legalbench",
         dataset={
@@ -4500,7 +4500,7 @@ class OverrulingLegalBenchClassificationV2(AbsTaskClassification):
 class PersonalJurisdictionLegalBenchClassification(AbsTaskClassification):
     metadata = TaskMetadata(
         name="PersonalJurisdictionLegalBenchClassification",
-        description="""Given a fact pattern describing the set of contacts between a plaintiff, defendant, and forum, determine if a court in that forum could excercise personal jurisdiction over the defendant.""",
+        description="""Given a fact pattern describing the set of contacts between a plaintiff, defendant, and forum, determine if a court in that forum could exercise personal jurisdiction over the defendant.""",
         reference="https://huggingface.co/datasets/nguha/legalbench",
         dataset={
             "path": "mteb/PersonalJurisdictionLegalBenchClassification",

mteb/tasks/classification/mya/myanmar_news.py CHANGED Viewed

@@ -9,7 +9,7 @@ class MyanmarNews(AbsTaskClassification):
             "path": "mteb/MyanmarNews",
             "revision": "644419f24bc820bbf8af24e0b4714a069812e0a3",
         },
-        description="The Myanmar News dataset on Hugging Face contains news articles in Burmese. It is designed for tasks such as text classification, sentiment analysis, and language modeling. The dataset includes a variety of news topics in 4 categorie, providing a rich resource for natural language processing applications involving Burmese which is a low resource language.",
+        description="The Myanmar News dataset on Hugging Face contains news articles in Burmese. It is designed for tasks such as text classification, sentiment analysis, and language modeling. The dataset includes a variety of news topics in 4 categories, providing a rich resource for natural language processing applications involving Burmese which is a low resource language.",
         reference="https://huggingface.co/datasets/myanmar_news",
         type="Classification",
         category="t2c",
@@ -45,7 +45,7 @@ class MyanmarNewsV2(AbsTaskClassification):
             "path": "mteb/myanmar_news",
             "revision": "475b43ffbdb5138ad67a01a2c860bc7db502f3c5",
         },
-        description="""The Myanmar News dataset on Hugging Face contains news articles in Burmese. It is designed for tasks such as text classification, sentiment analysis, and language modeling. The dataset includes a variety of news topics in 4 categorie, providing a rich resource for natural language processing applications involving Burmese which is a low resource language.
+        description="""The Myanmar News dataset on Hugging Face contains news articles in Burmese. It is designed for tasks such as text classification, sentiment analysis, and language modeling. The dataset includes a variety of news topics in 4 categories, providing a rich resource for natural language processing applications involving Burmese which is a low resource language.
         This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
         reference="https://huggingface.co/datasets/myanmar_news",
         type="Classification",

mteb/tasks/classification/tha/wongnai_reviews_classification.py CHANGED Viewed

@@ -5,7 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
 class WongnaiReviewsClassification(AbsTaskClassification):
     metadata = TaskMetadata(
         name="WongnaiReviewsClassification",
-        description="Wongnai features over 200,000 restaurants, beauty salons, and spas across Thailand on its platform, with detailed information about each merchant and user reviews. In this dataset there are 5 classes corressponding each star rating",
+        description="Wongnai features over 200,000 restaurants, beauty salons, and spas across Thailand on its platform, with detailed information about each merchant and user reviews. In this dataset there are 5 classes corresponding each star rating",
         reference="https://github.com/wongnai/wongnai-corpus",
         dataset={
             "path": "Wongnai/wongnai_reviews",

mteb/tasks/classification/ukr/ukr_formality_classification.py CHANGED Viewed

@@ -10,7 +10,7 @@ class UkrFormalityClassification(AbsTaskClassification):
         trainslating English GYAFC data.
         English data source: https://aclanthology.org/N18-1012/
         Translation into Ukrainian language using model: https://huggingface.co/facebook/nllb-200-distilled-600M
-        Additionally, the dataset was balanced, witha labels: 0 - informal, 1 - formal.
+        Additionally, the dataset was balanced, with labels: 0 - informal, 1 - formal.
         """,
         dataset={
             "path": "ukr-detect/ukr-formality-dataset-translated-gyafc",
@@ -61,7 +61,7 @@ class UkrFormalityClassificationV2(AbsTaskClassification):
         trainslating English GYAFC data.
         English data source: https://aclanthology.org/N18-1012/
         Translation into Ukrainian language using model: https://huggingface.co/facebook/nllb-200-distilled-600M
-        Additionally, the dataset was balanced, witha labels: 0 - informal, 1 - formal.
+        Additionally, the dataset was balanced, with labels: 0 - informal, 1 - formal.
         This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
         dataset={

mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py CHANGED Viewed

@@ -23,14 +23,15 @@ class IndicXnliPairClassification(AbsTaskPairClassification):
             "path": "mteb/IndicXnliPairClassification",
             "revision": "027e97b9afe84ea3447b57b7705b8864bb2b3a83",
         },
-        description="""INDICXNLI is similar to existing XNLI dataset in shape/form, but
-        focusses on Indic language family.
-        The train (392,702), validation (2,490), and evaluation sets (5,010) of English
-        XNLI were translated from English into each of the eleven Indic languages. IndicTrans
-        is a large Transformer-based sequence to sequence model. It is trained on Samanantar
-        dataset (Ramesh et al., 2021), which is the largest parallel multi- lingual corpus
-        over eleven Indic languages.
-        """,
+        description=(
+            "INDICXNLI is similar to existing XNLI dataset in shape/form, but "
+            "focuses on Indic language family. "
+            "The train (392,702), validation (2,490), and evaluation sets (5,010) of English "
+            "XNLI were translated from English into each of the eleven Indic languages. IndicTrans "
+            "is a large Transformer-based sequence to sequence model. It is trained on Samanantar "
+            "dataset (Ramesh et al., 2021), which is the largest parallel multi- lingual corpus "
+            "over eleven Indic languages."
+        ),
         reference="https://gem-benchmark.com/data_cards/opusparcus",
         category="t2t",
         modalities=["text"],

mteb/tasks/retrieval/code/code_rag.py CHANGED Viewed

@@ -60,9 +60,9 @@ class CodeRAGProgrammingSolutionsRetrieval(AbsTaskRetrieval):
         self.data_loaded = True
     def dataset_transform(self) -> None:
-        """And transform to a retrieval datset, which have the following attributes
+        """And transform to a retrieval dataset, which have the following attributes
-        self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document datas like title and text
+        self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
         self.queries = Dict[query_id, str] #id => query
         self.relevant_docs = Dict[query_id, Dict[[doc_id, score]]
         """
@@ -117,9 +117,9 @@ class CodeRAGOnlineTutorialsRetrieval(AbsTaskRetrieval):
         self.data_loaded = True
     def dataset_transform(self) -> None:
-        """And transform to a retrieval datset, which have the following attributes
+        """And transform to a retrieval dataset, which have the following attributes
-        self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document datas like title and text
+        self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
         self.queries = Dict[query_id, str] #id => query
         self.relevant_docs = Dict[query_id, Dict[[doc_id, score]]
         """
@@ -177,9 +177,9 @@ class CodeRAGLibraryDocumentationSolutionsRetrieval(AbsTaskRetrieval):
         self.data_loaded = True
     def dataset_transform(self) -> None:
-        """And transform to a retrieval datset, which have the following attributes
+        """And transform to a retrieval dataset, which have the following attributes
-        self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document datas like title and text
+        self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
         self.queries = Dict[query_id, str] #id => query
         self.relevant_docs = Dict[query_id, Dict[[doc_id, score]]
         """
@@ -234,9 +234,9 @@ class CodeRAGStackoverflowPostsRetrieval(AbsTaskRetrieval):
         self.data_loaded = True
     def dataset_transform(self) -> None:
-        """And transform to a retrieval datset, which have the following attributes
+        """And transform to a retrieval dataset, which have the following attributes
-        self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document datas like title and text
+        self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
         self.queries = Dict[query_id, str] #id => query
         self.relevant_docs = Dict[query_id, Dict[[doc_id, score]]
         """

mteb/tasks/retrieval/dan/dan_fever_retrieval.py CHANGED Viewed

@@ -56,7 +56,7 @@ Derczynski, Leon},
         self.data_loaded = True
     def dataset_transform(self) -> None:
-        """And transform to a retrieval datset, which have the following attributes
+        """And transform to a retrieval dataset, which have the following attributes
         self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
         self.queries = dict[query_id, str] #id => query

mteb/tasks/retrieval/dan/tv2_nordretrieval.py CHANGED Viewed

@@ -69,9 +69,9 @@ Piperidis, Stelios},
         self.data_loaded = True
     def dataset_transform(self) -> None:
-        """And transform to a retrieval datset, which have the following attributes
+        """And transform to a retrieval dataset, which have the following attributes
-        self.corpus = dict[doc_id, dict[str, str]] #id => dict with document datas like title and text
+        self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
         self.queries = dict[query_id, str] #id => query
         self.relevant_docs = dict[query_id, dict[[doc_id, score]]
         """

mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py CHANGED Viewed

@@ -45,9 +45,9 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval):
         self.data_loaded = True
     def dataset_transform(self) -> None:
-        """And transform to a retrieval datset, which have the following attributes
+        """And transform to a retrieval dataset, which have the following attributes
-        self.corpus = dict[doc_id, dict[str, str]] #id => dict with document datas like title and text
+        self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
         self.queries = dict[query_id, str] #id => query
         self.relevant_docs = dict[query_id, dict[[doc_id, score]]
         """

mteb/tasks/retrieval/eng/__init__.py CHANGED Viewed

@@ -22,6 +22,7 @@ from .cirr_it2i_retrieval import CIRRIT2IRetrieval
 from .climate_fever_retrieval import (
     ClimateFEVER,
     ClimateFEVERHardNegatives,
+    ClimateFEVERHardNegativesV2,
     ClimateFEVERRetrievalv2,
 )
 from .cqa_dupstack_android_retrieval import CQADupstackAndroidRetrieval
@@ -57,7 +58,7 @@ from .dapfam_patent_retrieval import (
     DAPFAMOutTitlAbsToTitlAbsClmRetrieval,
     DAPFAMOutTitlAbsToTitlAbsRetrieval,
 )
-from .dbpedia_retrieval import DBPedia, DBPediaHardNegatives
+from .dbpedia_retrieval import DBPedia, DBPediaHardNegatives, DBPediaHardNegativesV2
 from .edis_t2it_retrieval import EDIST2ITRetrieval
 from .encyclopedia_vqa_it2it_retrieval import EncyclopediaVQAIT2ITRetrieval
 from .english_finance1_retrieval import EnglishFinance1Retrieval
@@ -70,7 +71,7 @@ from .fashion200k_i2t_retrieval import Fashion200kI2TRetrieval
 from .fashion200k_t2i_retrieval import Fashion200kT2IRetrieval
 from .fashion_iq_it2i_retrieval import FashionIQIT2IRetrieval
 from .feedback_qa_retrieval import FeedbackQARetrieval
-from .fever_retrieval import FEVER, FEVERHardNegatives
+from .fever_retrieval import FEVER, FEVERHardNegatives, FEVERHardNegativesV2
 from .fi_qa2018_retrieval import FiQA2018
 from .fin_qa_retrieval import FinQARetrieval
 from .finance_bench_retrieval import FinanceBenchRetrieval
@@ -85,7 +86,11 @@ from .hateful_memes_i2t_retrieval import HatefulMemesI2TRetrieval
 from .hateful_memes_t2i_retrieval import HatefulMemesT2IRetrieval
 from .hc3_finance_retrieval import HC3FinanceRetrieval
 from .hella_swag_retrieval import HellaSwag
-from .hotpot_qa_retrieval import HotpotQA, HotpotQAHardNegatives
+from .hotpot_qa_retrieval import (
+    HotpotQA,
+    HotpotQAHardNegatives,
+    HotpotQAHardNegativesV2,
+)
 from .image_co_de_t2i_retrieval import ImageCoDeT2IRetrieval
 from .info_seek_it2it_retrieval import InfoSeekIT2ITRetrieval
 from .info_seek_it2t_retrieval import InfoSeekIT2TRetrieval
@@ -133,7 +138,11 @@ from .oven_it2it_retrieval import OVENIT2ITRetrieval
 from .oven_it2t_retrieval import OVENIT2TRetrieval
 from .piqa_retrieval import PIQA
 from .quail_retrieval import Quail
-from .quora_retrieval import QuoraRetrieval, QuoraRetrievalHardNegatives
+from .quora_retrieval import (
+    QuoraRetrieval,
+    QuoraRetrievalHardNegatives,
+    QuoraRetrievalHardNegativesV2,
+)
 from .r2_med_retrieval import (
     R2MEDBioinformaticsRetrieval,
     R2MEDBiologyRetrieval,
@@ -247,6 +256,7 @@ __all__ = [
     "ChemNQRetrieval",
     "ClimateFEVER",
     "ClimateFEVERHardNegatives",
+    "ClimateFEVERHardNegativesV2",
     "ClimateFEVERRetrievalv2",
     "DAPFAMAllTitlAbsClmToFullTextRetrieval",
     "DAPFAMAllTitlAbsClmToTitlAbsClmRetrieval",
@@ -268,6 +278,7 @@ __all__ = [
     "DAPFAMOutTitlAbsToTitlAbsRetrieval",
     "DBPedia",
     "DBPediaHardNegatives",
+    "DBPediaHardNegativesV2",
     "EDIST2ITRetrieval",
     "EncyclopediaVQAIT2ITRetrieval",
     "EnglishFinance1Retrieval",
@@ -276,6 +287,7 @@ __all__ = [
     "EnglishFinance4Retrieval",
     "EnglishHealthcare1Retrieval",
     "FEVERHardNegatives",
+    "FEVERHardNegativesV2",
     "FaithDialRetrieval",
     "Fashion200kI2TRetrieval",
     "Fashion200kT2IRetrieval",
@@ -296,6 +308,7 @@ __all__ = [
     "HellaSwag",
     "HotpotQA",
     "HotpotQAHardNegatives",
+    "HotpotQAHardNegativesV2",
     "ImageCoDeT2IRetrieval",
     "InfoSeekIT2ITRetrieval",
     "InfoSeekIT2TRetrieval",
@@ -345,6 +358,7 @@ __all__ = [
     "Quail",
     "QuoraRetrieval",
     "QuoraRetrievalHardNegatives",
+    "QuoraRetrievalHardNegativesV2",
     "R2MEDBioinformaticsRetrieval",
     "R2MEDBiologyRetrieval",
     "R2MEDIIYiClinicalRetrieval",

mteb/tasks/retrieval/eng/climate_fever_retrieval.py CHANGED Viewed

@@ -1,30 +1,21 @@
 from mteb.abstasks.retrieval import AbsTaskRetrieval
 from mteb.abstasks.task_metadata import TaskMetadata
-class ClimateFEVER(AbsTaskRetrieval):
-    metadata = TaskMetadata(
-        name="ClimateFEVER",
-        description="CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims (queries) regarding climate-change. The underlying corpus is the same as FVER.",
-        reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
-        dataset={
-            "path": "mteb/climate-fever",
-            "revision": "47f2ac6acb640fc46020b02a5b59fdda04d39380",
-        },
-        type="Retrieval",
-        category="t2t",
-        modalities=["text"],
-        eval_splits=["test"],
-        eval_langs=["eng-Latn"],
-        main_score="ndcg_at_10",
-        date=("2001-01-01", "2020-12-31"),  # launch of wiki -> paper publication
-        domains=["Encyclopaedic", "Written"],
-        task_subtypes=["Claim verification"],
-        license="cc-by-sa-4.0",
-        annotations_creators="human-annotated",
-        dialect=[],
-        sample_creation="found",
-        bibtex_citation=r"""
+_climate_fever_metadata = dict(
+    type="Retrieval",
+    category="t2t",
+    modalities=["text"],
+    eval_splits=["test"],
+    eval_langs=["eng-Latn"],
+    main_score="ndcg_at_10",
+    date=("2001-01-01", "2020-12-31"),  # launch of wiki -> paper publication
+    domains=["Encyclopaedic", "Written"],
+    task_subtypes=["Claim verification"],
+    license="cc-by-sa-4.0",
+    annotations_creators="human-annotated",
+    dialect=[],
+    sample_creation="found",
+    bibtex_citation=r"""
 @misc{diggelmann2021climatefever,
   archiveprefix = {arXiv},
   author = {Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold},
@@ -34,82 +25,82 @@ class ClimateFEVER(AbsTaskRetrieval):
   year = {2021},
 }
 """,
+)
+class ClimateFEVER(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="ClimateFEVER",
+        description=(
+            "CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims "
+            "(queries) regarding climate-change. The underlying corpus is the same as FEVER."
+        ),
+        reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
+        dataset={
+            "path": "mteb/climate-fever",
+            "revision": "47f2ac6acb640fc46020b02a5b59fdda04d39380",
+        },
+        prompt={
+            "query": "Given a claim about climate change, retrieve documents that support or refute the claim"
+        },
+        **_climate_fever_metadata,
+    )
+class ClimateFEVERRetrievalv2(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="ClimateFEVER.v2",
+        description=(
+            "CLIMATE-FEVER is a dataset following the FEVER methodology, containing 1,535 real-world climate change claims. "
+            "This updated version addresses corpus mismatches and qrel inconsistencies in MTEB, restoring labels while refining corpus-query alignment for better accuracy."
+        ),
+        reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
+        dataset={
+            "path": "mteb/climate-fever-v2",
+            "revision": "e438c9586767800aeb10dbe8a245c41dbea4e5f4",
+        },
         prompt={
             "query": "Given a claim about climate change, retrieve documents that support or refute the claim"
         },
+        adapted_from=["ClimateFEVER"],
+        **_climate_fever_metadata,
     )
 class ClimateFEVERHardNegatives(AbsTaskRetrieval):
     metadata = TaskMetadata(
         name="ClimateFEVERHardNegatives",
-        description="CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct.",
+        description=(
+            "CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. "
+            "The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
+        ),
         reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
         dataset={
             "path": "mteb/ClimateFEVER_test_top_250_only_w_correct-v2",
             "revision": "3a309e201f3c2c4b13bd4a367a8f37eee2ec1d21",
         },
-        type="Retrieval",
-        category="t2t",
-        modalities=["text"],
-        eval_splits=["test"],
-        eval_langs=["eng-Latn"],
-        main_score="ndcg_at_10",
-        date=("2001-01-01", "2020-12-31"),  # launch of wiki -> paper publication
-        domains=["Encyclopaedic", "Written"],
-        task_subtypes=["Claim verification"],
-        license="cc-by-sa-4.0",
-        annotations_creators="human-annotated",
-        dialect=[],
-        sample_creation="found",
-        bibtex_citation=r"""
-@misc{diggelmann2021climatefever,
-  archiveprefix = {arXiv},
-  author = {Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold},
-  eprint = {2012.00614},
-  primaryclass = {cs.CL},
-  title = {CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims},
-  year = {2021},
-}
-""",
         adapted_from=["ClimateFEVER"],
+        superseded_by="ClimateFEVERHardNegatives.v2",
+        **_climate_fever_metadata,
     )
-class ClimateFEVERRetrievalv2(AbsTaskRetrieval):
+class ClimateFEVERHardNegativesV2(AbsTaskRetrieval):
     metadata = TaskMetadata(
-        name="ClimateFEVER.v2",
-        description="CLIMATE-FEVER is a dataset following the FEVER methodology, containing 1,535 real-world climate change claims. This updated version addresses corpus mismatches and qrel inconsistencies in MTEB, restoring labels while refining corpus-query alignment for better accuracy. ",
+        name="ClimateFEVERHardNegatives.v2",
+        description=(
+            "CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. "
+            "The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct. "
+            "V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
+        ),
         reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
         dataset={
-            "path": "mteb/climate-fever-v2",
-            "revision": "e438c9586767800aeb10dbe8a245c41dbea4e5f4",
+            "path": "mteb/ClimateFEVER_test_top_250_only_w_correct-v2",
+            "revision": "3a309e201f3c2c4b13bd4a367a8f37eee2ec1d21",
         },
-        type="Retrieval",
-        category="t2t",
-        modalities=["text"],
-        eval_splits=["test"],
-        eval_langs=["eng-Latn"],
-        main_score="ndcg_at_10",
-        date=("2001-01-01", "2020-12-31"),  # launch of wiki -> paper publication
-        domains=["Academic", "Written"],
-        task_subtypes=["Claim verification"],
-        license="cc-by-sa-4.0",
-        annotations_creators="human-annotated",
-        dialect=[],
-        sample_creation="found",
-        bibtex_citation=r"""
-@misc{diggelmann2021climatefever,
-  archiveprefix = {arXiv},
-  author = {Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold},
-  eprint = {2012.00614},
-  primaryclass = {cs.CL},
-  title = {CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims},
-  year = {2021},
-}
-""",
+        adapted_from=["ClimateFEVER"],
         prompt={
             "query": "Given a claim about climate change, retrieve documents that support or refute the claim"
         },
-        adapted_from=["ClimateFEVER"],
+        **_climate_fever_metadata,
     )

mteb 2.1.0__py3-none-any.whl → 2.1.2__py3-none-any.whl

mteb 2.1.0py3-none-any.whl → 2.1.2py3-none-any.whl