PyPI - mteb - Versions diffs - 2.7.20__py3-none-any.whl → 2.7.22__py3-none-any.whl - Mend

mteb 2.7.20py3-none-any.whl → 2.7.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

mteb/abstasks/regression.py CHANGED Viewed

@@ -93,7 +93,6 @@ class AbsTaskRegression(AbsTaskClassification):
         n_samples: Number of samples to use for training the regression model. If the dataset has fewer samples than n_samples, all samples are used.
         abstask_prompt: Prompt to use for the task for instruction model if not prompt is provided in TaskMetadata.prompt.
         evaluator_model: The model to use for evaluation. Can be any sklearn compatible model. Default is `LinearRegression`.
     """
     evaluator: type[SklearnEvaluator] = SklearnEvaluator

mteb/models/model_implementations/{nvidia_llama_nemoretriever_colemb.py → nvidia_nemotron_colembed_vl.py} RENAMED Viewed

@@ -7,6 +7,7 @@ from packaging.specifiers import SpecifierSet
 from torch.utils.data import DataLoader
 from transformers import __version__ as transformers_version
+from mteb._requires_package import requires_package
 from mteb.models.abs_encoder import AbsEncoder
 from mteb.models.model_meta import ModelMeta
@@ -24,27 +25,51 @@ LLAMA_NEMORETRIEVER_CITATION = """@misc{xu2025llamanemoretrievercolembedtopperfo
       url={https://arxiv.org/abs/2507.05513}
 }"""
+# Transformers version constraints per extra.
+# Keep in sync with pyproject.toml [project.optional-dependencies]
+#
+# Note: The extra name reflects the transformers version requirement, not the model version.
+# For example, llama-nemotron-colembed-vl-3b-v2 uses "llama-nemotron-colembed-vl" because it
+# requires transformers==4.49.0, even though it's a "v2" model by name.
+_TRANSFORMERS_CONSTRAINTS: dict[str, str] = {
+    "llama-nemotron-colembed-vl": "==4.49.0",  # llama-nemoretriever-colembed-*
+    "nemotron-colembed-vl-v2": "==5.0.0rc0",  # nemotron-colembed-vl-4b-v2, nemotron-colembed-vl-8b-v2
+}
+class NemotronColEmbedVL(AbsEncoder):
+    """Encoder for the NemotronColEmbedVL family of models."""
-class LlamaNemoretrieverColembed(AbsEncoder):
     def __init__(
         self,
         model_name_or_path: str,
         revision: str,
         trust_remote_code: bool,
-        transformers_version_constraint: str | None = None,
+        extra_name: str = "llama-nemotron-colembed-vl",
         device_map="cuda",
         torch_dtype=torch.bfloat16,
         attn_implementation="flash_attention_2",
         **kwargs,
     ):
-        if transformers_version_constraint is not None:
-            spec = SpecifierSet(transformers_version_constraint)
-            if transformers_version not in spec:
-                raise RuntimeError(
-                    f"Model `{model_name_or_path}` requires transformers{transformers_version_constraint}, "
-                    f"but {transformers_version} is installed. "
-                    f"Run: pip install 'transformers{transformers_version_constraint}'"
-                )
+        install_hint = f"pip install 'mteb[{extra_name}]'"
+        # Check transformers version
+        constraint = _TRANSFORMERS_CONSTRAINTS.get(extra_name)
+        if constraint is None:
+            raise ValueError(
+                f"Unknown extra_name '{extra_name}'. "
+                f"Must be one of: {list(_TRANSFORMERS_CONSTRAINTS.keys())}"
+            )
+        if transformers_version not in SpecifierSet(constraint):
+            raise RuntimeError(
+                f"Model `{model_name_or_path}` requires transformers{constraint}, "
+                f"but {transformers_version} is installed. "
+                f"Run: {install_hint}"
+            )
+        # Check required packages
+        for package in ("torchvision", "accelerate", "flash_attn"):
+            requires_package(self, package, model_name_or_path, install_hint)
         from transformers import AutoModel
@@ -166,10 +191,10 @@ TRAINING_DATA_v2 = {
 }
 llama_nemoretriever_colembed_1b_v1 = ModelMeta(
-    loader=LlamaNemoretrieverColembed,
+    loader=NemotronColEmbedVL,
     loader_kwargs=dict(
+        extra_name="llama-nemotron-colembed-vl",
         trust_remote_code=True,
-        transformers_version_constraint="==4.49.0",
     ),
     name="nvidia/llama-nemoretriever-colembed-1b-v1",
     model_type=["late-interaction"],
@@ -195,10 +220,10 @@ llama_nemoretriever_colembed_1b_v1 = ModelMeta(
 )
 llama_nemoretriever_colembed_3b_v1 = ModelMeta(
-    loader=LlamaNemoretrieverColembed,
+    loader=NemotronColEmbedVL,
     loader_kwargs=dict(
+        extra_name="llama-nemotron-colembed-vl",
         trust_remote_code=True,
-        transformers_version_constraint="==4.49.0",
     ),
     name="nvidia/llama-nemoretriever-colembed-3b-v1",
     model_type=["late-interaction"],
@@ -224,10 +249,10 @@ llama_nemoretriever_colembed_3b_v1 = ModelMeta(
 )
 llama_nemotron_colembed_vl_3b_v2 = ModelMeta(
-    loader=LlamaNemoretrieverColembed,
+    loader=NemotronColEmbedVL,
     loader_kwargs=dict(
+        extra_name="llama-nemotron-colembed-vl",
         trust_remote_code=True,
-        transformers_version_constraint="==4.49.0",
     ),
     name="nvidia/llama-nemotron-colembed-vl-3b-v2",
     model_type=["late-interaction"],
@@ -251,11 +276,12 @@ llama_nemotron_colembed_vl_3b_v2 = ModelMeta(
     citation=LLAMA_NEMORETRIEVER_CITATION,
 )
 nemotron_colembed_vl_4b_v2 = ModelMeta(
-    loader=LlamaNemoretrieverColembed,
+    loader=NemotronColEmbedVL,
     loader_kwargs=dict(
+        extra_name="nemotron-colembed-vl-v2",
         trust_remote_code=True,
-        transformers_version_constraint="==5.0.0rc0",
     ),
     name="nvidia/nemotron-colembed-vl-4b-v2",
     revision="823b1625c15fe3da73fa094205e538a7a2301a2a",
@@ -280,10 +306,10 @@ nemotron_colembed_vl_4b_v2 = ModelMeta(
 nemotron_colembed_vl_8b_v2 = ModelMeta(
-    loader=LlamaNemoretrieverColembed,
+    loader=NemotronColEmbedVL,
     loader_kwargs=dict(
+        extra_name="nemotron-colembed-vl-v2",
         trust_remote_code=True,
-        transformers_version_constraint="==5.0.0rc0",
     ),
     name="nvidia/nemotron-colembed-vl-8b-v2",
     revision="6cbe43579dda6237768fc373768ad372cc5cdfec",

mteb/tasks/clustering/deu/ten_k_gnad_clustering_p2p.py CHANGED Viewed

@@ -18,14 +18,17 @@ class TenKGnadClusteringP2P(AbsTaskClusteringLegacy):
         eval_splits=["test"],
         eval_langs=["deu-Latn"],
         main_score="v_measure",
-        date=None,
+        date=(
+            "2000-01-01",
+            "2020-12-31",
+        ),  # since it is news it is guessed that it is from 2000 to 2020
         domains=["Web", "Written"],
         task_subtypes=[],
         license="cc-by-nc-sa-4.0",
-        annotations_creators=None,
+        annotations_creators="derived",
         dialect=[],
         sample_creation="found",
-        bibtex_citation=None,
+        bibtex_citation="",  # none found
         superseded_by="TenKGnadClusteringP2P.v2",
     )
@@ -36,7 +39,7 @@ class TenKGnadClusteringP2PFast(AbsTaskClustering):
     metadata = TaskMetadata(
         name="TenKGnadClusteringP2P.v2",
-        description="Clustering of news article titles+subheadings+texts. Clustering of 10 splits on the news article category.",
+        description="Clustering of news article titles+subheadings+texts. Clustering of 10 splits on the news article category. v2 uses a faster evaluation method used in the MMTEB paper, which allow for notably faster evaluation.",
         reference="https://tblock.github.io/10kGNAD/",
         dataset={
             "path": "slvnwhrl/tenkgnad-clustering-p2p",
@@ -53,13 +56,12 @@ class TenKGnadClusteringP2PFast(AbsTaskClustering):
             "2020-12-31",
         ),  # since it is news it is guessed that it is from 2000 to 2020
         domains=["News", "Non-fiction", "Written"],
-        task_subtypes=None,
+        task_subtypes=["Thematic clustering"],
         license="cc-by-sa-4.0",
         annotations_creators="derived",
         dialect=[],
         sample_creation="found",
-        bibtex_citation=None,  # none found
-        # due to duplicates
+        bibtex_citation="",  # none found
         adapted_from=["TenKGnadClusteringP2P"],
     )

mteb/tasks/clustering/deu/ten_k_gnad_clustering_s2s.py CHANGED Viewed

@@ -18,14 +18,17 @@ class TenKGnadClusteringS2S(AbsTaskClusteringLegacy):
         eval_splits=["test"],
         eval_langs=["deu-Latn"],
         main_score="v_measure",
-        date=None,
+        date=(
+            "2000-01-01",
+            "2020-12-31",
+        ),  # since it is news it is guessed that it is from 2000 to 2020
         domains=["News", "Non-fiction", "Written"],
-        task_subtypes=["Topic classification"],
-        license=None,
-        annotations_creators=None,
-        dialect=None,
-        sample_creation=None,
-        bibtex_citation=None,
+        task_subtypes=["Thematic clustering"],
+        license="cc-by-nc-sa-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="found",
+        bibtex_citation="",  # none found
         superseded_by="TenKGnadClusteringS2S.v2",
     )
@@ -36,7 +39,7 @@ class TenKGnadClusteringS2SFast(AbsTaskClustering):
     metadata = TaskMetadata(
         name="TenKGnadClusteringS2S.v2",
-        description="Clustering of news article titles. Clustering of 10 splits on the news article category.",
+        description="Clustering of news article titles. Clustering of 10 splits on the news article category. v2 uses a faster evaluation method used in the MMTEB paper, which allow for notably faster evaluation.",
         reference="https://tblock.github.io/10kGNAD/",
         dataset={
             "path": "slvnwhrl/tenkgnad-clustering-s2s",
@@ -53,13 +56,12 @@ class TenKGnadClusteringS2SFast(AbsTaskClustering):
             "2020-12-31",
         ),  # since it is news it is guessed that it is from 2000 to 2020
         domains=["News", "Non-fiction", "Written"],
-        task_subtypes=["Topic classification"],
+        task_subtypes=["Thematic clustering"],
         license="cc-by-sa-4.0",
         annotations_creators="derived",
         dialect=[],
         sample_creation="found",
-        bibtex_citation=None,  # none found
-        # due to duplicates
+        bibtex_citation="",  # none found
         adapted_from=["TenKGnadClusteringS2S"],
     )

mteb/tasks/clustering/fra/hal_clustering_s2s.py CHANGED Viewed

@@ -33,7 +33,7 @@ class HALClusteringS2S(AbsTaskClusteringLegacy):
         task_subtypes=["Thematic clustering"],
         license="apache-2.0",
         annotations_creators="human-annotated",
-        dialect=None,
+        dialect=[],
         sample_creation="found",
         bibtex_citation=r"""
 @misc{ciancone2024extending,

mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py CHANGED Viewed

@@ -47,7 +47,7 @@ class WikiClusteringP2P(AbsTaskClusteringLegacy):
         annotations_creators="derived",
         dialect=[],
         sample_creation="created",
-        bibtex_citation=None,  # None exists
+        bibtex_citation="",  # None exists
         superseded_by="WikiClusteringP2P.v2",
     )

mteb/tasks/clustering/nob/vg_clustering.py CHANGED Viewed

@@ -42,7 +42,7 @@ class VGClustering(AbsTaskClusteringLegacy):
         main_score="v_measure",
         date=("2020-01-01", "2024-12-31"),  # best guess
         domains=["News", "Non-fiction", "Written"],
-        license=None,
+        license="not specified",
         annotations_creators="derived",
         dialect=[],
         task_subtypes=["Thematic clustering"],

mteb/tasks/clustering/rom/romani_bible_clustering.py CHANGED Viewed

@@ -24,5 +24,5 @@ class RomaniBibleClustering(AbsTaskClusteringLegacy):
         annotations_creators="derived",
         dialect=["Kalderash"],
         sample_creation="human-translated and localized",
-        bibtex_citation=None,
+        bibtex_citation="",
     )

mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py CHANGED Viewed

@@ -17,7 +17,7 @@ class TwitterSemEval2015PC(AbsTaskPairClassification):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="max_ap",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Social", "Written"],
         task_subtypes=[],
         license="not specified",

mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py CHANGED Viewed

@@ -17,7 +17,7 @@ class TwitterURLCorpus(AbsTaskPairClassification):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="max_ap",
-        date=None,
+        date=("2017-01-01", "2017-12-31"),  # publication year
         domains=["Social", "Written"],
         task_subtypes=[],
         license="not specified",

mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py CHANGED Viewed

@@ -41,7 +41,7 @@ class IndicXnliPairClassification(AbsTaskPairClassification):
         main_score="max_ap",
         date=("2022-04-22", "2022-10-06"),
         domains=["Non-fiction", "Fiction", "Government", "Written"],
-        task_subtypes=None,
+        task_subtypes=[],
         license="cc-by-4.0",
         annotations_creators="derived",
         dialect=[],

mteb/tasks/pair_classification/pol/polish_pc.py CHANGED Viewed

@@ -77,7 +77,7 @@ class PpcPC(AbsTaskPairClassification):
         eval_splits=["test"],
         eval_langs=["pol-Latn"],
         main_score="max_ap",
-        date=None,
+        date=("2022-01-01", "2022-12-31"),  # publication year
         domains=[
             "Fiction",
             "Non-fiction",
@@ -125,7 +125,7 @@ class CdscePC(AbsTaskPairClassification):
         eval_splits=["test"],
         eval_langs=["pol-Latn"],
         main_score="max_ap",
-        date=None,
+        date=("2017-01-01", "2017-12-31"),  # publication year
         domains=["Written"],
         task_subtypes=[],
         license="cc-by-nc-sa-4.0",

mteb/tasks/retrieval/eng/cqa_dupstack_android_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class CQADupstackAndroidRetrieval(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Programming", "Web", "Written", "Non-fiction"],
         task_subtypes=["Question answering", "Duplicate Detection"],
         license="apache-2.0",

mteb/tasks/retrieval/eng/cqa_dupstack_english_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class CQADupstackEnglishRetrieval(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Written"],
         task_subtypes=["Question answering", "Duplicate Detection"],
         license="apache-2.0",

mteb/tasks/retrieval/eng/cqa_dupstack_gaming_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class CQADupstackGamingRetrieval(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Web", "Written"],
         task_subtypes=["Question answering", "Duplicate Detection"],
         license="apache-2.0",

mteb/tasks/retrieval/eng/cqa_dupstack_gis_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class CQADupstackGisRetrieval(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Written", "Non-fiction"],
         task_subtypes=["Question answering", "Duplicate Detection"],
         license="apache-2.0",

mteb/tasks/retrieval/eng/cqa_dupstack_mathematica_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class CQADupstackMathematicaRetrieval(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Written", "Academic", "Non-fiction"],
         task_subtypes=["Question answering", "Duplicate Detection"],
         license="apache-2.0",

mteb/tasks/retrieval/eng/cqa_dupstack_physics_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class CQADupstackPhysicsRetrieval(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Written", "Academic", "Non-fiction"],
         task_subtypes=["Question answering", "Duplicate Detection"],
         license="apache-2.0",

mteb/tasks/retrieval/eng/cqa_dupstack_programmers_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class CQADupstackProgrammersRetrieval(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Programming", "Written", "Non-fiction"],
         task_subtypes=[],
         license="apache-2.0",

mteb/tasks/retrieval/eng/cqa_dupstack_stats_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class CQADupstackStatsRetrieval(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Written", "Academic", "Non-fiction"],
         task_subtypes=["Question answering", "Duplicate Detection"],
         license="apache-2.0",

mteb/tasks/retrieval/eng/cqa_dupstack_tex_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class CQADupstackTexRetrieval(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Written", "Non-fiction"],
         task_subtypes=["Question answering", "Duplicate Detection"],
         license="apache-2.0",

mteb/tasks/retrieval/eng/cqa_dupstack_unix_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class CQADupstackUnixRetrieval(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Written", "Web", "Programming"],
         task_subtypes=["Question answering", "Duplicate Detection"],
         license="apache-2.0",

mteb/tasks/retrieval/eng/cqa_dupstack_webmasters_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class CQADupstackWebmastersRetrieval(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Written", "Web"],
         task_subtypes=["Question answering"],
         license="apache-2.0",

mteb/tasks/retrieval/eng/cqa_dupstack_wordpress_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class CQADupstackWordpressRetrieval(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2015-01-01", "2015-12-31"),  # publication year
         domains=["Written", "Web", "Programming"],
         task_subtypes=["Question answering"],
         license="apache-2.0",

mteb/tasks/retrieval/eng/fever_retrieval.py CHANGED Viewed

@@ -9,7 +9,7 @@ _fever_metadata = dict(
     eval_splits=["test"],
     eval_langs=["eng-Latn"],
     main_score="ndcg_at_10",
-    date=None,
+    date=("2018-01-01", "2018-12-31"),  # publication year
     domains=["Encyclopaedic", "Written"],
     task_subtypes=["Claim verification"],
     license="cc-by-nc-sa-3.0",

mteb/tasks/retrieval/eng/fi_qa2018_retrieval.py CHANGED Viewed

@@ -19,7 +19,7 @@ class FiQA2018(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2018-01-01", "2018-12-31"),  # publication year
         domains=["Written", "Financial"],
         task_subtypes=["Question answering"],
         license="not specified",

mteb/tasks/retrieval/eng/msmarc_ov2_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class MSMARCOv2(AbsTaskRetrieval):
         eval_splits=["train", "dev", "dev2"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2016-01-01", "2016-12-31"),  # publication year
         domains=[
             "Encyclopaedic",
             "Academic",

mteb/tasks/retrieval/eng/msmarco_retrieval.py CHANGED Viewed

@@ -19,7 +19,7 @@ class MSMARCO(AbsTaskRetrieval):
         eval_splits=["dev"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2016-01-01", "2016-12-31"),  # publication year
         domains=[
             "Encyclopaedic",
             "Academic",
@@ -81,7 +81,7 @@ class MSMARCOHardNegatives(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2016-01-01", "2016-12-31"),  # publication year
         domains=[
             "Encyclopaedic",
             "Academic",

mteb/tasks/retrieval/eng/nf_corpus_retrieval.py CHANGED Viewed

@@ -17,13 +17,13 @@ class NFCorpus(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2016-01-01", "2016-12-31"),  # publication year
         domains=["Medical", "Academic", "Written"],
-        task_subtypes=None,
-        license=None,
-        annotations_creators=None,
-        dialect=None,
-        sample_creation=None,
+        task_subtypes=[],
+        license="not specified",
+        annotations_creators="derived",
+        dialect=[],
+        sample_creation="found",
         bibtex_citation=r"""
 @inproceedings{boteva2016,
   author = {Boteva, Vera and Gholipour, Demian and Sokolov, Artem and Riezler, Stefan},

mteb/tasks/retrieval/eng/nq_retrieval.py CHANGED Viewed

@@ -17,7 +17,7 @@ class NQ(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
+        date=("2019-01-01", "2019-12-31"),  # publication year
         domains=["Written", "Encyclopaedic"],
         task_subtypes=["Question answering"],
         license="cc-by-nc-sa-3.0",
@@ -57,13 +57,13 @@ class NQHardNegatives(AbsTaskRetrieval):
         eval_splits=["test"],
         eval_langs=["eng-Latn"],
         main_score="ndcg_at_10",
-        date=None,
-        domains=None,
-        task_subtypes=None,
-        license=None,
-        annotations_creators=None,
-        dialect=None,
-        sample_creation=None,
+        date=("2019-01-01", "2019-12-31"),  # publication year
+        domains=["Written", "Encyclopaedic"],
+        task_subtypes=["Question answering"],
+        license="cc-by-nc-sa-3.0",
+        annotations_creators="human-annotated",
+        dialect=[],
+        sample_creation="found",
         bibtex_citation=r"""
 @article{47761,
   author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh

mteb/tasks/retrieval/eng/quora_retrieval.py CHANGED Viewed

@@ -9,7 +9,7 @@ _quora_metadata = dict(
     eval_splits=["test"],
     eval_langs=["eng-Latn"],
     main_score="ndcg_at_10",
-    date=None,
+    date=("2017-01-01", "2017-12-31"),  # original publication year
     domains=["Written", "Web", "Blog"],
     task_subtypes=["Question answering"],
     license="not specified",

mteb 2.7.20__py3-none-any.whl → 2.7.22__py3-none-any.whl

mteb 2.7.20py3-none-any.whl → 2.7.22py3-none-any.whl