PyPI - mteb - Versions diffs - 2.5.2__py3-none-any.whl → 2.7.2__py3-none-any.whl - Mend

mteb 2.5.2py3-none-any.whl → 2.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (241) hide show

mteb/__init__.py +2 -0
mteb/_create_dataloaders.py +17 -18
mteb/_evaluators/any_sts_evaluator.py +3 -3
mteb/_evaluators/clustering_evaluator.py +2 -2
mteb/_evaluators/evaluator.py +4 -2
mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +10 -8
mteb/_evaluators/pair_classification_evaluator.py +5 -3
mteb/_evaluators/retrieval_evaluator.py +2 -2
mteb/_evaluators/retrieval_metrics.py +18 -17
mteb/_evaluators/sklearn_evaluator.py +11 -10
mteb/_evaluators/text/bitext_mining_evaluator.py +27 -18
mteb/_evaluators/text/summarization_evaluator.py +23 -18
mteb/_evaluators/zeroshot_classification_evaluator.py +5 -3
mteb/abstasks/_data_filter/filters.py +1 -1
mteb/abstasks/_data_filter/task_pipelines.py +3 -0
mteb/abstasks/_statistics_calculation.py +18 -10
mteb/abstasks/_stratification.py +18 -18
mteb/abstasks/abstask.py +35 -28
mteb/abstasks/aggregate_task_metadata.py +1 -9
mteb/abstasks/aggregated_task.py +10 -29
mteb/abstasks/classification.py +15 -10
mteb/abstasks/clustering.py +19 -15
mteb/abstasks/clustering_legacy.py +10 -10
mteb/abstasks/image/image_text_pair_classification.py +7 -4
mteb/abstasks/multilabel_classification.py +23 -19
mteb/abstasks/pair_classification.py +20 -11
mteb/abstasks/regression.py +4 -4
mteb/abstasks/retrieval.py +28 -24
mteb/abstasks/retrieval_dataset_loaders.py +2 -2
mteb/abstasks/sts.py +8 -5
mteb/abstasks/task_metadata.py +31 -33
mteb/abstasks/text/bitext_mining.py +39 -28
mteb/abstasks/text/reranking.py +8 -6
mteb/abstasks/text/summarization.py +10 -5
mteb/abstasks/zeroshot_classification.py +8 -4
mteb/benchmarks/benchmark.py +4 -2
mteb/benchmarks/benchmarks/__init__.py +4 -0
mteb/benchmarks/benchmarks/benchmarks.py +112 -11
mteb/benchmarks/get_benchmark.py +14 -55
mteb/cache.py +182 -29
mteb/cli/_display_tasks.py +2 -2
mteb/cli/build_cli.py +110 -14
mteb/cli/generate_model_card.py +43 -23
mteb/deprecated_evaluator.py +63 -49
mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
mteb/evaluate.py +44 -33
mteb/filter_tasks.py +25 -26
mteb/get_tasks.py +29 -30
mteb/languages/language_scripts.py +5 -3
mteb/leaderboard/app.py +162 -34
mteb/load_results.py +12 -12
mteb/models/abs_encoder.py +10 -6
mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
mteb/models/cache_wrappers/cache_backends/_hash_utils.py +5 -4
mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
mteb/models/cache_wrappers/cache_wrapper.py +2 -2
mteb/models/get_model_meta.py +21 -3
mteb/models/instruct_wrapper.py +28 -8
mteb/models/model_implementations/align_models.py +1 -1
mteb/models/model_implementations/andersborges.py +4 -4
mteb/models/model_implementations/ara_models.py +1 -1
mteb/models/model_implementations/arctic_models.py +8 -8
mteb/models/model_implementations/b1ade_models.py +1 -1
mteb/models/model_implementations/bge_models.py +45 -21
mteb/models/model_implementations/bica_model.py +3 -3
mteb/models/model_implementations/blip2_models.py +2 -2
mteb/models/model_implementations/blip_models.py +16 -16
mteb/models/model_implementations/bm25.py +4 -4
mteb/models/model_implementations/bmretriever_models.py +6 -4
mteb/models/model_implementations/cadet_models.py +1 -1
mteb/models/model_implementations/cde_models.py +11 -4
mteb/models/model_implementations/clip_models.py +6 -6
mteb/models/model_implementations/clips_models.py +3 -3
mteb/models/model_implementations/codefuse_models.py +5 -5
mteb/models/model_implementations/codesage_models.py +3 -3
mteb/models/model_implementations/cohere_models.py +5 -5
mteb/models/model_implementations/cohere_v.py +2 -2
mteb/models/model_implementations/colpali_models.py +3 -3
mteb/models/model_implementations/colqwen_models.py +8 -8
mteb/models/model_implementations/colsmol_models.py +2 -2
mteb/models/model_implementations/conan_models.py +1 -1
mteb/models/model_implementations/dino_models.py +42 -42
mteb/models/model_implementations/e5_instruct.py +23 -4
mteb/models/model_implementations/e5_models.py +9 -9
mteb/models/model_implementations/e5_v.py +6 -6
mteb/models/model_implementations/eagerworks_models.py +1 -1
mteb/models/model_implementations/emillykkejensen_models.py +6 -6
mteb/models/model_implementations/en_code_retriever.py +1 -1
mteb/models/model_implementations/euler_models.py +2 -2
mteb/models/model_implementations/fa_models.py +9 -9
mteb/models/model_implementations/facebookai.py +14 -2
mteb/models/model_implementations/geogpt_models.py +1 -1
mteb/models/model_implementations/gme_v_models.py +6 -5
mteb/models/model_implementations/google_models.py +1 -1
mteb/models/model_implementations/granite_vision_embedding_models.py +1 -1
mteb/models/model_implementations/gritlm_models.py +2 -2
mteb/models/model_implementations/gte_models.py +25 -13
mteb/models/model_implementations/hinvec_models.py +1 -1
mteb/models/model_implementations/ibm_granite_models.py +30 -6
mteb/models/model_implementations/inf_models.py +2 -2
mteb/models/model_implementations/jasper_models.py +2 -2
mteb/models/model_implementations/jina_clip.py +48 -10
mteb/models/model_implementations/jina_models.py +18 -11
mteb/models/model_implementations/kblab.py +12 -6
mteb/models/model_implementations/kennethenevoldsen_models.py +4 -4
mteb/models/model_implementations/kfst.py +1 -1
mteb/models/model_implementations/kowshik24_models.py +1 -1
mteb/models/model_implementations/lgai_embedding_models.py +1 -1
mteb/models/model_implementations/linq_models.py +1 -1
mteb/models/model_implementations/listconranker.py +1 -1
mteb/models/model_implementations/llm2clip_models.py +6 -6
mteb/models/model_implementations/llm2vec_models.py +8 -8
mteb/models/model_implementations/mcinext_models.py +4 -1
mteb/models/model_implementations/mdbr_models.py +17 -3
mteb/models/model_implementations/misc_models.py +68 -68
mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
mteb/models/model_implementations/mme5_models.py +1 -1
mteb/models/model_implementations/moco_models.py +4 -4
mteb/models/model_implementations/mod_models.py +1 -1
mteb/models/model_implementations/model2vec_models.py +14 -14
mteb/models/model_implementations/moka_models.py +1 -1
mteb/models/model_implementations/nbailab.py +3 -3
mteb/models/model_implementations/no_instruct_sentence_models.py +2 -2
mteb/models/model_implementations/nomic_models.py +30 -15
mteb/models/model_implementations/nomic_models_vision.py +1 -1
mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +15 -9
mteb/models/model_implementations/nvidia_models.py +151 -19
mteb/models/model_implementations/octen_models.py +61 -2
mteb/models/model_implementations/openclip_models.py +13 -13
mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -5
mteb/models/model_implementations/ops_moa_models.py +1 -1
mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
mteb/models/model_implementations/pawan_models.py +1 -1
mteb/models/model_implementations/piccolo_models.py +1 -1
mteb/models/model_implementations/pixie_models.py +56 -0
mteb/models/model_implementations/promptriever_models.py +4 -4
mteb/models/model_implementations/pylate_models.py +10 -9
mteb/models/model_implementations/qodo_models.py +2 -2
mteb/models/model_implementations/qtack_models.py +1 -1
mteb/models/model_implementations/qwen3_models.py +3 -3
mteb/models/model_implementations/qzhou_models.py +2 -2
mteb/models/model_implementations/random_baseline.py +3 -3
mteb/models/model_implementations/rasgaard_models.py +2 -2
mteb/models/model_implementations/reasonir_model.py +1 -1
mteb/models/model_implementations/repllama_models.py +3 -3
mteb/models/model_implementations/rerankers_custom.py +12 -6
mteb/models/model_implementations/rerankers_monot5_based.py +17 -17
mteb/models/model_implementations/richinfoai_models.py +1 -1
mteb/models/model_implementations/ru_sentence_models.py +20 -20
mteb/models/model_implementations/ruri_models.py +10 -10
mteb/models/model_implementations/salesforce_models.py +3 -3
mteb/models/model_implementations/samilpwc_models.py +1 -1
mteb/models/model_implementations/sarashina_embedding_models.py +2 -2
mteb/models/model_implementations/searchmap_models.py +1 -1
mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +113 -146
mteb/models/model_implementations/sentence_transformers_models.py +124 -22
mteb/models/model_implementations/shuu_model.py +1 -1
mteb/models/model_implementations/siglip_models.py +20 -20
mteb/models/model_implementations/slm_models.py +416 -0
mteb/models/model_implementations/spartan8806_atles_champion.py +1 -1
mteb/models/model_implementations/stella_models.py +17 -4
mteb/models/model_implementations/tarka_models.py +2 -2
mteb/models/model_implementations/text2vec_models.py +9 -3
mteb/models/model_implementations/ua_sentence_models.py +1 -1
mteb/models/model_implementations/uae_models.py +7 -1
mteb/models/model_implementations/vdr_models.py +1 -1
mteb/models/model_implementations/vi_vn_models.py +6 -6
mteb/models/model_implementations/vlm2vec_models.py +3 -3
mteb/models/model_implementations/voyage_models.py +84 -0
mteb/models/model_implementations/voyage_v.py +9 -7
mteb/models/model_implementations/youtu_models.py +1 -1
mteb/models/model_implementations/yuan_models.py +1 -1
mteb/models/model_implementations/yuan_models_en.py +1 -1
mteb/models/model_meta.py +80 -31
mteb/models/models_protocols.py +22 -6
mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +9 -6
mteb/models/search_wrappers.py +33 -18
mteb/models/sentence_transformer_wrapper.py +50 -25
mteb/models/vllm_wrapper.py +327 -0
mteb/py.typed +0 -0
mteb/results/benchmark_results.py +29 -21
mteb/results/model_result.py +52 -22
mteb/results/task_result.py +80 -58
mteb/similarity_functions.py +11 -7
mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
mteb/tasks/classification/est/estonian_valence.py +1 -1
mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
mteb/tasks/classification/multilingual/scala_classification.py +1 -1
mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
mteb/tasks/retrieval/code/code_rag.py +12 -12
mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
mteb/tasks/retrieval/eng/__init__.py +2 -0
mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
mteb/tasks/retrieval/kor/__init__.py +15 -1
mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
mteb/tasks/retrieval/multilingual/__init__.py +2 -0
mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
mteb/tasks/retrieval/nob/norquad.py +2 -2
mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
mteb/tasks/retrieval/vie/__init__.py +14 -6
mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
mteb/types/__init__.py +2 -0
mteb/types/_encoder_io.py +12 -0
mteb/types/_result.py +2 -1
mteb/types/statistics.py +9 -3
{mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/METADATA +15 -4
{mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/RECORD +240 -219
mteb/models/model_implementations/mxbai_models.py +0 -111
{mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
{mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
{mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
{mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0

mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py CHANGED Viewed

@@ -25,7 +25,7 @@ class CUB200I2I(AbsTaskRetrieval):
         modalities=["image"],
         sample_creation="created",
         bibtex_citation=r"""
-@article{article,
+@article{welinder2010caltech,
   author = {Welinder, Peter and Branson, Steve and Mita, Takeshi and Wah, Catherine and Schroff, Florian and Belongie, Serge and Perona, Pietro},
   month = {09},
   pages = {},

mteb/tasks/retrieval/kor/__init__.py CHANGED Viewed

@@ -1,5 +1,19 @@
 from .auto_rag_retrieval import AutoRAGRetrieval
 from .ko_strategy_qa import KoStrategyQA
+from .kovidore2_bench_retrieval import (
+    KoVidore2CybersecurityRetrieval,
+    KoVidore2EconomicRetrieval,
+    KoVidore2EnergyRetrieval,
+    KoVidore2HrRetrieval,
+)
 from .squad_kor_v1_retrieval import SQuADKorV1Retrieval
-__all__ = ["AutoRAGRetrieval", "KoStrategyQA", "SQuADKorV1Retrieval"]
+__all__ = [
+    "AutoRAGRetrieval",
+    "KoStrategyQA",
+    "KoVidore2CybersecurityRetrieval",
+    "KoVidore2EconomicRetrieval",
+    "KoVidore2EnergyRetrieval",
+    "KoVidore2HrRetrieval",
+    "SQuADKorV1Retrieval",
+]

mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py ADDED Viewed

@@ -0,0 +1,142 @@
+from mteb.abstasks.retrieval import AbsTaskRetrieval
+from mteb.abstasks.task_metadata import TaskMetadata
+class KoVidore2CybersecurityRetrieval(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="KoVidore2CybersecurityRetrieval",
+        description="Retrieve associated pages according to questions. This dataset, Cybersecurity, is a corpus of technical reports on cyber threat trends and security incident responses in Korea, intended for complex-document understanding tasks.",
+        reference="https://github.com/whybe-choi/kovidore-data-generator",
+        dataset={
+            "path": "whybe-choi/kovidore-v2-cybersecurity-mteb",
+            "revision": "577d7c45f79d8eb4e7584db3990f91daa7e47956",
+        },
+        type="DocumentUnderstanding",
+        category="t2i",
+        eval_splits=["test"],
+        eval_langs=["kor-Hang"],
+        main_score="ndcg_at_10",
+        date=("2025-12-21", "2026-01-06"),
+        domains=["Social"],
+        task_subtypes=["Image Text Retrieval"],
+        license="cc-by-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        modalities=["text", "image"],
+        sample_creation="created",
+        bibtex_citation="""
+@misc{choi2026kovidorev2,
+  author = {Yongbin Choi},
+  note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
+  title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
+  url = {https://github.com/whybe-choi/kovidore-data-generator},
+  year = {2026},
+}
+""",
+        prompt={"query": "Find a screenshot that is relevant to the user's question."},
+    )
+class KoVidore2EconomicRetrieval(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="KoVidore2EconomicRetrieval",
+        description="Retrieve associated pages according to questions. This dataset, Economic trends, is a corpus of periodic reports on major economic indicators in Korea, intended for complex-document understanding tasks.",
+        reference="https://github.com/whybe-choi/kovidore-data-generator",
+        dataset={
+            "path": "whybe-choi/kovidore-v2-economic-mteb",
+            "revision": "0189c26211290a902cd9d41a0db932808a54c0a8",
+        },
+        type="DocumentUnderstanding",
+        category="t2i",
+        eval_splits=["test"],
+        eval_langs=["kor-Hang"],
+        main_score="ndcg_at_10",
+        date=("2025-12-21", "2026-01-06"),
+        domains=["Social"],
+        task_subtypes=["Image Text Retrieval"],
+        license="cc-by-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        modalities=["text", "image"],
+        sample_creation="created",
+        bibtex_citation="""
+@misc{choi2026kovidorev2,
+  author = {Yongbin Choi},
+  note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
+  title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
+  url = {https://github.com/whybe-choi/kovidore-data-generator},
+  year = {2026},
+}
+""",
+        prompt={"query": "Find a screenshot that is relevant to the user's question."},
+    )
+class KoVidore2EnergyRetrieval(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="KoVidore2EnergyRetrieval",
+        description="Retrieve associated pages according to questions. This dataset, Energy, is a corpus of reports on energy market trends, policy planning, and industry statistics, intended for complex-document understanding tasks.",
+        reference="https://github.com/whybe-choi/kovidore-data-generator",
+        dataset={
+            "path": "whybe-choi/kovidore-v2-energy-mteb",
+            "revision": "8c09a3d22b1fa3a7f5e815e9521da9b048754211",
+        },
+        type="DocumentUnderstanding",
+        category="t2i",
+        eval_splits=["test"],
+        eval_langs=["kor-Hang"],
+        main_score="ndcg_at_10",
+        date=("2025-12-21", "2026-01-06"),
+        domains=["Social"],
+        task_subtypes=["Image Text Retrieval"],
+        license="cc-by-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        modalities=["text", "image"],
+        sample_creation="created",
+        bibtex_citation="""
+@misc{choi2026kovidorev2,
+  author = {Yongbin Choi},
+  note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
+  title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
+  url = {https://github.com/whybe-choi/kovidore-data-generator},
+  year = {2026},
+}
+""",
+        prompt={"query": "Find a screenshot that is relevant to the user's question."},
+    )
+class KoVidore2HrRetrieval(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="KoVidore2HrRetrieval",
+        description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports on workforce outlook and employment policy in korea, intended for complex-document understanding tasks.",
+        reference="https://github.com/whybe-choi/kovidore-data-generator",
+        dataset={
+            "path": "whybe-choi/kovidore-v2-hr-mteb",
+            "revision": "d9432c782a9a3e2eed064f6fac08b4c967d92b99",
+        },
+        type="DocumentUnderstanding",
+        category="t2i",
+        eval_splits=["test"],
+        eval_langs=["kor-Hang"],
+        main_score="ndcg_at_10",
+        date=("2025-12-21", "2026-01-06"),
+        domains=["Social"],
+        task_subtypes=["Image Text Retrieval"],
+        license="cc-by-4.0",
+        annotations_creators="derived",
+        dialect=[],
+        modalities=["text", "image"],
+        sample_creation="created",
+        bibtex_citation="""
+@misc{choi2026kovidorev2,
+  author = {Yongbin Choi},
+  note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
+  title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
+  url = {https://github.com/whybe-choi/kovidore-data-generator},
+  year = {2026},
+}
+""",
+        prompt={"query": "Find a screenshot that is relevant to the user's question."},
+    )

mteb/tasks/retrieval/multilingual/__init__.py CHANGED Viewed

@@ -6,6 +6,7 @@ from .cross_lingual_semantic_discrimination_wmt21 import (
     CrossLingualSemanticDiscriminationWMT21,
 )
 from .cur_ev1_retrieval import CUREv1Retrieval
+from .euro_pirq_retrieval import EuroPIRQRetrieval
 from .indic_qa_retrieval import IndicQARetrieval
 from .jina_vdr_bench_retrieval import (
     JinaVDRAirbnbSyntheticRetrieval,
@@ -107,6 +108,7 @@ __all__ = [
     "CUREv1Retrieval",
     "CrossLingualSemanticDiscriminationWMT19",
     "CrossLingualSemanticDiscriminationWMT21",
+    "EuroPIRQRetrieval",
     "IndicQARetrieval",
     "JinaVDRAirbnbSyntheticRetrieval",
     "JinaVDRArabicChartQARetrieval",

mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py ADDED Viewed

@@ -0,0 +1,43 @@
+from mteb.abstasks.retrieval import AbsTaskRetrieval
+from mteb.abstasks.task_metadata import TaskMetadata
+_LANGUAGES = {
+    "en": ["eng-Latn"],
+    "fi": ["fin-Latn"],
+    "pt": ["por-Latn"],
+}
+class EuroPIRQRetrieval(AbsTaskRetrieval):
+    metadata = TaskMetadata(
+        name="EuroPIRQRetrieval",
+        description="The EuroPIRQ retrieval dataset is a multilingual collection designed for evaluating retrieval and cross-lingual retrieval tasks. Dataset contains 10,000 parallel passages & 100 parallel queries (synthetic) in three languages: English, Portuguese, and Finnish, constructed from the European Union's DGT-Acquis corpus.",
+        reference="https://huggingface.co/datasets/eherra/EuroPIRQ-retrieval",
+        dataset={
+            "path": "eherra/EuroPIRQ-retrieval",
+            "revision": "59225ed25fbcea2185e1acbc8c3c80f1a8cd8341",
+        },
+        type="Retrieval",
+        category="t2t",
+        modalities=["text"],
+        eval_splits=["test"],
+        eval_langs=_LANGUAGES,
+        main_score="ndcg_at_10",
+        date=("2025-12-01", "2025-12-31"),
+        domains=["Legal"],
+        task_subtypes=[],
+        license="not specified",
+        annotations_creators="LM-generated and reviewed",
+        dialect=[],
+        sample_creation="found",
+        is_public=True,
+        bibtex_citation=r"""
+@misc{eherra_2025_europirq,
+  author = { {Elias Herranen} },
+  publisher = { Hugging Face },
+  title = { EuroPIRQ: European Parallel Information Retrieval Queries },
+  url = { https://huggingface.co/datasets/eherra/EuroPIRQ-retrieval },
+  year = {2025},
+}
+""",
+    )

mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py CHANGED Viewed

@@ -15,7 +15,7 @@ class Vidore3FinanceEnRetrieval(AbsTaskRetrieval):
     metadata = TaskMetadata(
         name="Vidore3FinanceEnRetrieval",
         description="Retrieve associated pages according to questions. This task, Finance - EN, is a corpus of reports from american banking companies, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
-        reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
+        reference="https://arxiv.org/abs/2601.08620",
         dataset={
             "path": "vidore/vidore_v3_finance_en_mteb_format",
             "revision": "fa78cb14152b3dde8c5defdc4e3ddf50de69dfeb",
@@ -34,15 +34,14 @@ class Vidore3FinanceEnRetrieval(AbsTaskRetrieval):
         modalities=["text", "image"],
         sample_creation="created and machine-translated",
         bibtex_citation=r"""
-@misc{mace2025vidorev3,
-  author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
-  day = {5},
-  howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
-  journal = {Hugging Face Blog},
-  month = {November},
-  publisher = {Hugging Face},
-  title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
-  year = {2025},
+@article{loison2026vidorev3comprehensiveevaluation,
+  archiveprefix = {arXiv},
+  author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
+  eprint = {2601.08620},
+  primaryclass = {cs.AI},
+  title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
+  url = {https://arxiv.org/abs/2601.08620},
+  year = {2026},
 }
 """,
         prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -53,7 +52,7 @@ class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
     metadata = TaskMetadata(
         name="Vidore3FinanceFrRetrieval",
         description="Retrieve associated pages according to questions. This task, Finance - FR, is a corpus of reports from french companies in the luxury domain, intended for long-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
-        reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
+        reference="https://arxiv.org/abs/2601.08620",
         dataset={
             "path": "vidore/vidore_v3_finance_fr_mteb_format",
             "revision": "8a2adfda85a7967c7252129703d9b3c7c9f038a9",
@@ -71,15 +70,14 @@ class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
         dialect=[],
         sample_creation="created and machine-translated",
         bibtex_citation=r"""
-@misc{mace2025vidorev3,
-  author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
-  day = {5},
-  howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
-  journal = {Hugging Face Blog},
-  month = {November},
-  publisher = {Hugging Face},
-  title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
-  year = {2025},
+@article{loison2026vidorev3comprehensiveevaluation,
+  archiveprefix = {arXiv},
+  author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
+  eprint = {2601.08620},
+  primaryclass = {cs.AI},
+  title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
+  url = {https://arxiv.org/abs/2601.08620},
+  year = {2026},
 }
 """,
         prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -91,7 +89,7 @@ class Vidore3IndustrialRetrieval(AbsTaskRetrieval):
     metadata = TaskMetadata(
         name="Vidore3IndustrialRetrieval",
         description="Retrieve associated pages according to questions. This dataset, Industrial reports, is a corpus of technical documents on military aircraft (fueling, mechanics...), intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
-        reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
+        reference="https://arxiv.org/abs/2601.08620",
         dataset={
             "path": "vidore/vidore_v3_industrial_mteb_format",
             "revision": "f732b725cf4a70803210edfe265a04f8bd5328f6",
@@ -110,15 +108,14 @@ class Vidore3IndustrialRetrieval(AbsTaskRetrieval):
         modalities=["text", "image"],
         sample_creation="created and machine-translated",
         bibtex_citation=r"""
-@misc{mace2025vidorev3,
-  author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
-  day = {5},
-  howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
-  journal = {Hugging Face Blog},
-  month = {November},
-  publisher = {Hugging Face},
-  title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
-  year = {2025},
+@article{loison2026vidorev3comprehensiveevaluation,
+  archiveprefix = {arXiv},
+  author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
+  eprint = {2601.08620},
+  primaryclass = {cs.AI},
+  title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
+  url = {https://arxiv.org/abs/2601.08620},
+  year = {2026},
 }
 """,
         prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -130,7 +127,7 @@ class Vidore3PharmaceuticalsRetrieval(AbsTaskRetrieval):
     metadata = TaskMetadata(
         name="Vidore3PharmaceuticalsRetrieval",
         description="Retrieve associated pages according to questions. This dataset, Pharmaceutical, is a corpus of slides from the FDA, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
-        reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
+        reference="https://arxiv.org/abs/2601.08620",
         dataset={
             "path": "vidore/vidore_v3_pharmaceuticals_mteb_format",
             "revision": "237ed4f43c7fb3c4df07ec4e9dd0a4366be555b0",
@@ -149,15 +146,14 @@ class Vidore3PharmaceuticalsRetrieval(AbsTaskRetrieval):
         modalities=["text", "image"],
         sample_creation="created and machine-translated",
         bibtex_citation=r"""
-@misc{mace2025vidorev3,
-  author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
-  day = {5},
-  howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
-  journal = {Hugging Face Blog},
-  month = {November},
-  publisher = {Hugging Face},
-  title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
-  year = {2025},
+@article{loison2026vidorev3comprehensiveevaluation,
+  archiveprefix = {arXiv},
+  author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
+  eprint = {2601.08620},
+  primaryclass = {cs.AI},
+  title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
+  url = {https://arxiv.org/abs/2601.08620},
+  year = {2026},
 }
 """,
         prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -169,7 +165,7 @@ class Vidore3ComputerScienceRetrieval(AbsTaskRetrieval):
     metadata = TaskMetadata(
         name="Vidore3ComputerScienceRetrieval",
         description="Retrieve associated pages according to questions. This dataset, Computer Science, is a corpus of textbooks from the openstacks website, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
-        reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
+        reference="https://arxiv.org/abs/2601.08620",
         dataset={
             "path": "vidore/vidore_v3_computer_science_mteb_format",
             "revision": "fb7fb69f81f7db62790f40494124b8ad22b424ab",
@@ -188,15 +184,14 @@ class Vidore3ComputerScienceRetrieval(AbsTaskRetrieval):
         modalities=["text", "image"],
         sample_creation="created and machine-translated",
         bibtex_citation=r"""
-@misc{mace2025vidorev3,
-  author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
-  day = {5},
-  howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
-  journal = {Hugging Face Blog},
-  month = {November},
-  publisher = {Hugging Face},
-  title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
-  year = {2025},
+@article{loison2026vidorev3comprehensiveevaluation,
+  archiveprefix = {arXiv},
+  author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
+  eprint = {2601.08620},
+  primaryclass = {cs.AI},
+  title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
+  url = {https://arxiv.org/abs/2601.08620},
+  year = {2026},
 }
 """,
         prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -208,7 +203,7 @@ class Vidore3HrRetrieval(AbsTaskRetrieval):
     metadata = TaskMetadata(
         name="Vidore3HrRetrieval",
         description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports released by the european union, intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
-        reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
+        reference="https://arxiv.org/abs/2601.08620",
         dataset={
             "path": "vidore/vidore_v3_hr_mteb_format",
             "revision": "bc7d43d64815ed30f664168c8052106484aba7fd",
@@ -227,15 +222,14 @@ class Vidore3HrRetrieval(AbsTaskRetrieval):
         modalities=["text", "image"],
         sample_creation="created and machine-translated",
         bibtex_citation=r"""
-@misc{mace2025vidorev3,
-  author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
-  day = {5},
-  howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
-  journal = {Hugging Face Blog},
-  month = {November},
-  publisher = {Hugging Face},
-  title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
-  year = {2025},
+@article{loison2026vidorev3comprehensiveevaluation,
+  archiveprefix = {arXiv},
+  author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
+  eprint = {2601.08620},
+  primaryclass = {cs.AI},
+  title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
+  url = {https://arxiv.org/abs/2601.08620},
+  year = {2026},
 }
 """,
         prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -247,7 +241,7 @@ class Vidore3EnergyRetrieval(AbsTaskRetrieval):
     metadata = TaskMetadata(
         name="Vidore3EnergyRetrieval",
         description="Retrieve associated pages according to questions. This dataset, Energy Fr, is a corpus of reports on energy supply in europe, intended for complex-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
-        reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
+        reference="https://arxiv.org/abs/2601.08620",
         dataset={
             "path": "vidore/vidore_v3_energy_mteb_format",
             "revision": "84fca99e5978604bae30f2436eacb6dbaa0532e9",
@@ -266,15 +260,14 @@ class Vidore3EnergyRetrieval(AbsTaskRetrieval):
         modalities=["text", "image"],
         sample_creation="created and machine-translated",
         bibtex_citation=r"""
-@misc{mace2025vidorev3,
-  author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
-  day = {5},
-  howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
-  journal = {Hugging Face Blog},
-  month = {November},
-  publisher = {Hugging Face},
-  title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
-  year = {2025},
+@article{loison2026vidorev3comprehensiveevaluation,
+  archiveprefix = {arXiv},
+  author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
+  eprint = {2601.08620},
+  primaryclass = {cs.AI},
+  title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
+  url = {https://arxiv.org/abs/2601.08620},
+  year = {2026},
 }
 """,
         prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -286,7 +279,7 @@ class Vidore3PhysicsRetrieval(AbsTaskRetrieval):
     metadata = TaskMetadata(
         name="Vidore3PhysicsRetrieval",
         description="Retrieve associated pages according to questions. This dataset, Physics, is a corpus of course slides on french bachelor level physics lectures, intended for complex visual understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
-        reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
+        reference="https://arxiv.org/abs/2601.08620",
         dataset={
             "path": "vidore/vidore_v3_physics_mteb_format",
             "revision": "2c18ef90ab3ef93a9d86ecc6521cdae2a29f8300",
@@ -305,15 +298,14 @@ class Vidore3PhysicsRetrieval(AbsTaskRetrieval):
         modalities=["text", "image"],
         sample_creation="created and machine-translated",
         bibtex_citation=r"""
-@misc{mace2025vidorev3,
-  author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
-  day = {5},
-  howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
-  journal = {Hugging Face Blog},
-  month = {November},
-  publisher = {Hugging Face},
-  title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
-  year = {2025},
+@article{loison2026vidorev3comprehensiveevaluation,
+  archiveprefix = {arXiv},
+  author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
+  eprint = {2601.08620},
+  primaryclass = {cs.AI},
+  title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
+  url = {https://arxiv.org/abs/2601.08620},
+  year = {2026},
 }
 """,
         prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -325,7 +317,7 @@ class Vidore3NuclearRetrieval(AbsTaskRetrieval):
     metadata = TaskMetadata(
         name="Vidore3NuclearRetrieval",
         description="Retrieve associated pages according to questions.",
-        reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
+        reference="https://arxiv.org/abs/2601.08620",
         dataset={
             "path": "mteb-private/Vidore3NuclearRetrieval",
             "revision": "a463fc67fefc01152153101e88a32d5f9515e3e3",
@@ -344,15 +336,14 @@ class Vidore3NuclearRetrieval(AbsTaskRetrieval):
         modalities=["text", "image"],
         sample_creation="created and machine-translated",
         bibtex_citation=r"""
-@misc{mace2025vidorev3,
-  author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
-  day = {5},
-  howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
-  journal = {Hugging Face Blog},
-  month = {November},
-  publisher = {Hugging Face},
-  title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
-  year = {2025},
+@article{loison2026vidorev3comprehensiveevaluation,
+  archiveprefix = {arXiv},
+  author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
+  eprint = {2601.08620},
+  primaryclass = {cs.AI},
+  title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
+  url = {https://arxiv.org/abs/2601.08620},
+  year = {2026},
 }
 """,
         prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -364,7 +355,7 @@ class Vidore3TelecomRetrieval(AbsTaskRetrieval):
     metadata = TaskMetadata(
         name="Vidore3TelecomRetrieval",
         description="Retrieve associated pages according to questions.",
-        reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
+        reference="https://arxiv.org/abs/2601.08620",
         dataset={
             "path": "mteb-private/Vidore3TelecomRetrieval",
             "revision": "a54635a274ef2835721b7cbe3eb27483b9ec964b",
@@ -383,15 +374,14 @@ class Vidore3TelecomRetrieval(AbsTaskRetrieval):
         modalities=["text", "image"],
         sample_creation="created and machine-translated",
         bibtex_citation=r"""
-@misc{mace2025vidorev3,
-  author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
-  day = {5},
-  howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
-  journal = {Hugging Face Blog},
-  month = {November},
-  publisher = {Hugging Face},
-  title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
-  year = {2025},
+@article{loison2026vidorev3comprehensiveevaluation,
+  archiveprefix = {arXiv},
+  author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
+  eprint = {2601.08620},
+  primaryclass = {cs.AI},
+  title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
+  url = {https://arxiv.org/abs/2601.08620},
+  year = {2026},
 }
 """,
         prompt={"query": "Find a screenshot that is relevant to the user's question."},

mteb/tasks/retrieval/nob/norquad.py CHANGED Viewed

@@ -54,7 +54,7 @@ Fishel, Mark},
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
-        self.dataset = datasets.load_dataset(**self.metadata.dataset)  # type: ignore
+        self.dataset = datasets.load_dataset(**self.metadata.dataset)
         self.dataset_transform()
         self.data_loaded = True
@@ -71,7 +71,7 @@ Fishel, Mark},
         text2id = {}
         for split in self.dataset:
-            ds: datasets.Dataset = self.dataset[split]  # type: ignore
+            ds: datasets.Dataset = self.dataset[split]
             ds = ds.shuffle(seed=42)
             max_samples = min(1024, len(ds))
             ds = ds.select(

mteb/tasks/retrieval/nob/snl_retrieval.py CHANGED Viewed

@@ -41,7 +41,7 @@ class SNLRetrieval(AbsTaskRetrieval):
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
-        self.dataset = datasets.load_dataset(**self.metadata.dataset)  # type: ignore
+        self.dataset = datasets.load_dataset(**self.metadata.dataset)
         self.dataset_transform()
         self.data_loaded = True
@@ -58,7 +58,7 @@ class SNLRetrieval(AbsTaskRetrieval):
         text2id = {}
         for split in self.dataset:
-            ds: datasets.Dataset = self.dataset[split]  # type: ignore
+            ds: datasets.Dataset = self.dataset[split]
             ds = ds.shuffle(seed=42)
             self.queries[split] = {}

mteb/tasks/retrieval/tur/tur_hist_quad.py CHANGED Viewed

@@ -59,7 +59,7 @@ class TurHistQuadRetrieval(AbsTaskRetrieval):
         text2id = {}
         for split in self.metadata.eval_splits:
-            ds: datasets.Dataset = self.dataset[split]  # type: ignore
+            ds: datasets.Dataset = self.dataset[split]
             ds = ds.shuffle(seed=42)
             max_samples = min(1024, len(ds))
             ds = ds.select(

mteb 2.5.2__py3-none-any.whl → 2.7.2__py3-none-any.whl

mteb 2.5.2py3-none-any.whl → 2.7.2py3-none-any.whl