mteb 2.5.2__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +17 -18
- mteb/_evaluators/any_sts_evaluator.py +3 -3
- mteb/_evaluators/clustering_evaluator.py +2 -2
- mteb/_evaluators/evaluator.py +4 -2
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +10 -8
- mteb/_evaluators/pair_classification_evaluator.py +5 -3
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/retrieval_metrics.py +18 -17
- mteb/_evaluators/sklearn_evaluator.py +11 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +27 -18
- mteb/_evaluators/text/summarization_evaluator.py +23 -18
- mteb/_evaluators/zeroshot_classification_evaluator.py +5 -3
- mteb/abstasks/_data_filter/filters.py +1 -1
- mteb/abstasks/_data_filter/task_pipelines.py +3 -0
- mteb/abstasks/_statistics_calculation.py +18 -10
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +35 -28
- mteb/abstasks/aggregate_task_metadata.py +1 -9
- mteb/abstasks/aggregated_task.py +10 -29
- mteb/abstasks/classification.py +15 -10
- mteb/abstasks/clustering.py +19 -15
- mteb/abstasks/clustering_legacy.py +10 -10
- mteb/abstasks/image/image_text_pair_classification.py +7 -4
- mteb/abstasks/multilabel_classification.py +23 -19
- mteb/abstasks/pair_classification.py +20 -11
- mteb/abstasks/regression.py +4 -4
- mteb/abstasks/retrieval.py +28 -24
- mteb/abstasks/retrieval_dataset_loaders.py +2 -2
- mteb/abstasks/sts.py +8 -5
- mteb/abstasks/task_metadata.py +31 -33
- mteb/abstasks/text/bitext_mining.py +39 -28
- mteb/abstasks/text/reranking.py +8 -6
- mteb/abstasks/text/summarization.py +10 -5
- mteb/abstasks/zeroshot_classification.py +8 -4
- mteb/benchmarks/benchmark.py +4 -2
- mteb/benchmarks/benchmarks/__init__.py +4 -0
- mteb/benchmarks/benchmarks/benchmarks.py +112 -11
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +182 -29
- mteb/cli/_display_tasks.py +2 -2
- mteb/cli/build_cli.py +110 -14
- mteb/cli/generate_model_card.py +43 -23
- mteb/deprecated_evaluator.py +63 -49
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +44 -33
- mteb/filter_tasks.py +25 -26
- mteb/get_tasks.py +29 -30
- mteb/languages/language_scripts.py +5 -3
- mteb/leaderboard/app.py +162 -34
- mteb/load_results.py +12 -12
- mteb/models/abs_encoder.py +10 -6
- mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +5 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +2 -2
- mteb/models/get_model_meta.py +21 -3
- mteb/models/instruct_wrapper.py +28 -8
- mteb/models/model_implementations/align_models.py +1 -1
- mteb/models/model_implementations/andersborges.py +4 -4
- mteb/models/model_implementations/ara_models.py +1 -1
- mteb/models/model_implementations/arctic_models.py +8 -8
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +45 -21
- mteb/models/model_implementations/bica_model.py +3 -3
- mteb/models/model_implementations/blip2_models.py +2 -2
- mteb/models/model_implementations/blip_models.py +16 -16
- mteb/models/model_implementations/bm25.py +4 -4
- mteb/models/model_implementations/bmretriever_models.py +6 -4
- mteb/models/model_implementations/cadet_models.py +1 -1
- mteb/models/model_implementations/cde_models.py +11 -4
- mteb/models/model_implementations/clip_models.py +6 -6
- mteb/models/model_implementations/clips_models.py +3 -3
- mteb/models/model_implementations/codefuse_models.py +5 -5
- mteb/models/model_implementations/codesage_models.py +3 -3
- mteb/models/model_implementations/cohere_models.py +5 -5
- mteb/models/model_implementations/cohere_v.py +2 -2
- mteb/models/model_implementations/colpali_models.py +3 -3
- mteb/models/model_implementations/colqwen_models.py +8 -8
- mteb/models/model_implementations/colsmol_models.py +2 -2
- mteb/models/model_implementations/conan_models.py +1 -1
- mteb/models/model_implementations/dino_models.py +42 -42
- mteb/models/model_implementations/e5_instruct.py +23 -4
- mteb/models/model_implementations/e5_models.py +9 -9
- mteb/models/model_implementations/e5_v.py +6 -6
- mteb/models/model_implementations/eagerworks_models.py +1 -1
- mteb/models/model_implementations/emillykkejensen_models.py +6 -6
- mteb/models/model_implementations/en_code_retriever.py +1 -1
- mteb/models/model_implementations/euler_models.py +2 -2
- mteb/models/model_implementations/fa_models.py +9 -9
- mteb/models/model_implementations/facebookai.py +14 -2
- mteb/models/model_implementations/geogpt_models.py +1 -1
- mteb/models/model_implementations/gme_v_models.py +6 -5
- mteb/models/model_implementations/google_models.py +1 -1
- mteb/models/model_implementations/granite_vision_embedding_models.py +1 -1
- mteb/models/model_implementations/gritlm_models.py +2 -2
- mteb/models/model_implementations/gte_models.py +25 -13
- mteb/models/model_implementations/hinvec_models.py +1 -1
- mteb/models/model_implementations/ibm_granite_models.py +30 -6
- mteb/models/model_implementations/inf_models.py +2 -2
- mteb/models/model_implementations/jasper_models.py +2 -2
- mteb/models/model_implementations/jina_clip.py +48 -10
- mteb/models/model_implementations/jina_models.py +18 -11
- mteb/models/model_implementations/kblab.py +12 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +4 -4
- mteb/models/model_implementations/kfst.py +1 -1
- mteb/models/model_implementations/kowshik24_models.py +1 -1
- mteb/models/model_implementations/lgai_embedding_models.py +1 -1
- mteb/models/model_implementations/linq_models.py +1 -1
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +6 -6
- mteb/models/model_implementations/llm2vec_models.py +8 -8
- mteb/models/model_implementations/mcinext_models.py +4 -1
- mteb/models/model_implementations/mdbr_models.py +17 -3
- mteb/models/model_implementations/misc_models.py +68 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
- mteb/models/model_implementations/mme5_models.py +1 -1
- mteb/models/model_implementations/moco_models.py +4 -4
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +14 -14
- mteb/models/model_implementations/moka_models.py +1 -1
- mteb/models/model_implementations/nbailab.py +3 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +2 -2
- mteb/models/model_implementations/nomic_models.py +30 -15
- mteb/models/model_implementations/nomic_models_vision.py +1 -1
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +15 -9
- mteb/models/model_implementations/nvidia_models.py +151 -19
- mteb/models/model_implementations/octen_models.py +61 -2
- mteb/models/model_implementations/openclip_models.py +13 -13
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -5
- mteb/models/model_implementations/ops_moa_models.py +1 -1
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +1 -1
- mteb/models/model_implementations/piccolo_models.py +1 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +4 -4
- mteb/models/model_implementations/pylate_models.py +10 -9
- mteb/models/model_implementations/qodo_models.py +2 -2
- mteb/models/model_implementations/qtack_models.py +1 -1
- mteb/models/model_implementations/qwen3_models.py +3 -3
- mteb/models/model_implementations/qzhou_models.py +2 -2
- mteb/models/model_implementations/random_baseline.py +3 -3
- mteb/models/model_implementations/rasgaard_models.py +2 -2
- mteb/models/model_implementations/reasonir_model.py +1 -1
- mteb/models/model_implementations/repllama_models.py +3 -3
- mteb/models/model_implementations/rerankers_custom.py +12 -6
- mteb/models/model_implementations/rerankers_monot5_based.py +17 -17
- mteb/models/model_implementations/richinfoai_models.py +1 -1
- mteb/models/model_implementations/ru_sentence_models.py +20 -20
- mteb/models/model_implementations/ruri_models.py +10 -10
- mteb/models/model_implementations/salesforce_models.py +3 -3
- mteb/models/model_implementations/samilpwc_models.py +1 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -2
- mteb/models/model_implementations/searchmap_models.py +1 -1
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +113 -146
- mteb/models/model_implementations/sentence_transformers_models.py +124 -22
- mteb/models/model_implementations/shuu_model.py +1 -1
- mteb/models/model_implementations/siglip_models.py +20 -20
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -1
- mteb/models/model_implementations/stella_models.py +17 -4
- mteb/models/model_implementations/tarka_models.py +2 -2
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +1 -1
- mteb/models/model_implementations/uae_models.py +7 -1
- mteb/models/model_implementations/vdr_models.py +1 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -6
- mteb/models/model_implementations/vlm2vec_models.py +3 -3
- mteb/models/model_implementations/voyage_models.py +84 -0
- mteb/models/model_implementations/voyage_v.py +9 -7
- mteb/models/model_implementations/youtu_models.py +1 -1
- mteb/models/model_implementations/yuan_models.py +1 -1
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +80 -31
- mteb/models/models_protocols.py +22 -6
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +9 -6
- mteb/models/search_wrappers.py +33 -18
- mteb/models/sentence_transformer_wrapper.py +50 -25
- mteb/models/vllm_wrapper.py +327 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +29 -21
- mteb/results/model_result.py +52 -22
- mteb/results/task_result.py +80 -58
- mteb/similarity_functions.py +11 -7
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +12 -12
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +2 -0
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +12 -0
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +9 -3
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/RECORD +240 -219
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
|
@@ -25,7 +25,7 @@ class CUB200I2I(AbsTaskRetrieval):
|
|
|
25
25
|
modalities=["image"],
|
|
26
26
|
sample_creation="created",
|
|
27
27
|
bibtex_citation=r"""
|
|
28
|
-
@article{
|
|
28
|
+
@article{welinder2010caltech,
|
|
29
29
|
author = {Welinder, Peter and Branson, Steve and Mita, Takeshi and Wah, Catherine and Schroff, Florian and Belongie, Serge and Perona, Pietro},
|
|
30
30
|
month = {09},
|
|
31
31
|
pages = {},
|
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
from .auto_rag_retrieval import AutoRAGRetrieval
|
|
2
2
|
from .ko_strategy_qa import KoStrategyQA
|
|
3
|
+
from .kovidore2_bench_retrieval import (
|
|
4
|
+
KoVidore2CybersecurityRetrieval,
|
|
5
|
+
KoVidore2EconomicRetrieval,
|
|
6
|
+
KoVidore2EnergyRetrieval,
|
|
7
|
+
KoVidore2HrRetrieval,
|
|
8
|
+
)
|
|
3
9
|
from .squad_kor_v1_retrieval import SQuADKorV1Retrieval
|
|
4
10
|
|
|
5
|
-
__all__ = [
|
|
11
|
+
__all__ = [
|
|
12
|
+
"AutoRAGRetrieval",
|
|
13
|
+
"KoStrategyQA",
|
|
14
|
+
"KoVidore2CybersecurityRetrieval",
|
|
15
|
+
"KoVidore2EconomicRetrieval",
|
|
16
|
+
"KoVidore2EnergyRetrieval",
|
|
17
|
+
"KoVidore2HrRetrieval",
|
|
18
|
+
"SQuADKorV1Retrieval",
|
|
19
|
+
]
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class KoVidore2CybersecurityRetrieval(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="KoVidore2CybersecurityRetrieval",
|
|
8
|
+
description="Retrieve associated pages according to questions. This dataset, Cybersecurity, is a corpus of technical reports on cyber threat trends and security incident responses in Korea, intended for complex-document understanding tasks.",
|
|
9
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
10
|
+
dataset={
|
|
11
|
+
"path": "whybe-choi/kovidore-v2-cybersecurity-mteb",
|
|
12
|
+
"revision": "577d7c45f79d8eb4e7584db3990f91daa7e47956",
|
|
13
|
+
},
|
|
14
|
+
type="DocumentUnderstanding",
|
|
15
|
+
category="t2i",
|
|
16
|
+
eval_splits=["test"],
|
|
17
|
+
eval_langs=["kor-Hang"],
|
|
18
|
+
main_score="ndcg_at_10",
|
|
19
|
+
date=("2025-12-21", "2026-01-06"),
|
|
20
|
+
domains=["Social"],
|
|
21
|
+
task_subtypes=["Image Text Retrieval"],
|
|
22
|
+
license="cc-by-4.0",
|
|
23
|
+
annotations_creators="derived",
|
|
24
|
+
dialect=[],
|
|
25
|
+
modalities=["text", "image"],
|
|
26
|
+
sample_creation="created",
|
|
27
|
+
bibtex_citation="""
|
|
28
|
+
@misc{choi2026kovidorev2,
|
|
29
|
+
author = {Yongbin Choi},
|
|
30
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
31
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
32
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
33
|
+
year = {2026},
|
|
34
|
+
}
|
|
35
|
+
""",
|
|
36
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class KoVidore2EconomicRetrieval(AbsTaskRetrieval):
|
|
41
|
+
metadata = TaskMetadata(
|
|
42
|
+
name="KoVidore2EconomicRetrieval",
|
|
43
|
+
description="Retrieve associated pages according to questions. This dataset, Economic trends, is a corpus of periodic reports on major economic indicators in Korea, intended for complex-document understanding tasks.",
|
|
44
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
45
|
+
dataset={
|
|
46
|
+
"path": "whybe-choi/kovidore-v2-economic-mteb",
|
|
47
|
+
"revision": "0189c26211290a902cd9d41a0db932808a54c0a8",
|
|
48
|
+
},
|
|
49
|
+
type="DocumentUnderstanding",
|
|
50
|
+
category="t2i",
|
|
51
|
+
eval_splits=["test"],
|
|
52
|
+
eval_langs=["kor-Hang"],
|
|
53
|
+
main_score="ndcg_at_10",
|
|
54
|
+
date=("2025-12-21", "2026-01-06"),
|
|
55
|
+
domains=["Social"],
|
|
56
|
+
task_subtypes=["Image Text Retrieval"],
|
|
57
|
+
license="cc-by-4.0",
|
|
58
|
+
annotations_creators="derived",
|
|
59
|
+
dialect=[],
|
|
60
|
+
modalities=["text", "image"],
|
|
61
|
+
sample_creation="created",
|
|
62
|
+
bibtex_citation="""
|
|
63
|
+
@misc{choi2026kovidorev2,
|
|
64
|
+
author = {Yongbin Choi},
|
|
65
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
66
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
67
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
68
|
+
year = {2026},
|
|
69
|
+
}
|
|
70
|
+
""",
|
|
71
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class KoVidore2EnergyRetrieval(AbsTaskRetrieval):
|
|
76
|
+
metadata = TaskMetadata(
|
|
77
|
+
name="KoVidore2EnergyRetrieval",
|
|
78
|
+
description="Retrieve associated pages according to questions. This dataset, Energy, is a corpus of reports on energy market trends, policy planning, and industry statistics, intended for complex-document understanding tasks.",
|
|
79
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
80
|
+
dataset={
|
|
81
|
+
"path": "whybe-choi/kovidore-v2-energy-mteb",
|
|
82
|
+
"revision": "8c09a3d22b1fa3a7f5e815e9521da9b048754211",
|
|
83
|
+
},
|
|
84
|
+
type="DocumentUnderstanding",
|
|
85
|
+
category="t2i",
|
|
86
|
+
eval_splits=["test"],
|
|
87
|
+
eval_langs=["kor-Hang"],
|
|
88
|
+
main_score="ndcg_at_10",
|
|
89
|
+
date=("2025-12-21", "2026-01-06"),
|
|
90
|
+
domains=["Social"],
|
|
91
|
+
task_subtypes=["Image Text Retrieval"],
|
|
92
|
+
license="cc-by-4.0",
|
|
93
|
+
annotations_creators="derived",
|
|
94
|
+
dialect=[],
|
|
95
|
+
modalities=["text", "image"],
|
|
96
|
+
sample_creation="created",
|
|
97
|
+
bibtex_citation="""
|
|
98
|
+
@misc{choi2026kovidorev2,
|
|
99
|
+
author = {Yongbin Choi},
|
|
100
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
101
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
102
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
103
|
+
year = {2026},
|
|
104
|
+
}
|
|
105
|
+
""",
|
|
106
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class KoVidore2HrRetrieval(AbsTaskRetrieval):
|
|
111
|
+
metadata = TaskMetadata(
|
|
112
|
+
name="KoVidore2HrRetrieval",
|
|
113
|
+
description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports on workforce outlook and employment policy in korea, intended for complex-document understanding tasks.",
|
|
114
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
115
|
+
dataset={
|
|
116
|
+
"path": "whybe-choi/kovidore-v2-hr-mteb",
|
|
117
|
+
"revision": "d9432c782a9a3e2eed064f6fac08b4c967d92b99",
|
|
118
|
+
},
|
|
119
|
+
type="DocumentUnderstanding",
|
|
120
|
+
category="t2i",
|
|
121
|
+
eval_splits=["test"],
|
|
122
|
+
eval_langs=["kor-Hang"],
|
|
123
|
+
main_score="ndcg_at_10",
|
|
124
|
+
date=("2025-12-21", "2026-01-06"),
|
|
125
|
+
domains=["Social"],
|
|
126
|
+
task_subtypes=["Image Text Retrieval"],
|
|
127
|
+
license="cc-by-4.0",
|
|
128
|
+
annotations_creators="derived",
|
|
129
|
+
dialect=[],
|
|
130
|
+
modalities=["text", "image"],
|
|
131
|
+
sample_creation="created",
|
|
132
|
+
bibtex_citation="""
|
|
133
|
+
@misc{choi2026kovidorev2,
|
|
134
|
+
author = {Yongbin Choi},
|
|
135
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
136
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
137
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
138
|
+
year = {2026},
|
|
139
|
+
}
|
|
140
|
+
""",
|
|
141
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
142
|
+
)
|
|
@@ -6,6 +6,7 @@ from .cross_lingual_semantic_discrimination_wmt21 import (
|
|
|
6
6
|
CrossLingualSemanticDiscriminationWMT21,
|
|
7
7
|
)
|
|
8
8
|
from .cur_ev1_retrieval import CUREv1Retrieval
|
|
9
|
+
from .euro_pirq_retrieval import EuroPIRQRetrieval
|
|
9
10
|
from .indic_qa_retrieval import IndicQARetrieval
|
|
10
11
|
from .jina_vdr_bench_retrieval import (
|
|
11
12
|
JinaVDRAirbnbSyntheticRetrieval,
|
|
@@ -107,6 +108,7 @@ __all__ = [
|
|
|
107
108
|
"CUREv1Retrieval",
|
|
108
109
|
"CrossLingualSemanticDiscriminationWMT19",
|
|
109
110
|
"CrossLingualSemanticDiscriminationWMT21",
|
|
111
|
+
"EuroPIRQRetrieval",
|
|
110
112
|
"IndicQARetrieval",
|
|
111
113
|
"JinaVDRAirbnbSyntheticRetrieval",
|
|
112
114
|
"JinaVDRArabicChartQARetrieval",
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
_LANGUAGES = {
|
|
5
|
+
"en": ["eng-Latn"],
|
|
6
|
+
"fi": ["fin-Latn"],
|
|
7
|
+
"pt": ["por-Latn"],
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class EuroPIRQRetrieval(AbsTaskRetrieval):
|
|
12
|
+
metadata = TaskMetadata(
|
|
13
|
+
name="EuroPIRQRetrieval",
|
|
14
|
+
description="The EuroPIRQ retrieval dataset is a multilingual collection designed for evaluating retrieval and cross-lingual retrieval tasks. Dataset contains 10,000 parallel passages & 100 parallel queries (synthetic) in three languages: English, Portuguese, and Finnish, constructed from the European Union's DGT-Acquis corpus.",
|
|
15
|
+
reference="https://huggingface.co/datasets/eherra/EuroPIRQ-retrieval",
|
|
16
|
+
dataset={
|
|
17
|
+
"path": "eherra/EuroPIRQ-retrieval",
|
|
18
|
+
"revision": "59225ed25fbcea2185e1acbc8c3c80f1a8cd8341",
|
|
19
|
+
},
|
|
20
|
+
type="Retrieval",
|
|
21
|
+
category="t2t",
|
|
22
|
+
modalities=["text"],
|
|
23
|
+
eval_splits=["test"],
|
|
24
|
+
eval_langs=_LANGUAGES,
|
|
25
|
+
main_score="ndcg_at_10",
|
|
26
|
+
date=("2025-12-01", "2025-12-31"),
|
|
27
|
+
domains=["Legal"],
|
|
28
|
+
task_subtypes=[],
|
|
29
|
+
license="not specified",
|
|
30
|
+
annotations_creators="LM-generated and reviewed",
|
|
31
|
+
dialect=[],
|
|
32
|
+
sample_creation="found",
|
|
33
|
+
is_public=True,
|
|
34
|
+
bibtex_citation=r"""
|
|
35
|
+
@misc{eherra_2025_europirq,
|
|
36
|
+
author = { {Elias Herranen} },
|
|
37
|
+
publisher = { Hugging Face },
|
|
38
|
+
title = { EuroPIRQ: European Parallel Information Retrieval Queries },
|
|
39
|
+
url = { https://huggingface.co/datasets/eherra/EuroPIRQ-retrieval },
|
|
40
|
+
year = {2025},
|
|
41
|
+
}
|
|
42
|
+
""",
|
|
43
|
+
)
|
|
@@ -15,7 +15,7 @@ class Vidore3FinanceEnRetrieval(AbsTaskRetrieval):
|
|
|
15
15
|
metadata = TaskMetadata(
|
|
16
16
|
name="Vidore3FinanceEnRetrieval",
|
|
17
17
|
description="Retrieve associated pages according to questions. This task, Finance - EN, is a corpus of reports from american banking companies, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
18
|
-
reference="https://
|
|
18
|
+
reference="https://arxiv.org/abs/2601.08620",
|
|
19
19
|
dataset={
|
|
20
20
|
"path": "vidore/vidore_v3_finance_en_mteb_format",
|
|
21
21
|
"revision": "fa78cb14152b3dde8c5defdc4e3ddf50de69dfeb",
|
|
@@ -34,15 +34,14 @@ class Vidore3FinanceEnRetrieval(AbsTaskRetrieval):
|
|
|
34
34
|
modalities=["text", "image"],
|
|
35
35
|
sample_creation="created and machine-translated",
|
|
36
36
|
bibtex_citation=r"""
|
|
37
|
-
@
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
year = {2025},
|
|
37
|
+
@article{loison2026vidorev3comprehensiveevaluation,
|
|
38
|
+
archiveprefix = {arXiv},
|
|
39
|
+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
|
|
40
|
+
eprint = {2601.08620},
|
|
41
|
+
primaryclass = {cs.AI},
|
|
42
|
+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
|
|
43
|
+
url = {https://arxiv.org/abs/2601.08620},
|
|
44
|
+
year = {2026},
|
|
46
45
|
}
|
|
47
46
|
""",
|
|
48
47
|
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
@@ -53,7 +52,7 @@ class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
|
|
|
53
52
|
metadata = TaskMetadata(
|
|
54
53
|
name="Vidore3FinanceFrRetrieval",
|
|
55
54
|
description="Retrieve associated pages according to questions. This task, Finance - FR, is a corpus of reports from french companies in the luxury domain, intended for long-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
|
|
56
|
-
reference="https://
|
|
55
|
+
reference="https://arxiv.org/abs/2601.08620",
|
|
57
56
|
dataset={
|
|
58
57
|
"path": "vidore/vidore_v3_finance_fr_mteb_format",
|
|
59
58
|
"revision": "8a2adfda85a7967c7252129703d9b3c7c9f038a9",
|
|
@@ -71,15 +70,14 @@ class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
|
|
|
71
70
|
dialect=[],
|
|
72
71
|
sample_creation="created and machine-translated",
|
|
73
72
|
bibtex_citation=r"""
|
|
74
|
-
@
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
year = {2025},
|
|
73
|
+
@article{loison2026vidorev3comprehensiveevaluation,
|
|
74
|
+
archiveprefix = {arXiv},
|
|
75
|
+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
|
|
76
|
+
eprint = {2601.08620},
|
|
77
|
+
primaryclass = {cs.AI},
|
|
78
|
+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
|
|
79
|
+
url = {https://arxiv.org/abs/2601.08620},
|
|
80
|
+
year = {2026},
|
|
83
81
|
}
|
|
84
82
|
""",
|
|
85
83
|
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
@@ -91,7 +89,7 @@ class Vidore3IndustrialRetrieval(AbsTaskRetrieval):
|
|
|
91
89
|
metadata = TaskMetadata(
|
|
92
90
|
name="Vidore3IndustrialRetrieval",
|
|
93
91
|
description="Retrieve associated pages according to questions. This dataset, Industrial reports, is a corpus of technical documents on military aircraft (fueling, mechanics...), intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
94
|
-
reference="https://
|
|
92
|
+
reference="https://arxiv.org/abs/2601.08620",
|
|
95
93
|
dataset={
|
|
96
94
|
"path": "vidore/vidore_v3_industrial_mteb_format",
|
|
97
95
|
"revision": "f732b725cf4a70803210edfe265a04f8bd5328f6",
|
|
@@ -110,15 +108,14 @@ class Vidore3IndustrialRetrieval(AbsTaskRetrieval):
|
|
|
110
108
|
modalities=["text", "image"],
|
|
111
109
|
sample_creation="created and machine-translated",
|
|
112
110
|
bibtex_citation=r"""
|
|
113
|
-
@
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
year = {2025},
|
|
111
|
+
@article{loison2026vidorev3comprehensiveevaluation,
|
|
112
|
+
archiveprefix = {arXiv},
|
|
113
|
+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
|
|
114
|
+
eprint = {2601.08620},
|
|
115
|
+
primaryclass = {cs.AI},
|
|
116
|
+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
|
|
117
|
+
url = {https://arxiv.org/abs/2601.08620},
|
|
118
|
+
year = {2026},
|
|
122
119
|
}
|
|
123
120
|
""",
|
|
124
121
|
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
@@ -130,7 +127,7 @@ class Vidore3PharmaceuticalsRetrieval(AbsTaskRetrieval):
|
|
|
130
127
|
metadata = TaskMetadata(
|
|
131
128
|
name="Vidore3PharmaceuticalsRetrieval",
|
|
132
129
|
description="Retrieve associated pages according to questions. This dataset, Pharmaceutical, is a corpus of slides from the FDA, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
133
|
-
reference="https://
|
|
130
|
+
reference="https://arxiv.org/abs/2601.08620",
|
|
134
131
|
dataset={
|
|
135
132
|
"path": "vidore/vidore_v3_pharmaceuticals_mteb_format",
|
|
136
133
|
"revision": "237ed4f43c7fb3c4df07ec4e9dd0a4366be555b0",
|
|
@@ -149,15 +146,14 @@ class Vidore3PharmaceuticalsRetrieval(AbsTaskRetrieval):
|
|
|
149
146
|
modalities=["text", "image"],
|
|
150
147
|
sample_creation="created and machine-translated",
|
|
151
148
|
bibtex_citation=r"""
|
|
152
|
-
@
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
year = {2025},
|
|
149
|
+
@article{loison2026vidorev3comprehensiveevaluation,
|
|
150
|
+
archiveprefix = {arXiv},
|
|
151
|
+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
|
|
152
|
+
eprint = {2601.08620},
|
|
153
|
+
primaryclass = {cs.AI},
|
|
154
|
+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
|
|
155
|
+
url = {https://arxiv.org/abs/2601.08620},
|
|
156
|
+
year = {2026},
|
|
161
157
|
}
|
|
162
158
|
""",
|
|
163
159
|
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
@@ -169,7 +165,7 @@ class Vidore3ComputerScienceRetrieval(AbsTaskRetrieval):
|
|
|
169
165
|
metadata = TaskMetadata(
|
|
170
166
|
name="Vidore3ComputerScienceRetrieval",
|
|
171
167
|
description="Retrieve associated pages according to questions. This dataset, Computer Science, is a corpus of textbooks from the openstacks website, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
172
|
-
reference="https://
|
|
168
|
+
reference="https://arxiv.org/abs/2601.08620",
|
|
173
169
|
dataset={
|
|
174
170
|
"path": "vidore/vidore_v3_computer_science_mteb_format",
|
|
175
171
|
"revision": "fb7fb69f81f7db62790f40494124b8ad22b424ab",
|
|
@@ -188,15 +184,14 @@ class Vidore3ComputerScienceRetrieval(AbsTaskRetrieval):
|
|
|
188
184
|
modalities=["text", "image"],
|
|
189
185
|
sample_creation="created and machine-translated",
|
|
190
186
|
bibtex_citation=r"""
|
|
191
|
-
@
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
year = {2025},
|
|
187
|
+
@article{loison2026vidorev3comprehensiveevaluation,
|
|
188
|
+
archiveprefix = {arXiv},
|
|
189
|
+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
|
|
190
|
+
eprint = {2601.08620},
|
|
191
|
+
primaryclass = {cs.AI},
|
|
192
|
+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
|
|
193
|
+
url = {https://arxiv.org/abs/2601.08620},
|
|
194
|
+
year = {2026},
|
|
200
195
|
}
|
|
201
196
|
""",
|
|
202
197
|
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
@@ -208,7 +203,7 @@ class Vidore3HrRetrieval(AbsTaskRetrieval):
|
|
|
208
203
|
metadata = TaskMetadata(
|
|
209
204
|
name="Vidore3HrRetrieval",
|
|
210
205
|
description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports released by the european union, intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
211
|
-
reference="https://
|
|
206
|
+
reference="https://arxiv.org/abs/2601.08620",
|
|
212
207
|
dataset={
|
|
213
208
|
"path": "vidore/vidore_v3_hr_mteb_format",
|
|
214
209
|
"revision": "bc7d43d64815ed30f664168c8052106484aba7fd",
|
|
@@ -227,15 +222,14 @@ class Vidore3HrRetrieval(AbsTaskRetrieval):
|
|
|
227
222
|
modalities=["text", "image"],
|
|
228
223
|
sample_creation="created and machine-translated",
|
|
229
224
|
bibtex_citation=r"""
|
|
230
|
-
@
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
year = {2025},
|
|
225
|
+
@article{loison2026vidorev3comprehensiveevaluation,
|
|
226
|
+
archiveprefix = {arXiv},
|
|
227
|
+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
|
|
228
|
+
eprint = {2601.08620},
|
|
229
|
+
primaryclass = {cs.AI},
|
|
230
|
+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
|
|
231
|
+
url = {https://arxiv.org/abs/2601.08620},
|
|
232
|
+
year = {2026},
|
|
239
233
|
}
|
|
240
234
|
""",
|
|
241
235
|
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
@@ -247,7 +241,7 @@ class Vidore3EnergyRetrieval(AbsTaskRetrieval):
|
|
|
247
241
|
metadata = TaskMetadata(
|
|
248
242
|
name="Vidore3EnergyRetrieval",
|
|
249
243
|
description="Retrieve associated pages according to questions. This dataset, Energy Fr, is a corpus of reports on energy supply in europe, intended for complex-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
|
|
250
|
-
reference="https://
|
|
244
|
+
reference="https://arxiv.org/abs/2601.08620",
|
|
251
245
|
dataset={
|
|
252
246
|
"path": "vidore/vidore_v3_energy_mteb_format",
|
|
253
247
|
"revision": "84fca99e5978604bae30f2436eacb6dbaa0532e9",
|
|
@@ -266,15 +260,14 @@ class Vidore3EnergyRetrieval(AbsTaskRetrieval):
|
|
|
266
260
|
modalities=["text", "image"],
|
|
267
261
|
sample_creation="created and machine-translated",
|
|
268
262
|
bibtex_citation=r"""
|
|
269
|
-
@
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
year = {2025},
|
|
263
|
+
@article{loison2026vidorev3comprehensiveevaluation,
|
|
264
|
+
archiveprefix = {arXiv},
|
|
265
|
+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
|
|
266
|
+
eprint = {2601.08620},
|
|
267
|
+
primaryclass = {cs.AI},
|
|
268
|
+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
|
|
269
|
+
url = {https://arxiv.org/abs/2601.08620},
|
|
270
|
+
year = {2026},
|
|
278
271
|
}
|
|
279
272
|
""",
|
|
280
273
|
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
@@ -286,7 +279,7 @@ class Vidore3PhysicsRetrieval(AbsTaskRetrieval):
|
|
|
286
279
|
metadata = TaskMetadata(
|
|
287
280
|
name="Vidore3PhysicsRetrieval",
|
|
288
281
|
description="Retrieve associated pages according to questions. This dataset, Physics, is a corpus of course slides on french bachelor level physics lectures, intended for complex visual understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
|
|
289
|
-
reference="https://
|
|
282
|
+
reference="https://arxiv.org/abs/2601.08620",
|
|
290
283
|
dataset={
|
|
291
284
|
"path": "vidore/vidore_v3_physics_mteb_format",
|
|
292
285
|
"revision": "2c18ef90ab3ef93a9d86ecc6521cdae2a29f8300",
|
|
@@ -305,15 +298,14 @@ class Vidore3PhysicsRetrieval(AbsTaskRetrieval):
|
|
|
305
298
|
modalities=["text", "image"],
|
|
306
299
|
sample_creation="created and machine-translated",
|
|
307
300
|
bibtex_citation=r"""
|
|
308
|
-
@
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
year = {2025},
|
|
301
|
+
@article{loison2026vidorev3comprehensiveevaluation,
|
|
302
|
+
archiveprefix = {arXiv},
|
|
303
|
+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
|
|
304
|
+
eprint = {2601.08620},
|
|
305
|
+
primaryclass = {cs.AI},
|
|
306
|
+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
|
|
307
|
+
url = {https://arxiv.org/abs/2601.08620},
|
|
308
|
+
year = {2026},
|
|
317
309
|
}
|
|
318
310
|
""",
|
|
319
311
|
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
@@ -325,7 +317,7 @@ class Vidore3NuclearRetrieval(AbsTaskRetrieval):
|
|
|
325
317
|
metadata = TaskMetadata(
|
|
326
318
|
name="Vidore3NuclearRetrieval",
|
|
327
319
|
description="Retrieve associated pages according to questions.",
|
|
328
|
-
reference="https://
|
|
320
|
+
reference="https://arxiv.org/abs/2601.08620",
|
|
329
321
|
dataset={
|
|
330
322
|
"path": "mteb-private/Vidore3NuclearRetrieval",
|
|
331
323
|
"revision": "a463fc67fefc01152153101e88a32d5f9515e3e3",
|
|
@@ -344,15 +336,14 @@ class Vidore3NuclearRetrieval(AbsTaskRetrieval):
|
|
|
344
336
|
modalities=["text", "image"],
|
|
345
337
|
sample_creation="created and machine-translated",
|
|
346
338
|
bibtex_citation=r"""
|
|
347
|
-
@
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
year = {2025},
|
|
339
|
+
@article{loison2026vidorev3comprehensiveevaluation,
|
|
340
|
+
archiveprefix = {arXiv},
|
|
341
|
+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
|
|
342
|
+
eprint = {2601.08620},
|
|
343
|
+
primaryclass = {cs.AI},
|
|
344
|
+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
|
|
345
|
+
url = {https://arxiv.org/abs/2601.08620},
|
|
346
|
+
year = {2026},
|
|
356
347
|
}
|
|
357
348
|
""",
|
|
358
349
|
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
@@ -364,7 +355,7 @@ class Vidore3TelecomRetrieval(AbsTaskRetrieval):
|
|
|
364
355
|
metadata = TaskMetadata(
|
|
365
356
|
name="Vidore3TelecomRetrieval",
|
|
366
357
|
description="Retrieve associated pages according to questions.",
|
|
367
|
-
reference="https://
|
|
358
|
+
reference="https://arxiv.org/abs/2601.08620",
|
|
368
359
|
dataset={
|
|
369
360
|
"path": "mteb-private/Vidore3TelecomRetrieval",
|
|
370
361
|
"revision": "a54635a274ef2835721b7cbe3eb27483b9ec964b",
|
|
@@ -383,15 +374,14 @@ class Vidore3TelecomRetrieval(AbsTaskRetrieval):
|
|
|
383
374
|
modalities=["text", "image"],
|
|
384
375
|
sample_creation="created and machine-translated",
|
|
385
376
|
bibtex_citation=r"""
|
|
386
|
-
@
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
year = {2025},
|
|
377
|
+
@article{loison2026vidorev3comprehensiveevaluation,
|
|
378
|
+
archiveprefix = {arXiv},
|
|
379
|
+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
|
|
380
|
+
eprint = {2601.08620},
|
|
381
|
+
primaryclass = {cs.AI},
|
|
382
|
+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
|
|
383
|
+
url = {https://arxiv.org/abs/2601.08620},
|
|
384
|
+
year = {2026},
|
|
395
385
|
}
|
|
396
386
|
""",
|
|
397
387
|
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
@@ -54,7 +54,7 @@ Fishel, Mark},
|
|
|
54
54
|
"""Load dataset from HuggingFace hub"""
|
|
55
55
|
if self.data_loaded:
|
|
56
56
|
return
|
|
57
|
-
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
57
|
+
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
58
58
|
self.dataset_transform()
|
|
59
59
|
self.data_loaded = True
|
|
60
60
|
|
|
@@ -71,7 +71,7 @@ Fishel, Mark},
|
|
|
71
71
|
text2id = {}
|
|
72
72
|
|
|
73
73
|
for split in self.dataset:
|
|
74
|
-
ds: datasets.Dataset = self.dataset[split]
|
|
74
|
+
ds: datasets.Dataset = self.dataset[split]
|
|
75
75
|
ds = ds.shuffle(seed=42)
|
|
76
76
|
max_samples = min(1024, len(ds))
|
|
77
77
|
ds = ds.select(
|
|
@@ -41,7 +41,7 @@ class SNLRetrieval(AbsTaskRetrieval):
|
|
|
41
41
|
"""Load dataset from HuggingFace hub"""
|
|
42
42
|
if self.data_loaded:
|
|
43
43
|
return
|
|
44
|
-
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
44
|
+
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
45
45
|
self.dataset_transform()
|
|
46
46
|
self.data_loaded = True
|
|
47
47
|
|
|
@@ -58,7 +58,7 @@ class SNLRetrieval(AbsTaskRetrieval):
|
|
|
58
58
|
text2id = {}
|
|
59
59
|
|
|
60
60
|
for split in self.dataset:
|
|
61
|
-
ds: datasets.Dataset = self.dataset[split]
|
|
61
|
+
ds: datasets.Dataset = self.dataset[split]
|
|
62
62
|
ds = ds.shuffle(seed=42)
|
|
63
63
|
|
|
64
64
|
self.queries[split] = {}
|
|
@@ -59,7 +59,7 @@ class TurHistQuadRetrieval(AbsTaskRetrieval):
|
|
|
59
59
|
text2id = {}
|
|
60
60
|
|
|
61
61
|
for split in self.metadata.eval_splits:
|
|
62
|
-
ds: datasets.Dataset = self.dataset[split]
|
|
62
|
+
ds: datasets.Dataset = self.dataset[split]
|
|
63
63
|
ds = ds.shuffle(seed=42)
|
|
64
64
|
max_samples = min(1024, len(ds))
|
|
65
65
|
ds = ds.select(
|