mteb 2.5.2__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +17 -18
  3. mteb/_evaluators/any_sts_evaluator.py +3 -3
  4. mteb/_evaluators/clustering_evaluator.py +2 -2
  5. mteb/_evaluators/evaluator.py +4 -2
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +10 -8
  7. mteb/_evaluators/pair_classification_evaluator.py +5 -3
  8. mteb/_evaluators/retrieval_evaluator.py +2 -2
  9. mteb/_evaluators/retrieval_metrics.py +18 -17
  10. mteb/_evaluators/sklearn_evaluator.py +11 -10
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +27 -18
  12. mteb/_evaluators/text/summarization_evaluator.py +23 -18
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +5 -3
  14. mteb/abstasks/_data_filter/filters.py +1 -1
  15. mteb/abstasks/_data_filter/task_pipelines.py +3 -0
  16. mteb/abstasks/_statistics_calculation.py +18 -10
  17. mteb/abstasks/_stratification.py +18 -18
  18. mteb/abstasks/abstask.py +35 -28
  19. mteb/abstasks/aggregate_task_metadata.py +1 -9
  20. mteb/abstasks/aggregated_task.py +10 -29
  21. mteb/abstasks/classification.py +15 -10
  22. mteb/abstasks/clustering.py +19 -15
  23. mteb/abstasks/clustering_legacy.py +10 -10
  24. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  25. mteb/abstasks/multilabel_classification.py +23 -19
  26. mteb/abstasks/pair_classification.py +20 -11
  27. mteb/abstasks/regression.py +4 -4
  28. mteb/abstasks/retrieval.py +28 -24
  29. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  30. mteb/abstasks/sts.py +8 -5
  31. mteb/abstasks/task_metadata.py +31 -33
  32. mteb/abstasks/text/bitext_mining.py +39 -28
  33. mteb/abstasks/text/reranking.py +8 -6
  34. mteb/abstasks/text/summarization.py +10 -5
  35. mteb/abstasks/zeroshot_classification.py +8 -4
  36. mteb/benchmarks/benchmark.py +4 -2
  37. mteb/benchmarks/benchmarks/__init__.py +4 -0
  38. mteb/benchmarks/benchmarks/benchmarks.py +112 -11
  39. mteb/benchmarks/get_benchmark.py +14 -55
  40. mteb/cache.py +182 -29
  41. mteb/cli/_display_tasks.py +2 -2
  42. mteb/cli/build_cli.py +110 -14
  43. mteb/cli/generate_model_card.py +43 -23
  44. mteb/deprecated_evaluator.py +63 -49
  45. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  46. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  47. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  49. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  50. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  51. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  52. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  53. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  54. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  55. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  56. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  57. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  58. mteb/evaluate.py +44 -33
  59. mteb/filter_tasks.py +25 -26
  60. mteb/get_tasks.py +29 -30
  61. mteb/languages/language_scripts.py +5 -3
  62. mteb/leaderboard/app.py +162 -34
  63. mteb/load_results.py +12 -12
  64. mteb/models/abs_encoder.py +10 -6
  65. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  66. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +5 -4
  67. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  68. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  69. mteb/models/cache_wrappers/cache_wrapper.py +2 -2
  70. mteb/models/get_model_meta.py +21 -3
  71. mteb/models/instruct_wrapper.py +28 -8
  72. mteb/models/model_implementations/align_models.py +1 -1
  73. mteb/models/model_implementations/andersborges.py +4 -4
  74. mteb/models/model_implementations/ara_models.py +1 -1
  75. mteb/models/model_implementations/arctic_models.py +8 -8
  76. mteb/models/model_implementations/b1ade_models.py +1 -1
  77. mteb/models/model_implementations/bge_models.py +45 -21
  78. mteb/models/model_implementations/bica_model.py +3 -3
  79. mteb/models/model_implementations/blip2_models.py +2 -2
  80. mteb/models/model_implementations/blip_models.py +16 -16
  81. mteb/models/model_implementations/bm25.py +4 -4
  82. mteb/models/model_implementations/bmretriever_models.py +6 -4
  83. mteb/models/model_implementations/cadet_models.py +1 -1
  84. mteb/models/model_implementations/cde_models.py +11 -4
  85. mteb/models/model_implementations/clip_models.py +6 -6
  86. mteb/models/model_implementations/clips_models.py +3 -3
  87. mteb/models/model_implementations/codefuse_models.py +5 -5
  88. mteb/models/model_implementations/codesage_models.py +3 -3
  89. mteb/models/model_implementations/cohere_models.py +5 -5
  90. mteb/models/model_implementations/cohere_v.py +2 -2
  91. mteb/models/model_implementations/colpali_models.py +3 -3
  92. mteb/models/model_implementations/colqwen_models.py +8 -8
  93. mteb/models/model_implementations/colsmol_models.py +2 -2
  94. mteb/models/model_implementations/conan_models.py +1 -1
  95. mteb/models/model_implementations/dino_models.py +42 -42
  96. mteb/models/model_implementations/e5_instruct.py +23 -4
  97. mteb/models/model_implementations/e5_models.py +9 -9
  98. mteb/models/model_implementations/e5_v.py +6 -6
  99. mteb/models/model_implementations/eagerworks_models.py +1 -1
  100. mteb/models/model_implementations/emillykkejensen_models.py +6 -6
  101. mteb/models/model_implementations/en_code_retriever.py +1 -1
  102. mteb/models/model_implementations/euler_models.py +2 -2
  103. mteb/models/model_implementations/fa_models.py +9 -9
  104. mteb/models/model_implementations/facebookai.py +14 -2
  105. mteb/models/model_implementations/geogpt_models.py +1 -1
  106. mteb/models/model_implementations/gme_v_models.py +6 -5
  107. mteb/models/model_implementations/google_models.py +1 -1
  108. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -1
  109. mteb/models/model_implementations/gritlm_models.py +2 -2
  110. mteb/models/model_implementations/gte_models.py +25 -13
  111. mteb/models/model_implementations/hinvec_models.py +1 -1
  112. mteb/models/model_implementations/ibm_granite_models.py +30 -6
  113. mteb/models/model_implementations/inf_models.py +2 -2
  114. mteb/models/model_implementations/jasper_models.py +2 -2
  115. mteb/models/model_implementations/jina_clip.py +48 -10
  116. mteb/models/model_implementations/jina_models.py +18 -11
  117. mteb/models/model_implementations/kblab.py +12 -6
  118. mteb/models/model_implementations/kennethenevoldsen_models.py +4 -4
  119. mteb/models/model_implementations/kfst.py +1 -1
  120. mteb/models/model_implementations/kowshik24_models.py +1 -1
  121. mteb/models/model_implementations/lgai_embedding_models.py +1 -1
  122. mteb/models/model_implementations/linq_models.py +1 -1
  123. mteb/models/model_implementations/listconranker.py +1 -1
  124. mteb/models/model_implementations/llm2clip_models.py +6 -6
  125. mteb/models/model_implementations/llm2vec_models.py +8 -8
  126. mteb/models/model_implementations/mcinext_models.py +4 -1
  127. mteb/models/model_implementations/mdbr_models.py +17 -3
  128. mteb/models/model_implementations/misc_models.py +68 -68
  129. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  130. mteb/models/model_implementations/mme5_models.py +1 -1
  131. mteb/models/model_implementations/moco_models.py +4 -4
  132. mteb/models/model_implementations/mod_models.py +1 -1
  133. mteb/models/model_implementations/model2vec_models.py +14 -14
  134. mteb/models/model_implementations/moka_models.py +1 -1
  135. mteb/models/model_implementations/nbailab.py +3 -3
  136. mteb/models/model_implementations/no_instruct_sentence_models.py +2 -2
  137. mteb/models/model_implementations/nomic_models.py +30 -15
  138. mteb/models/model_implementations/nomic_models_vision.py +1 -1
  139. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +15 -9
  140. mteb/models/model_implementations/nvidia_models.py +151 -19
  141. mteb/models/model_implementations/octen_models.py +61 -2
  142. mteb/models/model_implementations/openclip_models.py +13 -13
  143. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -5
  144. mteb/models/model_implementations/ops_moa_models.py +1 -1
  145. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  146. mteb/models/model_implementations/pawan_models.py +1 -1
  147. mteb/models/model_implementations/piccolo_models.py +1 -1
  148. mteb/models/model_implementations/pixie_models.py +56 -0
  149. mteb/models/model_implementations/promptriever_models.py +4 -4
  150. mteb/models/model_implementations/pylate_models.py +10 -9
  151. mteb/models/model_implementations/qodo_models.py +2 -2
  152. mteb/models/model_implementations/qtack_models.py +1 -1
  153. mteb/models/model_implementations/qwen3_models.py +3 -3
  154. mteb/models/model_implementations/qzhou_models.py +2 -2
  155. mteb/models/model_implementations/random_baseline.py +3 -3
  156. mteb/models/model_implementations/rasgaard_models.py +2 -2
  157. mteb/models/model_implementations/reasonir_model.py +1 -1
  158. mteb/models/model_implementations/repllama_models.py +3 -3
  159. mteb/models/model_implementations/rerankers_custom.py +12 -6
  160. mteb/models/model_implementations/rerankers_monot5_based.py +17 -17
  161. mteb/models/model_implementations/richinfoai_models.py +1 -1
  162. mteb/models/model_implementations/ru_sentence_models.py +20 -20
  163. mteb/models/model_implementations/ruri_models.py +10 -10
  164. mteb/models/model_implementations/salesforce_models.py +3 -3
  165. mteb/models/model_implementations/samilpwc_models.py +1 -1
  166. mteb/models/model_implementations/sarashina_embedding_models.py +2 -2
  167. mteb/models/model_implementations/searchmap_models.py +1 -1
  168. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +113 -146
  169. mteb/models/model_implementations/sentence_transformers_models.py +124 -22
  170. mteb/models/model_implementations/shuu_model.py +1 -1
  171. mteb/models/model_implementations/siglip_models.py +20 -20
  172. mteb/models/model_implementations/slm_models.py +416 -0
  173. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -1
  174. mteb/models/model_implementations/stella_models.py +17 -4
  175. mteb/models/model_implementations/tarka_models.py +2 -2
  176. mteb/models/model_implementations/text2vec_models.py +9 -3
  177. mteb/models/model_implementations/ua_sentence_models.py +1 -1
  178. mteb/models/model_implementations/uae_models.py +7 -1
  179. mteb/models/model_implementations/vdr_models.py +1 -1
  180. mteb/models/model_implementations/vi_vn_models.py +6 -6
  181. mteb/models/model_implementations/vlm2vec_models.py +3 -3
  182. mteb/models/model_implementations/voyage_models.py +84 -0
  183. mteb/models/model_implementations/voyage_v.py +9 -7
  184. mteb/models/model_implementations/youtu_models.py +1 -1
  185. mteb/models/model_implementations/yuan_models.py +1 -1
  186. mteb/models/model_implementations/yuan_models_en.py +1 -1
  187. mteb/models/model_meta.py +80 -31
  188. mteb/models/models_protocols.py +22 -6
  189. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +9 -6
  190. mteb/models/search_wrappers.py +33 -18
  191. mteb/models/sentence_transformer_wrapper.py +50 -25
  192. mteb/models/vllm_wrapper.py +327 -0
  193. mteb/py.typed +0 -0
  194. mteb/results/benchmark_results.py +29 -21
  195. mteb/results/model_result.py +52 -22
  196. mteb/results/task_result.py +80 -58
  197. mteb/similarity_functions.py +11 -7
  198. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  199. mteb/tasks/classification/est/estonian_valence.py +1 -1
  200. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  201. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  202. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  203. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  204. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  205. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  206. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  207. mteb/tasks/retrieval/code/code_rag.py +12 -12
  208. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  209. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  210. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  211. mteb/tasks/retrieval/eng/__init__.py +2 -0
  212. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  213. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  214. mteb/tasks/retrieval/kor/__init__.py +15 -1
  215. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  216. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  217. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  218. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  219. mteb/tasks/retrieval/nob/norquad.py +2 -2
  220. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  221. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  222. mteb/tasks/retrieval/vie/__init__.py +14 -6
  223. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  224. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  225. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  226. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  227. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  228. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  229. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  230. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  231. mteb/types/__init__.py +2 -0
  232. mteb/types/_encoder_io.py +12 -0
  233. mteb/types/_result.py +2 -1
  234. mteb/types/statistics.py +9 -3
  235. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/METADATA +15 -4
  236. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/RECORD +240 -219
  237. mteb/models/model_implementations/mxbai_models.py +0 -111
  238. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  239. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  240. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  241. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -25,7 +25,7 @@ class CUB200I2I(AbsTaskRetrieval):
25
25
  modalities=["image"],
26
26
  sample_creation="created",
27
27
  bibtex_citation=r"""
28
- @article{article,
28
+ @article{welinder2010caltech,
29
29
  author = {Welinder, Peter and Branson, Steve and Mita, Takeshi and Wah, Catherine and Schroff, Florian and Belongie, Serge and Perona, Pietro},
30
30
  month = {09},
31
31
  pages = {},
@@ -1,5 +1,19 @@
1
1
  from .auto_rag_retrieval import AutoRAGRetrieval
2
2
  from .ko_strategy_qa import KoStrategyQA
3
+ from .kovidore2_bench_retrieval import (
4
+ KoVidore2CybersecurityRetrieval,
5
+ KoVidore2EconomicRetrieval,
6
+ KoVidore2EnergyRetrieval,
7
+ KoVidore2HrRetrieval,
8
+ )
3
9
  from .squad_kor_v1_retrieval import SQuADKorV1Retrieval
4
10
 
5
- __all__ = ["AutoRAGRetrieval", "KoStrategyQA", "SQuADKorV1Retrieval"]
11
+ __all__ = [
12
+ "AutoRAGRetrieval",
13
+ "KoStrategyQA",
14
+ "KoVidore2CybersecurityRetrieval",
15
+ "KoVidore2EconomicRetrieval",
16
+ "KoVidore2EnergyRetrieval",
17
+ "KoVidore2HrRetrieval",
18
+ "SQuADKorV1Retrieval",
19
+ ]
@@ -0,0 +1,142 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class KoVidore2CybersecurityRetrieval(AbsTaskRetrieval):
6
+ metadata = TaskMetadata(
7
+ name="KoVidore2CybersecurityRetrieval",
8
+ description="Retrieve associated pages according to questions. This dataset, Cybersecurity, is a corpus of technical reports on cyber threat trends and security incident responses in Korea, intended for complex-document understanding tasks.",
9
+ reference="https://github.com/whybe-choi/kovidore-data-generator",
10
+ dataset={
11
+ "path": "whybe-choi/kovidore-v2-cybersecurity-mteb",
12
+ "revision": "577d7c45f79d8eb4e7584db3990f91daa7e47956",
13
+ },
14
+ type="DocumentUnderstanding",
15
+ category="t2i",
16
+ eval_splits=["test"],
17
+ eval_langs=["kor-Hang"],
18
+ main_score="ndcg_at_10",
19
+ date=("2025-12-21", "2026-01-06"),
20
+ domains=["Social"],
21
+ task_subtypes=["Image Text Retrieval"],
22
+ license="cc-by-4.0",
23
+ annotations_creators="derived",
24
+ dialect=[],
25
+ modalities=["text", "image"],
26
+ sample_creation="created",
27
+ bibtex_citation="""
28
+ @misc{choi2026kovidorev2,
29
+ author = {Yongbin Choi},
30
+ note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
31
+ title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
32
+ url = {https://github.com/whybe-choi/kovidore-data-generator},
33
+ year = {2026},
34
+ }
35
+ """,
36
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
37
+ )
38
+
39
+
40
+ class KoVidore2EconomicRetrieval(AbsTaskRetrieval):
41
+ metadata = TaskMetadata(
42
+ name="KoVidore2EconomicRetrieval",
43
+ description="Retrieve associated pages according to questions. This dataset, Economic trends, is a corpus of periodic reports on major economic indicators in Korea, intended for complex-document understanding tasks.",
44
+ reference="https://github.com/whybe-choi/kovidore-data-generator",
45
+ dataset={
46
+ "path": "whybe-choi/kovidore-v2-economic-mteb",
47
+ "revision": "0189c26211290a902cd9d41a0db932808a54c0a8",
48
+ },
49
+ type="DocumentUnderstanding",
50
+ category="t2i",
51
+ eval_splits=["test"],
52
+ eval_langs=["kor-Hang"],
53
+ main_score="ndcg_at_10",
54
+ date=("2025-12-21", "2026-01-06"),
55
+ domains=["Social"],
56
+ task_subtypes=["Image Text Retrieval"],
57
+ license="cc-by-4.0",
58
+ annotations_creators="derived",
59
+ dialect=[],
60
+ modalities=["text", "image"],
61
+ sample_creation="created",
62
+ bibtex_citation="""
63
+ @misc{choi2026kovidorev2,
64
+ author = {Yongbin Choi},
65
+ note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
66
+ title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
67
+ url = {https://github.com/whybe-choi/kovidore-data-generator},
68
+ year = {2026},
69
+ }
70
+ """,
71
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
72
+ )
73
+
74
+
75
+ class KoVidore2EnergyRetrieval(AbsTaskRetrieval):
76
+ metadata = TaskMetadata(
77
+ name="KoVidore2EnergyRetrieval",
78
+ description="Retrieve associated pages according to questions. This dataset, Energy, is a corpus of reports on energy market trends, policy planning, and industry statistics, intended for complex-document understanding tasks.",
79
+ reference="https://github.com/whybe-choi/kovidore-data-generator",
80
+ dataset={
81
+ "path": "whybe-choi/kovidore-v2-energy-mteb",
82
+ "revision": "8c09a3d22b1fa3a7f5e815e9521da9b048754211",
83
+ },
84
+ type="DocumentUnderstanding",
85
+ category="t2i",
86
+ eval_splits=["test"],
87
+ eval_langs=["kor-Hang"],
88
+ main_score="ndcg_at_10",
89
+ date=("2025-12-21", "2026-01-06"),
90
+ domains=["Social"],
91
+ task_subtypes=["Image Text Retrieval"],
92
+ license="cc-by-4.0",
93
+ annotations_creators="derived",
94
+ dialect=[],
95
+ modalities=["text", "image"],
96
+ sample_creation="created",
97
+ bibtex_citation="""
98
+ @misc{choi2026kovidorev2,
99
+ author = {Yongbin Choi},
100
+ note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
101
+ title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
102
+ url = {https://github.com/whybe-choi/kovidore-data-generator},
103
+ year = {2026},
104
+ }
105
+ """,
106
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
107
+ )
108
+
109
+
110
+ class KoVidore2HrRetrieval(AbsTaskRetrieval):
111
+ metadata = TaskMetadata(
112
+ name="KoVidore2HrRetrieval",
113
+ description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports on workforce outlook and employment policy in korea, intended for complex-document understanding tasks.",
114
+ reference="https://github.com/whybe-choi/kovidore-data-generator",
115
+ dataset={
116
+ "path": "whybe-choi/kovidore-v2-hr-mteb",
117
+ "revision": "d9432c782a9a3e2eed064f6fac08b4c967d92b99",
118
+ },
119
+ type="DocumentUnderstanding",
120
+ category="t2i",
121
+ eval_splits=["test"],
122
+ eval_langs=["kor-Hang"],
123
+ main_score="ndcg_at_10",
124
+ date=("2025-12-21", "2026-01-06"),
125
+ domains=["Social"],
126
+ task_subtypes=["Image Text Retrieval"],
127
+ license="cc-by-4.0",
128
+ annotations_creators="derived",
129
+ dialect=[],
130
+ modalities=["text", "image"],
131
+ sample_creation="created",
132
+ bibtex_citation="""
133
+ @misc{choi2026kovidorev2,
134
+ author = {Yongbin Choi},
135
+ note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
136
+ title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
137
+ url = {https://github.com/whybe-choi/kovidore-data-generator},
138
+ year = {2026},
139
+ }
140
+ """,
141
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
142
+ )
@@ -6,6 +6,7 @@ from .cross_lingual_semantic_discrimination_wmt21 import (
6
6
  CrossLingualSemanticDiscriminationWMT21,
7
7
  )
8
8
  from .cur_ev1_retrieval import CUREv1Retrieval
9
+ from .euro_pirq_retrieval import EuroPIRQRetrieval
9
10
  from .indic_qa_retrieval import IndicQARetrieval
10
11
  from .jina_vdr_bench_retrieval import (
11
12
  JinaVDRAirbnbSyntheticRetrieval,
@@ -107,6 +108,7 @@ __all__ = [
107
108
  "CUREv1Retrieval",
108
109
  "CrossLingualSemanticDiscriminationWMT19",
109
110
  "CrossLingualSemanticDiscriminationWMT21",
111
+ "EuroPIRQRetrieval",
110
112
  "IndicQARetrieval",
111
113
  "JinaVDRAirbnbSyntheticRetrieval",
112
114
  "JinaVDRArabicChartQARetrieval",
@@ -0,0 +1,43 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+ _LANGUAGES = {
5
+ "en": ["eng-Latn"],
6
+ "fi": ["fin-Latn"],
7
+ "pt": ["por-Latn"],
8
+ }
9
+
10
+
11
+ class EuroPIRQRetrieval(AbsTaskRetrieval):
12
+ metadata = TaskMetadata(
13
+ name="EuroPIRQRetrieval",
14
+ description="The EuroPIRQ retrieval dataset is a multilingual collection designed for evaluating retrieval and cross-lingual retrieval tasks. Dataset contains 10,000 parallel passages & 100 parallel queries (synthetic) in three languages: English, Portuguese, and Finnish, constructed from the European Union's DGT-Acquis corpus.",
15
+ reference="https://huggingface.co/datasets/eherra/EuroPIRQ-retrieval",
16
+ dataset={
17
+ "path": "eherra/EuroPIRQ-retrieval",
18
+ "revision": "59225ed25fbcea2185e1acbc8c3c80f1a8cd8341",
19
+ },
20
+ type="Retrieval",
21
+ category="t2t",
22
+ modalities=["text"],
23
+ eval_splits=["test"],
24
+ eval_langs=_LANGUAGES,
25
+ main_score="ndcg_at_10",
26
+ date=("2025-12-01", "2025-12-31"),
27
+ domains=["Legal"],
28
+ task_subtypes=[],
29
+ license="not specified",
30
+ annotations_creators="LM-generated and reviewed",
31
+ dialect=[],
32
+ sample_creation="found",
33
+ is_public=True,
34
+ bibtex_citation=r"""
35
+ @misc{eherra_2025_europirq,
36
+ author = { {Elias Herranen} },
37
+ publisher = { Hugging Face },
38
+ title = { EuroPIRQ: European Parallel Information Retrieval Queries },
39
+ url = { https://huggingface.co/datasets/eherra/EuroPIRQ-retrieval },
40
+ year = {2025},
41
+ }
42
+ """,
43
+ )
@@ -15,7 +15,7 @@ class Vidore3FinanceEnRetrieval(AbsTaskRetrieval):
15
15
  metadata = TaskMetadata(
16
16
  name="Vidore3FinanceEnRetrieval",
17
17
  description="Retrieve associated pages according to questions. This task, Finance - EN, is a corpus of reports from american banking companies, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
18
- reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
18
+ reference="https://arxiv.org/abs/2601.08620",
19
19
  dataset={
20
20
  "path": "vidore/vidore_v3_finance_en_mteb_format",
21
21
  "revision": "fa78cb14152b3dde8c5defdc4e3ddf50de69dfeb",
@@ -34,15 +34,14 @@ class Vidore3FinanceEnRetrieval(AbsTaskRetrieval):
34
34
  modalities=["text", "image"],
35
35
  sample_creation="created and machine-translated",
36
36
  bibtex_citation=r"""
37
- @misc{mace2025vidorev3,
38
- author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
39
- day = {5},
40
- howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
41
- journal = {Hugging Face Blog},
42
- month = {November},
43
- publisher = {Hugging Face},
44
- title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
45
- year = {2025},
37
+ @article{loison2026vidorev3comprehensiveevaluation,
38
+ archiveprefix = {arXiv},
39
+ author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
40
+ eprint = {2601.08620},
41
+ primaryclass = {cs.AI},
42
+ title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
43
+ url = {https://arxiv.org/abs/2601.08620},
44
+ year = {2026},
46
45
  }
47
46
  """,
48
47
  prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -53,7 +52,7 @@ class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
53
52
  metadata = TaskMetadata(
54
53
  name="Vidore3FinanceFrRetrieval",
55
54
  description="Retrieve associated pages according to questions. This task, Finance - FR, is a corpus of reports from french companies in the luxury domain, intended for long-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
56
- reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
55
+ reference="https://arxiv.org/abs/2601.08620",
57
56
  dataset={
58
57
  "path": "vidore/vidore_v3_finance_fr_mteb_format",
59
58
  "revision": "8a2adfda85a7967c7252129703d9b3c7c9f038a9",
@@ -71,15 +70,14 @@ class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
71
70
  dialect=[],
72
71
  sample_creation="created and machine-translated",
73
72
  bibtex_citation=r"""
74
- @misc{mace2025vidorev3,
75
- author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
76
- day = {5},
77
- howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
78
- journal = {Hugging Face Blog},
79
- month = {November},
80
- publisher = {Hugging Face},
81
- title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
82
- year = {2025},
73
+ @article{loison2026vidorev3comprehensiveevaluation,
74
+ archiveprefix = {arXiv},
75
+ author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
76
+ eprint = {2601.08620},
77
+ primaryclass = {cs.AI},
78
+ title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
79
+ url = {https://arxiv.org/abs/2601.08620},
80
+ year = {2026},
83
81
  }
84
82
  """,
85
83
  prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -91,7 +89,7 @@ class Vidore3IndustrialRetrieval(AbsTaskRetrieval):
91
89
  metadata = TaskMetadata(
92
90
  name="Vidore3IndustrialRetrieval",
93
91
  description="Retrieve associated pages according to questions. This dataset, Industrial reports, is a corpus of technical documents on military aircraft (fueling, mechanics...), intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
94
- reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
92
+ reference="https://arxiv.org/abs/2601.08620",
95
93
  dataset={
96
94
  "path": "vidore/vidore_v3_industrial_mteb_format",
97
95
  "revision": "f732b725cf4a70803210edfe265a04f8bd5328f6",
@@ -110,15 +108,14 @@ class Vidore3IndustrialRetrieval(AbsTaskRetrieval):
110
108
  modalities=["text", "image"],
111
109
  sample_creation="created and machine-translated",
112
110
  bibtex_citation=r"""
113
- @misc{mace2025vidorev3,
114
- author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
115
- day = {5},
116
- howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
117
- journal = {Hugging Face Blog},
118
- month = {November},
119
- publisher = {Hugging Face},
120
- title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
121
- year = {2025},
111
+ @article{loison2026vidorev3comprehensiveevaluation,
112
+ archiveprefix = {arXiv},
113
+ author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
114
+ eprint = {2601.08620},
115
+ primaryclass = {cs.AI},
116
+ title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
117
+ url = {https://arxiv.org/abs/2601.08620},
118
+ year = {2026},
122
119
  }
123
120
  """,
124
121
  prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -130,7 +127,7 @@ class Vidore3PharmaceuticalsRetrieval(AbsTaskRetrieval):
130
127
  metadata = TaskMetadata(
131
128
  name="Vidore3PharmaceuticalsRetrieval",
132
129
  description="Retrieve associated pages according to questions. This dataset, Pharmaceutical, is a corpus of slides from the FDA, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
133
- reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
130
+ reference="https://arxiv.org/abs/2601.08620",
134
131
  dataset={
135
132
  "path": "vidore/vidore_v3_pharmaceuticals_mteb_format",
136
133
  "revision": "237ed4f43c7fb3c4df07ec4e9dd0a4366be555b0",
@@ -149,15 +146,14 @@ class Vidore3PharmaceuticalsRetrieval(AbsTaskRetrieval):
149
146
  modalities=["text", "image"],
150
147
  sample_creation="created and machine-translated",
151
148
  bibtex_citation=r"""
152
- @misc{mace2025vidorev3,
153
- author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
154
- day = {5},
155
- howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
156
- journal = {Hugging Face Blog},
157
- month = {November},
158
- publisher = {Hugging Face},
159
- title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
160
- year = {2025},
149
+ @article{loison2026vidorev3comprehensiveevaluation,
150
+ archiveprefix = {arXiv},
151
+ author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
152
+ eprint = {2601.08620},
153
+ primaryclass = {cs.AI},
154
+ title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
155
+ url = {https://arxiv.org/abs/2601.08620},
156
+ year = {2026},
161
157
  }
162
158
  """,
163
159
  prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -169,7 +165,7 @@ class Vidore3ComputerScienceRetrieval(AbsTaskRetrieval):
169
165
  metadata = TaskMetadata(
170
166
  name="Vidore3ComputerScienceRetrieval",
171
167
  description="Retrieve associated pages according to questions. This dataset, Computer Science, is a corpus of textbooks from the openstacks website, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
172
- reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
168
+ reference="https://arxiv.org/abs/2601.08620",
173
169
  dataset={
174
170
  "path": "vidore/vidore_v3_computer_science_mteb_format",
175
171
  "revision": "fb7fb69f81f7db62790f40494124b8ad22b424ab",
@@ -188,15 +184,14 @@ class Vidore3ComputerScienceRetrieval(AbsTaskRetrieval):
188
184
  modalities=["text", "image"],
189
185
  sample_creation="created and machine-translated",
190
186
  bibtex_citation=r"""
191
- @misc{mace2025vidorev3,
192
- author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
193
- day = {5},
194
- howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
195
- journal = {Hugging Face Blog},
196
- month = {November},
197
- publisher = {Hugging Face},
198
- title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
199
- year = {2025},
187
+ @article{loison2026vidorev3comprehensiveevaluation,
188
+ archiveprefix = {arXiv},
189
+ author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
190
+ eprint = {2601.08620},
191
+ primaryclass = {cs.AI},
192
+ title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
193
+ url = {https://arxiv.org/abs/2601.08620},
194
+ year = {2026},
200
195
  }
201
196
  """,
202
197
  prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -208,7 +203,7 @@ class Vidore3HrRetrieval(AbsTaskRetrieval):
208
203
  metadata = TaskMetadata(
209
204
  name="Vidore3HrRetrieval",
210
205
  description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports released by the european union, intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
211
- reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
206
+ reference="https://arxiv.org/abs/2601.08620",
212
207
  dataset={
213
208
  "path": "vidore/vidore_v3_hr_mteb_format",
214
209
  "revision": "bc7d43d64815ed30f664168c8052106484aba7fd",
@@ -227,15 +222,14 @@ class Vidore3HrRetrieval(AbsTaskRetrieval):
227
222
  modalities=["text", "image"],
228
223
  sample_creation="created and machine-translated",
229
224
  bibtex_citation=r"""
230
- @misc{mace2025vidorev3,
231
- author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
232
- day = {5},
233
- howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
234
- journal = {Hugging Face Blog},
235
- month = {November},
236
- publisher = {Hugging Face},
237
- title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
238
- year = {2025},
225
+ @article{loison2026vidorev3comprehensiveevaluation,
226
+ archiveprefix = {arXiv},
227
+ author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
228
+ eprint = {2601.08620},
229
+ primaryclass = {cs.AI},
230
+ title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
231
+ url = {https://arxiv.org/abs/2601.08620},
232
+ year = {2026},
239
233
  }
240
234
  """,
241
235
  prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -247,7 +241,7 @@ class Vidore3EnergyRetrieval(AbsTaskRetrieval):
247
241
  metadata = TaskMetadata(
248
242
  name="Vidore3EnergyRetrieval",
249
243
  description="Retrieve associated pages according to questions. This dataset, Energy Fr, is a corpus of reports on energy supply in europe, intended for complex-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
250
- reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
244
+ reference="https://arxiv.org/abs/2601.08620",
251
245
  dataset={
252
246
  "path": "vidore/vidore_v3_energy_mteb_format",
253
247
  "revision": "84fca99e5978604bae30f2436eacb6dbaa0532e9",
@@ -266,15 +260,14 @@ class Vidore3EnergyRetrieval(AbsTaskRetrieval):
266
260
  modalities=["text", "image"],
267
261
  sample_creation="created and machine-translated",
268
262
  bibtex_citation=r"""
269
- @misc{mace2025vidorev3,
270
- author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
271
- day = {5},
272
- howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
273
- journal = {Hugging Face Blog},
274
- month = {November},
275
- publisher = {Hugging Face},
276
- title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
277
- year = {2025},
263
+ @article{loison2026vidorev3comprehensiveevaluation,
264
+ archiveprefix = {arXiv},
265
+ author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
266
+ eprint = {2601.08620},
267
+ primaryclass = {cs.AI},
268
+ title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
269
+ url = {https://arxiv.org/abs/2601.08620},
270
+ year = {2026},
278
271
  }
279
272
  """,
280
273
  prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -286,7 +279,7 @@ class Vidore3PhysicsRetrieval(AbsTaskRetrieval):
286
279
  metadata = TaskMetadata(
287
280
  name="Vidore3PhysicsRetrieval",
288
281
  description="Retrieve associated pages according to questions. This dataset, Physics, is a corpus of course slides on french bachelor level physics lectures, intended for complex visual understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
289
- reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
282
+ reference="https://arxiv.org/abs/2601.08620",
290
283
  dataset={
291
284
  "path": "vidore/vidore_v3_physics_mteb_format",
292
285
  "revision": "2c18ef90ab3ef93a9d86ecc6521cdae2a29f8300",
@@ -305,15 +298,14 @@ class Vidore3PhysicsRetrieval(AbsTaskRetrieval):
305
298
  modalities=["text", "image"],
306
299
  sample_creation="created and machine-translated",
307
300
  bibtex_citation=r"""
308
- @misc{mace2025vidorev3,
309
- author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
310
- day = {5},
311
- howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
312
- journal = {Hugging Face Blog},
313
- month = {November},
314
- publisher = {Hugging Face},
315
- title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
316
- year = {2025},
301
+ @article{loison2026vidorev3comprehensiveevaluation,
302
+ archiveprefix = {arXiv},
303
+ author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
304
+ eprint = {2601.08620},
305
+ primaryclass = {cs.AI},
306
+ title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
307
+ url = {https://arxiv.org/abs/2601.08620},
308
+ year = {2026},
317
309
  }
318
310
  """,
319
311
  prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -325,7 +317,7 @@ class Vidore3NuclearRetrieval(AbsTaskRetrieval):
325
317
  metadata = TaskMetadata(
326
318
  name="Vidore3NuclearRetrieval",
327
319
  description="Retrieve associated pages according to questions.",
328
- reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
320
+ reference="https://arxiv.org/abs/2601.08620",
329
321
  dataset={
330
322
  "path": "mteb-private/Vidore3NuclearRetrieval",
331
323
  "revision": "a463fc67fefc01152153101e88a32d5f9515e3e3",
@@ -344,15 +336,14 @@ class Vidore3NuclearRetrieval(AbsTaskRetrieval):
344
336
  modalities=["text", "image"],
345
337
  sample_creation="created and machine-translated",
346
338
  bibtex_citation=r"""
347
- @misc{mace2025vidorev3,
348
- author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
349
- day = {5},
350
- howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
351
- journal = {Hugging Face Blog},
352
- month = {November},
353
- publisher = {Hugging Face},
354
- title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
355
- year = {2025},
339
+ @article{loison2026vidorev3comprehensiveevaluation,
340
+ archiveprefix = {arXiv},
341
+ author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
342
+ eprint = {2601.08620},
343
+ primaryclass = {cs.AI},
344
+ title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
345
+ url = {https://arxiv.org/abs/2601.08620},
346
+ year = {2026},
356
347
  }
357
348
  """,
358
349
  prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -364,7 +355,7 @@ class Vidore3TelecomRetrieval(AbsTaskRetrieval):
364
355
  metadata = TaskMetadata(
365
356
  name="Vidore3TelecomRetrieval",
366
357
  description="Retrieve associated pages according to questions.",
367
- reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
358
+ reference="https://arxiv.org/abs/2601.08620",
368
359
  dataset={
369
360
  "path": "mteb-private/Vidore3TelecomRetrieval",
370
361
  "revision": "a54635a274ef2835721b7cbe3eb27483b9ec964b",
@@ -383,15 +374,14 @@ class Vidore3TelecomRetrieval(AbsTaskRetrieval):
383
374
  modalities=["text", "image"],
384
375
  sample_creation="created and machine-translated",
385
376
  bibtex_citation=r"""
386
- @misc{mace2025vidorev3,
387
- author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
388
- day = {5},
389
- howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
390
- journal = {Hugging Face Blog},
391
- month = {November},
392
- publisher = {Hugging Face},
393
- title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
394
- year = {2025},
377
+ @article{loison2026vidorev3comprehensiveevaluation,
378
+ archiveprefix = {arXiv},
379
+ author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
380
+ eprint = {2601.08620},
381
+ primaryclass = {cs.AI},
382
+ title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
383
+ url = {https://arxiv.org/abs/2601.08620},
384
+ year = {2026},
395
385
  }
396
386
  """,
397
387
  prompt={"query": "Find a screenshot that is relevant to the user's question."},
@@ -54,7 +54,7 @@ Fishel, Mark},
54
54
  """Load dataset from HuggingFace hub"""
55
55
  if self.data_loaded:
56
56
  return
57
- self.dataset = datasets.load_dataset(**self.metadata.dataset) # type: ignore
57
+ self.dataset = datasets.load_dataset(**self.metadata.dataset)
58
58
  self.dataset_transform()
59
59
  self.data_loaded = True
60
60
 
@@ -71,7 +71,7 @@ Fishel, Mark},
71
71
  text2id = {}
72
72
 
73
73
  for split in self.dataset:
74
- ds: datasets.Dataset = self.dataset[split] # type: ignore
74
+ ds: datasets.Dataset = self.dataset[split]
75
75
  ds = ds.shuffle(seed=42)
76
76
  max_samples = min(1024, len(ds))
77
77
  ds = ds.select(
@@ -41,7 +41,7 @@ class SNLRetrieval(AbsTaskRetrieval):
41
41
  """Load dataset from HuggingFace hub"""
42
42
  if self.data_loaded:
43
43
  return
44
- self.dataset = datasets.load_dataset(**self.metadata.dataset) # type: ignore
44
+ self.dataset = datasets.load_dataset(**self.metadata.dataset)
45
45
  self.dataset_transform()
46
46
  self.data_loaded = True
47
47
 
@@ -58,7 +58,7 @@ class SNLRetrieval(AbsTaskRetrieval):
58
58
  text2id = {}
59
59
 
60
60
  for split in self.dataset:
61
- ds: datasets.Dataset = self.dataset[split] # type: ignore
61
+ ds: datasets.Dataset = self.dataset[split]
62
62
  ds = ds.shuffle(seed=42)
63
63
 
64
64
  self.queries[split] = {}
@@ -59,7 +59,7 @@ class TurHistQuadRetrieval(AbsTaskRetrieval):
59
59
  text2id = {}
60
60
 
61
61
  for split in self.metadata.eval_splits:
62
- ds: datasets.Dataset = self.dataset[split] # type: ignore
62
+ ds: datasets.Dataset = self.dataset[split]
63
63
  ds = ds.shuffle(seed=42)
64
64
  max_samples = min(1024, len(ds))
65
65
  ds = ds.select(