mteb 2.1.7__py3-none-any.whl → 2.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. mteb/_create_dataloaders.py +6 -3
  2. mteb/_evaluators/any_sts_evaluator.py +14 -12
  3. mteb/_evaluators/clustering_evaluator.py +1 -1
  4. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
  5. mteb/_evaluators/pair_classification_evaluator.py +3 -1
  6. mteb/_evaluators/sklearn_evaluator.py +15 -28
  7. mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
  8. mteb/_evaluators/text/summarization_evaluator.py +4 -2
  9. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
  10. mteb/abstasks/clustering.py +1 -1
  11. mteb/abstasks/multilabel_classification.py +2 -2
  12. mteb/abstasks/task_metadata.py +1 -0
  13. mteb/benchmarks/benchmark.py +9 -0
  14. mteb/benchmarks/benchmarks/__init__.py +2 -0
  15. mteb/benchmarks/benchmarks/benchmarks.py +40 -1
  16. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  17. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  18. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  19. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  20. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  21. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  22. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  23. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  24. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  25. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  26. mteb/models/cache_wrappers/cache_wrapper.py +1 -1
  27. mteb/models/model_implementations/align_models.py +6 -0
  28. mteb/models/model_implementations/ara_models.py +7 -0
  29. mteb/models/model_implementations/blip2_models.py +9 -0
  30. mteb/models/model_implementations/blip_models.py +19 -0
  31. mteb/models/model_implementations/cadet_models.py +8 -0
  32. mteb/models/model_implementations/cde_models.py +12 -0
  33. mteb/models/model_implementations/codefuse_models.py +15 -0
  34. mteb/models/model_implementations/codesage_models.py +12 -0
  35. mteb/models/model_implementations/misc_models.py +6 -0
  36. mteb/models/model_implementations/moco_models.py +9 -0
  37. mteb/models/model_implementations/openclip_models.py +16 -0
  38. mteb/models/model_implementations/piccolo_models.py +6 -0
  39. mteb/models/model_implementations/rasgaard_models.py +7 -1
  40. mteb/models/model_implementations/tarka_models.py +317 -0
  41. mteb/models/search_wrappers.py +5 -5
  42. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +1 -5
  43. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  44. mteb/tasks/classification/ara/ajgt.py +1 -2
  45. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  46. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  47. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  48. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  49. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  50. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  51. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  52. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  53. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  54. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  55. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  56. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  57. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  58. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  59. mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
  60. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  61. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  62. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  63. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  64. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  65. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  66. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  67. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  68. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  69. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  70. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  71. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  72. mteb/tasks/classification/eng/news_classification.py +1 -2
  73. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  74. mteb/tasks/classification/eng/patent_classification.py +1 -2
  75. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  76. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  77. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  78. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  79. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  80. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  81. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  82. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  83. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  84. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  85. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  86. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  87. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  88. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  89. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  90. mteb/tasks/classification/est/estonian_valence.py +1 -2
  91. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  92. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  93. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  94. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  95. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  96. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  97. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +1 -2
  98. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  99. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  100. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  101. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  102. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  103. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  104. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  105. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  106. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  107. mteb/tasks/classification/kor/klue_tc.py +1 -2
  108. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  109. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  110. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
  111. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  112. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  113. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  114. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  115. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  116. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  117. mteb/tasks/classification/multilingual/scala_classification.py +1 -2
  118. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  119. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  120. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  121. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +1 -3
  122. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  123. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  124. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  125. mteb/tasks/classification/pol/polish_classification.py +3 -6
  126. mteb/tasks/classification/ron/moroco.py +1 -2
  127. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  128. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  129. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  130. mteb/tasks/classification/rus/headline_classification.py +1 -2
  131. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  132. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  133. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  134. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  135. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  136. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  137. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  138. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  139. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  140. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  141. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  142. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  143. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  144. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  145. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  146. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  147. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  148. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  149. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  150. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  151. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  152. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  153. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  154. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  155. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  156. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  157. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  158. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  159. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  160. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  161. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  162. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  163. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  164. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  165. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  166. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  167. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  168. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  169. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  170. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  171. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  172. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  173. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  174. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  175. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  176. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  177. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  178. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  179. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  180. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  181. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  182. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  183. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  184. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  185. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  186. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  187. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  188. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  189. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  190. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  191. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  192. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  193. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  194. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  195. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  196. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  197. mteb/tasks/retrieval/multilingual/__init__.py +22 -0
  198. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  199. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  200. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  201. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  202. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
  203. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  204. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  205. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
  206. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  207. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  208. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  209. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  210. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  211. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  212. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  213. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  214. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  215. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  216. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
  217. mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
  218. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  219. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
  220. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
  221. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  222. mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
  223. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  224. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  225. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  226. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  227. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  228. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  229. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  230. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  231. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  232. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  233. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  234. {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/METADATA +1 -1
  235. {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/RECORD +239 -228
  236. {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/WHEEL +0 -0
  237. {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/entry_points.txt +0 -0
  238. {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/licenses/LICENSE +0 -0
  239. {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/top_level.txt +0 -0
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class SciDocsRerankingVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="SciDocsRR-VN",
8
- description="""A translated dataset from Ranking of related scientific papers based on their title.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Ranking of related scientific papers based on their title. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://allenai.org/data/scidocs",
14
10
  dataset={
15
11
  "path": "mteb/SciDocsRR-VN",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class StackOverflowDupQuestionsVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="StackOverflowDupQuestions-VN",
8
- description="""A translated dataset from Stack Overflow Duplicate Questions Task for questions with the tags Java, JavaScript and Python
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Stack Overflow Duplicate Questions Task for questions with the tags Java, JavaScript and Python The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://www.microsoft.com/en-us/research/uploads/prod/2019/03/nl4se18LinkSO.pdf",
14
10
  dataset={
15
11
  "path": "mteb/StackOverflowDupQuestions-VN",
@@ -7,14 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class LitSearchRetrieval(AbsTaskRetrieval):
8
8
  metadata = TaskMetadata(
9
9
  name="LitSearchRetrieval",
10
- description="""
11
- The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for
12
- Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature
13
- search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions
14
- generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about
15
- recently published papers, manually written by their authors. All LitSearch questions were manually examined or
16
- edited by experts to ensure high quality.
17
- """,
10
+ description="The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about recently published papers, manually written by their authors. All LitSearch questions were manually examined or edited by experts to ensure high quality.",
18
11
  reference="https://github.com/princeton-nlp/LitSearch",
19
12
  dataset={
20
13
  "path": "princeton-nlp/LitSearch",
@@ -9,10 +9,7 @@ class JaCWIRRetrieval(AbsTaskRetrieval):
9
9
 
10
10
  metadata = TaskMetadata(
11
11
  name="JaCWIRRetrieval",
12
- description="""JaCWIR is a small-scale Japanese information retrieval evaluation dataset consisting of
13
- 5000 question texts and approximately 500k web page titles and web page introductions or summaries
14
- (meta descriptions, etc.). The question texts are created based on one of the 500k web pages,
15
- and that data is used as a positive example for the question text.""",
12
+ description="JaCWIR is a small-scale Japanese information retrieval evaluation dataset consisting of 5000 question texts and approximately 500k web page titles and web page introductions or summaries (meta descriptions, etc.). The question texts are created based on one of the 500k web pages, and that data is used as a positive example for the question text.",
16
13
  reference="https://huggingface.co/datasets/hotchpotch/JaCWIR",
17
14
  dataset={
18
15
  "path": "mteb/JaCWIRRetrieval",
@@ -81,6 +81,18 @@ from .vidore2_bench_retrieval import (
81
81
  Vidore2ESGReportsHLRetrieval,
82
82
  Vidore2ESGReportsRetrieval,
83
83
  )
84
+ from .vidore3_bench_retrieval import (
85
+ Vidore3ComputerScienceRetrieval,
86
+ Vidore3EnergyRetrieval,
87
+ Vidore3FinanceEnRetrieval,
88
+ Vidore3FinanceFrRetrieval,
89
+ Vidore3HrRetrieval,
90
+ Vidore3IndustrialRetrieval,
91
+ Vidore3NuclearRetrieval,
92
+ Vidore3PharmaceuticalsRetrieval,
93
+ Vidore3PhysicsRetrieval,
94
+ Vidore3TelecomRetrieval,
95
+ )
84
96
  from .web_faq_retrieval import WebFAQRetrieval
85
97
  from .wikipedia_retrieval_multilingual import WikipediaRetrievalMultilingual
86
98
  from .wit_t2i_retrieval import WITT2IRetrieval
@@ -161,6 +173,16 @@ __all__ = [
161
173
  "Vidore2ESGReportsHLRetrieval",
162
174
  "Vidore2ESGReportsRetrieval",
163
175
  "Vidore2EconomicsReportsRetrieval",
176
+ "Vidore3ComputerScienceRetrieval",
177
+ "Vidore3EnergyRetrieval",
178
+ "Vidore3FinanceEnRetrieval",
179
+ "Vidore3FinanceFrRetrieval",
180
+ "Vidore3HrRetrieval",
181
+ "Vidore3IndustrialRetrieval",
182
+ "Vidore3NuclearRetrieval",
183
+ "Vidore3PharmaceuticalsRetrieval",
184
+ "Vidore3PhysicsRetrieval",
185
+ "Vidore3TelecomRetrieval",
164
186
  "WITT2IRetrieval",
165
187
  "WebFAQRetrieval",
166
188
  "WikipediaRetrievalMultilingual",
@@ -34,8 +34,7 @@ _EVAL_LANGS = {
34
34
  class MKQARetrieval(AbsTaskRetrieval):
35
35
  metadata = TaskMetadata(
36
36
  name="MKQARetrieval",
37
- description="""Multilingual Knowledge Questions & Answers (MKQA)contains 10,000 queries sampled from the Google Natural Questions dataset.
38
- For each query we collect new passage-independent answers. These queries and answers are then human translated into 25 Non-English languages.""",
37
+ description="Multilingual Knowledge Questions & Answers (MKQA)contains 10,000 queries sampled from the Google Natural Questions dataset. For each query we collect new passage-independent answers. These queries and answers are then human translated into 25 Non-English languages.",
39
38
  reference="https://github.com/apple/ml-mkqa",
40
39
  dataset={
41
40
  "path": "mteb/MKQARetrieval",
@@ -75,10 +75,7 @@ _EVAL_LANGS = extend_lang_pairs()
75
75
  class MLQARetrieval(AbsTaskRetrieval):
76
76
  metadata = TaskMetadata(
77
77
  name="MLQARetrieval",
78
- description="""MLQA (MultiLingual Question Answering) is a benchmark dataset for evaluating cross-lingual question answering performance.
79
- MLQA consists of over 5K extractive QA instances (12K in English) in SQuAD format in seven languages - English, Arabic,
80
- German, Spanish, Hindi, Vietnamese and Simplified Chinese. MLQA is highly parallel, with QA instances parallel between
81
- 4 different languages on average.""",
78
+ description="MLQA (MultiLingual Question Answering) is a benchmark dataset for evaluating cross-lingual question answering performance. MLQA consists of over 5K extractive QA instances (12K in English) in SQuAD format in seven languages - English, Arabic, German, Spanish, Hindi, Vietnamese and Simplified Chinese. MLQA is highly parallel, with QA instances parallel between 4 different languages on average.",
82
79
  reference="https://huggingface.co/datasets/mlqa",
83
80
  dataset={
84
81
  "path": "mteb/MLQARetrieval",
@@ -21,8 +21,7 @@ _LANGUAGES = {
21
21
  class MultiLongDocRetrieval(AbsTaskRetrieval):
22
22
  metadata = TaskMetadata(
23
23
  name="MultiLongDocRetrieval",
24
- description="""Multi Long Doc Retrieval (MLDR) 'is curated by the multilingual articles from Wikipedia, Wudao and mC4 (see Table 7), and NarrativeQA (Kocˇisky ́ et al., 2018; Gu ̈nther et al., 2023), which is only for English.' (Chen et al., 2024).
25
- It is constructed by sampling lengthy articles from Wikipedia, Wudao and mC4 datasets and randomly choose paragraphs from them. Then we use GPT-3.5 to generate questions based on these paragraphs. The generated question and the sampled article constitute a new text pair to the dataset.""",
24
+ description="Multi Long Doc Retrieval (MLDR) 'is curated by the multilingual articles from Wikipedia, Wudao and mC4 (see Table 7), and NarrativeQA (Kocˇisky ́ et al., 2018; Gu ̈nther et al., 2023), which is only for English.' (Chen et al., 2024). It is constructed by sampling lengthy articles from Wikipedia, Wudao and mC4 datasets and randomly choose paragraphs from them. Then we use GPT-3.5 to generate questions based on these paragraphs. The generated question and the sampled article constitute a new text pair to the dataset.",
26
25
  reference="https://arxiv.org/abs/2402.03216", # also: https://huggingface.co/datasets/Shitao/MLDR
27
26
  dataset={
28
27
  "path": "mteb/MultiLongDocRetrieval",
@@ -68,11 +68,7 @@ class RuSciBenchCiteRetrieval(AbsTaskRetrieval):
68
68
  "path": "mlsa-iai-msu-lab/ru_sci_bench_cite_retrieval",
69
69
  "revision": "6cb447d02f41b8b775d5d9df7faf472f44d2f1db",
70
70
  },
71
- description="""This task is focused on Direct Citation Prediction for scientific papers from eLibrary,
72
- Russia's largest electronic library of scientific publications. Given a query paper (title and abstract),
73
- the goal is to retrieve papers that are directly cited by it from a larger corpus of papers.
74
- The dataset for this task consists of 3,000 query papers, 15,000 relevant (cited) papers,
75
- and 75,000 irrelevant papers. The task is available for both Russian and English scientific texts.""",
71
+ description="This task is focused on Direct Citation Prediction for scientific papers from eLibrary, Russia's largest electronic library of scientific publications. Given a query paper (title and abstract), the goal is to retrieve papers that are directly cited by it from a larger corpus of papers. The dataset for this task consists of 3,000 query papers, 15,000 relevant (cited) papers, and 75,000 irrelevant papers. The task is available for both Russian and English scientific texts.",
76
72
  reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
77
73
  type="Retrieval",
78
74
  category="t2t",
@@ -130,13 +126,7 @@ class RuSciBenchCociteRetrieval(AbsTaskRetrieval):
130
126
  "path": "mlsa-iai-msu-lab/ru_sci_bench_cocite_retrieval",
131
127
  "revision": "a5da47a245275669d2b6ddf8f96c5338dd2428b4",
132
128
  },
133
- description="""This task focuses on Co-citation Prediction for scientific papers from eLibrary,
134
- Russia's largest electronic library of scientific publications. Given a query paper (title and abstract),
135
- the goal is to retrieve other papers that are co-cited with it. Two papers are considered co-cited
136
- if they are both cited by at least 5 of the same other papers. Similar to the Direct Citation task,
137
- this task employs a retrieval setup: for a given query paper, all other papers in the corpus that
138
- are not co-cited with it are considered negative examples. The task is available for both Russian
139
- and English scientific texts.""",
129
+ description="This task focuses on Co-citation Prediction for scientific papers from eLibrary, Russia's largest electronic library of scientific publications. Given a query paper (title and abstract), the goal is to retrieve other papers that are co-cited with it. Two papers are considered co-cited if they are both cited by at least 5 of the same other papers. Similar to the Direct Citation task, this task employs a retrieval setup: for a given query paper, all other papers in the corpus that are not co-cited with it are considered negative examples. The task is available for both Russian and English scientific texts.",
140
130
  reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
141
131
  type="Retrieval",
142
132
  category="t2t",
@@ -0,0 +1,399 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+ _LANGS = {
5
+ "french": ["fra-Latn"],
6
+ "spanish": ["spa-Latn"],
7
+ "english": ["eng-Latn"],
8
+ "german": ["deu-Latn"],
9
+ "italian": ["ita-Latn"],
10
+ "portuguese": ["por-Latn"],
11
+ }
12
+
13
+
14
+ class Vidore3FinanceEnRetrieval(AbsTaskRetrieval):
15
+ metadata = TaskMetadata(
16
+ name="Vidore3FinanceEnRetrieval",
17
+ description="Retrieve associated pages according to questions. This task, Finance - EN, is a corpus of reports from american banking companies, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
18
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
19
+ dataset={
20
+ "path": "vidore/vidore_v3_finance_en_mteb_format",
21
+ "revision": "fa78cb14152b3dde8c5defdc4e3ddf50de69dfeb",
22
+ },
23
+ type="DocumentUnderstanding",
24
+ category="t2i",
25
+ eval_splits=["test"],
26
+ eval_langs=_LANGS,
27
+ main_score="ndcg_at_10",
28
+ date=("2025-10-01", "2025-11-01"),
29
+ domains=["Financial"],
30
+ task_subtypes=["Image Text Retrieval"],
31
+ license="cc-by-4.0",
32
+ annotations_creators="derived",
33
+ dialect=[],
34
+ modalities=["text", "image"],
35
+ sample_creation="created and machine-translated",
36
+ bibtex_citation=r"""
37
+ @misc{mace2025vidorev3,
38
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
39
+ day = {5},
40
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
41
+ journal = {Hugging Face Blog},
42
+ month = {November},
43
+ publisher = {Hugging Face},
44
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
45
+ year = {2025},
46
+ }
47
+ """,
48
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
49
+ )
50
+
51
+
52
+ class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
53
+ metadata = TaskMetadata(
54
+ name="Vidore3FinanceFrRetrieval",
55
+ description="Retrieve associated pages according to questions. This task, Finance - FR, is a corpus of reports from french companies in the luxury domain, intended for long-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
56
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
57
+ dataset={
58
+ "path": "vidore/vidore_v3_finance_fr_mteb_format",
59
+ "revision": "8a2adfda85a7967c7252129703d9b3c7c9f038a9",
60
+ },
61
+ type="DocumentUnderstanding",
62
+ category="t2i",
63
+ eval_splits=["test"],
64
+ eval_langs=_LANGS,
65
+ main_score="ndcg_at_10",
66
+ date=("2025-10-01", "2025-11-01"),
67
+ domains=["Financial"],
68
+ task_subtypes=["Image Text Retrieval"],
69
+ license="cc-by-4.0",
70
+ annotations_creators="derived",
71
+ dialect=[],
72
+ sample_creation="created and machine-translated",
73
+ bibtex_citation=r"""
74
+ @misc{mace2025vidorev3,
75
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
76
+ day = {5},
77
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
78
+ journal = {Hugging Face Blog},
79
+ month = {November},
80
+ publisher = {Hugging Face},
81
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
82
+ year = {2025},
83
+ }
84
+ """,
85
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
86
+ is_public=True,
87
+ )
88
+
89
+
90
+ class Vidore3IndustrialRetrieval(AbsTaskRetrieval):
91
+ metadata = TaskMetadata(
92
+ name="Vidore3IndustrialRetrieval",
93
+ description="Retrieve associated pages according to questions. This dataset, Industrial reports, is a corpus of technical documents on military aircraft (fueling, mechanics...), intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
94
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
95
+ dataset={
96
+ "path": "vidore/vidore_v3_industrial_mteb_format",
97
+ "revision": "f732b725cf4a70803210edfe265a04f8bd5328f6",
98
+ },
99
+ type="DocumentUnderstanding",
100
+ category="t2i",
101
+ eval_splits=["test"],
102
+ eval_langs=_LANGS,
103
+ main_score="ndcg_at_10",
104
+ date=("2025-10-01", "2025-11-01"),
105
+ domains=["Engineering"],
106
+ task_subtypes=["Image Text Retrieval"],
107
+ license="cc-by-4.0",
108
+ annotations_creators="derived",
109
+ dialect=[],
110
+ modalities=["text", "image"],
111
+ sample_creation="created and machine-translated",
112
+ bibtex_citation=r"""
113
+ @misc{mace2025vidorev3,
114
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
115
+ day = {5},
116
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
117
+ journal = {Hugging Face Blog},
118
+ month = {November},
119
+ publisher = {Hugging Face},
120
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
121
+ year = {2025},
122
+ }
123
+ """,
124
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
125
+ is_public=True,
126
+ )
127
+
128
+
129
+ class Vidore3PharmaceuticalsRetrieval(AbsTaskRetrieval):
130
+ metadata = TaskMetadata(
131
+ name="Vidore3PharmaceuticalsRetrieval",
132
+ description="Retrieve associated pages according to questions. This dataset, Pharmaceutical, is a corpus of slides from the FDA, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
133
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
134
+ dataset={
135
+ "path": "vidore/vidore_v3_pharmaceuticals_mteb_format",
136
+ "revision": "237ed4f43c7fb3c4df07ec4e9dd0a4366be555b0",
137
+ },
138
+ type="DocumentUnderstanding",
139
+ category="t2i",
140
+ eval_splits=["test"],
141
+ eval_langs=_LANGS,
142
+ main_score="ndcg_at_10",
143
+ date=("2025-10-01", "2025-11-01"),
144
+ domains=["Medical"],
145
+ task_subtypes=["Image Text Retrieval"],
146
+ license="cc-by-4.0",
147
+ annotations_creators="derived",
148
+ dialect=[],
149
+ modalities=["text", "image"],
150
+ sample_creation="created and machine-translated",
151
+ bibtex_citation=r"""
152
+ @misc{mace2025vidorev3,
153
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
154
+ day = {5},
155
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
156
+ journal = {Hugging Face Blog},
157
+ month = {November},
158
+ publisher = {Hugging Face},
159
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
160
+ year = {2025},
161
+ }
162
+ """,
163
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
164
+ is_public=True,
165
+ )
166
+
167
+
168
+ class Vidore3ComputerScienceRetrieval(AbsTaskRetrieval):
169
+ metadata = TaskMetadata(
170
+ name="Vidore3ComputerScienceRetrieval",
171
+ description="Retrieve associated pages according to questions. This dataset, Computer Science, is a corpus of textbooks from the openstacks website, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
172
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
173
+ dataset={
174
+ "path": "vidore/vidore_v3_computer_science_mteb_format",
175
+ "revision": "fb7fb69f81f7db62790f40494124b8ad22b424ab",
176
+ },
177
+ type="DocumentUnderstanding",
178
+ category="t2i",
179
+ eval_splits=["test"],
180
+ eval_langs=_LANGS,
181
+ main_score="ndcg_at_10",
182
+ date=("2025-10-01", "2025-11-01"),
183
+ domains=["Engineering", "Programming"],
184
+ task_subtypes=["Image Text Retrieval"],
185
+ license="cc-by-4.0",
186
+ annotations_creators="derived",
187
+ dialect=[],
188
+ modalities=["text", "image"],
189
+ sample_creation="created and machine-translated",
190
+ bibtex_citation=r"""
191
+ @misc{mace2025vidorev3,
192
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
193
+ day = {5},
194
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
195
+ journal = {Hugging Face Blog},
196
+ month = {November},
197
+ publisher = {Hugging Face},
198
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
199
+ year = {2025},
200
+ }
201
+ """,
202
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
203
+ is_public=True,
204
+ )
205
+
206
+
207
+ class Vidore3HrRetrieval(AbsTaskRetrieval):
208
+ metadata = TaskMetadata(
209
+ name="Vidore3HrRetrieval",
210
+ description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports released by the european union, intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
211
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
212
+ dataset={
213
+ "path": "vidore/vidore_v3_hr_mteb_format",
214
+ "revision": "bc7d43d64815ed30f664168c8052106484aba7fd",
215
+ },
216
+ type="DocumentUnderstanding",
217
+ category="t2i",
218
+ eval_splits=["test"],
219
+ eval_langs=_LANGS,
220
+ main_score="ndcg_at_10",
221
+ date=("2025-10-01", "2025-11-01"),
222
+ domains=["Social"],
223
+ task_subtypes=["Image Text Retrieval"],
224
+ license="cc-by-4.0",
225
+ annotations_creators="derived",
226
+ dialect=[],
227
+ modalities=["text", "image"],
228
+ sample_creation="created and machine-translated",
229
+ bibtex_citation=r"""
230
+ @misc{mace2025vidorev3,
231
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
232
+ day = {5},
233
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
234
+ journal = {Hugging Face Blog},
235
+ month = {November},
236
+ publisher = {Hugging Face},
237
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
238
+ year = {2025},
239
+ }
240
+ """,
241
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
242
+ is_public=True,
243
+ )
244
+
245
+
246
+ class Vidore3EnergyRetrieval(AbsTaskRetrieval):
247
+ metadata = TaskMetadata(
248
+ name="Vidore3EnergyRetrieval",
249
+ description="Retrieve associated pages according to questions. This dataset, Energy Fr, is a corpus of reports on energy supply in europe, intended for complex-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
250
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
251
+ dataset={
252
+ "path": "vidore/vidore_v3_energy_mteb_format",
253
+ "revision": "84fca99e5978604bae30f2436eacb6dbaa0532e9",
254
+ },
255
+ type="DocumentUnderstanding",
256
+ category="t2i",
257
+ eval_splits=["test"],
258
+ eval_langs=_LANGS,
259
+ main_score="ndcg_at_10",
260
+ date=("2025-10-01", "2025-11-01"),
261
+ domains=["Engineering", "Chemistry", "Academic"],
262
+ task_subtypes=["Image Text Retrieval"],
263
+ license="cc-by-4.0",
264
+ annotations_creators="derived",
265
+ dialect=[],
266
+ modalities=["text", "image"],
267
+ sample_creation="created and machine-translated",
268
+ bibtex_citation=r"""
269
+ @misc{mace2025vidorev3,
270
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
271
+ day = {5},
272
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
273
+ journal = {Hugging Face Blog},
274
+ month = {November},
275
+ publisher = {Hugging Face},
276
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
277
+ year = {2025},
278
+ }
279
+ """,
280
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
281
+ is_public=True,
282
+ )
283
+
284
+
285
+ class Vidore3PhysicsRetrieval(AbsTaskRetrieval):
286
+ metadata = TaskMetadata(
287
+ name="Vidore3PhysicsRetrieval",
288
+ description="Retrieve associated pages according to questions. This dataset, Physics, is a corpus of course slides on french bachelor level physics lectures, intended for complex visual understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
289
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
290
+ dataset={
291
+ "path": "vidore/vidore_v3_physics_mteb_format",
292
+ "revision": "2c18ef90ab3ef93a9d86ecc6521cdae2a29f8300",
293
+ },
294
+ type="DocumentUnderstanding",
295
+ category="t2i",
296
+ eval_splits=["test"],
297
+ eval_langs=_LANGS,
298
+ main_score="ndcg_at_10",
299
+ date=("2025-10-01", "2025-11-01"),
300
+ domains=["Engineering", "Academic"],
301
+ task_subtypes=["Image Text Retrieval"],
302
+ license="cc-by-4.0",
303
+ annotations_creators="derived",
304
+ dialect=[],
305
+ modalities=["text", "image"],
306
+ sample_creation="created and machine-translated",
307
+ bibtex_citation=r"""
308
+ @misc{mace2025vidorev3,
309
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
310
+ day = {5},
311
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
312
+ journal = {Hugging Face Blog},
313
+ month = {November},
314
+ publisher = {Hugging Face},
315
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
316
+ year = {2025},
317
+ }
318
+ """,
319
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
320
+ is_public=True,
321
+ )
322
+
323
+
324
+ class Vidore3NuclearRetrieval(AbsTaskRetrieval):
325
+ metadata = TaskMetadata(
326
+ name="Vidore3NuclearRetrieval",
327
+ description="Retrieve associated pages according to questions.",
328
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
329
+ dataset={
330
+ "path": "mteb-private/Vidore3NuclearRetrieval",
331
+ "revision": "a463fc67fefc01152153101e88a32d5f9515e3e3",
332
+ },
333
+ type="DocumentUnderstanding",
334
+ category="t2i",
335
+ eval_splits=["test"],
336
+ eval_langs=_LANGS,
337
+ main_score="ndcg_at_10",
338
+ date=("2025-10-01", "2025-11-01"),
339
+ domains=["Engineering", "Chemistry"],
340
+ task_subtypes=["Image Text Retrieval"],
341
+ license="cc-by-4.0",
342
+ annotations_creators="derived",
343
+ dialect=[],
344
+ modalities=["text", "image"],
345
+ sample_creation="created and machine-translated",
346
+ bibtex_citation=r"""
347
+ @misc{mace2025vidorev3,
348
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
349
+ day = {5},
350
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
351
+ journal = {Hugging Face Blog},
352
+ month = {November},
353
+ publisher = {Hugging Face},
354
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
355
+ year = {2025},
356
+ }
357
+ """,
358
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
359
+ is_public=False,
360
+ )
361
+
362
+
363
+ class Vidore3TelecomRetrieval(AbsTaskRetrieval):
364
+ metadata = TaskMetadata(
365
+ name="Vidore3TelecomRetrieval",
366
+ description="Retrieve associated pages according to questions.",
367
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
368
+ dataset={
369
+ "path": "mteb-private/Vidore3TelecomRetrieval",
370
+ "revision": "a54635a274ef2835721b7cbe3eb27483b9ec964b",
371
+ },
372
+ type="DocumentUnderstanding",
373
+ category="t2i",
374
+ eval_splits=["test"],
375
+ eval_langs=_LANGS,
376
+ main_score="ndcg_at_10",
377
+ date=("2025-10-01", "2025-11-01"),
378
+ domains=["Engineering", "Programming"],
379
+ task_subtypes=["Image Text Retrieval"],
380
+ license="cc-by-4.0",
381
+ annotations_creators="derived",
382
+ dialect=[],
383
+ modalities=["text", "image"],
384
+ sample_creation="created and machine-translated",
385
+ bibtex_citation=r"""
386
+ @misc{mace2025vidorev3,
387
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
388
+ day = {5},
389
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
390
+ journal = {Hugging Face Blog},
391
+ month = {November},
392
+ publisher = {Hugging Face},
393
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
394
+ year = {2025},
395
+ }
396
+ """,
397
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
398
+ is_public=False,
399
+ )
@@ -7,13 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class SlovakSumRetrieval(AbsTaskRetrieval):
8
8
  metadata = TaskMetadata(
9
9
  name="SlovakSumRetrieval",
10
- description="""
11
- SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand
12
- news articles with titles and short abstracts obtained from multiple Slovak newspapers.
13
-
14
- Originally intended as a summarization task, but since no human annotations were provided
15
- here reformulated to a retrieval task.
16
- """,
10
+ description="SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand news articles with titles and short abstracts obtained from multiple Slovak newspapers. Originally intended as a summarization task, but since no human annotations were provided here reformulated to a retrieval task.",
17
11
  reference="https://huggingface.co/datasets/NaiveNeuron/slovaksum",
18
12
  dataset={
19
13
  "path": "NaiveNeuron/slovaksum",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class ArguAnaVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="ArguAna-VN",
8
- description="""A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://argumentation.bplaced.net/arguana/data",
14
10
  dataset={
15
11
  "path": "GreenNode/arguana-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class ClimateFEVERVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="ClimateFEVER-VN",
8
- description="""A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
14
10
  dataset={
15
11
  "path": "GreenNode/climate-fever-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackAndroidVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackAndroid-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-android-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackGisVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackGis-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-gis-vn",