mteb 2.0.5__py3-none-any.whl → 2.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (412) hide show
  1. mteb/__init__.py +10 -1
  2. mteb/_create_dataloaders.py +8 -3
  3. mteb/_evaluators/any_sts_evaluator.py +14 -12
  4. mteb/_evaluators/clustering_evaluator.py +1 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
  6. mteb/_evaluators/pair_classification_evaluator.py +3 -1
  7. mteb/_evaluators/retrieval_metrics.py +0 -9
  8. mteb/_evaluators/sklearn_evaluator.py +15 -28
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
  10. mteb/_evaluators/text/summarization_evaluator.py +4 -2
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
  12. mteb/abstasks/_stratification.py +1 -1
  13. mteb/abstasks/abstask.py +6 -1
  14. mteb/abstasks/clustering.py +1 -1
  15. mteb/abstasks/dataset_card_template.md +1 -1
  16. mteb/abstasks/multilabel_classification.py +2 -2
  17. mteb/abstasks/retrieval.py +2 -1
  18. mteb/abstasks/retrieval_dataset_loaders.py +1 -1
  19. mteb/abstasks/task_metadata.py +2 -1
  20. mteb/benchmarks/_create_table.py +1 -3
  21. mteb/benchmarks/benchmark.py +18 -1
  22. mteb/benchmarks/benchmarks/__init__.py +4 -0
  23. mteb/benchmarks/benchmarks/benchmarks.py +125 -16
  24. mteb/benchmarks/get_benchmark.py +3 -1
  25. mteb/cache.py +7 -3
  26. mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
  27. mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
  28. mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
  29. mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
  30. mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
  31. mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
  32. mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
  33. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
  34. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
  35. mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
  36. mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
  37. mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
  38. mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
  39. mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
  40. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
  41. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
  42. mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
  43. mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
  44. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  45. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  46. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  47. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  54. mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
  55. mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
  56. mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
  57. mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
  58. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  59. mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
  60. mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
  61. mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
  62. mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
  63. mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
  64. mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
  65. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  66. mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
  67. mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
  68. mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
  69. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  71. mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
  72. mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
  73. mteb/descriptive_stats/Retrieval/WinoGrande.json +14 -14
  74. mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
  75. mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
  76. mteb/evaluate.py +26 -6
  77. mteb/languages/check_language_code.py +11 -3
  78. mteb/languages/language_scripts.py +4 -0
  79. mteb/leaderboard/app.py +5 -3
  80. mteb/leaderboard/benchmark_selector.py +4 -2
  81. mteb/leaderboard/text_segments.py +1 -1
  82. mteb/models/cache_wrappers/cache_wrapper.py +1 -1
  83. mteb/models/instruct_wrapper.py +3 -0
  84. mteb/models/model_implementations/align_models.py +6 -0
  85. mteb/models/model_implementations/andersborges.py +51 -0
  86. mteb/models/model_implementations/ara_models.py +7 -0
  87. mteb/models/model_implementations/b1ade_models.py +1 -1
  88. mteb/models/model_implementations/bge_models.py +1 -3
  89. mteb/models/model_implementations/blip2_models.py +9 -0
  90. mteb/models/model_implementations/blip_models.py +19 -0
  91. mteb/models/model_implementations/bmretriever_models.py +1 -1
  92. mteb/models/model_implementations/cadet_models.py +8 -0
  93. mteb/models/model_implementations/cde_models.py +12 -0
  94. mteb/models/model_implementations/codefuse_models.py +15 -0
  95. mteb/models/model_implementations/codesage_models.py +12 -0
  96. mteb/models/model_implementations/cohere_models.py +1 -1
  97. mteb/models/model_implementations/colqwen_models.py +57 -0
  98. mteb/models/model_implementations/emillykkejensen_models.py +70 -0
  99. mteb/models/model_implementations/gme_v_models.py +2 -2
  100. mteb/models/model_implementations/ibm_granite_models.py +1 -1
  101. mteb/models/model_implementations/inf_models.py +3 -3
  102. mteb/models/model_implementations/jasper_models.py +253 -2
  103. mteb/models/model_implementations/jina_models.py +12 -2
  104. mteb/models/model_implementations/kalm_models.py +159 -25
  105. mteb/models/model_implementations/llm2vec_models.py +1 -1
  106. mteb/models/model_implementations/misc_models.py +8 -2
  107. mteb/models/model_implementations/moco_models.py +9 -0
  108. mteb/models/model_implementations/mxbai_models.py +1 -1
  109. mteb/models/model_implementations/openclip_models.py +16 -0
  110. mteb/models/model_implementations/piccolo_models.py +6 -0
  111. mteb/models/model_implementations/rasgaard_models.py +33 -0
  112. mteb/models/model_implementations/reasonir_model.py +1 -1
  113. mteb/models/model_implementations/salesforce_models.py +1 -1
  114. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
  115. mteb/models/model_implementations/spartan8806_atles_champion.py +26 -0
  116. mteb/models/model_implementations/tarka_models.py +374 -0
  117. mteb/models/model_implementations/voyage_models.py +6 -7
  118. mteb/models/model_implementations/voyage_v.py +10 -9
  119. mteb/models/model_implementations/yuan_models.py +33 -0
  120. mteb/models/search_wrappers.py +6 -5
  121. mteb/results/task_result.py +19 -17
  122. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  123. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  124. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +1 -5
  125. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  126. mteb/tasks/classification/ara/ajgt.py +1 -2
  127. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  128. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  129. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  130. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  131. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  132. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  133. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  134. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  135. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  136. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  137. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  138. mteb/tasks/classification/dan/angry_tweets_classification.py +2 -3
  139. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  140. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  141. mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
  142. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  143. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  144. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  145. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  146. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  147. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  148. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  149. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  150. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  151. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  152. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  153. mteb/tasks/classification/eng/legal_bench_classification.py +15 -121
  154. mteb/tasks/classification/eng/news_classification.py +1 -2
  155. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  156. mteb/tasks/classification/eng/patent_classification.py +1 -2
  157. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  158. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  159. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  160. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  161. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  162. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  163. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  164. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  165. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  166. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  167. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  168. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  169. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  170. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  171. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  172. mteb/tasks/classification/est/estonian_valence.py +1 -2
  173. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  174. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  175. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  176. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  177. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  178. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  179. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +1 -2
  180. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  181. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  182. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  183. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  184. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  185. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  186. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  187. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  188. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  189. mteb/tasks/classification/kor/klue_tc.py +1 -2
  190. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  191. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  192. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
  193. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  194. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  195. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  196. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  197. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  198. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  199. mteb/tasks/classification/multilingual/scala_classification.py +1 -2
  200. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  201. mteb/tasks/classification/mya/myanmar_news.py +2 -3
  202. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  203. mteb/tasks/classification/nld/__init__.py +16 -0
  204. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  205. mteb/tasks/classification/nld/dutch_cola_classification.py +41 -0
  206. mteb/tasks/classification/nld/dutch_government_bias_classification.py +40 -0
  207. mteb/tasks/classification/nld/dutch_news_articles_classification.py +33 -0
  208. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +39 -0
  209. mteb/tasks/classification/nld/iconclass_classification.py +44 -0
  210. mteb/tasks/classification/nld/open_tender_classification.py +41 -0
  211. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +49 -0
  212. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  213. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  214. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  215. mteb/tasks/classification/pol/polish_classification.py +3 -6
  216. mteb/tasks/classification/ron/moroco.py +1 -2
  217. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  218. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  219. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  220. mteb/tasks/classification/rus/headline_classification.py +1 -2
  221. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  222. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  223. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  224. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  225. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  226. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  227. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  228. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  229. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  230. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  231. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  232. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  233. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  234. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  235. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  236. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  237. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  238. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  239. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  240. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  241. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  242. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  243. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  244. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  245. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  246. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  247. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  248. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  249. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  250. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  251. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  252. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  253. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  254. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  255. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  256. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  257. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  258. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  259. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  260. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  261. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  262. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  263. mteb/tasks/clustering/__init__.py +1 -0
  264. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  265. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  266. mteb/tasks/clustering/nld/__init__.py +17 -0
  267. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +40 -0
  268. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +40 -0
  269. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +50 -0
  270. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +54 -0
  271. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +44 -0
  272. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +54 -0
  273. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +54 -0
  274. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  275. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  276. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  277. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  278. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  279. mteb/tasks/multilabel_classification/__init__.py +1 -0
  280. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  281. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  282. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  283. mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
  284. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +91 -0
  285. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +47 -0
  286. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  287. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  288. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  289. mteb/tasks/pair_classification/__init__.py +1 -0
  290. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  291. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  292. mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
  293. mteb/tasks/pair_classification/nld/__init__.py +7 -0
  294. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +39 -0
  295. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +44 -0
  296. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  297. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  298. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  299. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  300. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  301. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  302. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  303. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  304. mteb/tasks/retrieval/code/code_rag.py +8 -8
  305. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  306. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  307. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  308. mteb/tasks/retrieval/eng/__init__.py +18 -4
  309. mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
  310. mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
  311. mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
  312. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
  313. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
  314. mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
  315. mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
  316. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  317. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
  318. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
  319. mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
  320. mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
  321. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
  322. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
  323. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
  324. mteb/tasks/retrieval/eng/wino_grande_retrieval.py +1 -1
  325. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  326. mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
  327. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  328. mteb/tasks/retrieval/multilingual/__init__.py +22 -0
  329. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +6 -5
  330. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
  331. mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
  332. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
  333. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  334. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  335. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  336. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  337. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  338. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
  339. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
  340. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
  341. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
  342. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
  343. mteb/tasks/retrieval/nld/__init__.py +18 -4
  344. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  345. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +44 -0
  346. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +33 -0
  347. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +42 -0
  348. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  349. mteb/tasks/retrieval/nld/open_tender_retrieval.py +41 -0
  350. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  351. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  352. mteb/tasks/retrieval/nld/vabb_retrieval.py +44 -0
  353. mteb/tasks/retrieval/nob/norquad.py +2 -2
  354. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  355. mteb/tasks/retrieval/rus/__init__.py +11 -2
  356. mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
  357. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  358. mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
  359. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  360. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
  361. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  362. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  363. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  364. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  365. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  366. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  367. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  368. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  369. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  370. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  371. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
  372. mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
  373. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  374. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  375. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
  376. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
  377. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  378. mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
  379. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  380. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  381. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  382. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  383. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  384. mteb/tasks/sts/__init__.py +1 -0
  385. mteb/tasks/sts/nld/__init__.py +5 -0
  386. mteb/tasks/sts/nld/sick_nl_sts.py +42 -0
  387. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  388. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  389. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  390. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  391. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  392. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  393. mteb-2.1.19.dist-info/METADATA +253 -0
  394. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/RECORD +398 -330
  395. mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
  396. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
  397. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
  398. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
  399. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
  400. mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
  401. mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
  402. mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
  403. mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
  404. mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
  405. mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
  406. mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
  407. mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
  408. mteb-2.0.5.dist-info/METADATA +0 -455
  409. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/WHEEL +0 -0
  410. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/entry_points.txt +0 -0
  411. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/licenses/LICENSE +0 -0
  412. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,399 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+ _LANGS = {
5
+ "french": ["fra-Latn"],
6
+ "spanish": ["spa-Latn"],
7
+ "english": ["eng-Latn"],
8
+ "german": ["deu-Latn"],
9
+ "italian": ["ita-Latn"],
10
+ "portuguese": ["por-Latn"],
11
+ }
12
+
13
+
14
+ class Vidore3FinanceEnRetrieval(AbsTaskRetrieval):
15
+ metadata = TaskMetadata(
16
+ name="Vidore3FinanceEnRetrieval",
17
+ description="Retrieve associated pages according to questions. This task, Finance - EN, is a corpus of reports from american banking companies, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
18
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
19
+ dataset={
20
+ "path": "vidore/vidore_v3_finance_en_mteb_format",
21
+ "revision": "fa78cb14152b3dde8c5defdc4e3ddf50de69dfeb",
22
+ },
23
+ type="DocumentUnderstanding",
24
+ category="t2i",
25
+ eval_splits=["test"],
26
+ eval_langs=_LANGS,
27
+ main_score="ndcg_at_10",
28
+ date=("2025-10-01", "2025-11-01"),
29
+ domains=["Financial"],
30
+ task_subtypes=["Image Text Retrieval"],
31
+ license="cc-by-4.0",
32
+ annotations_creators="derived",
33
+ dialect=[],
34
+ modalities=["text", "image"],
35
+ sample_creation="created and machine-translated",
36
+ bibtex_citation=r"""
37
+ @misc{mace2025vidorev3,
38
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
39
+ day = {5},
40
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
41
+ journal = {Hugging Face Blog},
42
+ month = {November},
43
+ publisher = {Hugging Face},
44
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
45
+ year = {2025},
46
+ }
47
+ """,
48
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
49
+ )
50
+
51
+
52
+ class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
53
+ metadata = TaskMetadata(
54
+ name="Vidore3FinanceFrRetrieval",
55
+ description="Retrieve associated pages according to questions. This task, Finance - FR, is a corpus of reports from french companies in the luxury domain, intended for long-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
56
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
57
+ dataset={
58
+ "path": "vidore/vidore_v3_finance_fr_mteb_format",
59
+ "revision": "8a2adfda85a7967c7252129703d9b3c7c9f038a9",
60
+ },
61
+ type="DocumentUnderstanding",
62
+ category="t2i",
63
+ eval_splits=["test"],
64
+ eval_langs=_LANGS,
65
+ main_score="ndcg_at_10",
66
+ date=("2025-10-01", "2025-11-01"),
67
+ domains=["Financial"],
68
+ task_subtypes=["Image Text Retrieval"],
69
+ license="cc-by-4.0",
70
+ annotations_creators="derived",
71
+ dialect=[],
72
+ sample_creation="created and machine-translated",
73
+ bibtex_citation=r"""
74
+ @misc{mace2025vidorev3,
75
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
76
+ day = {5},
77
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
78
+ journal = {Hugging Face Blog},
79
+ month = {November},
80
+ publisher = {Hugging Face},
81
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
82
+ year = {2025},
83
+ }
84
+ """,
85
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
86
+ is_public=True,
87
+ )
88
+
89
+
90
+ class Vidore3IndustrialRetrieval(AbsTaskRetrieval):
91
+ metadata = TaskMetadata(
92
+ name="Vidore3IndustrialRetrieval",
93
+ description="Retrieve associated pages according to questions. This dataset, Industrial reports, is a corpus of technical documents on military aircraft (fueling, mechanics...), intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
94
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
95
+ dataset={
96
+ "path": "vidore/vidore_v3_industrial_mteb_format",
97
+ "revision": "f732b725cf4a70803210edfe265a04f8bd5328f6",
98
+ },
99
+ type="DocumentUnderstanding",
100
+ category="t2i",
101
+ eval_splits=["test"],
102
+ eval_langs=_LANGS,
103
+ main_score="ndcg_at_10",
104
+ date=("2025-10-01", "2025-11-01"),
105
+ domains=["Engineering"],
106
+ task_subtypes=["Image Text Retrieval"],
107
+ license="cc-by-4.0",
108
+ annotations_creators="derived",
109
+ dialect=[],
110
+ modalities=["text", "image"],
111
+ sample_creation="created and machine-translated",
112
+ bibtex_citation=r"""
113
+ @misc{mace2025vidorev3,
114
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
115
+ day = {5},
116
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
117
+ journal = {Hugging Face Blog},
118
+ month = {November},
119
+ publisher = {Hugging Face},
120
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
121
+ year = {2025},
122
+ }
123
+ """,
124
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
125
+ is_public=True,
126
+ )
127
+
128
+
129
+ class Vidore3PharmaceuticalsRetrieval(AbsTaskRetrieval):
130
+ metadata = TaskMetadata(
131
+ name="Vidore3PharmaceuticalsRetrieval",
132
+ description="Retrieve associated pages according to questions. This dataset, Pharmaceutical, is a corpus of slides from the FDA, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
133
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
134
+ dataset={
135
+ "path": "vidore/vidore_v3_pharmaceuticals_mteb_format",
136
+ "revision": "237ed4f43c7fb3c4df07ec4e9dd0a4366be555b0",
137
+ },
138
+ type="DocumentUnderstanding",
139
+ category="t2i",
140
+ eval_splits=["test"],
141
+ eval_langs=_LANGS,
142
+ main_score="ndcg_at_10",
143
+ date=("2025-10-01", "2025-11-01"),
144
+ domains=["Medical"],
145
+ task_subtypes=["Image Text Retrieval"],
146
+ license="cc-by-4.0",
147
+ annotations_creators="derived",
148
+ dialect=[],
149
+ modalities=["text", "image"],
150
+ sample_creation="created and machine-translated",
151
+ bibtex_citation=r"""
152
+ @misc{mace2025vidorev3,
153
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
154
+ day = {5},
155
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
156
+ journal = {Hugging Face Blog},
157
+ month = {November},
158
+ publisher = {Hugging Face},
159
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
160
+ year = {2025},
161
+ }
162
+ """,
163
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
164
+ is_public=True,
165
+ )
166
+
167
+
168
+ class Vidore3ComputerScienceRetrieval(AbsTaskRetrieval):
169
+ metadata = TaskMetadata(
170
+ name="Vidore3ComputerScienceRetrieval",
171
+ description="Retrieve associated pages according to questions. This dataset, Computer Science, is a corpus of textbooks from the openstacks website, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
172
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
173
+ dataset={
174
+ "path": "vidore/vidore_v3_computer_science_mteb_format",
175
+ "revision": "fb7fb69f81f7db62790f40494124b8ad22b424ab",
176
+ },
177
+ type="DocumentUnderstanding",
178
+ category="t2i",
179
+ eval_splits=["test"],
180
+ eval_langs=_LANGS,
181
+ main_score="ndcg_at_10",
182
+ date=("2025-10-01", "2025-11-01"),
183
+ domains=["Engineering", "Programming"],
184
+ task_subtypes=["Image Text Retrieval"],
185
+ license="cc-by-4.0",
186
+ annotations_creators="derived",
187
+ dialect=[],
188
+ modalities=["text", "image"],
189
+ sample_creation="created and machine-translated",
190
+ bibtex_citation=r"""
191
+ @misc{mace2025vidorev3,
192
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
193
+ day = {5},
194
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
195
+ journal = {Hugging Face Blog},
196
+ month = {November},
197
+ publisher = {Hugging Face},
198
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
199
+ year = {2025},
200
+ }
201
+ """,
202
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
203
+ is_public=True,
204
+ )
205
+
206
+
207
+ class Vidore3HrRetrieval(AbsTaskRetrieval):
208
+ metadata = TaskMetadata(
209
+ name="Vidore3HrRetrieval",
210
+ description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports released by the european union, intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
211
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
212
+ dataset={
213
+ "path": "vidore/vidore_v3_hr_mteb_format",
214
+ "revision": "bc7d43d64815ed30f664168c8052106484aba7fd",
215
+ },
216
+ type="DocumentUnderstanding",
217
+ category="t2i",
218
+ eval_splits=["test"],
219
+ eval_langs=_LANGS,
220
+ main_score="ndcg_at_10",
221
+ date=("2025-10-01", "2025-11-01"),
222
+ domains=["Social"],
223
+ task_subtypes=["Image Text Retrieval"],
224
+ license="cc-by-4.0",
225
+ annotations_creators="derived",
226
+ dialect=[],
227
+ modalities=["text", "image"],
228
+ sample_creation="created and machine-translated",
229
+ bibtex_citation=r"""
230
+ @misc{mace2025vidorev3,
231
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
232
+ day = {5},
233
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
234
+ journal = {Hugging Face Blog},
235
+ month = {November},
236
+ publisher = {Hugging Face},
237
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
238
+ year = {2025},
239
+ }
240
+ """,
241
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
242
+ is_public=True,
243
+ )
244
+
245
+
246
+ class Vidore3EnergyRetrieval(AbsTaskRetrieval):
247
+ metadata = TaskMetadata(
248
+ name="Vidore3EnergyRetrieval",
249
+ description="Retrieve associated pages according to questions. This dataset, Energy Fr, is a corpus of reports on energy supply in europe, intended for complex-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
250
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
251
+ dataset={
252
+ "path": "vidore/vidore_v3_energy_mteb_format",
253
+ "revision": "84fca99e5978604bae30f2436eacb6dbaa0532e9",
254
+ },
255
+ type="DocumentUnderstanding",
256
+ category="t2i",
257
+ eval_splits=["test"],
258
+ eval_langs=_LANGS,
259
+ main_score="ndcg_at_10",
260
+ date=("2025-10-01", "2025-11-01"),
261
+ domains=["Engineering", "Chemistry", "Academic"],
262
+ task_subtypes=["Image Text Retrieval"],
263
+ license="cc-by-4.0",
264
+ annotations_creators="derived",
265
+ dialect=[],
266
+ modalities=["text", "image"],
267
+ sample_creation="created and machine-translated",
268
+ bibtex_citation=r"""
269
+ @misc{mace2025vidorev3,
270
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
271
+ day = {5},
272
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
273
+ journal = {Hugging Face Blog},
274
+ month = {November},
275
+ publisher = {Hugging Face},
276
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
277
+ year = {2025},
278
+ }
279
+ """,
280
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
281
+ is_public=True,
282
+ )
283
+
284
+
285
+ class Vidore3PhysicsRetrieval(AbsTaskRetrieval):
286
+ metadata = TaskMetadata(
287
+ name="Vidore3PhysicsRetrieval",
288
+ description="Retrieve associated pages according to questions. This dataset, Physics, is a corpus of course slides on french bachelor level physics lectures, intended for complex visual understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
289
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
290
+ dataset={
291
+ "path": "vidore/vidore_v3_physics_mteb_format",
292
+ "revision": "2c18ef90ab3ef93a9d86ecc6521cdae2a29f8300",
293
+ },
294
+ type="DocumentUnderstanding",
295
+ category="t2i",
296
+ eval_splits=["test"],
297
+ eval_langs=_LANGS,
298
+ main_score="ndcg_at_10",
299
+ date=("2025-10-01", "2025-11-01"),
300
+ domains=["Engineering", "Academic"],
301
+ task_subtypes=["Image Text Retrieval"],
302
+ license="cc-by-4.0",
303
+ annotations_creators="derived",
304
+ dialect=[],
305
+ modalities=["text", "image"],
306
+ sample_creation="created and machine-translated",
307
+ bibtex_citation=r"""
308
+ @misc{mace2025vidorev3,
309
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
310
+ day = {5},
311
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
312
+ journal = {Hugging Face Blog},
313
+ month = {November},
314
+ publisher = {Hugging Face},
315
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
316
+ year = {2025},
317
+ }
318
+ """,
319
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
320
+ is_public=True,
321
+ )
322
+
323
+
324
+ class Vidore3NuclearRetrieval(AbsTaskRetrieval):
325
+ metadata = TaskMetadata(
326
+ name="Vidore3NuclearRetrieval",
327
+ description="Retrieve associated pages according to questions.",
328
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
329
+ dataset={
330
+ "path": "mteb-private/Vidore3NuclearRetrieval",
331
+ "revision": "a463fc67fefc01152153101e88a32d5f9515e3e3",
332
+ },
333
+ type="DocumentUnderstanding",
334
+ category="t2i",
335
+ eval_splits=["test"],
336
+ eval_langs=_LANGS,
337
+ main_score="ndcg_at_10",
338
+ date=("2025-10-01", "2025-11-01"),
339
+ domains=["Engineering", "Chemistry"],
340
+ task_subtypes=["Image Text Retrieval"],
341
+ license="cc-by-4.0",
342
+ annotations_creators="derived",
343
+ dialect=[],
344
+ modalities=["text", "image"],
345
+ sample_creation="created and machine-translated",
346
+ bibtex_citation=r"""
347
+ @misc{mace2025vidorev3,
348
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
349
+ day = {5},
350
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
351
+ journal = {Hugging Face Blog},
352
+ month = {November},
353
+ publisher = {Hugging Face},
354
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
355
+ year = {2025},
356
+ }
357
+ """,
358
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
359
+ is_public=False,
360
+ )
361
+
362
+
363
+ class Vidore3TelecomRetrieval(AbsTaskRetrieval):
364
+ metadata = TaskMetadata(
365
+ name="Vidore3TelecomRetrieval",
366
+ description="Retrieve associated pages according to questions.",
367
+ reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
368
+ dataset={
369
+ "path": "mteb-private/Vidore3TelecomRetrieval",
370
+ "revision": "a54635a274ef2835721b7cbe3eb27483b9ec964b",
371
+ },
372
+ type="DocumentUnderstanding",
373
+ category="t2i",
374
+ eval_splits=["test"],
375
+ eval_langs=_LANGS,
376
+ main_score="ndcg_at_10",
377
+ date=("2025-10-01", "2025-11-01"),
378
+ domains=["Engineering", "Programming"],
379
+ task_subtypes=["Image Text Retrieval"],
380
+ license="cc-by-4.0",
381
+ annotations_creators="derived",
382
+ dialect=[],
383
+ modalities=["text", "image"],
384
+ sample_creation="created and machine-translated",
385
+ bibtex_citation=r"""
386
+ @misc{mace2025vidorev3,
387
+ author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
388
+ day = {5},
389
+ howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
390
+ journal = {Hugging Face Blog},
391
+ month = {November},
392
+ publisher = {Hugging Face},
393
+ title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
394
+ year = {2025},
395
+ }
396
+ """,
397
+ prompt={"query": "Find a screenshot that is relevant to the user's question."},
398
+ is_public=False,
399
+ )
@@ -34,7 +34,6 @@ def _load_wit_data(path: str, langs: list, splits: str, revision: str | None = N
34
34
  lang_corpus = lang_data.map(
35
35
  lambda x: {
36
36
  "id": "corpus-" + x["image_id"],
37
- "text": None,
38
37
  "modality": "image",
39
38
  "image": x["image"],
40
39
  },
@@ -60,7 +59,6 @@ def _load_wit_data(path: str, langs: list, splits: str, revision: str | None = N
60
59
  "id": query_id,
61
60
  "text": caption,
62
61
  "modality": "text",
63
- "image": None,
64
62
  }
65
63
  )
66
64
  if query_id not in relevant_docs[lang][split]:
@@ -1,4 +1,4 @@
1
- from datasets import DatasetDict, load_dataset
1
+ from datasets import DatasetDict, Image, load_dataset
2
2
 
3
3
  from mteb.abstasks.retrieval import AbsTaskRetrieval
4
4
  from mteb.abstasks.task_metadata import TaskMetadata
@@ -16,7 +16,7 @@ _LANGUAGES = {
16
16
 
17
17
 
18
18
  def _load_xflickrco_data(
19
- path: str, langs: list, splits: str, revision: str | None = None
19
+ path: str, langs: list, splits: list[str], revision: str | None = None
20
20
  ):
21
21
  corpus = {lang: dict.fromkeys(splits) for lang in langs}
22
22
  queries = {lang: dict.fromkeys(splits) for lang in langs}
@@ -32,22 +32,23 @@ def _load_xflickrco_data(
32
32
  lang_corpus = lang_data.map(
33
33
  lambda x: {
34
34
  "id": "corpus-" + x["id"],
35
- "text": None,
36
35
  "modality": "image",
37
- "image": x["image"]["bytes"],
36
+ "image": x["image"],
38
37
  },
39
38
  remove_columns=["sentences"],
40
39
  )
40
+ lang_corpus = lang_corpus.cast_column("image", Image())
41
41
 
42
42
  lang_queries = lang_data.map(
43
43
  lambda x: {
44
44
  "id": "query-" + x["id"],
45
45
  "text": x["sentences"],
46
46
  "modality": "text",
47
- "image": None,
48
47
  },
49
48
  remove_columns=["sentences"],
50
49
  )
50
+ # None values
51
+ lang_queries = lang_queries.remove_columns(["image"])
51
52
 
52
53
  relevant_docs[lang][split] = {}
53
54
  for row in lang_data:
@@ -1,4 +1,4 @@
1
- from datasets import Dataset, DatasetDict, load_dataset
1
+ from datasets import Dataset, DatasetDict, Image, load_dataset
2
2
 
3
3
  from mteb.abstasks.retrieval import AbsTaskRetrieval
4
4
  from mteb.abstasks.task_metadata import TaskMetadata
@@ -61,9 +61,8 @@ def _load_xm3600_data(
61
61
  lang_corpus = lang_data.map(
62
62
  lambda x: {
63
63
  "id": "corpus-" + x["image_id"],
64
- "text": None,
65
64
  "modality": "image",
66
- "image": x["image"]["bytes"],
65
+ "image": x["image"],
67
66
  },
68
67
  remove_columns=[
69
68
  "captions",
@@ -73,6 +72,7 @@ def _load_xm3600_data(
73
72
  "image_id",
74
73
  ],
75
74
  )
75
+ lang_corpus = lang_corpus.cast_column("image", Image())
76
76
 
77
77
  corpus[lang][split] = lang_corpus
78
78
 
@@ -90,7 +90,6 @@ def _load_xm3600_data(
90
90
  "id": query_id,
91
91
  "text": caption,
92
92
  "modality": "text",
93
- "image": None,
94
93
  }
95
94
  )
96
95
  if query_id not in relevant_docs[lang][split]:
@@ -1,4 +1,5 @@
1
- from .argu_ana_nl_retrieval import ArguAnaNL
1
+ from .argu_ana_nl_retrieval import ArguAnaNL, ArguAnaNLv2
2
+ from .bbsard_nl_retrieval import BBSARDNLRetrieval
2
3
  from .climate_fevernl_retrieval import ClimateFEVERNL
3
4
  from .cqa_dupstack_android_nl_retrieval import CQADupstackAndroidNLRetrieval
4
5
  from .cqa_dupstack_english_nl_retrieval import CQADupstackEnglishNLRetrieval
@@ -13,17 +14,21 @@ from .cqa_dupstack_unix_nl_retrieval import CQADupstackUnixNLRetrieval
13
14
  from .cqa_dupstack_webmasters_nl_retrieval import CQADupstackWebmastersNLRetrieval
14
15
  from .cqa_dupstack_wordpress_nl_retrieval import CQADupstackWordpressNLRetrieval
15
16
  from .db_pedia_nl_retrieval import DBPediaNL
17
+ from .dutch_news_articles_retrieval import DutchNewsArticlesRetrieval
16
18
  from .fevernl_retrieval import FEVERNL
17
19
  from .fi_qa2018_nl_retrieval import FiQA2018NL
18
20
  from .hotpot_qanl_retrieval import HotpotQANL
21
+ from .legal_qa_nl_retrieval import LegalQANLRetrieval
19
22
  from .mmarconl_retrieval import MMMARCONL
20
- from .nf_corpus_nl_retrieval import NFCorpusNL
23
+ from .nf_corpus_nl_retrieval import NFCorpusNL, NFCorpusNLv2
21
24
  from .nqnl_retrieval import NQNL
25
+ from .open_tender_retrieval import OpenTenderRetrieval
22
26
  from .quora_nl_retrieval import QuoraNLRetrieval
23
- from .sci_fact_nl_retrieval import SciFactNL
24
- from .scidocsnl_retrieval import SCIDOCSNL
27
+ from .sci_fact_nl_retrieval import SciFactNL, SciFactNLv2
28
+ from .scidocsnl_retrieval import SCIDOCSNL, SCIDOCSNLv2
25
29
  from .touche2020_nl_retrieval import Touche2020NL
26
30
  from .treccovidnl_retrieval import TRECCOVIDNL
31
+ from .vabb_retrieval import VABBRetrieval
27
32
 
28
33
  __all__ = [
29
34
  "FEVERNL",
@@ -32,6 +37,8 @@ __all__ = [
32
37
  "SCIDOCSNL",
33
38
  "TRECCOVIDNL",
34
39
  "ArguAnaNL",
40
+ "ArguAnaNLv2",
41
+ "BBSARDNLRetrieval",
35
42
  "CQADupstackAndroidNLRetrieval",
36
43
  "CQADupstackEnglishNLRetrieval",
37
44
  "CQADupstackGamingNLRetrieval",
@@ -46,10 +53,17 @@ __all__ = [
46
53
  "CQADupstackWordpressNLRetrieval",
47
54
  "ClimateFEVERNL",
48
55
  "DBPediaNL",
56
+ "DutchNewsArticlesRetrieval",
49
57
  "FiQA2018NL",
50
58
  "HotpotQANL",
59
+ "LegalQANLRetrieval",
51
60
  "NFCorpusNL",
61
+ "NFCorpusNLv2",
62
+ "OpenTenderRetrieval",
52
63
  "QuoraNLRetrieval",
64
+ "SCIDOCSNLv2",
53
65
  "SciFactNL",
66
+ "SciFactNLv2",
54
67
  "Touche2020NL",
68
+ "VABBRetrieval",
55
69
  ]
@@ -1,33 +1,26 @@
1
1
  from mteb.abstasks.retrieval import AbsTaskRetrieval
2
2
  from mteb.abstasks.task_metadata import TaskMetadata
3
3
 
4
-
5
- class ArguAnaNL(AbsTaskRetrieval):
6
- ignore_identical_ids = True
7
-
8
- metadata = TaskMetadata(
9
- name="ArguAna-NL",
10
- description="ArguAna involves the task of retrieval of the best counterargument to an argument. ArguAna-NL is "
11
- "a Dutch translation.",
12
- reference="https://huggingface.co/datasets/clips/beir-nl-arguana",
13
- dataset={
14
- "path": "clips/beir-nl-arguana",
15
- "revision": "4cd085d148fe2cac923bb7758d6ef585926170ba",
16
- },
17
- type="Retrieval",
18
- category="t2t",
19
- modalities=["text"],
20
- eval_splits=["test"],
21
- eval_langs=["nld-Latn"],
22
- main_score="ndcg_at_10",
23
- date=("2016-03-01", "2016-03-01"), # best guess: based on publication date
24
- domains=["Written", "Non-fiction"],
25
- task_subtypes=[],
26
- license="cc-by-sa-4.0",
27
- annotations_creators="derived",
28
- dialect=[],
29
- sample_creation="machine-translated and verified", # manually checked a small subset
30
- bibtex_citation=r"""
4
+ _argu_ana_nl_metadata = dict(
5
+ reference="https://huggingface.co/datasets/clips/beir-nl-arguana",
6
+ dataset={
7
+ "path": "clips/beir-nl-arguana",
8
+ "revision": "4cd085d148fe2cac923bb7758d6ef585926170ba",
9
+ },
10
+ type="Retrieval",
11
+ category="t2t",
12
+ modalities=["text"],
13
+ eval_splits=["test"],
14
+ eval_langs=["nld-Latn"],
15
+ main_score="ndcg_at_10",
16
+ date=("2016-03-01", "2016-03-01"), # best guess: based on publication date
17
+ domains=["Written", "Non-fiction"],
18
+ task_subtypes=[],
19
+ license="cc-by-sa-4.0",
20
+ annotations_creators="derived",
21
+ dialect=[],
22
+ sample_creation="machine-translated and verified", # manually checked a small subset
23
+ bibtex_citation=r"""
31
24
  @misc{banar2024beirnlzeroshotinformationretrieval,
32
25
  archiveprefix = {arXiv},
33
26
  author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
@@ -38,5 +31,31 @@ class ArguAnaNL(AbsTaskRetrieval):
38
31
  year = {2024},
39
32
  }
40
33
  """,
34
+ )
35
+
36
+
37
+ class ArguAnaNL(AbsTaskRetrieval):
38
+ ignore_identical_ids = True
39
+
40
+ metadata = TaskMetadata(
41
+ name="ArguAna-NL",
42
+ description="ArguAna involves the task of retrieval of the best counterargument to an argument. ArguAna-NL is "
43
+ "a Dutch translation.",
41
44
  adapted_from=["ArguAna"],
45
+ **_argu_ana_nl_metadata,
46
+ )
47
+
48
+
49
+ class ArguAnaNLv2(AbsTaskRetrieval):
50
+ ignore_identical_ids = True
51
+
52
+ metadata = TaskMetadata(
53
+ name="ArguAna-NL.v2",
54
+ description="ArguAna involves the task of retrieval of the best counterargument to an argument. ArguAna-NL is "
55
+ "a Dutch translation. This version adds a Dutch prompt to the dataset.",
56
+ prompt={
57
+ "query": "Gegeven een bewering, vind documenten die de bewering weerleggen"
58
+ },
59
+ adapted_from=["ArguAna-NL"],
60
+ **_argu_ana_nl_metadata,
42
61
  )
@@ -0,0 +1,44 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class BBSARDNLRetrieval(AbsTaskRetrieval):
6
+ ignore_identical_ids = True
7
+
8
+ metadata = TaskMetadata(
9
+ name="bBSARDNLRetrieval",
10
+ description="Building on the Belgian Statutory Article Retrieval Dataset (BSARD) in French, we introduce the "
11
+ "bilingual version of this dataset, bBSARD. The dataset contains parallel Belgian statutory "
12
+ "articles in both French and Dutch, along with legal questions from BSARD and their Dutch "
13
+ "translation.",
14
+ reference="https://aclanthology.org/2025.regnlp-1.3.pdf",
15
+ dataset={
16
+ "path": "clips/mteb-nl-bbsard",
17
+ "revision": "52027c212ba9765a3e9737c9cbf9a06ae83cbb93",
18
+ },
19
+ type="Retrieval",
20
+ category="t2t",
21
+ modalities=["text"],
22
+ eval_splits=["test"],
23
+ eval_langs=["nld-Latn"],
24
+ main_score="ndcg_at_10",
25
+ date=("2021-05-01", "2021-08-26"),
26
+ domains=["Legal", "Written"],
27
+ task_subtypes=[],
28
+ license="cc-by-nc-sa-4.0",
29
+ annotations_creators="expert-annotated",
30
+ dialect=[],
31
+ sample_creation="found",
32
+ bibtex_citation=r"""
33
+ @article{lotfi2025bilingual,
34
+ author = {Lotfi, Ehsan and Banar, Nikolay and Yuzbashyan, Nerses and Daelemans, Walter},
35
+ journal = {COLING 2025},
36
+ pages = {10},
37
+ title = {Bilingual BSARD: Extending Statutory Article Retrieval to Dutch},
38
+ year = {2025},
39
+ }
40
+ """,
41
+ prompt={
42
+ "query": "Gegeven een juridische vraag, haal documenten op die kunnen helpen bij het beantwoorden van de vraag"
43
+ },
44
+ )