mteb 2.0.5__py3-none-any.whl → 2.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (412) hide show
  1. mteb/__init__.py +10 -1
  2. mteb/_create_dataloaders.py +8 -3
  3. mteb/_evaluators/any_sts_evaluator.py +14 -12
  4. mteb/_evaluators/clustering_evaluator.py +1 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
  6. mteb/_evaluators/pair_classification_evaluator.py +3 -1
  7. mteb/_evaluators/retrieval_metrics.py +0 -9
  8. mteb/_evaluators/sklearn_evaluator.py +15 -28
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
  10. mteb/_evaluators/text/summarization_evaluator.py +4 -2
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
  12. mteb/abstasks/_stratification.py +1 -1
  13. mteb/abstasks/abstask.py +6 -1
  14. mteb/abstasks/clustering.py +1 -1
  15. mteb/abstasks/dataset_card_template.md +1 -1
  16. mteb/abstasks/multilabel_classification.py +2 -2
  17. mteb/abstasks/retrieval.py +2 -1
  18. mteb/abstasks/retrieval_dataset_loaders.py +1 -1
  19. mteb/abstasks/task_metadata.py +2 -1
  20. mteb/benchmarks/_create_table.py +1 -3
  21. mteb/benchmarks/benchmark.py +18 -1
  22. mteb/benchmarks/benchmarks/__init__.py +4 -0
  23. mteb/benchmarks/benchmarks/benchmarks.py +125 -16
  24. mteb/benchmarks/get_benchmark.py +3 -1
  25. mteb/cache.py +7 -3
  26. mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
  27. mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
  28. mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
  29. mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
  30. mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
  31. mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
  32. mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
  33. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
  34. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
  35. mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
  36. mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
  37. mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
  38. mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
  39. mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
  40. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
  41. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
  42. mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
  43. mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
  44. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  45. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  46. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  47. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  54. mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
  55. mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
  56. mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
  57. mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
  58. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  59. mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
  60. mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
  61. mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
  62. mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
  63. mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
  64. mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
  65. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  66. mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
  67. mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
  68. mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
  69. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  71. mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
  72. mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
  73. mteb/descriptive_stats/Retrieval/WinoGrande.json +14 -14
  74. mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
  75. mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
  76. mteb/evaluate.py +26 -6
  77. mteb/languages/check_language_code.py +11 -3
  78. mteb/languages/language_scripts.py +4 -0
  79. mteb/leaderboard/app.py +5 -3
  80. mteb/leaderboard/benchmark_selector.py +4 -2
  81. mteb/leaderboard/text_segments.py +1 -1
  82. mteb/models/cache_wrappers/cache_wrapper.py +1 -1
  83. mteb/models/instruct_wrapper.py +3 -0
  84. mteb/models/model_implementations/align_models.py +6 -0
  85. mteb/models/model_implementations/andersborges.py +51 -0
  86. mteb/models/model_implementations/ara_models.py +7 -0
  87. mteb/models/model_implementations/b1ade_models.py +1 -1
  88. mteb/models/model_implementations/bge_models.py +1 -3
  89. mteb/models/model_implementations/blip2_models.py +9 -0
  90. mteb/models/model_implementations/blip_models.py +19 -0
  91. mteb/models/model_implementations/bmretriever_models.py +1 -1
  92. mteb/models/model_implementations/cadet_models.py +8 -0
  93. mteb/models/model_implementations/cde_models.py +12 -0
  94. mteb/models/model_implementations/codefuse_models.py +15 -0
  95. mteb/models/model_implementations/codesage_models.py +12 -0
  96. mteb/models/model_implementations/cohere_models.py +1 -1
  97. mteb/models/model_implementations/colqwen_models.py +57 -0
  98. mteb/models/model_implementations/emillykkejensen_models.py +70 -0
  99. mteb/models/model_implementations/gme_v_models.py +2 -2
  100. mteb/models/model_implementations/ibm_granite_models.py +1 -1
  101. mteb/models/model_implementations/inf_models.py +3 -3
  102. mteb/models/model_implementations/jasper_models.py +253 -2
  103. mteb/models/model_implementations/jina_models.py +12 -2
  104. mteb/models/model_implementations/kalm_models.py +159 -25
  105. mteb/models/model_implementations/llm2vec_models.py +1 -1
  106. mteb/models/model_implementations/misc_models.py +8 -2
  107. mteb/models/model_implementations/moco_models.py +9 -0
  108. mteb/models/model_implementations/mxbai_models.py +1 -1
  109. mteb/models/model_implementations/openclip_models.py +16 -0
  110. mteb/models/model_implementations/piccolo_models.py +6 -0
  111. mteb/models/model_implementations/rasgaard_models.py +33 -0
  112. mteb/models/model_implementations/reasonir_model.py +1 -1
  113. mteb/models/model_implementations/salesforce_models.py +1 -1
  114. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
  115. mteb/models/model_implementations/spartan8806_atles_champion.py +26 -0
  116. mteb/models/model_implementations/tarka_models.py +374 -0
  117. mteb/models/model_implementations/voyage_models.py +6 -7
  118. mteb/models/model_implementations/voyage_v.py +10 -9
  119. mteb/models/model_implementations/yuan_models.py +33 -0
  120. mteb/models/search_wrappers.py +6 -5
  121. mteb/results/task_result.py +19 -17
  122. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  123. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  124. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +1 -5
  125. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  126. mteb/tasks/classification/ara/ajgt.py +1 -2
  127. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  128. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  129. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  130. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  131. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  132. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  133. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  134. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  135. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  136. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  137. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  138. mteb/tasks/classification/dan/angry_tweets_classification.py +2 -3
  139. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  140. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  141. mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
  142. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  143. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  144. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  145. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  146. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  147. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  148. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  149. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  150. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  151. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  152. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  153. mteb/tasks/classification/eng/legal_bench_classification.py +15 -121
  154. mteb/tasks/classification/eng/news_classification.py +1 -2
  155. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  156. mteb/tasks/classification/eng/patent_classification.py +1 -2
  157. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  158. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  159. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  160. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  161. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  162. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  163. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  164. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  165. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  166. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  167. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  168. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  169. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  170. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  171. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  172. mteb/tasks/classification/est/estonian_valence.py +1 -2
  173. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  174. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  175. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  176. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  177. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  178. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  179. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +1 -2
  180. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  181. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  182. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  183. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  184. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  185. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  186. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  187. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  188. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  189. mteb/tasks/classification/kor/klue_tc.py +1 -2
  190. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  191. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  192. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
  193. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  194. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  195. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  196. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  197. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  198. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  199. mteb/tasks/classification/multilingual/scala_classification.py +1 -2
  200. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  201. mteb/tasks/classification/mya/myanmar_news.py +2 -3
  202. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  203. mteb/tasks/classification/nld/__init__.py +16 -0
  204. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  205. mteb/tasks/classification/nld/dutch_cola_classification.py +41 -0
  206. mteb/tasks/classification/nld/dutch_government_bias_classification.py +40 -0
  207. mteb/tasks/classification/nld/dutch_news_articles_classification.py +33 -0
  208. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +39 -0
  209. mteb/tasks/classification/nld/iconclass_classification.py +44 -0
  210. mteb/tasks/classification/nld/open_tender_classification.py +41 -0
  211. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +49 -0
  212. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  213. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  214. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  215. mteb/tasks/classification/pol/polish_classification.py +3 -6
  216. mteb/tasks/classification/ron/moroco.py +1 -2
  217. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  218. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  219. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  220. mteb/tasks/classification/rus/headline_classification.py +1 -2
  221. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  222. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  223. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  224. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  225. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  226. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  227. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  228. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  229. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  230. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  231. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  232. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  233. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  234. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  235. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  236. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  237. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  238. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  239. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  240. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  241. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  242. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  243. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  244. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  245. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  246. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  247. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  248. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  249. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  250. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  251. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  252. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  253. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  254. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  255. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  256. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  257. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  258. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  259. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  260. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  261. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  262. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  263. mteb/tasks/clustering/__init__.py +1 -0
  264. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  265. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  266. mteb/tasks/clustering/nld/__init__.py +17 -0
  267. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +40 -0
  268. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +40 -0
  269. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +50 -0
  270. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +54 -0
  271. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +44 -0
  272. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +54 -0
  273. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +54 -0
  274. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  275. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  276. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  277. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  278. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  279. mteb/tasks/multilabel_classification/__init__.py +1 -0
  280. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  281. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  282. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  283. mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
  284. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +91 -0
  285. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +47 -0
  286. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  287. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  288. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  289. mteb/tasks/pair_classification/__init__.py +1 -0
  290. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  291. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  292. mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
  293. mteb/tasks/pair_classification/nld/__init__.py +7 -0
  294. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +39 -0
  295. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +44 -0
  296. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  297. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  298. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  299. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  300. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  301. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  302. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  303. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  304. mteb/tasks/retrieval/code/code_rag.py +8 -8
  305. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  306. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  307. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  308. mteb/tasks/retrieval/eng/__init__.py +18 -4
  309. mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
  310. mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
  311. mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
  312. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
  313. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
  314. mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
  315. mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
  316. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  317. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
  318. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
  319. mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
  320. mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
  321. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
  322. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
  323. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
  324. mteb/tasks/retrieval/eng/wino_grande_retrieval.py +1 -1
  325. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  326. mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
  327. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  328. mteb/tasks/retrieval/multilingual/__init__.py +22 -0
  329. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +6 -5
  330. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
  331. mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
  332. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
  333. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  334. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  335. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  336. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  337. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  338. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
  339. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
  340. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
  341. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
  342. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
  343. mteb/tasks/retrieval/nld/__init__.py +18 -4
  344. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  345. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +44 -0
  346. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +33 -0
  347. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +42 -0
  348. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  349. mteb/tasks/retrieval/nld/open_tender_retrieval.py +41 -0
  350. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  351. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  352. mteb/tasks/retrieval/nld/vabb_retrieval.py +44 -0
  353. mteb/tasks/retrieval/nob/norquad.py +2 -2
  354. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  355. mteb/tasks/retrieval/rus/__init__.py +11 -2
  356. mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
  357. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  358. mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
  359. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  360. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
  361. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  362. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  363. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  364. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  365. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  366. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  367. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  368. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  369. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  370. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  371. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
  372. mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
  373. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  374. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  375. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
  376. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
  377. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  378. mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
  379. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  380. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  381. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  382. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  383. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  384. mteb/tasks/sts/__init__.py +1 -0
  385. mteb/tasks/sts/nld/__init__.py +5 -0
  386. mteb/tasks/sts/nld/sick_nl_sts.py +42 -0
  387. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  388. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  389. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  390. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  391. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  392. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  393. mteb-2.1.19.dist-info/METADATA +253 -0
  394. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/RECORD +398 -330
  395. mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
  396. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
  397. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
  398. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
  399. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
  400. mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
  401. mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
  402. mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
  403. mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
  404. mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
  405. mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
  406. mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
  407. mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
  408. mteb-2.0.5.dist-info/METADATA +0 -455
  409. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/WHEEL +0 -0
  410. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/entry_points.txt +0 -0
  411. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/licenses/LICENSE +0 -0
  412. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/top_level.txt +0 -0
@@ -1,30 +1,21 @@
1
1
  from mteb.abstasks.retrieval import AbsTaskRetrieval
2
2
  from mteb.abstasks.task_metadata import TaskMetadata
3
3
 
4
-
5
- class DBPedia(AbsTaskRetrieval):
6
- metadata = TaskMetadata(
7
- name="DBPedia",
8
- description="DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base",
9
- reference="https://github.com/iai-group/DBpedia-Entity/",
10
- dataset={
11
- "path": "mteb/dbpedia",
12
- "revision": "c0f706b76e590d620bd6618b3ca8efdd34e2d659",
13
- },
14
- type="Retrieval",
15
- category="t2t",
16
- modalities=["text"],
17
- eval_splits=["test"],
18
- eval_langs=["eng-Latn"],
19
- main_score="ndcg_at_10",
20
- date=("2017-01-01", "2017-01-01"), # best guess: based on publication date
21
- domains=["Written", "Encyclopaedic"],
22
- task_subtypes=[],
23
- license="mit",
24
- annotations_creators="derived",
25
- dialect=[],
26
- sample_creation="found",
27
- bibtex_citation=r"""
4
+ _dbpedia_metadata = dict(
5
+ type="Retrieval",
6
+ category="t2t",
7
+ modalities=["text"],
8
+ eval_splits=["test"],
9
+ eval_langs=["eng-Latn"],
10
+ main_score="ndcg_at_10",
11
+ date=("2017-01-01", "2017-01-01"), # best guess: based on publication date
12
+ domains=["Written", "Encyclopaedic"],
13
+ task_subtypes=[],
14
+ license="mit",
15
+ annotations_creators="derived",
16
+ dialect=[],
17
+ sample_creation="found",
18
+ bibtex_citation=r"""
28
19
  @inproceedings{Hasibi:2017:DVT,
29
20
  author = {Hasibi, Faegheh and Nikolaev, Fedor and Xiong, Chenyan and Balog, Krisztian and Bratsberg, Svein Erik and Kotov, Alexander and Callan, Jamie},
30
21
  booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval},
@@ -36,45 +27,59 @@ class DBPedia(AbsTaskRetrieval):
36
27
  year = {2017},
37
28
  }
38
29
  """,
30
+ )
31
+
32
+
33
+ class DBPedia(AbsTaskRetrieval):
34
+ metadata = TaskMetadata(
35
+ name="DBPedia",
36
+ description="DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base",
37
+ reference="https://github.com/iai-group/DBpedia-Entity/",
38
+ dataset={
39
+ "path": "mteb/dbpedia",
40
+ "revision": "c0f706b76e590d620bd6618b3ca8efdd34e2d659",
41
+ },
39
42
  prompt={
40
43
  "query": "Given a query, retrieve relevant entity descriptions from DBPedia"
41
44
  },
45
+ **_dbpedia_metadata,
42
46
  )
43
47
 
44
48
 
45
49
  class DBPediaHardNegatives(AbsTaskRetrieval):
46
50
  metadata = TaskMetadata(
47
51
  name="DBPediaHardNegatives",
48
- description="DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct.",
52
+ description=(
53
+ "DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base. "
54
+ "The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
55
+ ),
56
+ reference="https://github.com/iai-group/DBpedia-Entity/",
57
+ dataset={
58
+ "path": "mteb/DBPedia_test_top_250_only_w_correct-v2",
59
+ "revision": "943ec7fdfef3728b2ad1966c5b6479ff9ffd26c9",
60
+ },
61
+ superseded_by="DBPediaHardNegatives.v2",
62
+ adapted_from=["DBPedia"],
63
+ **_dbpedia_metadata,
64
+ )
65
+
66
+
67
+ class DBPediaHardNegativesV2(AbsTaskRetrieval):
68
+ metadata = TaskMetadata(
69
+ name="DBPediaHardNegatives.v2",
70
+ description=(
71
+ "DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base. "
72
+ "The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct. "
73
+ "V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
74
+ ),
49
75
  reference="https://github.com/iai-group/DBpedia-Entity/",
50
76
  dataset={
51
77
  "path": "mteb/DBPedia_test_top_250_only_w_correct-v2",
52
78
  "revision": "943ec7fdfef3728b2ad1966c5b6479ff9ffd26c9",
53
79
  },
54
- type="Retrieval",
55
- category="t2t",
56
- modalities=["text"],
57
- eval_splits=["test"],
58
- eval_langs=["eng-Latn"],
59
- main_score="ndcg_at_10",
60
- date=("2017-01-01", "2017-01-01"), # best guess: based on publication date
61
- domains=["Written", "Encyclopaedic"],
62
- task_subtypes=[],
63
- license="mit",
64
- annotations_creators="derived",
65
- dialect=[],
66
- sample_creation="found",
67
- bibtex_citation=r"""
68
- @inproceedings{Hasibi:2017:DVT,
69
- author = {Hasibi, Faegheh and Nikolaev, Fedor and Xiong, Chenyan and Balog, Krisztian and Bratsberg, Svein Erik and Kotov, Alexander and Callan, Jamie},
70
- booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval},
71
- doi = {10.1145/3077136.3080751},
72
- pages = {1265--1268},
73
- publisher = {ACM},
74
- series = {SIGIR '17},
75
- title = {DBpedia-Entity V2: A Test Collection for Entity Search},
76
- year = {2017},
77
- }
78
- """,
79
80
  adapted_from=["DBPedia"],
81
+ prompt={
82
+ "query": "Given a query, retrieve relevant entity descriptions from DBPedia"
83
+ },
84
+ **_dbpedia_metadata,
80
85
  )
@@ -1,36 +1,22 @@
1
1
  from mteb.abstasks.retrieval import AbsTaskRetrieval
2
2
  from mteb.abstasks.task_metadata import TaskMetadata
3
3
 
4
-
5
- class FEVER(AbsTaskRetrieval):
6
- ignore_identical_ids = True
7
-
8
- metadata = TaskMetadata(
9
- name="FEVER",
10
- dataset={
11
- "path": "mteb/fever",
12
- "revision": "bea83ef9e8fb933d90a2f1d5515737465d613e12",
13
- },
14
- description=(
15
- "FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences"
16
- + " extracted from Wikipedia and subsequently verified without knowledge of the sentence they were"
17
- + " derived from."
18
- ),
19
- reference="https://fever.ai/",
20
- type="Retrieval",
21
- category="t2t",
22
- modalities=["text"],
23
- eval_splits=["test"],
24
- eval_langs=["eng-Latn"],
25
- main_score="ndcg_at_10",
26
- date=None,
27
- domains=["Encyclopaedic", "Written"],
28
- task_subtypes=["Claim verification"],
29
- license="cc-by-nc-sa-3.0",
30
- annotations_creators="human-annotated",
31
- dialect=[],
32
- sample_creation="found",
33
- bibtex_citation=r"""
4
+ _fever_metadata = dict(
5
+ reference="https://fever.ai/",
6
+ type="Retrieval",
7
+ category="t2t",
8
+ modalities=["text"],
9
+ eval_splits=["test"],
10
+ eval_langs=["eng-Latn"],
11
+ main_score="ndcg_at_10",
12
+ date=None,
13
+ domains=["Encyclopaedic", "Written"],
14
+ task_subtypes=["Claim verification"],
15
+ license="cc-by-nc-sa-3.0",
16
+ annotations_creators="human-annotated",
17
+ dialect=[],
18
+ sample_creation="found",
19
+ bibtex_citation=r"""
34
20
  @inproceedings{thorne-etal-2018-fever,
35
21
  address = {New Orleans, Louisiana},
36
22
  author = {Thorne, James and
@@ -50,9 +36,27 @@ Stent, Amanda},
50
36
  year = {2018},
51
37
  }
52
38
  """,
39
+ )
40
+
41
+
42
+ class FEVER(AbsTaskRetrieval):
43
+ ignore_identical_ids = True
44
+
45
+ metadata = TaskMetadata(
46
+ name="FEVER",
47
+ dataset={
48
+ "path": "mteb/fever",
49
+ "revision": "bea83ef9e8fb933d90a2f1d5515737465d613e12",
50
+ },
51
+ description=(
52
+ "FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences "
53
+ "extracted from Wikipedia and subsequently verified without knowledge of the sentence they were "
54
+ "derived from."
55
+ ),
53
56
  prompt={
54
57
  "query": "Given a claim, retrieve documents that support or refute the claim"
55
58
  },
59
+ **_fever_metadata,
56
60
  )
57
61
 
58
62
 
@@ -66,43 +70,34 @@ class FEVERHardNegatives(AbsTaskRetrieval):
66
70
  "revision": "080c9ed6267b65029207906e815d44a9240bafca",
67
71
  },
68
72
  description=(
69
- "FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences"
70
- + " extracted from Wikipedia and subsequently verified without knowledge of the sentence they were"
71
- + " derived from. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
73
+ "FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences "
74
+ "extracted from Wikipedia and subsequently verified without knowledge of the sentence they were "
75
+ "derived from. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
76
+ ),
77
+ adapted_from=["FEVER"],
78
+ superseded_by="FEVERHardNegatives.v2",
79
+ **_fever_metadata,
80
+ )
81
+
82
+
83
+ class FEVERHardNegativesV2(AbsTaskRetrieval):
84
+ ignore_identical_ids = True
85
+
86
+ metadata = TaskMetadata(
87
+ name="FEVERHardNegatives.v2",
88
+ dataset={
89
+ "path": "mteb/FEVER_test_top_250_only_w_correct-v2",
90
+ "revision": "080c9ed6267b65029207906e815d44a9240bafca",
91
+ },
92
+ description=(
93
+ "FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences "
94
+ "extracted from Wikipedia and subsequently verified without knowledge of the sentence they were "
95
+ "derived from. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct. "
96
+ "V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
72
97
  ),
73
- reference="https://fever.ai/",
74
- type="Retrieval",
75
- category="t2t",
76
- modalities=["text"],
77
- eval_splits=["test"],
78
- eval_langs=["eng-Latn"],
79
- main_score="ndcg_at_10",
80
- date=None,
81
- domains=["Encyclopaedic", "Written"],
82
- task_subtypes=["Claim verification"],
83
- license="cc-by-nc-sa-3.0",
84
- annotations_creators="human-annotated",
85
- dialect=None,
86
- sample_creation=None,
87
- bibtex_citation=r"""
88
- @inproceedings{thorne-etal-2018-fever,
89
- address = {New Orleans, Louisiana},
90
- author = {Thorne, James and
91
- Vlachos, Andreas and
92
- Christodoulopoulos, Christos and
93
- Mittal, Arpit},
94
- booktitle = {Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)},
95
- doi = {10.18653/v1/N18-1074},
96
- editor = {Walker, Marilyn and
97
- Ji, Heng and
98
- Stent, Amanda},
99
- month = jun,
100
- pages = {809--819},
101
- publisher = {Association for Computational Linguistics},
102
- title = {{FEVER}: a Large-scale Dataset for Fact Extraction and {VER}ification},
103
- url = {https://aclanthology.org/N18-1074},
104
- year = {2018},
105
- }
106
- """,
107
98
  adapted_from=["FEVER"],
99
+ prompt={
100
+ "query": "Given a claim, retrieve documents that support or refute the claim"
101
+ },
102
+ **_fever_metadata,
108
103
  )
@@ -24,9 +24,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
24
24
  shared_corpus = shared_corpus.map(
25
25
  lambda x: {
26
26
  "id": "corpus-" + str(x["id"]),
27
- # "text": x["text"],
28
27
  "modality": "text",
29
- "image": None,
30
28
  },
31
29
  remove_columns=[
32
30
  "split",
@@ -40,9 +38,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
40
38
  queries[split] = split_dataset.map(
41
39
  lambda x: {
42
40
  "id": "query-" + str(x["id"]),
43
- "text": None,
44
41
  "modality": "image",
45
- # "image": x["image"],
46
42
  },
47
43
  remove_columns=[
48
44
  "split",
@@ -24,9 +24,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
24
24
  shared_corpus = shared_corpus.map(
25
25
  lambda x: {
26
26
  "id": "corpus-" + str(x["id"]),
27
- "text": None,
28
27
  "modality": "image",
29
- # "image": None,
30
28
  },
31
29
  remove_columns=[
32
30
  "split",
@@ -40,9 +38,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
40
38
  queries[split] = split_dataset.map(
41
39
  lambda x: {
42
40
  "id": "query-" + str(x["id"]),
43
- # "text": None,
44
41
  "modality": "text",
45
- "image": None,
46
42
  },
47
43
  remove_columns=[
48
44
  "split",
@@ -1,33 +1,22 @@
1
1
  from mteb.abstasks.retrieval import AbsTaskRetrieval
2
2
  from mteb.abstasks.task_metadata import TaskMetadata
3
3
 
4
-
5
- class HotpotQA(AbsTaskRetrieval):
6
- metadata = TaskMetadata(
7
- name="HotpotQA",
8
- dataset={
9
- "path": "mteb/hotpotqa",
10
- "revision": "ab518f4d6fcca38d87c25209f94beba119d02014",
11
- },
12
- description=(
13
- "HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong"
14
- + " supervision for supporting facts to enable more explainable question answering systems."
15
- ),
16
- reference="https://hotpotqa.github.io/",
17
- type="Retrieval",
18
- category="t2t",
19
- modalities=["text"],
20
- eval_splits=["test"],
21
- eval_langs=["eng-Latn"],
22
- main_score="ndcg_at_10",
23
- date=("2018-01-01", "2018-12-31"), # best guess: based on publication date
24
- domains=["Web", "Written"],
25
- task_subtypes=["Question answering"],
26
- license="cc-by-sa-4.0",
27
- annotations_creators="human-annotated",
28
- dialect=[],
29
- sample_creation="found",
30
- bibtex_citation=r"""
4
+ _hotpot_qa_metadata = dict(
5
+ reference="https://hotpotqa.github.io/",
6
+ type="Retrieval",
7
+ category="t2t",
8
+ modalities=["text"],
9
+ eval_splits=["test"],
10
+ eval_langs=["eng-Latn"],
11
+ main_score="ndcg_at_10",
12
+ date=("2018-01-01", "2018-12-31"), # best guess: based on publication date
13
+ domains=["Web", "Written"],
14
+ task_subtypes=["Question answering"],
15
+ license="cc-by-sa-4.0",
16
+ annotations_creators="human-annotated",
17
+ dialect=[],
18
+ sample_creation="found",
19
+ bibtex_citation=r"""
31
20
  @inproceedings{yang-etal-2018-hotpotqa,
32
21
  address = {Brussels, Belgium},
33
22
  author = {Yang, Zhilin and
@@ -51,9 +40,24 @@ Tsujii, Jun{'}ichi},
51
40
  year = {2018},
52
41
  }
53
42
  """,
43
+ )
44
+
45
+
46
+ class HotpotQA(AbsTaskRetrieval):
47
+ metadata = TaskMetadata(
48
+ name="HotpotQA",
49
+ dataset={
50
+ "path": "mteb/hotpotqa",
51
+ "revision": "ab518f4d6fcca38d87c25209f94beba119d02014",
52
+ },
53
+ description=(
54
+ "HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong "
55
+ "supervision for supporting facts to enable more explainable question answering systems."
56
+ ),
54
57
  prompt={
55
58
  "query": "Given a multi-hop question, retrieve documents that can help answer the question"
56
59
  },
60
+ **_hotpot_qa_metadata,
57
61
  )
58
62
 
59
63
 
@@ -65,46 +69,32 @@ class HotpotQAHardNegatives(AbsTaskRetrieval):
65
69
  "revision": "617612fa63afcb60e3b134bed8b7216a99707c37",
66
70
  },
67
71
  description=(
68
- "HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong"
69
- + " supervision for supporting facts to enable more explainable question answering systems. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
72
+ "HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong "
73
+ "supervision for supporting facts to enable more explainable question answering systems. "
74
+ "The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
70
75
  ),
71
- reference="https://hotpotqa.github.io/",
72
- type="Retrieval",
73
- category="t2t",
74
- modalities=["text"],
75
- eval_splits=["test"],
76
- eval_langs=["eng-Latn"],
77
- main_score="ndcg_at_10",
78
- date=("2018-01-01", "2018-12-31"), # best guess: based on publication date
79
- domains=["Web", "Written"],
80
- task_subtypes=["Question answering"],
81
- license="cc-by-sa-4.0",
82
- annotations_creators="human-annotated",
83
- dialect=[],
84
- sample_creation="found",
85
- bibtex_citation=r"""
86
- @inproceedings{yang-etal-2018-hotpotqa,
87
- address = {Brussels, Belgium},
88
- author = {Yang, Zhilin and
89
- Qi, Peng and
90
- Zhang, Saizheng and
91
- Bengio, Yoshua and
92
- Cohen, William and
93
- Salakhutdinov, Ruslan and
94
- Manning, Christopher D.},
95
- booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
96
- doi = {10.18653/v1/D18-1259},
97
- editor = {Riloff, Ellen and
98
- Chiang, David and
99
- Hockenmaier, Julia and
100
- Tsujii, Jun{'}ichi},
101
- month = oct # {-} # nov,
102
- pages = {2369--2380},
103
- publisher = {Association for Computational Linguistics},
104
- title = {{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering},
105
- url = {https://aclanthology.org/D18-1259},
106
- year = {2018},
107
- }
108
- """,
109
76
  adapted_from=["HotpotQA"],
77
+ superseded_by="HotpotQAHardNegatives.v2",
78
+ **_hotpot_qa_metadata,
79
+ )
80
+
81
+
82
+ class HotpotQAHardNegativesV2(AbsTaskRetrieval):
83
+ metadata = TaskMetadata(
84
+ name="HotpotQAHardNegatives.v2",
85
+ dataset={
86
+ "path": "mteb/HotpotQA_test_top_250_only_w_correct-v2",
87
+ "revision": "617612fa63afcb60e3b134bed8b7216a99707c37",
88
+ },
89
+ description=(
90
+ "HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong "
91
+ "supervision for supporting facts to enable more explainable question answering systems. "
92
+ "The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
93
+ "V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
94
+ ),
95
+ adapted_from=["HotpotQA"],
96
+ prompt={
97
+ "query": "Given a multi-hop question, retrieve documents that can help answer the question"
98
+ },
99
+ **_hotpot_qa_metadata,
110
100
  )
@@ -5,7 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class LegalSummarization(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="LegalSummarization",
8
- description="The dataset consistes of 439 pairs of contracts and their summarizations from https://tldrlegal.com and https://tosdr.org/.",
8
+ description="The dataset consists of 439 pairs of contracts and their summarizations from https://tldrlegal.com and https://tosdr.org/.",
9
9
  reference="https://github.com/lauramanor/legal_summarization",
10
10
  dataset={
11
11
  "path": "mteb/legal_summarization",
@@ -7,14 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class LitSearchRetrieval(AbsTaskRetrieval):
8
8
  metadata = TaskMetadata(
9
9
  name="LitSearchRetrieval",
10
- description="""
11
- The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for
12
- Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature
13
- search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions
14
- generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about
15
- recently published papers, manually written by their authors. All LitSearch questions were manually examined or
16
- edited by experts to ensure high quality.
17
- """,
10
+ description="The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about recently published papers, manually written by their authors. All LitSearch questions were manually examined or edited by experts to ensure high quality.",
18
11
  reference="https://github.com/princeton-nlp/LitSearch",
19
12
  dataset={
20
13
  "path": "princeton-nlp/LitSearch",
@@ -20,7 +20,6 @@ def _load_data(path: str, splits: str, revision: str | None = None):
20
20
  "id": f"corpus-{split_name}-{idx}",
21
21
  "text": x["text_corrected"],
22
22
  "modality": "text",
23
- "image": None,
24
23
  }
25
24
 
26
25
  split_datasets = {}
@@ -56,9 +55,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
56
55
  queries[split] = split_dataset.map(
57
56
  lambda x, idx: {
58
57
  "id": f"query-{split}-{idx}",
59
- "text": None,
60
58
  "modality": "image",
61
- # "image": None,
62
59
  },
63
60
  with_indices=True,
64
61
  remove_columns=[
@@ -18,7 +18,6 @@ def _load_data(path: str, splits: str, revision: str | None = None):
18
18
  def map_function(split_name):
19
19
  return lambda x, idx: {
20
20
  "id": f"corpus-{split_name}-{idx}",
21
- "text": None,
22
21
  "modality": "image",
23
22
  }
24
23
 
@@ -56,7 +55,6 @@ def _load_data(path: str, splits: str, revision: str | None = None):
56
55
  "id": f"query-{split}-{idx}",
57
56
  "text": x["text_corrected"],
58
57
  "modality": "text",
59
- "image": None,
60
58
  },
61
59
  with_indices=True,
62
60
  remove_columns=[
@@ -12,7 +12,7 @@ class OVENIT2TRetrieval(AbsTaskRetrieval):
12
12
  "revision": "2192074af29422bc1dc41cf07936f198b8c69bd0",
13
13
  },
14
14
  type="Any2AnyRetrieval",
15
- category="it2i",
15
+ category="it2t",
16
16
  eval_splits=["test"],
17
17
  eval_langs=["eng-Latn"],
18
18
  main_score="ndcg_at_10",
@@ -1,6 +1,32 @@
1
1
  from mteb.abstasks.retrieval import AbsTaskRetrieval
2
2
  from mteb.abstasks.task_metadata import TaskMetadata
3
3
 
4
+ _quora_metadata = dict(
5
+ reference="https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs",
6
+ type="Retrieval",
7
+ category="t2t",
8
+ modalities=["text"],
9
+ eval_splits=["test"],
10
+ eval_langs=["eng-Latn"],
11
+ main_score="ndcg_at_10",
12
+ date=None,
13
+ domains=["Written", "Web", "Blog"],
14
+ task_subtypes=["Question answering"],
15
+ license="not specified",
16
+ annotations_creators="human-annotated",
17
+ dialect=[],
18
+ sample_creation="found",
19
+ bibtex_citation=r"""
20
+ @misc{quora-question-pairs,
21
+ author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung},
22
+ publisher = {Kaggle},
23
+ title = {Quora Question Pairs},
24
+ url = {https://kaggle.com/competitions/quora-question-pairs},
25
+ year = {2017},
26
+ }
27
+ """,
28
+ )
29
+
4
30
 
5
31
  class QuoraRetrieval(AbsTaskRetrieval):
6
32
  ignore_identical_ids = True
@@ -15,32 +41,10 @@ class QuoraRetrieval(AbsTaskRetrieval):
15
41
  "QuoraRetrieval is based on questions that are marked as duplicates on the Quora platform. Given a"
16
42
  + " question, find other (duplicate) questions."
17
43
  ),
18
- reference="https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs",
19
- type="Retrieval",
20
- category="t2t",
21
- modalities=["text"],
22
- eval_splits=["test"],
23
- eval_langs=["eng-Latn"],
24
- main_score="ndcg_at_10",
25
- date=None,
26
- domains=["Written", "Web", "Blog"],
27
- task_subtypes=["Question answering"],
28
- license="not specified",
29
- annotations_creators="human-annotated",
30
- dialect=[],
31
- sample_creation="found",
32
- bibtex_citation=r"""
33
- @misc{quora-question-pairs,
34
- author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung},
35
- publisher = {Kaggle},
36
- title = {Quora Question Pairs},
37
- url = {https://kaggle.com/competitions/quora-question-pairs},
38
- year = {2017},
39
- }
40
- """,
41
44
  prompt={
42
45
  "query": "Given a question, retrieve questions that are semantically equivalent to the given question"
43
46
  },
47
+ **_quora_metadata,
44
48
  )
45
49
 
46
50
 
@@ -57,28 +61,29 @@ class QuoraRetrievalHardNegatives(AbsTaskRetrieval):
57
61
  "QuoraRetrieval is based on questions that are marked as duplicates on the Quora platform. Given a"
58
62
  + " question, find other (duplicate) questions. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
59
63
  ),
60
- reference="https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs",
61
- type="Retrieval",
62
- category="t2t",
63
- modalities=["text"],
64
- eval_splits=["test"],
65
- eval_langs=["eng-Latn"],
66
- main_score="ndcg_at_10",
67
- date=None,
68
- domains=None,
69
- task_subtypes=None,
70
- license=None,
71
- annotations_creators=None,
72
- dialect=None,
73
- sample_creation=None,
74
- bibtex_citation=r"""
75
- @misc{quora-question-pairs,
76
- author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung},
77
- publisher = {Kaggle},
78
- title = {Quora Question Pairs},
79
- url = {https://kaggle.com/competitions/quora-question-pairs},
80
- year = {2017},
81
- }
82
- """,
83
64
  adapted_from=["QuoraRetrieval"],
65
+ superseded_by="QuoraRetrievalHardNegatives.v2",
66
+ **_quora_metadata,
67
+ )
68
+
69
+
70
+ class QuoraRetrievalHardNegativesV2(AbsTaskRetrieval):
71
+ ignore_identical_ids = True
72
+
73
+ metadata = TaskMetadata(
74
+ name="QuoraRetrievalHardNegatives.v2",
75
+ dataset={
76
+ "path": "mteb/QuoraRetrieval_test_top_250_only_w_correct-v2",
77
+ "revision": "907a33577e9506221d3ba20f5a851b7c3f8dc6d3",
78
+ },
79
+ description=(
80
+ "QuoraRetrieval is based on questions that are marked as duplicates on the Quora platform. Given a "
81
+ "question, find other (duplicate) questions. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
82
+ "V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
83
+ ),
84
+ adapted_from=["QuoraRetrieval"],
85
+ prompt={
86
+ "query": "Given a question, retrieve questions that are semantically equivalent to the given question"
87
+ },
88
+ **_quora_metadata,
84
89
  )