mteb 2.1.6__py3-none-any.whl → 2.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (272) hide show
  1. mteb/_create_dataloaders.py +6 -3
  2. mteb/_evaluators/any_sts_evaluator.py +14 -12
  3. mteb/_evaluators/clustering_evaluator.py +1 -1
  4. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
  5. mteb/_evaluators/pair_classification_evaluator.py +3 -1
  6. mteb/_evaluators/sklearn_evaluator.py +15 -28
  7. mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
  8. mteb/_evaluators/text/summarization_evaluator.py +4 -2
  9. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
  10. mteb/abstasks/clustering.py +1 -1
  11. mteb/abstasks/multilabel_classification.py +2 -2
  12. mteb/abstasks/task_metadata.py +1 -0
  13. mteb/benchmarks/benchmark.py +9 -0
  14. mteb/benchmarks/benchmarks/__init__.py +2 -0
  15. mteb/benchmarks/benchmarks/benchmarks.py +40 -1
  16. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  17. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  18. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  19. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  20. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  21. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  22. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  23. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  24. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  25. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  26. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  27. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  28. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  29. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  30. mteb/models/cache_wrappers/cache_wrapper.py +1 -1
  31. mteb/models/model_implementations/align_models.py +6 -0
  32. mteb/models/model_implementations/ara_models.py +7 -0
  33. mteb/models/model_implementations/blip2_models.py +9 -0
  34. mteb/models/model_implementations/blip_models.py +19 -0
  35. mteb/models/model_implementations/cadet_models.py +8 -0
  36. mteb/models/model_implementations/cde_models.py +12 -0
  37. mteb/models/model_implementations/codefuse_models.py +15 -0
  38. mteb/models/model_implementations/codesage_models.py +12 -0
  39. mteb/models/model_implementations/misc_models.py +6 -0
  40. mteb/models/model_implementations/moco_models.py +9 -0
  41. mteb/models/model_implementations/openclip_models.py +16 -0
  42. mteb/models/model_implementations/piccolo_models.py +6 -0
  43. mteb/models/model_implementations/rasgaard_models.py +7 -1
  44. mteb/models/model_implementations/tarka_models.py +317 -0
  45. mteb/models/search_wrappers.py +5 -5
  46. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +1 -5
  47. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  48. mteb/tasks/classification/ara/ajgt.py +1 -2
  49. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  50. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  51. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  52. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  53. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  54. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  55. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  56. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  57. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  58. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  59. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  60. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  61. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  62. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  63. mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
  64. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  65. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  66. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  67. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  68. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  69. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  70. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  71. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  72. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  73. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  74. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  75. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  76. mteb/tasks/classification/eng/news_classification.py +1 -2
  77. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  78. mteb/tasks/classification/eng/patent_classification.py +1 -2
  79. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  80. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  81. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  82. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  83. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  84. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  85. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  86. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  87. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  88. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  89. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  90. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  91. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  92. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  93. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  94. mteb/tasks/classification/est/estonian_valence.py +1 -2
  95. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  96. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  97. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  98. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  99. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  100. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  101. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +1 -2
  102. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  103. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  104. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  105. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  106. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  107. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  108. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  109. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  110. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  111. mteb/tasks/classification/kor/klue_tc.py +1 -2
  112. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  113. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  114. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
  115. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  116. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  117. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  118. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  119. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  120. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  121. mteb/tasks/classification/multilingual/scala_classification.py +1 -2
  122. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  123. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  124. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  125. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  126. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  127. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  128. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  129. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  130. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  131. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  132. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  133. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  134. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  135. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  136. mteb/tasks/classification/pol/polish_classification.py +3 -6
  137. mteb/tasks/classification/ron/moroco.py +1 -2
  138. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  139. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  140. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  141. mteb/tasks/classification/rus/headline_classification.py +1 -2
  142. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  143. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  144. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  145. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  146. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  147. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  148. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  149. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  150. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  151. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  152. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  153. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  154. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  155. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  156. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  157. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  158. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  159. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  160. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  161. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  162. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  163. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  164. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  165. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  166. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  167. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  168. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  169. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  170. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  171. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  172. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  173. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  174. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  175. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  176. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  177. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  178. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  179. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  180. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  181. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  182. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  183. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  184. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  185. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  186. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  187. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  188. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  189. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  190. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  191. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  192. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  193. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  194. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  195. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  196. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  197. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  198. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  199. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  200. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  201. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  202. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  203. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  204. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  205. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  206. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  207. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  208. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  209. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  210. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  211. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  212. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  213. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  214. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  215. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  216. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  217. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  218. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  219. mteb/tasks/retrieval/multilingual/__init__.py +22 -0
  220. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  221. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  222. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  223. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  224. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
  225. mteb/tasks/retrieval/nld/__init__.py +8 -4
  226. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  227. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  228. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  229. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  230. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  231. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  232. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  233. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  234. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  235. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  236. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  237. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
  238. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  239. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  240. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  241. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  242. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  243. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  244. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  245. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  246. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  247. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  248. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
  249. mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
  250. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  251. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
  252. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
  253. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  254. mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
  255. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  256. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  257. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  258. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  259. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  260. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  261. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  262. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  263. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  264. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  265. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  266. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  267. {mteb-2.1.6.dist-info → mteb-2.1.8.dist-info}/METADATA +1 -1
  268. {mteb-2.1.6.dist-info → mteb-2.1.8.dist-info}/RECORD +272 -257
  269. {mteb-2.1.6.dist-info → mteb-2.1.8.dist-info}/WHEEL +0 -0
  270. {mteb-2.1.6.dist-info → mteb-2.1.8.dist-info}/entry_points.txt +0 -0
  271. {mteb-2.1.6.dist-info → mteb-2.1.8.dist-info}/licenses/LICENSE +0 -0
  272. {mteb-2.1.6.dist-info → mteb-2.1.8.dist-info}/top_level.txt +0 -0
@@ -7,8 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class SwedishPatentCPCSubclassClassification(AbsTaskMultilabelClassification):
8
8
  metadata = TaskMetadata(
9
9
  name="SwedishPatentCPCSubclassClassification",
10
- description="""This dataset contains historical Swedish patent documents (1885-1972) classified according to the Cooperative Patent Classification (CPC) system. Each document can have multiple labels, making this a multi-label classification task with significant implications for patent retrieval and prior art search.
11
- The dataset includes patent claims text extracted from digitally recreated versions of historical Swedish patents, generated using Optical Character Recognition (OCR) from original paper documents. The text quality varies due to OCR limitations, but all CPC labels were manually assigned by patent engineers at PRV (Swedish Patent and Registration Office), ensuring high reliability for machine learning applications.""",
10
+ description="This dataset contains historical Swedish patent documents (1885-1972) classified according to the Cooperative Patent Classification (CPC) system. Each document can have multiple labels, making this a multi-label classification task with significant implications for patent retrieval and prior art search. The dataset includes patent claims text extracted from digitally recreated versions of historical Swedish patents, generated using Optical Character Recognition (OCR) from original paper documents. The text quality varies due to OCR limitations, but all CPC labels were manually assigned by patent engineers at PRV (Swedish Patent and Registration Office), ensuring high reliability for machine learning applications.",
12
11
  reference="https://urn.kb.se/resolve?urn=urn:nbn:se:kth:diva-368254",
13
12
  type="MultilabelClassification",
14
13
  category="t2t",
@@ -5,12 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class TalemaaderPC(AbsTaskPairClassification):
6
6
  metadata = TaskMetadata(
7
7
  name="TalemaaderPC",
8
- description="""\
9
- The Danish Language and Literature Society has developed a dataset for evaluating language models in Danish.
10
- The dataset contains a total of 1000 Danish idioms and fixed expressions with transferred meanings based on the Danish Dictionary's collection of fixed expressions with associated definitions.
11
- For each of the 1000 idioms and fixed expressions, three false definitions have also been prepared.
12
- The dataset can be used to test the performance of language models in identifying correct definitions for Danish idioms and fixed expressions.
13
- """,
8
+ description="\\ The Danish Language and Literature Society has developed a dataset for evaluating language models in Danish. The dataset contains a total of 1000 Danish idioms and fixed expressions with transferred meanings based on the Danish Dictionary's collection of fixed expressions with associated definitions. For each of the 1000 idioms and fixed expressions, three false definitions have also been prepared. The dataset can be used to test the performance of language models in identifying correct definitions for Danish idioms and fixed expressions.",
14
9
  reference="https://sprogteknologi.dk/dataset/1000-talemader-evalueringsdatasaet",
15
10
  dataset={
16
11
  "path": "mteb/talemaader_pc",
@@ -50,15 +50,7 @@ _DATASET_COLUMN_MAP = [
50
50
  class LegalBenchPC(AbsTaskPairClassification):
51
51
  metadata = TaskMetadata(
52
52
  name="LegalBenchPC",
53
- description="""This LegalBench pair classification task is a combination of the following datasets:
54
-
55
- - Citation Prediction Classification: Given a legal statement and a case citation, determine if the citation is supportive of the legal statement.
56
- - Consumer Contracts QA: The task consists of 400 yes/no questions relating to consumer contracts (specifically, online terms of service) and is relevant to the legal skill of contract interpretation.
57
- - Contract QA: Answer yes/no questions about whether contractual clauses discuss particular issues like confidentiality requirements, BIPA consent, PII data breaches, breach of contract etc.
58
- - Hearsay: Classify if a particular piece of evidence qualifies as hearsay. Each sample in the dataset describes (1) an issue being litigated or an assertion a party wishes to prove, and (2) a piece of evidence a party wishes to introduce. The goal is to determine if—as it relates to the issue—the evidence would be considered hearsay under the definition provided above.
59
- - Privacy Policy Entailment: Given a privacy policy clause and a description of the clause, determine if the description is correct. This is a binary classification task in which the LLM is provided with a clause from a privacy policy, and a description of that clause (e.g., “The policy describes collection of the user’s HTTP cookies, flash cookies, pixel tags, or similar identifiers by a party to the contract.”).
60
- - Privacy Policy QA: Given a question and a clause from a privacy policy, determine if the clause contains enough information to answer the question. This is a binary classification task in which the LLM is provided with a question (e.g., “do you publish my data”) and a clause from a privacy policy. The LLM must determine if the clause contains an answer to the question, and classify the question-clause pair.
61
- """,
53
+ description="This LegalBench pair classification task is a combination of the following datasets: - Citation Prediction Classification: Given a legal statement and a case citation, determine if the citation is supportive of the legal statement. - Consumer Contracts QA: The task consists of 400 yes/no questions relating to consumer contracts (specifically, online terms of service) and is relevant to the legal skill of contract interpretation. - Contract QA: Answer yes/no questions about whether contractual clauses discuss particular issues like confidentiality requirements, BIPA consent, PII data breaches, breach of contract etc. - Hearsay: Classify if a particular piece of evidence qualifies as hearsay. Each sample in the dataset describes (1) an issue being litigated or an assertion a party wishes to prove, and (2) a piece of evidence a party wishes to introduce. The goal is to determine if—as it relates to the issue—the evidence would be considered hearsay under the definition provided above. - Privacy Policy Entailment: Given a privacy policy clause and a description of the clause, determine if the description is correct. This is a binary classification task in which the LLM is provided with a clause from a privacy policy, and a description of that clause (e.g., “The policy describes collection of the user’s HTTP cookies, flash cookies, pixel tags, or similar identifiers by a party to the contract.”). - Privacy Policy QA: Given a question and a clause from a privacy policy, determine if the clause contains enough information to answer the question. This is a binary classification task in which the LLM is provided with a question (e.g., “do you publish my data”) and a clause from a privacy policy. The LLM must determine if the clause contains an answer to the question, and classify the question-clause pair.",
62
54
  reference="https://huggingface.co/datasets/nguha/legalbench",
63
55
  dataset={
64
56
  "path": "mteb/LegalBenchPC",
@@ -33,4 +33,7 @@ class SICKNLPairClassification(AbsTaskPairClassification):
33
33
  year = {2021},
34
34
  }
35
35
  """,
36
+ prompt={
37
+ "query": "Zoek tekst die semantisch vergelijkbaar is met de gegeven tekst."
38
+ },
36
39
  )
@@ -38,4 +38,7 @@ class XLWICNLPairClassification(AbsTaskPairClassification):
38
38
  year = {2020},
39
39
  }
40
40
  """,
41
+ prompt={
42
+ "query": "Zoek tekst die semantisch vergelijkbaar is met de gegeven tekst."
43
+ },
41
44
  )
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class SprintDuplicateQuestionsPCVN(AbsTaskPairClassification):
6
6
  metadata = TaskMetadata(
7
7
  name="SprintDuplicateQuestions-VN",
8
- description="""A translated dataset from Duplicate questions from the Sprint community.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Duplicate questions from the Sprint community. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://www.aclweb.org/anthology/D18-1131/",
14
10
  dataset={
15
11
  "path": "GreenNode/sprintduplicatequestions-pairclassification-vn",
@@ -9,11 +9,7 @@ class TwitterSemEval2015PCVN(AbsTaskPairClassification):
9
9
  "path": "GreenNode/twittersemeval2015-pairclassification-vn",
10
10
  "revision": "9215a3c954078fd15c2bbecca914477d53944de1",
11
11
  },
12
- description="""A translated dataset from Paraphrase-Pairs of Tweets from the SemEval 2015 workshop.
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from Paraphrase-Pairs of Tweets from the SemEval 2015 workshop. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://alt.qcri.org/semeval2015/task1/",
18
14
  category="t2c",
19
15
  type="PairClassification",
@@ -9,11 +9,7 @@ class TwitterURLCorpusPC(AbsTaskPairClassification):
9
9
  "path": "GreenNode/twitterurlcorpus-pairclassification-vn",
10
10
  "revision": "6e6a40aaade2129f70432f2156a6d24b63d72be3",
11
11
  },
12
- description="""A translated dataset from Paraphrase-Pairs of Tweets.
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from Paraphrase-Pairs of Tweets. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://languagenet.github.io/",
18
14
  category="t2c",
19
15
  type="PairClassification",
@@ -5,9 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class RuSciBenchCitedCountRegression(AbsTaskRegression):
6
6
  metadata = TaskMetadata(
7
7
  name="RuSciBenchCitedCountRegression",
8
- description="""Predicts the number of times a scientific article has been cited by other papers.
9
- The prediction is based on the article's title and abstract. The data is sourced from the Russian electronic
10
- library of scientific publications (eLibrary.ru) and includes papers with both Russian and English abstracts.""",
8
+ description="Predicts the number of times a scientific article has been cited by other papers. The prediction is based on the article's title and abstract. The data is sourced from the Russian electronic library of scientific publications (eLibrary.ru) and includes papers with both Russian and English abstracts.",
11
9
  reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
12
10
  dataset={
13
11
  "path": "mlsa-iai-msu-lab/ru_sci_bench_mteb",
@@ -51,9 +49,7 @@ class RuSciBenchCitedCountRegression(AbsTaskRegression):
51
49
  class RuSciBenchYearPublRegression(AbsTaskRegression):
52
50
  metadata = TaskMetadata(
53
51
  name="RuSciBenchYearPublRegression",
54
- description="""Predicts the publication year of a scientific article. The prediction is based on the
55
- article's title and abstract. The data is sourced from the Russian electronic library of scientific
56
- publications (eLibrary.ru) and includes papers with both Russian and English abstracts.""",
52
+ description="Predicts the publication year of a scientific article. The prediction is based on the article's title and abstract. The data is sourced from the Russian electronic library of scientific publications (eLibrary.ru) and includes papers with both Russian and English abstracts.",
57
53
  reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
58
54
  dataset={
59
55
  "path": "mlsa-iai-msu-lab/ru_sci_bench_mteb",
@@ -78,8 +78,7 @@ _CITATION = r"""
78
78
  class XGlueWPRReranking(AbsTaskRetrieval):
79
79
  metadata = TaskMetadata(
80
80
  name="XGlueWPRReranking",
81
- description="""XGLUE is a new benchmark dataset to evaluate the performance of cross-lingual pre-trained models
82
- with respect to cross-lingual natural language understanding and generation. XGLUE is composed of 11 tasks spans 19 languages.""",
81
+ description="XGLUE is a new benchmark dataset to evaluate the performance of cross-lingual pre-trained models with respect to cross-lingual natural language understanding and generation. XGLUE is composed of 11 tasks spans 19 languages.",
83
82
  reference="https://github.com/microsoft/XGLUE",
84
83
  dataset={
85
84
  "path": "mteb/XGlueWPRReranking",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class AskUbuntuDupQuestionsVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="AskUbuntuDupQuestions-VN",
8
- description="""A translated dataset from AskUbuntu Question Dataset - Questions from AskUbuntu with manual annotations marking pairs of questions as similar or non-similar
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from AskUbuntu Question Dataset - Questions from AskUbuntu with manual annotations marking pairs of questions as similar or non-similar The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://github.com/taolei87/askubuntu",
14
10
  dataset={
15
11
  "path": "mteb/AskUbuntuDupQuestions-VN",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class SciDocsRerankingVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="SciDocsRR-VN",
8
- description="""A translated dataset from Ranking of related scientific papers based on their title.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Ranking of related scientific papers based on their title. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://allenai.org/data/scidocs",
14
10
  dataset={
15
11
  "path": "mteb/SciDocsRR-VN",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class StackOverflowDupQuestionsVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="StackOverflowDupQuestions-VN",
8
- description="""A translated dataset from Stack Overflow Duplicate Questions Task for questions with the tags Java, JavaScript and Python
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Stack Overflow Duplicate Questions Task for questions with the tags Java, JavaScript and Python The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://www.microsoft.com/en-us/research/uploads/prod/2019/03/nl4se18LinkSO.pdf",
14
10
  dataset={
15
11
  "path": "mteb/StackOverflowDupQuestions-VN",
@@ -7,14 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class LitSearchRetrieval(AbsTaskRetrieval):
8
8
  metadata = TaskMetadata(
9
9
  name="LitSearchRetrieval",
10
- description="""
11
- The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for
12
- Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature
13
- search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions
14
- generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about
15
- recently published papers, manually written by their authors. All LitSearch questions were manually examined or
16
- edited by experts to ensure high quality.
17
- """,
10
+ description="The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about recently published papers, manually written by their authors. All LitSearch questions were manually examined or edited by experts to ensure high quality.",
18
11
  reference="https://github.com/princeton-nlp/LitSearch",
19
12
  dataset={
20
13
  "path": "princeton-nlp/LitSearch",
@@ -9,10 +9,7 @@ class JaCWIRRetrieval(AbsTaskRetrieval):
9
9
 
10
10
  metadata = TaskMetadata(
11
11
  name="JaCWIRRetrieval",
12
- description="""JaCWIR is a small-scale Japanese information retrieval evaluation dataset consisting of
13
- 5000 question texts and approximately 500k web page titles and web page introductions or summaries
14
- (meta descriptions, etc.). The question texts are created based on one of the 500k web pages,
15
- and that data is used as a positive example for the question text.""",
12
+ description="JaCWIR is a small-scale Japanese information retrieval evaluation dataset consisting of 5000 question texts and approximately 500k web page titles and web page introductions or summaries (meta descriptions, etc.). The question texts are created based on one of the 500k web pages, and that data is used as a positive example for the question text.",
16
13
  reference="https://huggingface.co/datasets/hotchpotch/JaCWIR",
17
14
  dataset={
18
15
  "path": "mteb/JaCWIRRetrieval",
@@ -81,6 +81,18 @@ from .vidore2_bench_retrieval import (
81
81
  Vidore2ESGReportsHLRetrieval,
82
82
  Vidore2ESGReportsRetrieval,
83
83
  )
84
+ from .vidore3_bench_retrieval import (
85
+ Vidore3ComputerScienceRetrieval,
86
+ Vidore3EnergyRetrieval,
87
+ Vidore3FinanceEnRetrieval,
88
+ Vidore3FinanceFrRetrieval,
89
+ Vidore3HrRetrieval,
90
+ Vidore3IndustrialRetrieval,
91
+ Vidore3NuclearRetrieval,
92
+ Vidore3PharmaceuticalsRetrieval,
93
+ Vidore3PhysicsRetrieval,
94
+ Vidore3TelecomRetrieval,
95
+ )
84
96
  from .web_faq_retrieval import WebFAQRetrieval
85
97
  from .wikipedia_retrieval_multilingual import WikipediaRetrievalMultilingual
86
98
  from .wit_t2i_retrieval import WITT2IRetrieval
@@ -161,6 +173,16 @@ __all__ = [
161
173
  "Vidore2ESGReportsHLRetrieval",
162
174
  "Vidore2ESGReportsRetrieval",
163
175
  "Vidore2EconomicsReportsRetrieval",
176
+ "Vidore3ComputerScienceRetrieval",
177
+ "Vidore3EnergyRetrieval",
178
+ "Vidore3FinanceEnRetrieval",
179
+ "Vidore3FinanceFrRetrieval",
180
+ "Vidore3HrRetrieval",
181
+ "Vidore3IndustrialRetrieval",
182
+ "Vidore3NuclearRetrieval",
183
+ "Vidore3PharmaceuticalsRetrieval",
184
+ "Vidore3PhysicsRetrieval",
185
+ "Vidore3TelecomRetrieval",
164
186
  "WITT2IRetrieval",
165
187
  "WebFAQRetrieval",
166
188
  "WikipediaRetrievalMultilingual",
@@ -34,8 +34,7 @@ _EVAL_LANGS = {
34
34
  class MKQARetrieval(AbsTaskRetrieval):
35
35
  metadata = TaskMetadata(
36
36
  name="MKQARetrieval",
37
- description="""Multilingual Knowledge Questions & Answers (MKQA)contains 10,000 queries sampled from the Google Natural Questions dataset.
38
- For each query we collect new passage-independent answers. These queries and answers are then human translated into 25 Non-English languages.""",
37
+ description="Multilingual Knowledge Questions & Answers (MKQA)contains 10,000 queries sampled from the Google Natural Questions dataset. For each query we collect new passage-independent answers. These queries and answers are then human translated into 25 Non-English languages.",
39
38
  reference="https://github.com/apple/ml-mkqa",
40
39
  dataset={
41
40
  "path": "mteb/MKQARetrieval",
@@ -75,10 +75,7 @@ _EVAL_LANGS = extend_lang_pairs()
75
75
  class MLQARetrieval(AbsTaskRetrieval):
76
76
  metadata = TaskMetadata(
77
77
  name="MLQARetrieval",
78
- description="""MLQA (MultiLingual Question Answering) is a benchmark dataset for evaluating cross-lingual question answering performance.
79
- MLQA consists of over 5K extractive QA instances (12K in English) in SQuAD format in seven languages - English, Arabic,
80
- German, Spanish, Hindi, Vietnamese and Simplified Chinese. MLQA is highly parallel, with QA instances parallel between
81
- 4 different languages on average.""",
78
+ description="MLQA (MultiLingual Question Answering) is a benchmark dataset for evaluating cross-lingual question answering performance. MLQA consists of over 5K extractive QA instances (12K in English) in SQuAD format in seven languages - English, Arabic, German, Spanish, Hindi, Vietnamese and Simplified Chinese. MLQA is highly parallel, with QA instances parallel between 4 different languages on average.",
82
79
  reference="https://huggingface.co/datasets/mlqa",
83
80
  dataset={
84
81
  "path": "mteb/MLQARetrieval",
@@ -21,8 +21,7 @@ _LANGUAGES = {
21
21
  class MultiLongDocRetrieval(AbsTaskRetrieval):
22
22
  metadata = TaskMetadata(
23
23
  name="MultiLongDocRetrieval",
24
- description="""Multi Long Doc Retrieval (MLDR) 'is curated by the multilingual articles from Wikipedia, Wudao and mC4 (see Table 7), and NarrativeQA (Kocˇisky ́ et al., 2018; Gu ̈nther et al., 2023), which is only for English.' (Chen et al., 2024).
25
- It is constructed by sampling lengthy articles from Wikipedia, Wudao and mC4 datasets and randomly choose paragraphs from them. Then we use GPT-3.5 to generate questions based on these paragraphs. The generated question and the sampled article constitute a new text pair to the dataset.""",
24
+ description="Multi Long Doc Retrieval (MLDR) 'is curated by the multilingual articles from Wikipedia, Wudao and mC4 (see Table 7), and NarrativeQA (Kocˇisky ́ et al., 2018; Gu ̈nther et al., 2023), which is only for English.' (Chen et al., 2024). It is constructed by sampling lengthy articles from Wikipedia, Wudao and mC4 datasets and randomly choose paragraphs from them. Then we use GPT-3.5 to generate questions based on these paragraphs. The generated question and the sampled article constitute a new text pair to the dataset.",
26
25
  reference="https://arxiv.org/abs/2402.03216", # also: https://huggingface.co/datasets/Shitao/MLDR
27
26
  dataset={
28
27
  "path": "mteb/MultiLongDocRetrieval",
@@ -68,11 +68,7 @@ class RuSciBenchCiteRetrieval(AbsTaskRetrieval):
68
68
  "path": "mlsa-iai-msu-lab/ru_sci_bench_cite_retrieval",
69
69
  "revision": "6cb447d02f41b8b775d5d9df7faf472f44d2f1db",
70
70
  },
71
- description="""This task is focused on Direct Citation Prediction for scientific papers from eLibrary,
72
- Russia's largest electronic library of scientific publications. Given a query paper (title and abstract),
73
- the goal is to retrieve papers that are directly cited by it from a larger corpus of papers.
74
- The dataset for this task consists of 3,000 query papers, 15,000 relevant (cited) papers,
75
- and 75,000 irrelevant papers. The task is available for both Russian and English scientific texts.""",
71
+ description="This task is focused on Direct Citation Prediction for scientific papers from eLibrary, Russia's largest electronic library of scientific publications. Given a query paper (title and abstract), the goal is to retrieve papers that are directly cited by it from a larger corpus of papers. The dataset for this task consists of 3,000 query papers, 15,000 relevant (cited) papers, and 75,000 irrelevant papers. The task is available for both Russian and English scientific texts.",
76
72
  reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
77
73
  type="Retrieval",
78
74
  category="t2t",
@@ -130,13 +126,7 @@ class RuSciBenchCociteRetrieval(AbsTaskRetrieval):
130
126
  "path": "mlsa-iai-msu-lab/ru_sci_bench_cocite_retrieval",
131
127
  "revision": "a5da47a245275669d2b6ddf8f96c5338dd2428b4",
132
128
  },
133
- description="""This task focuses on Co-citation Prediction for scientific papers from eLibrary,
134
- Russia's largest electronic library of scientific publications. Given a query paper (title and abstract),
135
- the goal is to retrieve other papers that are co-cited with it. Two papers are considered co-cited
136
- if they are both cited by at least 5 of the same other papers. Similar to the Direct Citation task,
137
- this task employs a retrieval setup: for a given query paper, all other papers in the corpus that
138
- are not co-cited with it are considered negative examples. The task is available for both Russian
139
- and English scientific texts.""",
129
+ description="This task focuses on Co-citation Prediction for scientific papers from eLibrary, Russia's largest electronic library of scientific publications. Given a query paper (title and abstract), the goal is to retrieve other papers that are co-cited with it. Two papers are considered co-cited if they are both cited by at least 5 of the same other papers. Similar to the Direct Citation task, this task employs a retrieval setup: for a given query paper, all other papers in the corpus that are not co-cited with it are considered negative examples. The task is available for both Russian and English scientific texts.",
140
130
  reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
141
131
  type="Retrieval",
142
132
  category="t2t",