mteb 2.0.5__py3-none-any.whl → 2.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (412) hide show
  1. mteb/__init__.py +10 -1
  2. mteb/_create_dataloaders.py +8 -3
  3. mteb/_evaluators/any_sts_evaluator.py +14 -12
  4. mteb/_evaluators/clustering_evaluator.py +1 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
  6. mteb/_evaluators/pair_classification_evaluator.py +3 -1
  7. mteb/_evaluators/retrieval_metrics.py +0 -9
  8. mteb/_evaluators/sklearn_evaluator.py +15 -28
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
  10. mteb/_evaluators/text/summarization_evaluator.py +4 -2
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
  12. mteb/abstasks/_stratification.py +1 -1
  13. mteb/abstasks/abstask.py +6 -1
  14. mteb/abstasks/clustering.py +1 -1
  15. mteb/abstasks/dataset_card_template.md +1 -1
  16. mteb/abstasks/multilabel_classification.py +2 -2
  17. mteb/abstasks/retrieval.py +2 -1
  18. mteb/abstasks/retrieval_dataset_loaders.py +1 -1
  19. mteb/abstasks/task_metadata.py +2 -1
  20. mteb/benchmarks/_create_table.py +1 -3
  21. mteb/benchmarks/benchmark.py +18 -1
  22. mteb/benchmarks/benchmarks/__init__.py +4 -0
  23. mteb/benchmarks/benchmarks/benchmarks.py +125 -16
  24. mteb/benchmarks/get_benchmark.py +3 -1
  25. mteb/cache.py +7 -3
  26. mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
  27. mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
  28. mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
  29. mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
  30. mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
  31. mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
  32. mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
  33. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
  34. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
  35. mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
  36. mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
  37. mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
  38. mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
  39. mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
  40. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
  41. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
  42. mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
  43. mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
  44. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  45. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  46. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  47. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  54. mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
  55. mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
  56. mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
  57. mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
  58. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  59. mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
  60. mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
  61. mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
  62. mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
  63. mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
  64. mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
  65. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  66. mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
  67. mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
  68. mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
  69. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  71. mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
  72. mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
  73. mteb/descriptive_stats/Retrieval/WinoGrande.json +14 -14
  74. mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
  75. mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
  76. mteb/evaluate.py +26 -6
  77. mteb/languages/check_language_code.py +11 -3
  78. mteb/languages/language_scripts.py +4 -0
  79. mteb/leaderboard/app.py +5 -3
  80. mteb/leaderboard/benchmark_selector.py +4 -2
  81. mteb/leaderboard/text_segments.py +1 -1
  82. mteb/models/cache_wrappers/cache_wrapper.py +1 -1
  83. mteb/models/instruct_wrapper.py +3 -0
  84. mteb/models/model_implementations/align_models.py +6 -0
  85. mteb/models/model_implementations/andersborges.py +51 -0
  86. mteb/models/model_implementations/ara_models.py +7 -0
  87. mteb/models/model_implementations/b1ade_models.py +1 -1
  88. mteb/models/model_implementations/bge_models.py +1 -3
  89. mteb/models/model_implementations/blip2_models.py +9 -0
  90. mteb/models/model_implementations/blip_models.py +19 -0
  91. mteb/models/model_implementations/bmretriever_models.py +1 -1
  92. mteb/models/model_implementations/cadet_models.py +8 -0
  93. mteb/models/model_implementations/cde_models.py +12 -0
  94. mteb/models/model_implementations/codefuse_models.py +15 -0
  95. mteb/models/model_implementations/codesage_models.py +12 -0
  96. mteb/models/model_implementations/cohere_models.py +1 -1
  97. mteb/models/model_implementations/colqwen_models.py +57 -0
  98. mteb/models/model_implementations/emillykkejensen_models.py +70 -0
  99. mteb/models/model_implementations/gme_v_models.py +2 -2
  100. mteb/models/model_implementations/ibm_granite_models.py +1 -1
  101. mteb/models/model_implementations/inf_models.py +3 -3
  102. mteb/models/model_implementations/jasper_models.py +253 -2
  103. mteb/models/model_implementations/jina_models.py +12 -2
  104. mteb/models/model_implementations/kalm_models.py +159 -25
  105. mteb/models/model_implementations/llm2vec_models.py +1 -1
  106. mteb/models/model_implementations/misc_models.py +8 -2
  107. mteb/models/model_implementations/moco_models.py +9 -0
  108. mteb/models/model_implementations/mxbai_models.py +1 -1
  109. mteb/models/model_implementations/openclip_models.py +16 -0
  110. mteb/models/model_implementations/piccolo_models.py +6 -0
  111. mteb/models/model_implementations/rasgaard_models.py +33 -0
  112. mteb/models/model_implementations/reasonir_model.py +1 -1
  113. mteb/models/model_implementations/salesforce_models.py +1 -1
  114. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
  115. mteb/models/model_implementations/spartan8806_atles_champion.py +26 -0
  116. mteb/models/model_implementations/tarka_models.py +374 -0
  117. mteb/models/model_implementations/voyage_models.py +6 -7
  118. mteb/models/model_implementations/voyage_v.py +10 -9
  119. mteb/models/model_implementations/yuan_models.py +33 -0
  120. mteb/models/search_wrappers.py +6 -5
  121. mteb/results/task_result.py +19 -17
  122. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  123. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  124. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +1 -5
  125. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  126. mteb/tasks/classification/ara/ajgt.py +1 -2
  127. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  128. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  129. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  130. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  131. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  132. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  133. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  134. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  135. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  136. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  137. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  138. mteb/tasks/classification/dan/angry_tweets_classification.py +2 -3
  139. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  140. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  141. mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
  142. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  143. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  144. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  145. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  146. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  147. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  148. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  149. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  150. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  151. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  152. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  153. mteb/tasks/classification/eng/legal_bench_classification.py +15 -121
  154. mteb/tasks/classification/eng/news_classification.py +1 -2
  155. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  156. mteb/tasks/classification/eng/patent_classification.py +1 -2
  157. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  158. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  159. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  160. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  161. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  162. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  163. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  164. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  165. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  166. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  167. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  168. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  169. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  170. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  171. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  172. mteb/tasks/classification/est/estonian_valence.py +1 -2
  173. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  174. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  175. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  176. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  177. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  178. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  179. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +1 -2
  180. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  181. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  182. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  183. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  184. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  185. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  186. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  187. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  188. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  189. mteb/tasks/classification/kor/klue_tc.py +1 -2
  190. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  191. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  192. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
  193. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  194. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  195. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  196. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  197. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  198. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  199. mteb/tasks/classification/multilingual/scala_classification.py +1 -2
  200. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  201. mteb/tasks/classification/mya/myanmar_news.py +2 -3
  202. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  203. mteb/tasks/classification/nld/__init__.py +16 -0
  204. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  205. mteb/tasks/classification/nld/dutch_cola_classification.py +41 -0
  206. mteb/tasks/classification/nld/dutch_government_bias_classification.py +40 -0
  207. mteb/tasks/classification/nld/dutch_news_articles_classification.py +33 -0
  208. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +39 -0
  209. mteb/tasks/classification/nld/iconclass_classification.py +44 -0
  210. mteb/tasks/classification/nld/open_tender_classification.py +41 -0
  211. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +49 -0
  212. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  213. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  214. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  215. mteb/tasks/classification/pol/polish_classification.py +3 -6
  216. mteb/tasks/classification/ron/moroco.py +1 -2
  217. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  218. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  219. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  220. mteb/tasks/classification/rus/headline_classification.py +1 -2
  221. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  222. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  223. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  224. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  225. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  226. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  227. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  228. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  229. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  230. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  231. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  232. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  233. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  234. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  235. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  236. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  237. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  238. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  239. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  240. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  241. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  242. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  243. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  244. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  245. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  246. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  247. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  248. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  249. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  250. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  251. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  252. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  253. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  254. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  255. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  256. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  257. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  258. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  259. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  260. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  261. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  262. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  263. mteb/tasks/clustering/__init__.py +1 -0
  264. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  265. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  266. mteb/tasks/clustering/nld/__init__.py +17 -0
  267. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +40 -0
  268. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +40 -0
  269. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +50 -0
  270. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +54 -0
  271. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +44 -0
  272. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +54 -0
  273. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +54 -0
  274. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  275. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  276. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  277. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  278. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  279. mteb/tasks/multilabel_classification/__init__.py +1 -0
  280. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  281. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  282. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  283. mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
  284. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +91 -0
  285. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +47 -0
  286. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  287. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  288. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  289. mteb/tasks/pair_classification/__init__.py +1 -0
  290. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  291. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  292. mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
  293. mteb/tasks/pair_classification/nld/__init__.py +7 -0
  294. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +39 -0
  295. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +44 -0
  296. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  297. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  298. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  299. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  300. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  301. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  302. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  303. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  304. mteb/tasks/retrieval/code/code_rag.py +8 -8
  305. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  306. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  307. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  308. mteb/tasks/retrieval/eng/__init__.py +18 -4
  309. mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
  310. mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
  311. mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
  312. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
  313. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
  314. mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
  315. mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
  316. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  317. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
  318. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
  319. mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
  320. mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
  321. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
  322. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
  323. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
  324. mteb/tasks/retrieval/eng/wino_grande_retrieval.py +1 -1
  325. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  326. mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
  327. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  328. mteb/tasks/retrieval/multilingual/__init__.py +22 -0
  329. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +6 -5
  330. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
  331. mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
  332. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
  333. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  334. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  335. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  336. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  337. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  338. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
  339. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
  340. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
  341. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
  342. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
  343. mteb/tasks/retrieval/nld/__init__.py +18 -4
  344. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  345. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +44 -0
  346. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +33 -0
  347. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +42 -0
  348. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  349. mteb/tasks/retrieval/nld/open_tender_retrieval.py +41 -0
  350. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  351. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  352. mteb/tasks/retrieval/nld/vabb_retrieval.py +44 -0
  353. mteb/tasks/retrieval/nob/norquad.py +2 -2
  354. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  355. mteb/tasks/retrieval/rus/__init__.py +11 -2
  356. mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
  357. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  358. mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
  359. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  360. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
  361. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  362. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  363. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  364. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  365. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  366. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  367. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  368. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  369. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  370. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  371. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
  372. mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
  373. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  374. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  375. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
  376. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
  377. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  378. mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
  379. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  380. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  381. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  382. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  383. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  384. mteb/tasks/sts/__init__.py +1 -0
  385. mteb/tasks/sts/nld/__init__.py +5 -0
  386. mteb/tasks/sts/nld/sick_nl_sts.py +42 -0
  387. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  388. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  389. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  390. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  391. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  392. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  393. mteb-2.1.19.dist-info/METADATA +253 -0
  394. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/RECORD +398 -330
  395. mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
  396. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
  397. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
  398. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
  399. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
  400. mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
  401. mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
  402. mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
  403. mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
  404. mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
  405. mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
  406. mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
  407. mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
  408. mteb-2.0.5.dist-info/METADATA +0 -455
  409. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/WHEEL +0 -0
  410. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/entry_points.txt +0 -0
  411. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/licenses/LICENSE +0 -0
  412. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/top_level.txt +0 -0
@@ -156,16 +156,15 @@ class VoyageModel(AbsEncoder):
156
156
  and len(batch) < batch_size
157
157
  and batch_tokens < self._max_tokens_per_batch
158
158
  ):
159
- n_tokens = len(
160
- self._client.tokenize([sentences[index]], model=self._model_name)[0]
161
- )
159
+ txt = sentences[index] if len(sentences[index]) > 0 else " "
160
+ n_tokens = len(self._client.tokenize([txt], model=self._model_name)[0])
162
161
  if (
163
162
  batch_tokens + n_tokens > self._max_tokens_per_batch
164
163
  and len(batch) > 0
165
164
  ):
166
165
  break
167
166
  batch_tokens += n_tokens
168
- batch.append(sentences[index])
167
+ batch.append(txt)
169
168
  index += 1
170
169
 
171
170
  embeddings.extend(
@@ -249,7 +248,7 @@ voyage_3_5 = ModelMeta(
249
248
  n_parameters=None,
250
249
  memory_usage_mb=None,
251
250
  license=None,
252
- reference="https://docs.voyageai.com/docs/embeddings",
251
+ reference="https://blog.voyageai.com/2025/05/20/voyage-3-5/",
253
252
  similarity_fn_name="cosine",
254
253
  framework=["API"],
255
254
  use_instructions=True,
@@ -274,7 +273,7 @@ voyage_3_5_int8 = ModelMeta(
274
273
  n_parameters=None,
275
274
  memory_usage_mb=None,
276
275
  license=None,
277
- reference="https://docs.voyageai.com/docs/flexible-dimensions-and-quantization",
276
+ reference="https://blog.voyageai.com/2025/05/20/voyage-3-5/",
278
277
  similarity_fn_name="cosine",
279
278
  framework=["API"],
280
279
  use_instructions=True,
@@ -300,7 +299,7 @@ voyage_3_5_binary = ModelMeta(
300
299
  n_parameters=None,
301
300
  memory_usage_mb=None,
302
301
  license=None,
303
- reference="https://docs.voyageai.com/docs/flexible-dimensions-and-quantization",
302
+ reference="https://blog.voyageai.com/2025/05/20/voyage-3-5/",
304
303
  similarity_fn_name="cosine",
305
304
  framework=["API"],
306
305
  use_instructions=True,
@@ -51,7 +51,13 @@ def _downsample_image(
51
51
  def voyage_v_loader(model_name, **kwargs):
52
52
  requires_package(
53
53
  voyage_v_loader,
54
- "voyageai and tenacity",
54
+ "voyageai",
55
+ model_name,
56
+ "pip install 'mteb[voyage_v]'",
57
+ )
58
+ requires_package(
59
+ voyage_v_loader,
60
+ "tenacity",
55
61
  model_name,
56
62
  "pip install 'mteb[voyage_v]'",
57
63
  )
@@ -65,11 +71,9 @@ def voyage_v_loader(model_name, **kwargs):
65
71
  **kwargs: Any,
66
72
  ):
67
73
  requires_image_dependencies()
68
- from torchvision import transforms
69
74
 
70
75
  self.model_name = model_name.split("/")[-1]
71
76
  self.vo = voyageai.Client()
72
- self.tensor_to_image = transforms.Compose([transforms.PILToTensor()])
73
77
 
74
78
  @retry(
75
79
  stop=stop_after_attempt(6), # Stop after 6 attempts
@@ -126,10 +130,7 @@ def voyage_v_loader(model_name, **kwargs):
126
130
  for batch in tqdm(
127
131
  images, disable=not show_progress_bar, desc="Image Encoding"
128
132
  ):
129
- batch_images = [
130
- [_downsample_image(self.tensor_to_image(image))]
131
- for image in batch["image"]
132
- ]
133
+ batch_images = [[_downsample_image(image)] for image in batch["image"]]
133
134
  embeddings = self._multimodal_embed(
134
135
  batch_images, model=self.model_name, input_type=input_type
135
136
  ).embeddings
@@ -148,6 +149,7 @@ def voyage_v_loader(model_name, **kwargs):
148
149
  show_progress_bar: bool = True,
149
150
  **kwargs: Any,
150
151
  ) -> Array:
152
+ input_type = "document" # default
151
153
  if prompt_type is not None:
152
154
  if prompt_type == PromptType.document:
153
155
  input_type = "document"
@@ -163,8 +165,7 @@ def voyage_v_loader(model_name, **kwargs):
163
165
  inputs, disable=not show_progress_bar, desc="Interleaved Encoding"
164
166
  ):
165
167
  batch_images = [
166
- _downsample_image(self.tensor_to_image(image))
167
- for image in batch["image"]
168
+ _downsample_image(image) for image in batch["image"]
168
169
  ]
169
170
  batch_texts = batch["text"]
170
171
  interleaved_inputs = [
@@ -0,0 +1,33 @@
1
+ from mteb.models import ModelMeta, sentence_transformers_loader
2
+
3
+ yuan_emb_zh_datasets = {
4
+ "CMedQAv2-reranking",
5
+ "DuRetrieval",
6
+ "MMarcoReranking",
7
+ "T2Reranking",
8
+ "T2Retrieval",
9
+ }
10
+
11
+ # not in mteb
12
+ # "Multi-CPR":"http://github.com/Alibaba-NLP/Multi-CPR",
13
+
14
+ yuan_embedding_2_zh = ModelMeta(
15
+ name="IEITYuan/Yuan-embedding-2.0-zh",
16
+ loader=sentence_transformers_loader,
17
+ languages=["zho-Hans"],
18
+ open_weights=True,
19
+ revision="b5ebcace6f4fc6e5a4d1852557eb2dc2d1040cee",
20
+ release_date="2025-11-24",
21
+ n_parameters=326000000,
22
+ memory_usage_mb=1242,
23
+ embed_dim=1792,
24
+ license="apache-2.0",
25
+ max_tokens=512,
26
+ reference="https://huggingface.co/IEITYuan/Yuan-embedding-2.0-zh",
27
+ similarity_fn_name="cosine",
28
+ framework=["Sentence Transformers", "PyTorch"],
29
+ use_instructions=False,
30
+ public_training_code=None,
31
+ public_training_data=None,
32
+ training_datasets=yuan_emb_zh_datasets,
33
+ )
@@ -90,7 +90,7 @@ class SearchEncoderWrapper:
90
90
  queries,
91
91
  task_metadata,
92
92
  prompt_type=PromptType.query,
93
- batch_size=encode_kwargs.get("batch_size", 32),
93
+ **encode_kwargs,
94
94
  )
95
95
 
96
96
  query_embeddings = self.model.encode(
@@ -165,7 +165,7 @@ class SearchEncoderWrapper:
165
165
  sub_corpus,
166
166
  task_metadata,
167
167
  prompt_type=PromptType.document,
168
- batch_size=encode_kwargs.get("batch_size", 32),
168
+ **encode_kwargs,
169
169
  ),
170
170
  task_metadata=task_metadata,
171
171
  hf_split=hf_split,
@@ -191,6 +191,7 @@ class SearchEncoderWrapper:
191
191
  cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
192
192
  cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
193
193
 
194
+ sub_corpus_ids = list(sub_corpus_ids)
194
195
  for query_itr in range(len(query_embeddings)):
195
196
  query_id = query_idx_to_id[query_itr]
196
197
  for sub_corpus_id, score in zip(
@@ -230,7 +231,7 @@ class SearchEncoderWrapper:
230
231
  self.task_corpus,
231
232
  task_metadata,
232
233
  prompt_type=PromptType.document,
233
- batch_size=encode_kwargs.get("batch_size", 32),
234
+ **encode_kwargs,
234
235
  ),
235
236
  task_metadata=task_metadata,
236
237
  hf_split=hf_split,
@@ -407,13 +408,13 @@ class SearchCrossEncoderWrapper:
407
408
  Dataset.from_list(total_queries),
408
409
  task_metadata,
409
410
  prompt_type=PromptType.document,
410
- batch_size=encode_kwargs.get("batch_size", 32),
411
+ **encode_kwargs,
411
412
  )
412
413
  corpus_loader = create_dataloader(
413
414
  Dataset.from_list(total_docs),
414
415
  task_metadata,
415
416
  prompt_type=PromptType.document,
416
- batch_size=encode_kwargs.get("batch_size", 32),
417
+ **encode_kwargs,
417
418
  )
418
419
  predictions = self.model.predict(
419
420
  inputs1=queries_loader,
@@ -32,7 +32,7 @@ from mteb.types import (
32
32
  logger = logging.getLogger(__name__)
33
33
 
34
34
 
35
- class Criterias(HelpfulStrEnum):
35
+ class Criteria(HelpfulStrEnum):
36
36
  """Enum for criteria to check when merging TaskResult objects."""
37
37
 
38
38
  MTEB_VERSION = "mteb_version"
@@ -671,7 +671,7 @@ class TaskResult(BaseModel):
671
671
  def is_mergeable(
672
672
  self,
673
673
  result: TaskResult | AbsTask,
674
- criteria: list[str] | list[Criterias] = [
674
+ criteria: list[str] | list[Criteria] = [
675
675
  "mteb_version",
676
676
  "dataset_revision",
677
677
  ],
@@ -688,9 +688,7 @@ class TaskResult(BaseModel):
688
688
  Returns:
689
689
  True if the TaskResult object can be merged with the other object, False otherwise.
690
690
  """
691
- criteria = [
692
- Criterias.from_str(c) if isinstance(c, str) else c for c in criteria
693
- ]
691
+ criteria = [Criteria.from_str(c) if isinstance(c, str) else c for c in criteria]
694
692
  if isinstance(result, TaskResult):
695
693
  name = result.task_name
696
694
  revision = result.dataset_revision
@@ -700,27 +698,31 @@ class TaskResult(BaseModel):
700
698
  name = result.metadata.name
701
699
  revision = result.metadata.revision
702
700
  else:
701
+ msg = "result must be a TaskResult or AbsTask object"
702
+ if raise_error:
703
+ raise ValueError(msg)
704
+ logger.debug(msg)
703
705
  return False
704
706
 
705
707
  if self.task_name != name:
708
+ msg = f"Cannot merge TaskResult objects as they are derived from different tasks ({self.task_name} and {name})"
706
709
  if raise_error:
707
- raise ValueError(
708
- f"Cannot merge TaskResult objects as they are derived from different tasks ({self.task_name} and {name})"
709
- )
710
+ raise ValueError(msg)
711
+ logger.debug(msg)
710
712
  return False
711
713
 
712
- if Criterias.MTEB_VERSION in criteria and self.mteb_version != mteb_version:
714
+ if Criteria.MTEB_VERSION in criteria and self.mteb_version != mteb_version:
715
+ msg = f"Cannot merge TaskResult objects as they are derived from different MTEB versions ({self.mteb_version} (loaded) and {mteb_version} (current))"
713
716
  if raise_error:
714
- raise ValueError(
715
- f"Cannot merge TaskResult objects as they are derived from different MTEB versions ({self.mteb_version} and {mteb_version})"
716
- )
717
+ raise ValueError(msg)
718
+ logger.debug(msg)
717
719
  return False
718
720
 
719
- if Criterias.DATASET_REVISION in criteria and self.dataset_revision != revision:
721
+ if Criteria.DATASET_REVISION in criteria and self.dataset_revision != revision:
722
+ msg = f"Cannot merge TaskResult objects as they are derived from different dataset revisions ({self.dataset_revision} and {revision})"
720
723
  if raise_error:
721
- raise ValueError(
722
- f"Cannot merge TaskResult objects as they are derived from different dataset revisions ({self.dataset_revision} and {revision})"
723
- )
724
+ raise ValueError(msg)
725
+ logger.debug(msg)
724
726
  return False
725
727
 
726
728
  return True
@@ -728,7 +730,7 @@ class TaskResult(BaseModel):
728
730
  def merge(
729
731
  self,
730
732
  new_results: TaskResult,
731
- criteria: list[str] | list[Criterias] = [
733
+ criteria: list[str] | list[Criteria] = [
732
734
  "mteb_version",
733
735
  "dataset_revision",
734
736
  ],
@@ -23,7 +23,7 @@ class BUCCBitextMining(AbsTaskBitextMining):
23
23
  "path": "mteb/BUCC",
24
24
  "revision": "414572247440f0ccacf7eb0bb70a31533a0e5443",
25
25
  },
26
- description="BUCC bitext mining dataset",
26
+ description="BUCC bitext mining dataset train split.",
27
27
  reference="https://comparable.limsi.fr/bucc2018/bucc2018-task.html",
28
28
  type="BitextMining",
29
29
  category="t2t",
@@ -71,7 +71,9 @@ Rapp, Reinhard},
71
71
 
72
72
  sentence1 = data["sentence1"][0]
73
73
  sentence2 = data["sentence2"][0]
74
- sentence1 = [sentence1[i] for (i, j) in gold]
74
+ sentence1 = [
75
+ sentence1[i] for (i, j) in gold
76
+ ] # keep only sentences in gold. The 2nd value is meant for sentence2 but not used here. This is fixed in BUCC.v2.
75
77
  logger.info(f"Lang {lang} num gold {len(gold)}")
76
78
  logger.info(f"Lang {lang} num sentence1 {len(sentence1)}")
77
79
  logger.info(f"Lang {lang} num sentence2 {len(sentence2)}")
@@ -20,7 +20,7 @@ class BUCCBitextMiningFast(AbsTaskBitextMining):
20
20
  "path": "mteb/bucc-bitext-mining",
21
21
  "revision": "1739dc11ffe9b7bfccd7f3d585aeb4c544fc6677",
22
22
  },
23
- description="BUCC bitext mining dataset",
23
+ description="BUCC bitext mining dataset train split, gold set only.",
24
24
  reference="https://comparable.limsi.fr/bucc2018/bucc2018-task.html",
25
25
  type="BitextMining",
26
26
  category="t2t",
@@ -10,11 +10,7 @@ class RuSciBenchBitextMining(AbsTaskBitextMining):
10
10
  "path": "mlsa-iai-msu-lab/ru_sci_bench_bitext_mining",
11
11
  "revision": "e5840033c5cf2573932db027ac8001fe0a7eb6fa",
12
12
  },
13
- description="""This task focuses on finding translations of scientific articles.
14
- The dataset is sourced from eLibrary, Russia's largest electronic library of scientific publications.
15
- Russian authors often provide English translations for their abstracts and titles,
16
- and the data consists of these paired titles and abstracts. The task evaluates a model's ability
17
- to match an article's Russian title and abstract to its English counterpart, or vice versa.""",
13
+ description="This task focuses on finding translations of scientific articles. The dataset is sourced from eLibrary, Russia's largest electronic library of scientific publications. Russian authors often provide English translations for their abstracts and titles, and the data consists of these paired titles and abstracts. The task evaluates a model's ability to match an article's Russian title and abstract to its English counterpart, or vice versa.",
18
14
  reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
19
15
  type="BitextMining",
20
16
  category="t2c",
@@ -198,9 +198,7 @@ _SPLITS = ["default"]
198
198
  class WebFAQBitextMiningQuestions(AbsTaskBitextMining):
199
199
  metadata = TaskMetadata(
200
200
  name="WebFAQBitextMiningQuestions",
201
- description="""The WebFAQ Bitext Dataset consists of natural FAQ-style Question-Answer pairs that align across languages.
202
- A sentence in the "WebFAQBitextMiningQuestions" task is the question originating from an aligned QA.
203
- The dataset is sourced from FAQ pages on the web.""",
201
+ description='The WebFAQ Bitext Dataset consists of natural FAQ-style Question-Answer pairs that align across languages. A sentence in the "WebFAQBitextMiningQuestions" task is the question originating from an aligned QA. The dataset is sourced from FAQ pages on the web.',
204
202
  reference="https://huggingface.co/PaDaS-Lab",
205
203
  dataset={
206
204
  "path": "PaDaS-Lab/webfaq-bitexts",
@@ -254,9 +252,7 @@ The dataset is sourced from FAQ pages on the web.""",
254
252
  class WebFAQBitextMiningQAs(AbsTaskBitextMining):
255
253
  metadata = TaskMetadata(
256
254
  name="WebFAQBitextMiningQAs",
257
- description="""The WebFAQ Bitext Dataset consists of natural FAQ-style Question-Answer pairs that align across languages.
258
- A sentence in the "WebFAQBitextMiningQAs" task is a concatenation of a question and its corresponding answer.
259
- The dataset is sourced from FAQ pages on the web.""",
255
+ description='The WebFAQ Bitext Dataset consists of natural FAQ-style Question-Answer pairs that align across languages. A sentence in the "WebFAQBitextMiningQAs" task is a concatenation of a question and its corresponding answer. The dataset is sourced from FAQ pages on the web.',
260
256
  reference="https://huggingface.co/PaDaS-Lab",
261
257
  dataset={
262
258
  "path": "PaDaS-Lab/webfaq-bitexts",
@@ -45,8 +45,7 @@ class AJGTV2(AbsTaskClassification):
45
45
  "path": "mteb/ajgt",
46
46
  "revision": "0a3dea7301ee0c051891f04d32f3e8577a9eae36",
47
47
  },
48
- description="""Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets (900 for training and 900 for testing) annotated as positive and negative. Modern Standard Arabic (MSA) or Jordanian dialect.
49
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
48
+ description="Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets (900 for training and 900 for testing) annotated as positive and negative. Modern Standard Arabic (MSA) or Jordanian dialect. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
50
49
  reference="https://link.springer.com/chapter/10.1007/978-3-319-60042-0_66/",
51
50
  type="Classification",
52
51
  category="t2c",
@@ -45,8 +45,7 @@ class HotelReviewSentimentClassificationV2(AbsTaskClassification):
45
45
  "path": "mteb/HotelReviewSentimentClassification",
46
46
  "revision": "f5e6a24acbed4182114ffdf46747090b3f51e836",
47
47
  },
48
- description="""HARD is a dataset of Arabic hotel reviews collected from the Booking.com website.
49
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
48
+ description="HARD is a dataset of Arabic hotel reviews collected from the Booking.com website. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
50
49
  reference="https://link.springer.com/chapter/10.1007/978-3-319-67056-0_3",
51
50
  type="Classification",
52
51
  category="t2c",
@@ -41,8 +41,7 @@ class OnlineStoreReviewSentimentClassificationV2(AbsTaskClassification):
41
41
  "path": "mteb/online_store_review_sentiment",
42
42
  "revision": "de0e8eed65adf1cbc58f8743a5f5c5df556de4c4",
43
43
  },
44
- description="""This dataset contains Arabic reviews of products from the SHEIN online store.
45
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
44
+ description="This dataset contains Arabic reviews of products from the SHEIN online store. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
46
45
  reference="https://huggingface.co/datasets/Ruqiya/Arabic_Reviews_of_SHEIN",
47
46
  type="Classification",
48
47
  category="t2c",
@@ -52,8 +52,7 @@ class RestaurantReviewSentimentClassificationV2(AbsTaskClassification):
52
52
  "path": "mteb/restaurant_review_sentiment",
53
53
  "revision": "5d28c1e8fb393173a849696ed178b90a6f78754a",
54
54
  },
55
- description="""Dataset of 8156 restaurant reviews from qaym.com in Arabic for sentiment analysis
56
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
55
+ description="Dataset of 8156 restaurant reviews from qaym.com in Arabic for sentiment analysis This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
57
56
  reference="https://link.springer.com/chapter/10.1007/978-3-319-18117-2_2",
58
57
  type="Classification",
59
58
  category="t2c",
@@ -45,8 +45,7 @@ class TweetEmotionClassificationV2(AbsTaskClassification):
45
45
  "path": "mteb/TweetEmotionClassification",
46
46
  "revision": "930d65840c089406ceed5241b1a9ba7294e5eeae",
47
47
  },
48
- description="""A dataset of 10,012 tweets that was created with the aim of covering the most frequently used emotion categories in Arabic tweets.
49
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
48
+ description="A dataset of 10,012 tweets that was created with the aim of covering the most frequently used emotion categories in Arabic tweets. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
50
49
  reference="https://link.springer.com/chapter/10.1007/978-3-319-77116-8_8",
51
50
  type="Classification",
52
51
  category="t2c",
@@ -62,8 +62,7 @@ class TweetSarcasmClassificationV2(AbsTaskClassification):
62
62
  "path": "mteb/tweet_sarcasm",
63
63
  "revision": "3a20898e2ea3303844e907d55f7a815a7644150d",
64
64
  },
65
- description="""Arabic sarcasm detection dataset, which was created through the reannotation of available Arabic sentiment analysis datasets.
66
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
65
+ description="Arabic sarcasm detection dataset, which was created through the reannotation of available Arabic sentiment analysis datasets. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
67
66
  reference="https://aclanthology.org/2020.osact-1.5/",
68
67
  type="Classification",
69
68
  category="t2c",
@@ -55,8 +55,7 @@ Islam, Tanvir},
55
55
  class BengaliDocumentClassificationV2(AbsTaskClassification):
56
56
  metadata = TaskMetadata(
57
57
  name="BengaliDocumentClassification.v2",
58
- description="""Dataset for News Classification, categorized with 13 domains.
59
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
58
+ description="Dataset for News Classification, categorized with 13 domains. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
60
59
  reference="https://aclanthology.org/2023.eacl-main.4",
61
60
  dataset={
62
61
  "path": "mteb/bengali_document",
@@ -45,8 +45,7 @@ class BengaliHateSpeechClassification(AbsTaskClassification):
45
45
  class BengaliHateSpeechClassificationV2(AbsTaskClassification):
46
46
  metadata = TaskMetadata(
47
47
  name="BengaliHateSpeechClassification.v2",
48
- description="""The Bengali Hate Speech Dataset is a Bengali-language dataset of news articles collected from various Bengali media sources and categorized based on the type of hate in the text.
49
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
48
+ description="The Bengali Hate Speech Dataset is a Bengali-language dataset of news articles collected from various Bengali media sources and categorized based on the type of hate in the text. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
50
49
  reference="https://huggingface.co/datasets/bn_hate_speech",
51
50
  dataset={
52
51
  "path": "mteb/bengali_hate_speech",
@@ -45,8 +45,7 @@ class BengaliSentimentAnalysis(AbsTaskClassification):
45
45
  class BengaliSentimentAnalysisV2(AbsTaskClassification):
46
46
  metadata = TaskMetadata(
47
47
  name="BengaliSentimentAnalysis.v2",
48
- description="""dataset contains 2854 Negative reviews and 7238 Positive reviews collected and manually annotated from Youtube Bengali drama.
49
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
48
+ description="dataset contains 2854 Negative reviews and 7238 Positive reviews collected and manually annotated from Youtube Bengali drama. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
50
49
  reference="https://data.mendeley.com/datasets/p6zc7krs37/4",
51
50
  dataset={
52
51
  "path": "mteb/bengali_sentiment_analysis",
@@ -51,8 +51,7 @@ class CSFDCZMovieReviewSentimentClassification(AbsTaskClassification):
51
51
  class CSFDCZMovieReviewSentimentClassificationV2(AbsTaskClassification):
52
52
  metadata = TaskMetadata(
53
53
  name="CSFDCZMovieReviewSentimentClassification.v2",
54
- description="""The dataset contains 30k user reviews from csfd.cz in Czech.
55
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
54
+ description="The dataset contains 30k user reviews from csfd.cz in Czech. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
56
55
  reference="https://arxiv.org/abs/2304.01922",
57
56
  dataset={
58
57
  "path": "mteb/csfdcz_movie_review_sentiment",
@@ -58,8 +58,7 @@ Montoyo, Andres},
58
58
  class CzechProductReviewSentimentClassificationV2(AbsTaskClassification):
59
59
  metadata = TaskMetadata(
60
60
  name="CzechProductReviewSentimentClassification.v2",
61
- description="""User reviews of products on Czech e-shop Mall.cz with 3 sentiment classes (positive, neutral, negative)
62
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
61
+ description="User reviews of products on Czech e-shop Mall.cz with 3 sentiment classes (positive, neutral, negative) This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
63
62
  reference="https://aclanthology.org/W13-1609/",
64
63
  dataset={
65
64
  "path": "mteb/czech_product_review_sentiment",
@@ -55,8 +55,7 @@ Montoyo, Andres},
55
55
  class CzechSoMeSentimentClassificationV2(AbsTaskClassification):
56
56
  metadata = TaskMetadata(
57
57
  name="CzechSoMeSentimentClassification.v2",
58
- description="""User comments on Facebook
59
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
58
+ description="User comments on Facebook This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
60
59
  reference="https://aclanthology.org/W13-1609/",
61
60
  dataset={
62
61
  "path": "mteb/czech_so_me_sentiment",
@@ -9,7 +9,7 @@ class AngryTweetsClassification(AbsTaskClassification):
9
9
  "path": "DDSC/angry-tweets",
10
10
  "revision": "20b0e6081892e78179356fada741b7afa381443d",
11
11
  },
12
- description="A sentiment dataset with 3 classes (positiv, negativ, neutral) for Danish tweets",
12
+ description="A sentiment dataset with 3 classes (positive, negative, neutral) for Danish tweets",
13
13
  reference="https://aclanthology.org/2021.nodalida-main.53/",
14
14
  type="Classification",
15
15
  category="t2c",
@@ -47,8 +47,7 @@ class AngryTweetsClassificationV2(AbsTaskClassification):
47
47
  "path": "mteb/angry_tweets",
48
48
  "revision": "b9475fb66a13befda4fa9871cd92343bb2c0eb77",
49
49
  },
50
- description="""A sentiment dataset with 3 classes (positiv, negativ, neutral) for Danish tweets
51
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
50
+ description="A sentiment dataset with 3 classes (positive, negative, neutral) for Danish tweets This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
52
51
  reference="https://aclanthology.org/2021.nodalida-main.53/",
53
52
  type="Classification",
54
53
  category="t2c",
@@ -49,8 +49,7 @@ class DanishPoliticalCommentsClassificationV2(AbsTaskClassification):
49
49
  "path": "mteb/danish_political_comments",
50
50
  "revision": "476a9e7327aba70ad3e97a169d7310b86be9b245",
51
51
  },
52
- description="""A dataset of Danish political comments rated for sentiment
53
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
52
+ description="A dataset of Danish political comments rated for sentiment This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
54
53
  reference="https://huggingface.co/datasets/danish_political_comments",
55
54
  type="Classification",
56
55
  category="t2c",
@@ -69,8 +69,7 @@ class DdiscoCohesionClassificationV2(AbsTaskClassification):
69
69
  "path": "mteb/ddisco_cohesion",
70
70
  "revision": "b5a05bdecdfc6efc14eebc8f7a86e0986edaf5ff",
71
71
  },
72
- description="""A Danish Discourse dataset with values for coherence and source (Wikipedia or Reddit)
73
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
72
+ description="A Danish Discourse dataset with values for coherence and source (Wikipedia or Reddit) This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
74
73
  reference="https://aclanthology.org/2022.lrec-1.260/",
75
74
  type="Classification",
76
75
  category="t2c",
@@ -76,8 +76,7 @@ class DKHateClassificationV2(AbsTaskClassification):
76
76
  "path": "mteb/dk_hate",
77
77
  "revision": "0468ff11393992d8347cf4282fb706fe970608d4",
78
78
  },
79
- description="""Danish Tweets annotated for Hate Speech either being Offensive or not
80
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
79
+ description="Danish Tweets annotated for Hate Speech either being Offensive or not This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
81
80
  reference="https://aclanthology.org/2020.lrec-1.430/",
82
81
  type="Classification",
83
82
  category="t2c",
@@ -56,8 +56,7 @@ Zesch, Torsten},
56
56
  class GermanPoliticiansTwitterSentimentClassificationV2(AbsTaskClassification):
57
57
  metadata = TaskMetadata(
58
58
  name="GermanPoliticiansTwitterSentimentClassification.v2",
59
- description="""GermanPoliticiansTwitterSentiment is a dataset of German tweets categorized with their sentiment (3 classes).
60
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
59
+ description="GermanPoliticiansTwitterSentiment is a dataset of German tweets categorized with their sentiment (3 classes). This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
61
60
  reference="https://aclanthology.org/2022.konvens-1.9",
62
61
  dataset={
63
62
  "path": "mteb/german_politicians_twitter_sentiment",
@@ -43,8 +43,7 @@ class TenKGnadClassification(AbsTaskClassification):
43
43
  class TenKGnadClassificationV2(AbsTaskClassification):
44
44
  metadata = TaskMetadata(
45
45
  name="TenKGnadClassification.v2",
46
- description="""10k German News Articles Dataset (10kGNAD) contains news articles from the online Austrian newspaper website DER Standard with their topic classification (9 classes).
47
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
46
+ description="10k German News Articles Dataset (10kGNAD) contains news articles from the online Austrian newspaper website DER Standard with their topic classification (9 classes). This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
48
47
  reference="https://tblock.github.io/10kGNAD/",
49
48
  dataset={
50
49
  "path": "mteb/ten_k_gnad",
@@ -44,8 +44,7 @@ class AmazonPolarityClassification(AbsTaskClassification):
44
44
  class AmazonPolarityClassificationV2(AbsTaskClassification):
45
45
  metadata = TaskMetadata(
46
46
  name="AmazonPolarityClassification.v2",
47
- description="""Amazon Polarity Classification Dataset.
48
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
47
+ description="Amazon Polarity Classification Dataset. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
49
48
  reference="https://huggingface.co/datasets/amazon_polarity",
50
49
  dataset={
51
50
  "path": "mteb/amazon_polarity",
@@ -43,8 +43,7 @@ class ArxivClassification(AbsTaskClassification):
43
43
  class ArxivClassificationV2(AbsTaskClassification):
44
44
  metadata = TaskMetadata(
45
45
  name="ArxivClassification.v2",
46
- description="""Classification Dataset of Arxiv Papers
47
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
46
+ description="Classification Dataset of Arxiv Papers This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
48
47
  dataset={
49
48
  "path": "mteb/arxiv",
50
49
  "revision": "202e10e9a5d37a5068397b48184d0728346a7b4a",
@@ -61,8 +61,7 @@ Shah, Rushin},
61
61
  class Banking77ClassificationV2(AbsTaskClassification):
62
62
  metadata = TaskMetadata(
63
63
  name="Banking77Classification.v2",
64
- description="""Dataset composed of online banking queries annotated with their corresponding intents.
65
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
64
+ description="Dataset composed of online banking queries annotated with their corresponding intents. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
66
65
  reference="https://arxiv.org/abs/2003.04807",
67
66
  dataset={
68
67
  "path": "mteb/banking77",
@@ -50,8 +50,7 @@ class DBpediaClassification(AbsTaskClassification):
50
50
  class DBpediaClassificationV2(AbsTaskClassification):
51
51
  metadata = TaskMetadata(
52
52
  name="DBpediaClassification.v2",
53
- description="""DBpedia14 is a dataset of English texts from Wikipedia articles, categorized into 14 non-overlapping classes based on their DBpedia ontology.
54
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
53
+ description="DBpedia14 is a dataset of English texts from Wikipedia articles, categorized into 14 non-overlapping classes based on their DBpedia ontology. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
55
54
  reference="https://arxiv.org/abs/1509.01626",
56
55
  dataset={
57
56
  "path": "mteb/d_bpedia",
@@ -59,8 +59,7 @@ Tsujii, Jun{'}ichi},
59
59
  class EmotionClassificationV2(AbsTaskClassification):
60
60
  metadata = TaskMetadata(
61
61
  name="EmotionClassification.v2",
62
- description="""Emotion is a dataset of English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise.
63
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
62
+ description="Emotion is a dataset of English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
64
63
  reference="https://www.aclweb.org/anthology/D18-1404",
65
64
  dataset={
66
65
  "path": "mteb/emotion",