mteb 2.1.4__py3-none-any.whl → 2.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. mteb/__init__.py +4 -0
  2. mteb/_create_dataloaders.py +6 -3
  3. mteb/_evaluators/any_sts_evaluator.py +21 -12
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +1 -1
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +9 -4
  7. mteb/_evaluators/pair_classification_evaluator.py +30 -38
  8. mteb/_evaluators/sklearn_evaluator.py +15 -28
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
  10. mteb/_evaluators/text/summarization_evaluator.py +4 -2
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
  12. mteb/abstasks/_data_filter/__init__.py +0 -0
  13. mteb/abstasks/_data_filter/filters.py +125 -0
  14. mteb/abstasks/_data_filter/task_pipelines.py +102 -0
  15. mteb/abstasks/_statistics_calculation.py +6 -2
  16. mteb/abstasks/classification.py +0 -2
  17. mteb/abstasks/clustering.py +1 -1
  18. mteb/abstasks/clustering_legacy.py +3 -0
  19. mteb/abstasks/multilabel_classification.py +10 -3
  20. mteb/abstasks/pair_classification.py +8 -1
  21. mteb/abstasks/sts.py +7 -0
  22. mteb/abstasks/task_metadata.py +1 -0
  23. mteb/benchmarks/_create_table.py +84 -37
  24. mteb/benchmarks/benchmark.py +74 -15
  25. mteb/benchmarks/benchmarks/__init__.py +8 -0
  26. mteb/benchmarks/benchmarks/benchmarks.py +259 -15
  27. mteb/benchmarks/get_benchmark.py +2 -0
  28. mteb/cache.py +47 -10
  29. mteb/deprecated_evaluator.py +8 -13
  30. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  31. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  32. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  33. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  34. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  35. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  36. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  37. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  38. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  39. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  40. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  41. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  42. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  43. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  44. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  45. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  46. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  47. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  48. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  49. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  50. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  51. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  52. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  53. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  54. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  55. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  56. mteb/evaluate.py +65 -45
  57. mteb/leaderboard/app.py +268 -133
  58. mteb/leaderboard/benchmark_selector.py +14 -5
  59. mteb/leaderboard/figures.py +13 -15
  60. mteb/leaderboard/table.py +82 -17
  61. mteb/models/__init__.py +4 -1
  62. mteb/models/abs_encoder.py +21 -17
  63. mteb/models/cache_wrappers/__init__.py +2 -1
  64. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +2 -2
  65. mteb/models/cache_wrappers/cache_wrapper.py +1 -1
  66. mteb/models/get_model_meta.py +3 -114
  67. mteb/models/instruct_wrapper.py +5 -1
  68. mteb/models/model_implementations/align_models.py +7 -0
  69. mteb/models/model_implementations/amazon_models.py +1 -0
  70. mteb/models/model_implementations/andersborges.py +65 -0
  71. mteb/models/model_implementations/ara_models.py +8 -0
  72. mteb/models/model_implementations/arctic_models.py +8 -0
  73. mteb/models/model_implementations/b1ade_models.py +1 -0
  74. mteb/models/model_implementations/bedrock_models.py +4 -0
  75. mteb/models/model_implementations/bge_models.py +60 -0
  76. mteb/models/model_implementations/bica_model.py +35 -0
  77. mteb/models/model_implementations/blip2_models.py +11 -0
  78. mteb/models/model_implementations/blip_models.py +27 -0
  79. mteb/models/model_implementations/bm25.py +1 -0
  80. mteb/models/model_implementations/bmretriever_models.py +4 -0
  81. mteb/models/model_implementations/cadet_models.py +9 -0
  82. mteb/models/model_implementations/cde_models.py +14 -0
  83. mteb/models/model_implementations/clip_models.py +3 -0
  84. mteb/models/model_implementations/clips_models.py +100 -0
  85. mteb/models/model_implementations/codefuse_models.py +162 -0
  86. mteb/models/model_implementations/codesage_models.py +15 -0
  87. mteb/models/model_implementations/cohere_models.py +8 -1
  88. mteb/models/model_implementations/cohere_v.py +5 -0
  89. mteb/models/model_implementations/colpali_models.py +14 -6
  90. mteb/models/model_implementations/colqwen_models.py +271 -1
  91. mteb/models/model_implementations/colsmol_models.py +2 -0
  92. mteb/models/model_implementations/conan_models.py +1 -0
  93. mteb/models/model_implementations/dino_models.py +171 -0
  94. mteb/models/model_implementations/e5_instruct.py +4 -0
  95. mteb/models/model_implementations/e5_models.py +12 -101
  96. mteb/models/model_implementations/e5_v.py +1 -0
  97. mteb/models/model_implementations/eagerworks_models.py +164 -0
  98. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  99. mteb/models/model_implementations/en_code_retriever.py +1 -0
  100. mteb/models/model_implementations/euler_models.py +32 -0
  101. mteb/models/model_implementations/evaclip_models.py +4 -0
  102. mteb/models/model_implementations/fa_models.py +58 -0
  103. mteb/models/model_implementations/facebookai.py +193 -0
  104. mteb/models/model_implementations/geogpt_models.py +1 -0
  105. mteb/models/model_implementations/gme_v_models.py +11 -5
  106. mteb/models/model_implementations/google_models.py +16 -5
  107. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -2
  108. mteb/models/model_implementations/gritlm_models.py +2 -0
  109. mteb/models/model_implementations/gte_models.py +78 -0
  110. mteb/models/model_implementations/hinvec_models.py +1 -0
  111. mteb/models/model_implementations/human.py +1 -0
  112. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  113. mteb/models/model_implementations/inf_models.py +2 -0
  114. mteb/models/model_implementations/jasper_models.py +255 -2
  115. mteb/models/model_implementations/jina_clip.py +1 -0
  116. mteb/models/model_implementations/jina_models.py +209 -5
  117. mteb/models/model_implementations/kalm_models.py +203 -25
  118. mteb/models/model_implementations/kblab.py +31 -0
  119. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  120. mteb/models/model_implementations/kfst.py +25 -0
  121. mteb/models/model_implementations/kowshik24_models.py +32 -0
  122. mteb/models/model_implementations/lens_models.py +2 -0
  123. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  124. mteb/models/model_implementations/linq_models.py +3 -2
  125. mteb/models/model_implementations/listconranker.py +1 -1
  126. mteb/models/model_implementations/llm2clip_models.py +3 -0
  127. mteb/models/model_implementations/llm2vec_models.py +8 -0
  128. mteb/models/model_implementations/mcinext_models.py +3 -0
  129. mteb/models/model_implementations/mdbr_models.py +2 -0
  130. mteb/models/model_implementations/misc_models.py +362 -0
  131. mteb/models/model_implementations/mme5_models.py +1 -0
  132. mteb/models/model_implementations/moco_models.py +11 -0
  133. mteb/models/model_implementations/mod_models.py +191 -0
  134. mteb/models/model_implementations/model2vec_models.py +13 -0
  135. mteb/models/model_implementations/moka_models.py +3 -0
  136. mteb/models/model_implementations/mxbai_models.py +9 -0
  137. mteb/models/model_implementations/nbailab.py +70 -0
  138. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  139. mteb/models/model_implementations/nomic_models.py +156 -4
  140. mteb/models/model_implementations/nomic_models_vision.py +7 -2
  141. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +23 -16
  142. mteb/models/model_implementations/nvidia_models.py +4 -1
  143. mteb/models/model_implementations/octen_models.py +195 -0
  144. mteb/models/model_implementations/openai_models.py +20 -16
  145. mteb/models/model_implementations/openclip_models.py +24 -0
  146. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  147. mteb/models/model_implementations/ops_moa_models.py +4 -2
  148. mteb/models/model_implementations/pawan_models.py +39 -0
  149. mteb/models/model_implementations/piccolo_models.py +8 -0
  150. mteb/models/model_implementations/promptriever_models.py +8 -4
  151. mteb/models/model_implementations/pylate_models.py +37 -4
  152. mteb/models/model_implementations/qodo_models.py +2 -0
  153. mteb/models/model_implementations/qtack_models.py +1 -0
  154. mteb/models/model_implementations/qwen3_models.py +6 -3
  155. mteb/models/model_implementations/qzhou_models.py +3 -1
  156. mteb/models/model_implementations/random_baseline.py +16 -21
  157. mteb/models/model_implementations/rasgaard_models.py +34 -0
  158. mteb/models/model_implementations/reasonir_model.py +1 -0
  159. mteb/models/model_implementations/repllama_models.py +2 -0
  160. mteb/models/model_implementations/rerankers_custom.py +3 -3
  161. mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
  162. mteb/models/model_implementations/richinfoai_models.py +1 -0
  163. mteb/models/model_implementations/ru_sentence_models.py +51 -0
  164. mteb/models/model_implementations/ruri_models.py +322 -0
  165. mteb/models/model_implementations/salesforce_models.py +3 -0
  166. mteb/models/model_implementations/samilpwc_models.py +1 -0
  167. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  168. mteb/models/model_implementations/searchmap_models.py +1 -0
  169. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  170. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +658 -0
  171. mteb/models/model_implementations/seed_models.py +1 -0
  172. mteb/models/model_implementations/sentence_transformers_models.py +57 -0
  173. mteb/models/model_implementations/shuu_model.py +32 -31
  174. mteb/models/model_implementations/siglip_models.py +10 -0
  175. mteb/models/model_implementations/sonar_models.py +1 -0
  176. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  177. mteb/models/model_implementations/stella_models.py +6 -0
  178. mteb/models/model_implementations/tarka_models.py +376 -0
  179. mteb/models/model_implementations/ua_sentence_models.py +10 -0
  180. mteb/models/model_implementations/uae_models.py +1 -0
  181. mteb/models/model_implementations/vdr_models.py +2 -0
  182. mteb/models/model_implementations/vi_vn_models.py +39 -0
  183. mteb/models/model_implementations/vista_models.py +2 -0
  184. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  185. mteb/models/model_implementations/voyage_models.py +15 -0
  186. mteb/models/model_implementations/voyage_v.py +8 -2
  187. mteb/models/model_implementations/xyz_models.py +1 -0
  188. mteb/models/model_implementations/youtu_models.py +1 -0
  189. mteb/models/model_implementations/yuan_models.py +34 -0
  190. mteb/models/model_implementations/yuan_models_en.py +58 -0
  191. mteb/models/model_meta.py +442 -22
  192. mteb/models/search_encoder_index/__init__.py +7 -0
  193. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  194. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  195. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +157 -0
  196. mteb/models/search_wrappers.py +165 -48
  197. mteb/models/sentence_transformer_wrapper.py +2 -7
  198. mteb/results/benchmark_results.py +88 -47
  199. mteb/results/model_result.py +11 -4
  200. mteb/results/task_result.py +37 -19
  201. mteb/similarity_functions.py +49 -0
  202. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  203. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  204. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  205. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  206. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  207. mteb/tasks/classification/ara/ajgt.py +1 -2
  208. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  209. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  210. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  211. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  212. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  213. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  214. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  215. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  216. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  217. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  218. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  219. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  220. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  221. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  222. mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
  223. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  224. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  225. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  226. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  227. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  228. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  229. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  230. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  231. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  232. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  233. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  234. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  235. mteb/tasks/classification/eng/news_classification.py +1 -2
  236. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  237. mteb/tasks/classification/eng/patent_classification.py +1 -2
  238. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  239. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  240. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  241. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  242. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  243. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  244. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  245. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  246. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  247. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  248. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  249. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  250. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  251. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  252. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  253. mteb/tasks/classification/est/estonian_valence.py +1 -2
  254. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  255. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  256. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  257. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  258. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  259. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  260. mteb/tasks/classification/heb/__init__.py +6 -1
  261. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  262. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  263. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  264. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  265. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  266. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  267. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  268. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  269. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  270. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  271. mteb/tasks/classification/kor/klue_tc.py +1 -2
  272. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  273. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  274. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
  275. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  276. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  277. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  278. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  279. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  280. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  281. mteb/tasks/classification/multilingual/scala_classification.py +1 -2
  282. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  283. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  284. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  285. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  286. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  287. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  288. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  289. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  290. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  291. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  292. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  293. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  294. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  295. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  296. mteb/tasks/classification/pol/polish_classification.py +3 -6
  297. mteb/tasks/classification/ron/moroco.py +1 -2
  298. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  299. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  300. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  301. mteb/tasks/classification/rus/headline_classification.py +1 -2
  302. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  303. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  304. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  305. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  306. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  307. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  308. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  309. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  310. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  311. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  312. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  313. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  314. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  315. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  316. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  317. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  318. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  319. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  320. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  321. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  322. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  323. mteb/tasks/classification/tur/__init__.py +4 -0
  324. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  325. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  326. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  327. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  328. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  329. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  330. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  331. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  332. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  333. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  334. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  335. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  336. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  337. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  338. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  339. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  340. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  341. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  342. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  343. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  344. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  345. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  346. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  347. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  348. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  349. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  350. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  351. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  352. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  353. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  354. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  355. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  356. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  357. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  358. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  359. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  360. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  361. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  362. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  363. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  364. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  365. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  366. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  367. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  368. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  369. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  370. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  371. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  372. mteb/tasks/pair_classification/rus/terra.py +51 -25
  373. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  374. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  375. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  376. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  377. mteb/tasks/reranking/jpn/__init__.py +9 -1
  378. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  379. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  380. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  381. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  382. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  383. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  384. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  385. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  386. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  387. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  388. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  389. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  390. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  391. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  392. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  393. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  394. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  395. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  396. mteb/tasks/retrieval/kor/__init__.py +2 -1
  397. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  398. mteb/tasks/retrieval/multilingual/__init__.py +22 -0
  399. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  400. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  401. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  402. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  403. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  404. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  405. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  406. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  407. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
  408. mteb/tasks/retrieval/nld/__init__.py +8 -4
  409. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  410. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  411. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  412. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  413. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  414. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  415. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  416. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  417. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  418. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  419. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  420. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
  421. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  422. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  423. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  424. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  425. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  426. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  427. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  428. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  429. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  430. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  431. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
  432. mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
  433. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  434. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  435. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
  436. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
  437. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  438. mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
  439. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  440. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  441. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  442. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  443. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  444. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  445. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  446. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  447. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  448. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  449. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  450. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  451. mteb/types/_encoder_io.py +7 -2
  452. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/METADATA +11 -5
  453. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/RECORD +457 -391
  454. mteb/models/model_implementations/nb_sbert.py +0 -25
  455. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/WHEEL +0 -0
  456. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/entry_points.txt +0 -0
  457. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/licenses/LICENSE +0 -0
  458. {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,70 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class MultiLongDocReranking(AbsTaskRetrieval):
6
+ metadata = TaskMetadata(
7
+ name="MultiLongDocReranking",
8
+ description=(
9
+ "Reranking version of MultiLongDocRetrieval (MLDR). MLDR is a Multilingual Long-Document "
10
+ "Retrieval dataset built on Wikipedia, Wudao and mC4, covering 13 typologically diverse languages. "
11
+ "Specifically, we sample lengthy articles from Wikipedia, Wudao and mC4 datasets and randomly choose "
12
+ "paragraphs from them. Then we use GPT-3.5 to generate questions based on these paragraphs. "
13
+ "The generated question and the sampled article constitute a new text pair to the dataset."
14
+ ),
15
+ reference="https://huggingface.co/datasets/Shitao/MLDR",
16
+ dataset={
17
+ "path": "mteb/MultiLongDocReranking",
18
+ "revision": "ad09ce14c17bce6edae151b7f6ef12e15d91dbf3",
19
+ },
20
+ type="Reranking",
21
+ category="t2t",
22
+ modalities=["text"],
23
+ eval_splits=["test"],
24
+ eval_langs={
25
+ "ar": ["ara-Arab"],
26
+ "de": ["deu-Latn"],
27
+ "en": ["eng-Latn"],
28
+ "es": ["spa-Latn"],
29
+ "fr": ["fra-Latn"],
30
+ "hi": ["hin-Deva"],
31
+ "it": ["ita-Latn"],
32
+ "ja": ["jpn-Jpan"],
33
+ "ko": ["kor-Kore"],
34
+ "pt": ["por-Latn"],
35
+ "ru": ["rus-Cyrl"],
36
+ "th": ["tha-Thai"],
37
+ "zh": ["zho-Hans"],
38
+ },
39
+ main_score="ndcg_at_10",
40
+ date=(
41
+ "2000-01-01",
42
+ "2024-12-31",
43
+ ), # Not found in the paper, guessed using the paper's publication date and constituent datasets
44
+ domains=[
45
+ "Encyclopaedic",
46
+ "Written",
47
+ "Web",
48
+ "Non-fiction",
49
+ "Fiction",
50
+ ], # narrativeqa, wikipedia, wudao, mC4
51
+ task_subtypes=[],
52
+ license="mit",
53
+ annotations_creators="LM-generated", # gpt-3.5
54
+ dialect=[],
55
+ sample_creation="found",
56
+ bibtex_citation=r"""
57
+ @misc{bge-m3,
58
+ archiveprefix = {arXiv},
59
+ author = {Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
60
+ eprint = {2402.03216},
61
+ primaryclass = {cs.CL},
62
+ title = {BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
63
+ year = {2024},
64
+ }
65
+ """,
66
+ prompt={
67
+ "query": "Given a question, rerank long documents based on their relevance to answer the question"
68
+ },
69
+ adapted_from=["MultiLongDocRetrieval"],
70
+ )
@@ -78,8 +78,7 @@ _CITATION = r"""
78
78
  class XGlueWPRReranking(AbsTaskRetrieval):
79
79
  metadata = TaskMetadata(
80
80
  name="XGlueWPRReranking",
81
- description="""XGLUE is a new benchmark dataset to evaluate the performance of cross-lingual pre-trained models
82
- with respect to cross-lingual natural language understanding and generation. XGLUE is composed of 11 tasks spans 19 languages.""",
81
+ description="XGLUE is a new benchmark dataset to evaluate the performance of cross-lingual pre-trained models with respect to cross-lingual natural language understanding and generation. XGLUE is composed of 11 tasks spans 19 languages.",
83
82
  reference="https://github.com/microsoft/XGLUE",
84
83
  dataset={
85
84
  "path": "mteb/XGlueWPRReranking",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class AskUbuntuDupQuestionsVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="AskUbuntuDupQuestions-VN",
8
- description="""A translated dataset from AskUbuntu Question Dataset - Questions from AskUbuntu with manual annotations marking pairs of questions as similar or non-similar
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from AskUbuntu Question Dataset - Questions from AskUbuntu with manual annotations marking pairs of questions as similar or non-similar The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://github.com/taolei87/askubuntu",
14
10
  dataset={
15
11
  "path": "mteb/AskUbuntuDupQuestions-VN",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class SciDocsRerankingVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="SciDocsRR-VN",
8
- description="""A translated dataset from Ranking of related scientific papers based on their title.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Ranking of related scientific papers based on their title. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://allenai.org/data/scidocs",
14
10
  dataset={
15
11
  "path": "mteb/SciDocsRR-VN",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class StackOverflowDupQuestionsVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="StackOverflowDupQuestions-VN",
8
- description="""A translated dataset from Stack Overflow Duplicate Questions Task for questions with the tags Java, JavaScript and Python
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Stack Overflow Duplicate Questions Task for questions with the tags Java, JavaScript and Python The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://www.microsoft.com/en-us/research/uploads/prod/2019/03/nl4se18LinkSO.pdf",
14
10
  dataset={
15
11
  "path": "mteb/StackOverflowDupQuestions-VN",
@@ -25,11 +25,14 @@ class FreshStackRetrieval(AbsTaskRetrieval):
25
25
  dialect=[],
26
26
  sample_creation="found",
27
27
  bibtex_citation=r"""
28
- @article{freshstack2023,
29
- author = {FreshStack Authors},
30
- journal = {arXiv preprint arXiv:2301.12345},
31
- title = {FreshStack: A Multi-language Code Generation and Retrieval Benchmark},
32
- year = {2023},
28
+ @misc{thakur2025freshstackbuildingrealisticbenchmarks,
29
+ archiveprefix = {arXiv},
30
+ author = {Nandan Thakur and Jimmy Lin and Sam Havens and Michael Carbin and Omar Khattab and Andrew Drozdov},
31
+ eprint = {2504.13128},
32
+ primaryclass = {cs.IR},
33
+ title = {FreshStack: Building Realistic Benchmarks for Evaluating Retrieval on Technical Documents},
34
+ url = {https://arxiv.org/abs/2504.13128},
35
+ year = {2025},
33
36
  }
34
37
  """,
35
38
  )
@@ -7,14 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class LitSearchRetrieval(AbsTaskRetrieval):
8
8
  metadata = TaskMetadata(
9
9
  name="LitSearchRetrieval",
10
- description="""
11
- The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for
12
- Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature
13
- search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions
14
- generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about
15
- recently published papers, manually written by their authors. All LitSearch questions were manually examined or
16
- edited by experts to ensure high quality.
17
- """,
10
+ description="The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about recently published papers, manually written by their authors. All LitSearch questions were manually examined or edited by experts to ensure high quality.",
18
11
  reference="https://github.com/princeton-nlp/LitSearch",
19
12
  dataset={
20
13
  "path": "princeton-nlp/LitSearch",
@@ -351,6 +351,7 @@ class VidoreSyntheticDocQAAIRetrieval(AbsTaskRetrieval):
351
351
  }
352
352
  """,
353
353
  prompt={"query": "Find a screenshot that relevant to the user's question."},
354
+ adapted_from=["VidoreDocVQARetrieval"],
354
355
  )
355
356
 
356
357
  def load_data(self) -> None:
@@ -394,6 +395,7 @@ class VidoreSyntheticDocQAEnergyRetrieval(AbsTaskRetrieval):
394
395
  }
395
396
  """,
396
397
  prompt={"query": "Find a screenshot that relevant to the user's question."},
398
+ adapted_from=["VidoreDocVQARetrieval"],
397
399
  )
398
400
 
399
401
  def load_data(self) -> None:
@@ -437,6 +439,7 @@ class VidoreSyntheticDocQAGovernmentReportsRetrieval(AbsTaskRetrieval):
437
439
  }
438
440
  """,
439
441
  prompt={"query": "Find a screenshot that relevant to the user's question."},
442
+ adapted_from=["VidoreDocVQARetrieval"],
440
443
  )
441
444
 
442
445
  def load_data(self) -> None:
@@ -480,6 +483,7 @@ class VidoreSyntheticDocQAHealthcareIndustryRetrieval(AbsTaskRetrieval):
480
483
  }
481
484
  """,
482
485
  prompt={"query": "Find a screenshot that relevant to the user's question."},
486
+ adapted_from=["VidoreDocVQARetrieval"],
483
487
  )
484
488
 
485
489
  def load_data(self) -> None:
@@ -1,8 +1,12 @@
1
1
  from .ja_cwir_retrieval import JaCWIRRetrieval
2
+ from .ja_cwir_retrieval_lite import JaCWIRRetrievalLite
2
3
  from .ja_gov_faqs_retrieval import JaGovFaqsRetrieval
3
4
  from .ja_qu_ad_retrieval import JaQuADRetrieval
4
5
  from .japanese_legal1_retrieval import JapaneseLegal1Retrieval
5
6
  from .jaqket_retrieval import JaqketRetrieval
7
+ from .jaqket_retrieval_lite import JaqketRetrievalLite
8
+ from .miracl_ja_retrieval_lite import MIRACLJaRetrievalLite
9
+ from .mr_tydi_ja_retrieval_lite import MrTyDiJaRetrievalLite
6
10
  from .nlp_journal_abs_article_retrieval import (
7
11
  NLPJournalAbsArticleRetrieval,
8
12
  NLPJournalAbsArticleRetrievalV2,
@@ -22,10 +26,14 @@ from .nlp_journal_title_intro_retrieval import (
22
26
 
23
27
  __all__ = [
24
28
  "JaCWIRRetrieval",
29
+ "JaCWIRRetrievalLite",
25
30
  "JaGovFaqsRetrieval",
26
31
  "JaQuADRetrieval",
27
32
  "JapaneseLegal1Retrieval",
28
33
  "JaqketRetrieval",
34
+ "JaqketRetrievalLite",
35
+ "MIRACLJaRetrievalLite",
36
+ "MrTyDiJaRetrievalLite",
29
37
  "NLPJournalAbsArticleRetrieval",
30
38
  "NLPJournalAbsArticleRetrievalV2",
31
39
  "NLPJournalAbsIntroRetrieval",
@@ -9,10 +9,7 @@ class JaCWIRRetrieval(AbsTaskRetrieval):
9
9
 
10
10
  metadata = TaskMetadata(
11
11
  name="JaCWIRRetrieval",
12
- description="""JaCWIR is a small-scale Japanese information retrieval evaluation dataset consisting of
13
- 5000 question texts and approximately 500k web page titles and web page introductions or summaries
14
- (meta descriptions, etc.). The question texts are created based on one of the 500k web pages,
15
- and that data is used as a positive example for the question text.""",
12
+ description="JaCWIR is a small-scale Japanese information retrieval evaluation dataset consisting of 5000 question texts and approximately 500k web page titles and web page introductions or summaries (meta descriptions, etc.). The question texts are created based on one of the 500k web pages, and that data is used as a positive example for the question text.",
16
13
  reference="https://huggingface.co/datasets/hotchpotch/JaCWIR",
17
14
  dataset={
18
15
  "path": "mteb/JaCWIRRetrieval",
@@ -0,0 +1,47 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class JaCWIRRetrievalLite(AbsTaskRetrieval):
6
+ metadata = TaskMetadata(
7
+ name="JaCWIRRetrievalLite",
8
+ dataset={
9
+ "path": "mteb/JaCWIRRetrievalLite",
10
+ "revision": "79472b360242cf2692e24a6d9999ef50d350d672",
11
+ },
12
+ description=(
13
+ "JaCWIR (Japanese Casual Web IR) is a dataset consisting of questions and webpage meta descriptions "
14
+ "collected from Hatena Bookmark. This is the lightweight version with a reduced corpus "
15
+ "(302,638 documents) constructed using hard negatives from 5 high-performance models."
16
+ ),
17
+ reference="https://huggingface.co/datasets/hotchpotch/JaCWIR",
18
+ type="Retrieval",
19
+ category="t2t",
20
+ modalities=["text"],
21
+ eval_splits=["test"],
22
+ eval_langs=["jpn-Jpan"],
23
+ main_score="ndcg_at_10",
24
+ date=("2020-01-01", "2025-01-01"),
25
+ domains=["Web", "Written"],
26
+ task_subtypes=["Article retrieval"],
27
+ license="not specified",
28
+ annotations_creators="derived",
29
+ dialect=[],
30
+ sample_creation="found",
31
+ adapted_from=["JaCWIRRetrieval"],
32
+ bibtex_citation=r"""
33
+ @misc{jmteb_lite,
34
+ author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide
35
+ and Kawahara, Daisuke},
36
+ howpublished = {\url{https://huggingface.co/datasets/sbintuitions/JMTEB-lite}},
37
+ title = {{J}{M}{T}{E}{B}-lite: {T}he {L}ightweight {V}ersion of {JMTEB}},
38
+ year = {2025},
39
+ }
40
+
41
+ @misc{yuichi-tateno-2024-jacwir,
42
+ author = {Yuichi Tateno},
43
+ title = {JaCWIR: Japanese Casual Web IR - 日本語情報検索評価のための小規模でカジュアルなWebタイトルと概要のデータセット},
44
+ url = {https://huggingface.co/datasets/hotchpotch/JaCWIR},
45
+ }
46
+ """,
47
+ )
@@ -0,0 +1,50 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class JaqketRetrievalLite(AbsTaskRetrieval):
6
+ metadata = TaskMetadata(
7
+ name="JaqketRetrievalLite",
8
+ dataset={
9
+ "path": "mteb/JaqketRetrievalLite",
10
+ "revision": "860965fbb6526dd8edff12627dacf07c8f5a54f3",
11
+ },
12
+ description=(
13
+ "JAQKET (JApanese Questions on Knowledge of EnTities) is a QA dataset created based on quiz questions. "
14
+ "This is the lightweight version with a reduced corpus (65,802 documents) constructed using "
15
+ "hard negatives from 5 high-performance models."
16
+ ),
17
+ reference="https://github.com/kumapo/JAQKET-dataset",
18
+ type="Retrieval",
19
+ category="t2t",
20
+ modalities=["text"],
21
+ eval_splits=["test"],
22
+ eval_langs=["jpn-Jpan"],
23
+ main_score="ndcg_at_10",
24
+ date=("2023-10-09", "2025-01-01"),
25
+ domains=["Encyclopaedic", "Non-fiction", "Written"],
26
+ task_subtypes=["Question answering"],
27
+ license="cc-by-sa-4.0",
28
+ annotations_creators="human-annotated",
29
+ dialect=[],
30
+ sample_creation="found",
31
+ adapted_from=["JaqketRetrieval"],
32
+ bibtex_citation=r"""
33
+ @misc{jmteb_lite,
34
+ author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide
35
+ and Kawahara, Daisuke},
36
+ howpublished = {\url{https://huggingface.co/datasets/sbintuitions/JMTEB-lite}},
37
+ title = {{J}{M}{T}{E}{B}-lite: {T}he {L}ightweight {V}ersion of {JMTEB}},
38
+ year = {2025},
39
+ }
40
+
41
+ @inproceedings{Kurihara_nlp2020,
42
+ author = {鈴木正敏 and 鈴木潤 and 松田耕史 and ⻄田京介 and 井之上直也},
43
+ booktitle = {言語処理学会第26回年次大会},
44
+ note = {in Japanese},
45
+ title = {JAQKET: クイズを題材にした日本語 QA データセットの構築},
46
+ url = {https://www.anlp.jp/proceedings/annual_meeting/2020/pdf_dir/P2-24.pdf},
47
+ year = {2020},
48
+ }
49
+ """,
50
+ )
@@ -0,0 +1,52 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class MIRACLJaRetrievalLite(AbsTaskRetrieval):
6
+ metadata = TaskMetadata(
7
+ name="MIRACLJaRetrievalLite",
8
+ dataset={
9
+ "path": "mteb/MIRACLJaRetrievalLite",
10
+ "revision": "575c225da29d1f5fec01082afa56f35df0f44295",
11
+ },
12
+ description=(
13
+ "MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual "
14
+ "retrieval dataset. This is the lightweight Japanese version with a reduced corpus (105,064 documents) "
15
+ "constructed using hard negatives from 5 high-performance models."
16
+ ),
17
+ reference="https://project-miracl.github.io/",
18
+ type="Retrieval",
19
+ category="t2t",
20
+ modalities=["text"],
21
+ eval_splits=["test"],
22
+ eval_langs=["jpn-Jpan"],
23
+ main_score="ndcg_at_10",
24
+ date=("2022-06-01", "2025-01-01"),
25
+ domains=["Encyclopaedic", "Written"],
26
+ task_subtypes=[],
27
+ license="apache-2.0",
28
+ annotations_creators="expert-annotated",
29
+ dialect=[],
30
+ sample_creation="created",
31
+ adapted_from=["MIRACLRetrieval"],
32
+ bibtex_citation=r"""
33
+ @article{10.1162/tacl_a_00595,
34
+ author = {Zhang, Xinyu and Thakur, Nandan and Ogundepo, Odunayo and Kamalloo, Ehsan and Alfonso-Hermelo, David
35
+ and Li, Xiaoguang and Liu, Qun and Rezagholizadeh, Mehdi and Lin, Jimmy},
36
+ doi = {10.1162/tacl_a_00595},
37
+ journal = {Transactions of the Association for Computational Linguistics},
38
+ pages = {1114-1131},
39
+ title = {{MIRACL: A Multilingual Retrieval Dataset Covering 18 Diverse Languages}},
40
+ volume = {11},
41
+ year = {2023},
42
+ }
43
+
44
+ @misc{jmteb_lite,
45
+ author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide
46
+ and Kawahara, Daisuke},
47
+ howpublished = {\url{https://huggingface.co/datasets/sbintuitions/JMTEB-lite}},
48
+ title = {{J}{M}{T}{E}{B}-lite: {T}he {L}ightweight {V}ersion of {JMTEB}},
49
+ year = {2025},
50
+ }
51
+ """,
52
+ )
@@ -0,0 +1,48 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class MrTyDiJaRetrievalLite(AbsTaskRetrieval):
6
+ metadata = TaskMetadata(
7
+ name="MrTyDiJaRetrievalLite",
8
+ dataset={
9
+ "path": "mteb/MrTyDiJaRetrievalLite",
10
+ "revision": "b87e6ff25f4e32d1c97498a539ea8aad5fde3cb1",
11
+ },
12
+ description=(
13
+ "Mr.TyDi is a multilingual benchmark dataset built on TyDi for document retrieval tasks. "
14
+ "This is the lightweight Japanese version with a reduced corpus (93,382 documents) constructed using "
15
+ "hard negatives from 5 high-performance models."
16
+ ),
17
+ reference="https://huggingface.co/datasets/castorini/mr-tydi",
18
+ type="Retrieval",
19
+ category="t2t",
20
+ modalities=["text"],
21
+ eval_splits=["test"],
22
+ eval_langs=["jpn-Jpan"],
23
+ main_score="ndcg_at_10",
24
+ date=("2021-01-01", "2025-01-01"),
25
+ domains=["Encyclopaedic", "Non-fiction", "Written"],
26
+ task_subtypes=["Question answering"],
27
+ license="apache-2.0",
28
+ annotations_creators="human-annotated",
29
+ dialect=[],
30
+ sample_creation="found",
31
+ adapted_from=["MrTidyRetrieval"],
32
+ bibtex_citation=r"""
33
+ @misc{jmteb_lite,
34
+ author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide
35
+ and Kawahara, Daisuke},
36
+ howpublished = {\url{https://huggingface.co/datasets/sbintuitions/JMTEB-lite}},
37
+ title = {{J}{M}{T}{E}{B}-lite: {T}he {L}ightweight {V}ersion of {JMTEB}},
38
+ year = {2025},
39
+ }
40
+
41
+ @article{mrtydi,
42
+ author = {Xinyu Zhang and Xueguang Ma and Peng Shi and Jimmy Lin},
43
+ journal = {arXiv:2108.08787},
44
+ title = {{Mr. TyDi}: A Multi-lingual Benchmark for Dense Retrieval},
45
+ year = {2021},
46
+ }
47
+ """,
48
+ )
@@ -46,10 +46,17 @@ class GeorgianFAQRetrieval(AbsTaskRetrieval):
46
46
  split=_EVAL_SPLIT,
47
47
  revision=self.metadata.dataset["revision"],
48
48
  )
49
- question_ids = {
50
- question: _id for _id, question in enumerate(set(data["question"]))
51
- }
52
- answer_ids = {answer: _id for _id, answer in enumerate(set(data["answer"]))}
49
+
50
+ question_ids = {}
51
+ answer_ids = {}
52
+
53
+ for row in data:
54
+ question = row["question"]
55
+ answer = row["answer"]
56
+ if question not in question_ids:
57
+ question_ids[question] = len(question_ids)
58
+ if answer not in answer_ids:
59
+ answer_ids[answer] = len(answer_ids)
53
60
 
54
61
  for row in data:
55
62
  question = row["question"]
@@ -1,4 +1,5 @@
1
1
  from .auto_rag_retrieval import AutoRAGRetrieval
2
2
  from .ko_strategy_qa import KoStrategyQA
3
+ from .squad_kor_v1_retrieval import SQuADKorV1Retrieval
3
4
 
4
- __all__ = ["AutoRAGRetrieval", "KoStrategyQA"]
5
+ __all__ = ["AutoRAGRetrieval", "KoStrategyQA", "SQuADKorV1Retrieval"]
@@ -0,0 +1,47 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class SQuADKorV1Retrieval(AbsTaskRetrieval):
6
+ metadata = TaskMetadata(
7
+ name="SQuADKorV1Retrieval",
8
+ description="Korean translation of SQuAD v1.0 dataset for retrieval task, based on Korean Wikipedia articles.",
9
+ reference="https://huggingface.co/datasets/yjoonjang/squad_kor_v1",
10
+ dataset={
11
+ "path": "yjoonjang/squad_kor_v1",
12
+ "revision": "2b4ee1f3b143a04792da93a3df21933c5fe9eed3",
13
+ },
14
+ type="Retrieval",
15
+ category="t2t",
16
+ modalities=["text"],
17
+ eval_splits=["test"],
18
+ eval_langs=["kor-Hang"],
19
+ main_score="ndcg_at_10",
20
+ date=("2018-01-01", "2019-12-31"),
21
+ domains=["Encyclopaedic", "Written"],
22
+ task_subtypes=["Question answering"],
23
+ license="cc-by-sa-4.0",
24
+ annotations_creators="derived",
25
+ dialect=[],
26
+ sample_creation="found",
27
+ bibtex_citation=r"""
28
+ @inproceedings{rajpurkar-etal-2016-squad,
29
+ address = {Austin, Texas},
30
+ author = {Rajpurkar, Pranav and
31
+ Zhang, Jian and
32
+ Lopyrev, Konstantin and
33
+ Liang, Percy},
34
+ booktitle = {Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing},
35
+ doi = {10.18653/v1/D16-1264},
36
+ editor = {Su, Jian and
37
+ Duh, Kevin and
38
+ Carreras, Xavier},
39
+ month = nov,
40
+ pages = {2383--2392},
41
+ publisher = {Association for Computational Linguistics},
42
+ title = {{SQ}u{AD}: 100,000+ Questions for Machine Comprehension of Text},
43
+ url = {https://aclanthology.org/D16-1264},
44
+ year = {2016},
45
+ }
46
+ """,
47
+ )
@@ -81,6 +81,18 @@ from .vidore2_bench_retrieval import (
81
81
  Vidore2ESGReportsHLRetrieval,
82
82
  Vidore2ESGReportsRetrieval,
83
83
  )
84
+ from .vidore3_bench_retrieval import (
85
+ Vidore3ComputerScienceRetrieval,
86
+ Vidore3EnergyRetrieval,
87
+ Vidore3FinanceEnRetrieval,
88
+ Vidore3FinanceFrRetrieval,
89
+ Vidore3HrRetrieval,
90
+ Vidore3IndustrialRetrieval,
91
+ Vidore3NuclearRetrieval,
92
+ Vidore3PharmaceuticalsRetrieval,
93
+ Vidore3PhysicsRetrieval,
94
+ Vidore3TelecomRetrieval,
95
+ )
84
96
  from .web_faq_retrieval import WebFAQRetrieval
85
97
  from .wikipedia_retrieval_multilingual import WikipediaRetrievalMultilingual
86
98
  from .wit_t2i_retrieval import WITT2IRetrieval
@@ -161,6 +173,16 @@ __all__ = [
161
173
  "Vidore2ESGReportsHLRetrieval",
162
174
  "Vidore2ESGReportsRetrieval",
163
175
  "Vidore2EconomicsReportsRetrieval",
176
+ "Vidore3ComputerScienceRetrieval",
177
+ "Vidore3EnergyRetrieval",
178
+ "Vidore3FinanceEnRetrieval",
179
+ "Vidore3FinanceFrRetrieval",
180
+ "Vidore3HrRetrieval",
181
+ "Vidore3IndustrialRetrieval",
182
+ "Vidore3NuclearRetrieval",
183
+ "Vidore3PharmaceuticalsRetrieval",
184
+ "Vidore3PhysicsRetrieval",
185
+ "Vidore3TelecomRetrieval",
164
186
  "WITT2IRetrieval",
165
187
  "WebFAQRetrieval",
166
188
  "WikipediaRetrievalMultilingual",
@@ -230,10 +230,11 @@ class BelebeleRetrieval(AbsTaskRetrieval):
230
230
  ds_corpus = self.dataset[lang_corpus]
231
231
  ds_question = self.dataset[lang_question]
232
232
 
233
- question_ids = {
234
- question: _id
235
- for _id, question in enumerate(set(ds_question["question"]))
236
- }
233
+ question_ids = {}
234
+ for row in ds_question:
235
+ question = row["question"]
236
+ if question not in question_ids:
237
+ question_ids[question] = len(question_ids)
237
238
 
238
239
  link_to_context_id = {}
239
240
  context_idx = 0