mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -1,30 +1,26 @@
1
1
  from mteb.abstasks.retrieval import AbsTaskRetrieval
2
2
  from mteb.abstasks.task_metadata import TaskMetadata
3
3
 
4
-
5
- class SciFactNL(AbsTaskRetrieval):
6
- metadata = TaskMetadata(
7
- name="SciFact-NL",
8
- dataset={
9
- "path": "clips/beir-nl-scifact",
10
- "revision": "856d8dfc294b138856bbf3042450e3782321e44e",
11
- },
12
- description="SciFactNL verifies scientific claims in Dutch using evidence from the research literature containing scientific paper abstracts.",
13
- reference="https://huggingface.co/datasets/clips/beir-nl-scifact",
14
- type="Retrieval",
15
- category="t2t",
16
- modalities=["text"],
17
- eval_splits=["test"],
18
- eval_langs=["nld-Latn"],
19
- main_score="ndcg_at_10",
20
- date=("2020-05-01", "2020-05-01"), # best guess: based on submission date
21
- domains=["Academic", "Medical", "Written"],
22
- task_subtypes=[],
23
- license="cc-by-4.0",
24
- annotations_creators="derived",
25
- dialect=[],
26
- sample_creation="machine-translated and verified", # manually checked a small subset
27
- bibtex_citation=r"""
4
+ _sci_fact_nl_metadata = dict(
5
+ dataset={
6
+ "path": "clips/beir-nl-scifact",
7
+ "revision": "856d8dfc294b138856bbf3042450e3782321e44e",
8
+ },
9
+ reference="https://huggingface.co/datasets/clips/beir-nl-scifact",
10
+ type="Retrieval",
11
+ category="t2t",
12
+ modalities=["text"],
13
+ eval_splits=["test"],
14
+ eval_langs=["nld-Latn"],
15
+ main_score="ndcg_at_10",
16
+ date=("2020-05-01", "2020-05-01"), # best guess: based on submission date
17
+ domains=["Academic", "Medical", "Written"],
18
+ task_subtypes=[],
19
+ license="cc-by-4.0",
20
+ annotations_creators="derived",
21
+ dialect=[],
22
+ sample_creation="machine-translated and verified", # manually checked a small subset
23
+ bibtex_citation=r"""
28
24
  @misc{banar2024beirnlzeroshotinformationretrieval,
29
25
  archiveprefix = {arXiv},
30
26
  author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
@@ -35,5 +31,27 @@ class SciFactNL(AbsTaskRetrieval):
35
31
  year = {2024},
36
32
  }
37
33
  """,
34
+ )
35
+
36
+
37
+ class SciFactNL(AbsTaskRetrieval):
38
+ metadata = TaskMetadata(
39
+ name="SciFact-NL",
40
+ description="SciFactNL verifies scientific claims in Dutch using evidence from the research literature "
41
+ "containing scientific paper abstracts.",
38
42
  adapted_from=["SciFact"],
43
+ **_sci_fact_nl_metadata,
44
+ )
45
+
46
+
47
+ class SciFactNLv2(AbsTaskRetrieval):
48
+ metadata = TaskMetadata(
49
+ name="SciFact-NL.v2",
50
+ description="SciFactNL verifies scientific claims in Dutch using evidence from the research literature "
51
+ "containing scientific paper abstracts. This version adds a Dutch prompt to the dataset.",
52
+ adapted_from=["SciFact-NL"],
53
+ prompt={
54
+ "query": "Given a scientific claim, retrieve documents that support or refute the claim"
55
+ },
56
+ **_sci_fact_nl_metadata,
39
57
  )
@@ -1,33 +1,26 @@
1
1
  from mteb.abstasks.retrieval import AbsTaskRetrieval
2
2
  from mteb.abstasks.task_metadata import TaskMetadata
3
3
 
4
-
5
- class SCIDOCSNL(AbsTaskRetrieval):
6
- metadata = TaskMetadata(
7
- name="SCIDOCS-NL",
8
- dataset={
9
- "path": "clips/beir-nl-scidocs",
10
- "revision": "4e018aa220029f9d1bd5a31de3650e322e32ea38",
11
- },
12
- description=(
13
- "SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation"
14
- + " prediction, to document classification and recommendation. SciDocs-NL is a Dutch translation."
15
- ),
16
- reference="https://huggingface.co/datasets/clips/beir-nl-scidocs",
17
- type="Retrieval",
18
- category="t2t",
19
- modalities=["text"],
20
- eval_splits=["test"],
21
- eval_langs=["nld-Latn"],
22
- main_score="ndcg_at_10",
23
- date=("2020-05-01", "2020-05-01"), # best guess: based on submission date
24
- domains=["Academic", "Written", "Non-fiction"],
25
- task_subtypes=[],
26
- license="cc-by-sa-4.0",
27
- annotations_creators="derived",
28
- dialect=[],
29
- sample_creation="machine-translated and verified", # manually checked a small subset
30
- bibtex_citation=r"""
4
+ _scidocsnl_metadata = dict(
5
+ dataset={
6
+ "path": "clips/beir-nl-scidocs",
7
+ "revision": "4e018aa220029f9d1bd5a31de3650e322e32ea38",
8
+ },
9
+ reference="https://huggingface.co/datasets/clips/beir-nl-scidocs",
10
+ type="Retrieval",
11
+ category="t2t",
12
+ modalities=["text"],
13
+ eval_splits=["test"],
14
+ eval_langs=["nld-Latn"],
15
+ main_score="ndcg_at_10",
16
+ date=("2020-05-01", "2020-05-01"), # best guess: based on submission date
17
+ domains=["Academic", "Written", "Non-fiction"],
18
+ task_subtypes=[],
19
+ license="cc-by-sa-4.0",
20
+ annotations_creators="derived",
21
+ dialect=[],
22
+ sample_creation="machine-translated and verified", # manually checked a small subset
23
+ bibtex_citation=r"""
31
24
  @misc{banar2024beirnlzeroshotinformationretrieval,
32
25
  archiveprefix = {arXiv},
33
26
  author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
@@ -38,5 +31,29 @@ class SCIDOCSNL(AbsTaskRetrieval):
38
31
  year = {2024},
39
32
  }
40
33
  """,
34
+ )
35
+
36
+
37
+ class SCIDOCSNL(AbsTaskRetrieval):
38
+ metadata = TaskMetadata(
39
+ name="SCIDOCS-NL",
40
+ description="SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from "
41
+ "citation prediction, to document classification and recommendation. SciDocs-NL is a Dutch "
42
+ "translation.",
41
43
  adapted_from=["SCIDOCS"],
44
+ **_scidocsnl_metadata,
45
+ )
46
+
47
+
48
+ class SCIDOCSNLv2(AbsTaskRetrieval):
49
+ metadata = TaskMetadata(
50
+ name="SCIDOCS-NL.v2",
51
+ description="SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from "
52
+ "citation prediction, to document classification and recommendation. SciDocs-NL is a Dutch "
53
+ "translation. This version adds a Dutch prompt to the dataset.",
54
+ adapted_from=["SCIDOCS-NL"],
55
+ **_scidocsnl_metadata,
56
+ prompt={
57
+ "query": "Gegeven de titel van een wetenschappelijk artikel, haal de abstracts op van artikelen die door het gegeven artikel worden geciteerd"
58
+ },
42
59
  )
@@ -38,4 +38,7 @@ class VABBRetrieval(AbsTaskRetrieval):
38
38
  year = {2024},
39
39
  }
40
40
  """,
41
+ prompt={
42
+ "query": "Gegeven een titel, haal de wetenschappelijke abstract op die het beste bij de titel past"
43
+ },
41
44
  )
@@ -54,7 +54,7 @@ Fishel, Mark},
54
54
  """Load dataset from HuggingFace hub"""
55
55
  if self.data_loaded:
56
56
  return
57
- self.dataset = datasets.load_dataset(**self.metadata.dataset) # type: ignore
57
+ self.dataset = datasets.load_dataset(**self.metadata.dataset)
58
58
  self.dataset_transform()
59
59
  self.data_loaded = True
60
60
 
@@ -71,7 +71,7 @@ Fishel, Mark},
71
71
  text2id = {}
72
72
 
73
73
  for split in self.dataset:
74
- ds: datasets.Dataset = self.dataset[split] # type: ignore
74
+ ds: datasets.Dataset = self.dataset[split]
75
75
  ds = ds.shuffle(seed=42)
76
76
  max_samples = min(1024, len(ds))
77
77
  ds = ds.select(
@@ -41,7 +41,7 @@ class SNLRetrieval(AbsTaskRetrieval):
41
41
  """Load dataset from HuggingFace hub"""
42
42
  if self.data_loaded:
43
43
  return
44
- self.dataset = datasets.load_dataset(**self.metadata.dataset) # type: ignore
44
+ self.dataset = datasets.load_dataset(**self.metadata.dataset)
45
45
  self.dataset_transform()
46
46
  self.data_loaded = True
47
47
 
@@ -58,7 +58,7 @@ class SNLRetrieval(AbsTaskRetrieval):
58
58
  text2id = {}
59
59
 
60
60
  for split in self.dataset:
61
- ds: datasets.Dataset = self.dataset[split] # type: ignore
61
+ ds: datasets.Dataset = self.dataset[split]
62
62
  ds = ds.shuffle(seed=42)
63
63
 
64
64
  self.queries[split] = {}
@@ -7,13 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class SlovakSumRetrieval(AbsTaskRetrieval):
8
8
  metadata = TaskMetadata(
9
9
  name="SlovakSumRetrieval",
10
- description="""
11
- SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand
12
- news articles with titles and short abstracts obtained from multiple Slovak newspapers.
13
-
14
- Originally intended as a summarization task, but since no human annotations were provided
15
- here reformulated to a retrieval task.
16
- """,
10
+ description="SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand news articles with titles and short abstracts obtained from multiple Slovak newspapers. Originally intended as a summarization task, but since no human annotations were provided here reformulated to a retrieval task.",
17
11
  reference="https://huggingface.co/datasets/NaiveNeuron/slovaksum",
18
12
  dataset={
19
13
  "path": "NaiveNeuron/slovaksum",
@@ -59,7 +59,7 @@ class TurHistQuadRetrieval(AbsTaskRetrieval):
59
59
  text2id = {}
60
60
 
61
61
  for split in self.metadata.eval_splits:
62
- ds: datasets.Dataset = self.dataset[split] # type: ignore
62
+ ds: datasets.Dataset = self.dataset[split]
63
63
  ds = ds.shuffle(seed=42)
64
64
  max_samples = min(1024, len(ds))
65
65
  ds = ds.select(
@@ -1,5 +1,5 @@
1
1
  from .argu_ana_vn_retrieval import ArguAnaVN
2
- from .climate_fevervn_retrieval import ClimateFEVERVN
2
+ from .climate_fevervn_retrieval import ClimateFEVERVN, NanoClimateFEVERVN
3
3
  from .cqa_dupstack_android_vn_retrieval import CQADupstackAndroidVN
4
4
  from .cqa_dupstack_gis_vn_retrieval import CQADupstackGisVN
5
5
  from .cqa_dupstack_mathematica_vn_retrieval import CQADupstackMathematicaVN
@@ -10,19 +10,20 @@ from .cqa_dupstack_tex_vn_retrieval import CQADupstackTexVN
10
10
  from .cqa_dupstack_unix_vn_retrieval import CQADupstackUnixVN
11
11
  from .cqa_dupstack_webmasters_vn_retrieval import CQADupstackWebmastersVN
12
12
  from .cqa_dupstack_wordpress_vn_retrieval import CQADupstackWordpressVN
13
- from .db_pedia_vn_retrieval import DBPediaVN
14
- from .fevervn_retrieval import FEVERVN
13
+ from .db_pedia_vn_retrieval import DBPediaVN, NanoDBPediaVN
14
+ from .fevervn_retrieval import FEVERVN, NanoFEVERVN
15
15
  from .fi_qa2018_vn_retrieval import FiQA2018VN
16
16
  from .green_node_table_markdown_retrieval import GreenNodeTableMarkdownRetrieval
17
- from .hotpot_qavn_retrieval import HotpotQAVN
18
- from .msmarcovn_retrieval import MSMARCOVN
17
+ from .hotpot_qavn_retrieval import HotpotQAVN, NanoHotpotQAVN
18
+ from .msmarcovn_retrieval import MSMARCOVN, NanoMSMARCOVN
19
19
  from .nf_corpus_vn_retrieval import NFCorpusVN
20
- from .nqvn_retrieval import NQVN
20
+ from .nqvn_retrieval import NQVN, NanoNQVN
21
21
  from .quora_vn_retrieval import QuoraVN
22
22
  from .sci_fact_vn_retrieval import SciFactVN
23
23
  from .scidocsvn_retrieval import SCIDOCSVN
24
24
  from .touche2020_vn_retrieval import Touche2020VN
25
25
  from .treccovidvn_retrieval import TRECCOVIDVN
26
+ from .tvpl_retrieval import TVPLRetrieval
26
27
  from .vie_qu_ad_retrieval import VieQuADRetrieval
27
28
  from .zac_legal_text_retrieval import ZacLegalTextRetrieval
28
29
 
@@ -49,8 +50,15 @@ __all__ = [
49
50
  "GreenNodeTableMarkdownRetrieval",
50
51
  "HotpotQAVN",
51
52
  "NFCorpusVN",
53
+ "NanoClimateFEVERVN",
54
+ "NanoDBPediaVN",
55
+ "NanoFEVERVN",
56
+ "NanoHotpotQAVN",
57
+ "NanoMSMARCOVN",
58
+ "NanoNQVN",
52
59
  "QuoraVN",
53
60
  "SciFactVN",
61
+ "TVPLRetrieval",
54
62
  "Touche2020VN",
55
63
  "VieQuADRetrieval",
56
64
  "ZacLegalTextRetrieval",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class ArguAnaVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="ArguAna-VN",
8
- description="""A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://argumentation.bplaced.net/arguana/data",
14
10
  dataset={
15
11
  "path": "GreenNode/arguana-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class ClimateFEVERVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="ClimateFEVER-VN",
8
- description="""A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
14
10
  dataset={
15
11
  "path": "GreenNode/climate-fever-vn",
@@ -40,3 +36,42 @@ class ClimateFEVERVN(AbsTaskRetrieval):
40
36
  """,
41
37
  adapted_from=["ClimateFEVER"],
42
38
  )
39
+
40
+
41
+ class NanoClimateFEVERVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoClimateFEVER-VN",
44
+ description="NanoClimateFEVERVN is a small version of A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
45
+ reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
46
+ dataset={
47
+ "path": "GreenNode/nano-climate-fever-vn",
48
+ "revision": "1852e852f07403d4529a8520d52b91ff6d57869b",
49
+ },
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Encyclopaedic", "Written"],
61
+ task_subtypes=["Claim verification"],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a claim about climate change, retrieve documents that support or refute the claim"
75
+ },
76
+ adapted_from=["ClimateFEVER-VN"],
77
+ )
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackAndroidVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackAndroid-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-android-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackGisVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackGis-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-gis-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackMathematicaVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackMathematica-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-mathematica-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackPhysicsVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackPhysics-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-physics-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackProgrammersRetrievalVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackProgrammers-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-programmers-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackStatsVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackStats-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-stats-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackTexVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackTex-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-tex-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackUnixVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackUnix-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-unix-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackWebmastersVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackWebmasters-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-webmasters-vn",
@@ -9,11 +9,7 @@ class CQADupstackWordpressVN(AbsTaskRetrieval):
9
9
  "path": "GreenNode/cqadupstack-wordpress-vn",
10
10
  "revision": "2230f80e1baf42aa005731ca86577621c566fcd7",
11
11
  },
12
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
18
14
  type="Retrieval",
19
15
  category="t2t",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class DBPediaVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="DBPedia-VN",
8
- description="""A translated dataset from DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://github.com/iai-group/DBpedia-Entity/",
14
10
  dataset={
15
11
  "path": "GreenNode/dbpedia-vn",
@@ -40,3 +36,42 @@ class DBPediaVN(AbsTaskRetrieval):
40
36
  """,
41
37
  adapted_from=["DBPedia"],
42
38
  )
39
+
40
+
41
+ class NanoDBPediaVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoDBPedia-VN",
44
+ description="NanoDBPediaVN is a small version of A translated dataset from DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
45
+ reference="https://github.com/iai-group/DBpedia-Entity/",
46
+ dataset={
47
+ "path": "GreenNode/nano-dbpedia-vn",
48
+ "revision": "bbc3259bc63bf1e250d7034024092cc3230d5850",
49
+ },
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Written", "Encyclopaedic"],
61
+ task_subtypes=[],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a query, retrieve relevant entity descriptions from DBPedia"
75
+ },
76
+ adapted_from=["DBPedia-VN"],
77
+ )
@@ -9,13 +9,7 @@ class FEVERVN(AbsTaskRetrieval):
9
9
  "path": "GreenNode/fever-vn",
10
10
  "revision": "a543dd8b98aed3603110c01d26db05ba39b87d49",
11
11
  },
12
- description="""A translated dataset from FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences
13
- extracted from Wikipedia and subsequently verified without knowledge of the sentence they were
14
- derived from.
15
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
16
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
17
- - Applies advanced embedding models to filter the translations.
18
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
19
13
  reference="https://fever.ai/",
20
14
  type="Retrieval",
21
15
  category="t2t",
@@ -42,3 +36,42 @@ class FEVERVN(AbsTaskRetrieval):
42
36
  """,
43
37
  adapted_from=["FEVER"],
44
38
  )
39
+
40
+
41
+ class NanoFEVERVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoFEVER-VN",
44
+ dataset={
45
+ "path": "GreenNode/nano-fever-vn",
46
+ "revision": "457ca6b058ed19b28f2359e2d816d7527af6bef8",
47
+ },
48
+ description="NanoFEVERVN is a small version of A translated dataset from FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
49
+ reference="https://fever.ai/",
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Encyclopaedic", "Written"],
61
+ task_subtypes=["Claim verification"],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a claim, retrieve documents that support or refute the claim"
75
+ },
76
+ adapted_from=["FEVER-VN"],
77
+ )